diff options
Diffstat (limited to 'intern/cycles/kernel')
185 files changed, 12104 insertions, 9588 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 473bdb67920..a89c5679b27 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -37,7 +37,12 @@ set(SRC_KERNEL_DEVICE_OPTIX device/optix/kernel_shader_raytrace.cu ) +set(SRC_KERNEL_DEVICE_ONEAPI + device/oneapi/kernel.cpp +) + set(SRC_KERNEL_DEVICE_CPU_HEADERS + device/cpu/bvh.h device/cpu/compat.h device/cpu/image.h device/cpu/globals.h @@ -67,17 +72,30 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS ) set(SRC_KERNEL_DEVICE_OPTIX_HEADERS + device/optix/bvh.h device/optix/compat.h device/optix/globals.h ) set(SRC_KERNEL_DEVICE_METAL_HEADERS + device/metal/bvh.h device/metal/compat.h device/metal/context_begin.h device/metal/context_end.h + device/metal/function_constants.h device/metal/globals.h ) +set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS + device/oneapi/compat.h + device/oneapi/context_begin.h + device/oneapi/context_end.h + device/oneapi/globals.h + device/oneapi/image.h + device/oneapi/kernel.h + device/oneapi/kernel_templates.h +) + set(SRC_KERNEL_CLOSURE_HEADERS closure/alloc.h closure/bsdf.h @@ -140,6 +158,7 @@ set(SRC_KERNEL_SVM_HEADERS svm/math_util.h svm/mix.h svm/musgrave.h + svm/node_types_template.h svm/noise.h svm/noisetex.h svm/normal.h @@ -198,8 +217,6 @@ set(SRC_KERNEL_BVH_HEADERS bvh/util.h bvh/volume.h bvh/volume_all.h - bvh/embree.h - bvh/metal.h ) set(SRC_KERNEL_CAMERA_HEADERS @@ -208,15 +225,18 @@ set(SRC_KERNEL_CAMERA_HEADERS ) set(SRC_KERNEL_FILM_HEADERS - film/accumulate.h film/adaptive_sampling.h - film/id_passes.h - film/passes.h + film/aov_passes.h + film/data_passes.h + film/denoising_passes.h + film/cryptomatte_passes.h + film/light_passes.h film/read.h - film/write_passes.h + film/write.h ) set(SRC_KERNEL_INTEGRATOR_HEADERS + integrator/displacement_shader.h integrator/init_from_bake.h integrator/init_from_camera.h integrator/intersect_closest.h @@ -228,7 +248,6 @@ set(SRC_KERNEL_INTEGRATOR_HEADERS integrator/path_state.h integrator/shade_background.h integrator/shade_light.h - integrator/shader_eval.h integrator/shade_shadow.h integrator/shade_surface.h integrator/shade_volume.h @@ -241,6 +260,8 @@ set(SRC_KERNEL_INTEGRATOR_HEADERS integrator/subsurface_disk.h integrator/subsurface.h integrator/subsurface_random_walk.h + integrator/surface_shader.h + integrator/volume_shader.h integrator/volume_stack.h ) @@ -257,6 +278,8 @@ set(SRC_KERNEL_SAMPLE_HEADERS sample/mapping.h sample/mis.h sample/pattern.h + sample/sobol_burley.h + sample/util.h ) set(SRC_KERNEL_UTIL_HEADERS @@ -267,8 +290,9 @@ set(SRC_KERNEL_UTIL_HEADERS ) set(SRC_KERNEL_TYPES_HEADERS + data_arrays.h + data_template.h tables.h - textures.h types.h ) @@ -299,6 +323,7 @@ set(SRC_UTIL_HEADERS ../util/math_float2.h ../util/math_float3.h ../util/math_float4.h + ../util/math_float8.h ../util/math_int2.h ../util/math_int3.h ../util/math_int4.h @@ -307,6 +332,7 @@ set(SRC_UTIL_HEADERS ../util/rect.h ../util/static_assert.h ../util/transform.h + ../util/transform_inverse.h ../util/texture.h ../util/types.h ../util/types_float2.h @@ -323,6 +349,7 @@ set(SRC_UTIL_HEADERS ../util/types_int3_impl.h ../util/types_int4.h ../util/types_int4_impl.h + ../util/types_spectrum.h ../util/types_uchar2.h ../util/types_uchar2_impl.h ../util/types_uchar3.h @@ -336,8 +363,6 @@ set(SRC_UTIL_HEADERS ../util/types_uint4.h ../util/types_uint4_impl.h ../util/types_ushort4.h - ../util/types_vector3.h - ../util/types_vector3_impl.h ) set(LIB @@ -519,8 +544,6 @@ if(WITH_CYCLES_CUDA_BINARIES) cycles_set_solution_folder(cycles_kernel_cuda) endif() -####################################################### START - # HIP module if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) @@ -595,7 +618,6 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) cycles_set_solution_folder(cycles_kernel_hip) endif() -####################################################### END # OptiX PTX modules if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) @@ -687,6 +709,201 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) cycles_set_solution_folder(cycles_kernel_optix) endif() +# oneAPI module + +if(WITH_CYCLES_DEVICE_ONEAPI) + if(WIN32) + set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll) + else() + set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.so) + endif() + + set(cycles_oneapi_kernel_sources + ${SRC_KERNEL_DEVICE_ONEAPI} + ${SRC_KERNEL_HEADERS} + ${SRC_KERNEL_DEVICE_GPU_HEADERS} + ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS} + ${SRC_UTIL_HEADERS} + ) + + # SYCL_CPP_FLAGS is a variable that the user can set to pass extra compiler options + set(sycl_compiler_flags + ${CMAKE_CURRENT_SOURCE_DIR}/${SRC_KERNEL_DEVICE_ONEAPI} + -fsycl + -fsycl-unnamed-lambda + -fdelayed-template-parsing + -mllvm -inlinedefault-threshold=300 + -mllvm -inlinehint-threshold=400 + -shared + -DWITH_ONEAPI + -ffast-math + -DNDEBUG + -O2 + -o ${cycles_kernel_oneapi_lib} + -I${CMAKE_CURRENT_SOURCE_DIR}/.. + ${SYCL_CPP_FLAGS} + ) + + + if (WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED) + list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_ENABLED) + endif() + + # Set defaults for spir64 and spir64_gen options + if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64) + set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'") + endif() + if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen) + SET (CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target") + endif() + # Enable zebin, a graphics binary format with improved compatibility. + string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ") + string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ") + + if (WITH_CYCLES_ONEAPI_BINARIES) + # Iterate over all targest and their options + list (JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string) + list (APPEND sycl_compiler_flags -fsycl-targets=${targets_string}) + foreach(target ${CYCLES_ONEAPI_SYCL_TARGETS}) + if(DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_${target}) + list (APPEND sycl_compiler_flags -Xsycl-target-backend=${target} "${CYCLES_ONEAPI_SYCL_OPTIONS_${target}}") + endif() + endforeach() + else() + # If AOT is disabled, build for spir64 + list(APPEND sycl_compiler_flags + -fsycl-targets=spir64 + -Xsycl-target-backend=spir64 "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}") + endif() + + if(WITH_NANOVDB) + list(APPEND sycl_compiler_flags + -DWITH_NANOVDB + -I"${NANOVDB_INCLUDE_DIR}") + endif() + + if(WITH_CYCLES_DEBUG) + list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG) + endif() + + get_filename_component(sycl_compiler_root ${SYCL_COMPILER} DIRECTORY) + get_filename_component(sycl_compiler_compiler_name ${SYCL_COMPILER} NAME_WE) + + if(UNIX AND NOT APPLE) + if(NOT WITH_CXX11_ABI) + check_library_exists(sycl + _ZN4sycl3_V17handler22verifyUsedKernelBundleERKSs ${sycl_compiler_root}/../lib SYCL_NO_CXX11_ABI) + if(SYCL_NO_CXX11_ABI) + list(APPEND sycl_compiler_flags -D_GLIBCXX_USE_CXX11_ABI=0) + endif() + endif() + endif() + + if(WIN32) + list(APPEND sycl_compiler_flags + -fms-extensions + -fms-compatibility + -D_WINDLL + -D_MBCS + -DWIN32 + -D_WINDOWS + -D_CRT_NONSTDC_NO_DEPRECATE + -D_CRT_SECURE_NO_DEPRECATE + -DONEAPI_EXPORT) + + if(sycl_compiler_compiler_name MATCHES "dpcpp") + # The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables. + add_custom_command( + OUTPUT ${cycles_kernel_oneapi_lib} + COMMAND "${sycl_compiler_root}/../../env/vars.bat" + COMMAND ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags} + DEPENDS ${cycles_oneapi_kernel_sources}) + else() + # The open source SYCL compiler just goes by clang++ and does not have such a script. + # Set the variables manually. + string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR}) + if(NOT CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) # case for Ninja on Windows + get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY) + string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir}) + get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE) + else() + set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}) + endif() + list(APPEND sycl_compiler_flags + -L "${MSVC_TOOLS_DIR}/lib/x64" + -L "${WINDOWS_KIT_DIR}/um/x64" + -L "${WINDOWS_KIT_DIR}/ucrt/x64") + add_custom_command( + OUTPUT ${cycles_kernel_oneapi_lib} + COMMAND ${CMAKE_COMMAND} -E env + "LIB=${sycl_compiler_root}/../lib" # for compiler to find sycl.lib + "PATH=${OCLOC_INSTALL_DIR};${sycl_compiler_root}" + ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags} + DEPENDS ${cycles_oneapi_kernel_sources}) + endif() + else() + list(APPEND sycl_compiler_flags -fPIC) + + # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash + # it triggers at either AoT or JIT stages gets fixed. + list(APPEND sycl_compiler_flags -fhonor-nans) + + # add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and + # libpi_level_zero.so can be placed next to it and get found. + list(APPEND sycl_compiler_flags -Wl,-rpath,'$$ORIGIN') + + # The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables. + if(sycl_compiler_compiler_name MATCHES "dpcpp") + add_custom_command( + OUTPUT ${cycles_kernel_oneapi_lib} + COMMAND bash -c \"source ${sycl_compiler_root}/../../env/vars.sh&&${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}\" + DEPENDS ${cycles_oneapi_kernel_sources}) + else() + # The open source SYCL compiler just goes by clang++ and does not have such a script. + # Set the variables manually. + if(NOT IGC_INSTALL_DIR) + get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE) + endif() + add_custom_command( + OUTPUT ${cycles_kernel_oneapi_lib} + COMMAND ${CMAKE_COMMAND} -E env + "LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib" + "PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" # env PATH is for compiler to find ld + ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags} + DEPENDS ${cycles_oneapi_kernel_sources}) + endif() + endif() + + # install dynamic libraries required at runtime + if(WIN32) + set(SYCL_RUNTIME_DEPENDENCIES + sycl.dll + pi_level_zero.dll + ) + if(NOT WITH_BLENDER) + # For the Cycles standalone put libraries next to the Cycles application. + delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH}) + else() + # For Blender put the libraries next to the Blender executable. + # + # Note that the installation path in the delayed_install is relative to the versioned folder, + # which means we need to go one level up. + delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" "../") + endif() + elseif(UNIX AND NOT APPLE) + file(GLOB SYCL_RUNTIME_DEPENDENCIES + ${sycl_compiler_root}/../lib/libsycl.so + ${sycl_compiler_root}/../lib/libsycl.so.[0-9] + ${sycl_compiler_root}/../lib/libsycl.so.[0-9].[0-9].[0-9]-[0-9] + ) + list(APPEND SYCL_RUNTIME_DEPENDENCIES ${sycl_compiler_root}/../lib/libpi_level_zero.so) + delayed_install("" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH}/lib) + endif() + + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH}/lib) + add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib}) +endif() + # OSL module if(WITH_CYCLES_OSL) @@ -752,6 +969,7 @@ cycles_add_library(cycles_kernel "${LIB}" ${SRC_KERNEL_DEVICE_HIP_HEADERS} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS} ${SRC_KERNEL_DEVICE_METAL_HEADERS} + ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS} ) source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS}) @@ -764,6 +982,7 @@ source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS}) source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS}) source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS}) source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS}) +source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}) source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS}) source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS}) source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS}) @@ -782,6 +1001,9 @@ endif() if(WITH_CYCLES_HIP) add_dependencies(cycles_kernel cycles_kernel_hip) endif() +if(WITH_CYCLES_DEVICE_ONEAPI) + add_dependencies(cycles_kernel cycles_kernel_oneapi) +endif() # Install kernel source for runtime compilation diff --git a/intern/cycles/kernel/bake/bake.h b/intern/cycles/kernel/bake/bake.h index 544a8217bef..384ca9168f0 100644 --- a/intern/cycles/kernel/bake/bake.h +++ b/intern/cycles/kernel/bake/bake.h @@ -4,10 +4,13 @@ #pragma once #include "kernel/camera/projection.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/integrator/displacement_shader.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/geom/geom.h" +#include "kernel/util/color.h" + CCL_NAMESPACE_BEGIN ccl_device void kernel_displace_evaluate(KernelGlobals kg, @@ -23,20 +26,20 @@ ccl_device void kernel_displace_evaluate(KernelGlobals kg, /* Evaluate displacement shader. */ const float3 P = sd.P; - shader_eval_displacement(kg, INTEGRATOR_STATE_NULL, &sd); + displacement_shader_eval(kg, INTEGRATOR_STATE_NULL, &sd); float3 D = sd.P - P; object_inverse_dir_transform(kg, &sd, &D); #ifdef __KERNEL_DEBUG_NAN__ - if (!isfinite3_safe(D)) { + if (!isfinite_safe(D)) { kernel_assert(!"Cycles displacement with non-finite value detected"); } #endif /* Ensure finite displacement, preventing BVH from becoming degenerate and avoiding possible * traversal issues caused by non-finite math. */ - D = ensure_finite3(D); + D = ensure_finite(D); /* Write output. */ output[offset * 3 + 0] += D.x; @@ -62,24 +65,26 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg, /* Evaluate shader. * This is being evaluated for all BSDFs, so path flag does not contain a specific type. */ const uint32_t path_flag = PATH_RAY_EMISSION; - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT & + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT & ~(KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_NODE_LIGHT_PATH)>( kg, INTEGRATOR_STATE_NULL, &sd, NULL, path_flag); - float3 color = shader_background_eval(&sd); + Spectrum color = surface_shader_background(&sd); #ifdef __KERNEL_DEBUG_NAN__ - if (!isfinite3_safe(color)) { + if (!isfinite_safe(color)) { kernel_assert(!"Cycles background with non-finite value detected"); } #endif /* Ensure finite color, avoiding possible numerical instabilities in the path tracing kernels. */ - color = ensure_finite3(color); + color = ensure_finite(color); + + float3 color_rgb = spectrum_to_rgb(color); /* Write output. */ - output[offset * 3 + 0] += color.x; - output[offset * 3 + 1] += color.y; - output[offset * 3 + 2] += color.z; + output[offset * 3 + 0] += color_rgb.x; + output[offset * 3 + 1] += color_rgb.y; + output[offset * 3 + 2] += color_rgb.z; } ccl_device void kernel_curve_shadow_transparency_evaluate( @@ -95,12 +100,12 @@ ccl_device void kernel_curve_shadow_transparency_evaluate( shader_setup_from_curve(kg, &sd, in.object, in.prim, __float_as_int(in.v), in.u); /* Evaluate transparency. */ - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW & + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW & ~(KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_NODE_LIGHT_PATH)>( kg, INTEGRATOR_STATE_NULL, &sd, NULL, PATH_RAY_SHADOW); /* Write output. */ - output[offset] = clamp(average(shader_bsdf_transparency(kg, &sd)), 0.0f, 1.0f); + output[offset] = clamp(average(surface_shader_transparency(kg, &sd)), 0.0f, 1.0f); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 04ccb7ceff5..29789a15b28 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -1,40 +1,47 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ -/* BVH - * - * Bounding volume hierarchy for ray tracing. We compile different variations - * of the same BVH traversal function for faster rendering when some types of - * primitives are not needed, using #includes to work around the lack of - * C++ templates in OpenCL. - * - * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", - * the code has been extended and modified to support more primitives and work - * with CPU/CUDA/OpenCL. */ - #pragma once -#ifdef __EMBREE__ -# include "kernel/bvh/embree.h" -#endif - -#ifdef __METALRT__ -# include "kernel/bvh/metal.h" -#endif - #include "kernel/bvh/types.h" #include "kernel/bvh/util.h" #include "kernel/integrator/state_util.h" +/* Device specific acceleration structures for ray tracing. */ + +#if defined(__EMBREE__) +# include "kernel/device/cpu/bvh.h" +# define __BVH2__ +#elif defined(__METALRT__) +# include "kernel/device/metal/bvh.h" +#elif defined(__KERNEL_OPTIX__) +# include "kernel/device/optix/bvh.h" +#else +# define __BVH2__ +#endif + CCL_NAMESPACE_BEGIN -#if !defined(__KERNEL_GPU_RAYTRACING__) +#ifdef __BVH2__ -/* Regular BVH traversal */ +/* BVH2 + * + * Bounding volume hierarchy for ray tracing, when no native acceleration + * structure is available for the device. + * + * We compile different variations of the same BVH traversal function for + * faster rendering when some types of primitives are not needed, using #includes + * to work around the lack of C++ templates in OpenCL. + * + * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", + * the code has been extended and modified to support more primitives and work + * with CPU and various GPU kernel languages. */ # include "kernel/bvh/nodes.h" +/* Regular BVH traversal */ + # define BVH_FUNCTION_NAME bvh_intersect # define BVH_FUNCTION_FEATURES BVH_POINTCLOUD # include "kernel/bvh/traversal.h" @@ -57,260 +64,20 @@ CCL_NAMESPACE_BEGIN # include "kernel/bvh/traversal.h" # endif -/* Subsurface scattering BVH traversal */ - -# if defined(__BVH_LOCAL__) -# define BVH_FUNCTION_NAME bvh_intersect_local -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/local.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_local_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/local.h" -# endif -# endif /* __BVH_LOCAL__ */ - -/* Volume BVH traversal */ - -# if defined(__VOLUME__) -# define BVH_FUNCTION_NAME bvh_intersect_volume -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/volume.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/volume.h" -# endif -# endif /* __VOLUME__ */ - -/* Record all intersections - Shadow BVH traversal */ - -# if defined(__SHADOW_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all -# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" - -# if defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair -# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# if defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion -# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# endif /* __SHADOW_RECORD_ALL__ */ - -/* Record all intersections - Volume BVH traversal. */ - -# if defined(__VOLUME_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/volume_all.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/volume_all.h" -# endif -# endif /* __VOLUME_RECORD_ALL__ */ - -# undef BVH_FEATURE -# undef BVH_NAME_JOIN -# undef BVH_NAME_EVAL -# undef BVH_FUNCTION_FULL_NAME - -#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */ - -ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray) -{ - /* NOTE: Due to some vectorization code non-finite origin point might - * cause lots of false-positive intersections which will overflow traversal - * stack. - * This code is a quick way to perform early output, to avoid crashes in - * such cases. - * From production scenes so far it seems it's enough to test first element - * only. - * Scene intersection may also called with empty rays for conditional trace - * calls that evaluate to false, so filter those out. - */ - return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; -} - ccl_device_intersect bool scene_intersect(KernelGlobals kg, ccl_private const Ray *ray, const uint visibility, ccl_private Intersection *isect) { -#ifdef __KERNEL_OPTIX__ - uint p0 = 0; - uint p1 = 0; - uint p2 = 0; - uint p3 = 0; - uint p4 = visibility; - uint p5 = PRIMITIVE_NONE; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - else if (visibility & PATH_RAY_SHADOW_OPAQUE) { - ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT; - } - - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - ray_flags, - 0, /* SBT offset for PG_HITD */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - isect->t = __uint_as_float(p0); - isect->u = __uint_as_float(p1); - isect->v = __uint_as_float(p2); - isect->prim = p3; - isect->object = p4; - isect->type = p5; - - return p5 != PRIMITIVE_NONE; -#elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - return false; - } - -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); - return false; - } - - if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - kernel_assert(!"Invalid ift_default"); - return false; - } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; - - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } - - MetalRTIntersectionPayload payload; - payload.self = ray->self; - payload.u = 0.0f; - payload.v = 0.0f; - payload.visibility = visibility; - - typename metalrt_intersector_type::result_type intersection; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - /* No further intersector setup required: Default MetalRT behavior is any-hit. */ - } - else if (visibility & PATH_RAY_SHADOW_OPAQUE) { - /* No further intersector setup required: Shadow ray early termination is controlled by the - * intersection handler */ - } - -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_default, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); -# endif - - if (intersection.type == intersection_type::none) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - - return false; - } - - isect->t = intersection.distance; - - isect->prim = payload.prim; - isect->type = payload.type; - isect->object = intersection.user_instance_id; - - isect->t = intersection.distance; - if (intersection.type == intersection_type::triangle) { - isect->u = 1.0f - intersection.triangle_barycentric_coord.y - - intersection.triangle_barycentric_coord.x; - isect->v = intersection.triangle_barycentric_coord.x; - } - else { - isect->u = payload.u; - isect->v = payload.v; - } - - return isect->type != PRIMITIVE_NONE; - -#else - - if (!scene_intersect_valid(ray)) { + if (!intersection_ray_valid(ray)) { return false; } # ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - isect->t = ray->t; - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); - IntersectContext rtc_ctx(&ctx); - RTCRayHit ray_hit; - ctx.ray = ray; - kernel_embree_setup_rayhit(*ray, ray_hit, visibility); - rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit); - if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && - ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) { - kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect); - return true; - } - return false; + if (kernel_data.device_bvh) { + return kernel_embree_intersect(kg, ray, visibility, isect); } -# endif /* __EMBREE__ */ +# endif # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { @@ -322,7 +89,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, return bvh_intersect_motion(kg, ray, isect, visibility); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ # ifdef __HAIR__ if (kernel_data.bvh.have_curves) { @@ -331,10 +98,22 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, # endif /* __HAIR__ */ return bvh_intersect(kg, ray, isect, visibility); -#endif /* __KERNEL_OPTIX__ */ } -#ifdef __BVH_LOCAL__ +/* Single object BVH traversal, for SSS/AO/bevel. */ + +# ifdef __BVH_LOCAL__ + +# define BVH_FUNCTION_NAME bvh_intersect_local +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/local.h" + +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_local_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/local.h" +# endif + ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, ccl_private const Ray *ray, ccl_private LocalIntersection *local_isect, @@ -342,108 +121,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, ccl_private uint *lcg_state, int max_hits) { -# ifdef __KERNEL_OPTIX__ - uint p0 = pointer_pack_to_uint_0(lcg_state); - uint p1 = pointer_pack_to_uint_1(lcg_state); - uint p2 = pointer_pack_to_uint_0(local_isect); - uint p3 = pointer_pack_to_uint_1(local_isect); - uint p4 = local_object; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - /* Is set to zero on miss or if ray is aborted, so can be used as return value. */ - uint p5 = max_hits; - - if (local_isect) { - local_isect->num_hits = 0; /* Initialize hit count to zero. */ - } - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - 0xFF, - /* Need to always call into __anyhit__kernel_optix_local_hit. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 2, /* SBT offset for PG_HITL */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - return p5; -# elif defined(__METALRT__) - if (!scene_intersect_valid(ray)) { - if (local_isect) { - local_isect->num_hits = 0; - } - return false; - } - -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - if (local_isect) { - local_isect->num_hits = 0; - } - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); - return false; - } - - if (is_null_intersection_function_table(metal_ancillaries->ift_local)) { - if (local_isect) { - local_isect->num_hits = 0; - } - kernel_assert(!"Invalid ift_local"); - return false; - } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; - - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } - - MetalRTIntersectionLocalPayload payload; - payload.self = ray->self; - payload.local_object = local_object; - payload.max_hits = max_hits; - payload.local_isect.num_hits = 0; - if (lcg_state) { - payload.has_lcg_state = true; - payload.lcg_state = *lcg_state; - } - payload.result = false; - - typename metalrt_intersector_type::result_type intersection; - -# if defined(__METALRT_MOTION__) - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload); -# endif - - if (lcg_state) { - *lcg_state = payload.lcg_state; - } - *local_isect = payload.local_isect; - - return payload.result; - -# else - - if (!scene_intersect_valid(ray)) { + if (!intersection_ray_valid(ray)) { if (local_isect) { local_isect->num_hits = 0; } @@ -451,59 +129,10 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, } # ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - const bool has_bvh = !(kernel_tex_fetch(__object_flag, local_object) & - SD_OBJECT_TRANSFORM_APPLIED); - CCLIntersectContext ctx( - kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL); - ctx.lcg_state = lcg_state; - ctx.max_hits = max_hits; - ctx.ray = ray; - ctx.local_isect = local_isect; - if (local_isect) { - local_isect->num_hits = 0; - } - ctx.local_object_id = local_object; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); - - /* If this object has its own BVH, use it. */ - if (has_bvh) { - RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2); - if (geom) { - float3 P = ray->P; - float3 dir = ray->D; - float3 idir = ray->D; - Transform ob_itfm; - rtc_ray.tfar = ray->t * - bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm); - /* bvh_instance_motion_push() returns the inverse transform but - * it's not needed here. */ - (void)ob_itfm; - - rtc_ray.org_x = P.x; - rtc_ray.org_y = P.y; - rtc_ray.org_z = P.z; - rtc_ray.dir_x = dir.x; - rtc_ray.dir_y = dir.y; - rtc_ray.dir_z = dir.z; - RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom); - kernel_assert(scene); - if (scene) { - rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray); - } - } - } - else { - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - } - - /* rtcOccluded1 sets tfar to -inf if a hit was found. */ - return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0); - ; + if (kernel_data.device_bvh) { + return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); } -# endif /* __EMBREE__ */ +# endif # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { @@ -511,144 +140,55 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, } # endif /* __OBJECT_MOTION__ */ return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); -# endif /* __KERNEL_OPTIX__ */ } -#endif +# endif -#ifdef __SHADOW_RECORD_ALL__ -ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, - IntegratorShadowState state, - ccl_private const Ray *ray, - uint visibility, - uint max_hits, - ccl_private uint *num_recorded_hits, - ccl_private float *throughput) -{ -# ifdef __KERNEL_OPTIX__ - uint p0 = state; - uint p1 = __float_as_uint(1.0f); /* Throughput. */ - uint p2 = 0; /* Number of hits. */ - uint p3 = max_hits; - uint p4 = visibility; - uint p5 = false; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } +/* Transparent shadow BVH traversal, recording multiple intersections. */ - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 1, /* SBT offset for PG_HITS */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - *num_recorded_hits = uint16_unpack_from_uint_0(p2); - *throughput = __uint_as_float(p1); - - return p5; -# elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - return false; - } +# ifdef __SHADOW_RECORD_ALL__ -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); - return false; - } +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all +# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" - if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) { - kernel_assert(!"Invalid ift_shadow"); - return false; - } +# if defined(__HAIR__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair +# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" # endif - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; - - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } - - MetalRTIntersectionShadowPayload payload; - payload.self = ray->self; - payload.visibility = visibility; - payload.max_hits = max_hits; - payload.num_hits = 0; - payload.num_recorded_hits = 0; - payload.throughput = 1.0f; - payload.result = false; - payload.state = state; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - - typename metalrt_intersector_type::result_type intersection; - -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_shadow, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload); +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" # endif - *num_recorded_hits = payload.num_recorded_hits; - *throughput = payload.throughput; - - return payload.result; +# if defined(__HAIR__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion +# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" +# endif -# else - if (!scene_intersect_valid(ray)) { +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, + IntegratorShadowState state, + ccl_private const Ray *ray, + uint visibility, + uint max_hits, + ccl_private uint *num_recorded_hits, + ccl_private float *throughput) +{ + if (!intersection_ray_valid(ray)) { *num_recorded_hits = 0; *throughput = 1.0f; return false; } # ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); - Intersection *isect_array = (Intersection *)state->shadow_isect; - ctx.isect_s = isect_array; - ctx.max_hits = max_hits; - ctx.ray = ray; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, visibility); - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - - *num_recorded_hits = ctx.num_recorded_hits; - *throughput = ctx.throughput; - return ctx.opaque_hit; + if (kernel_data.device_bvh) { + return kernel_embree_intersect_shadow_all( + kg, state, ray, visibility, max_hits, num_recorded_hits, throughput); } -# endif /* __EMBREE__ */ +# endif # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { @@ -662,7 +202,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, return bvh_intersect_shadow_all_motion( kg, ray, state, visibility, max_hits, num_recorded_hits, throughput); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ # ifdef __HAIR__ if (kernel_data.bvh.have_curves) { @@ -673,180 +213,89 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, return bvh_intersect_shadow_all( kg, ray, state, visibility, max_hits, num_recorded_hits, throughput); -# endif /* __KERNEL_OPTIX__ */ } -#endif /* __SHADOW_RECORD_ALL__ */ +# endif /* __SHADOW_RECORD_ALL__ */ + +/* Volume BVH traversal, for initializing or updating the volume stack. */ + +# if defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) + +# define BVH_FUNCTION_NAME bvh_intersect_volume +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/volume.h" + +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/volume.h" +# endif -#ifdef __VOLUME__ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint visibility) { -# ifdef __KERNEL_OPTIX__ - uint p0 = 0; - uint p1 = 0; - uint p2 = 0; - uint p3 = 0; - uint p4 = visibility; - uint p5 = PRIMITIVE_NONE; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - /* Need to always call into __anyhit__kernel_optix_volume_test. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 3, /* SBT offset for PG_HITV */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - isect->t = __uint_as_float(p0); - isect->u = __uint_as_float(p1); - isect->v = __uint_as_float(p2); - isect->prim = p3; - isect->object = p4; - isect->type = p5; - - return p5 != PRIMITIVE_NONE; -# elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - return false; - } -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + if (!intersection_ray_valid(ray)) { return false; } - if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { - kernel_assert(!"Invalid ift_default"); - return false; +# ifdef __OBJECT_MOTION__ + if (kernel_data.bvh.have_motion) { + return bvh_intersect_volume_motion(kg, ray, isect, visibility); } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; +# endif /* __OBJECT_MOTION__ */ - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } + return bvh_intersect_volume(kg, ray, isect, visibility); +} +# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */ - MetalRTIntersectionPayload payload; - payload.self = ray->self; - payload.visibility = visibility; +/* Volume BVH traversal, for initializing or updating the volume stack. + * Variation that records multiple intersections at once. */ - typename metalrt_intersector_type::result_type intersection; +# if defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } +# define BVH_FUNCTION_NAME bvh_intersect_volume_all +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/volume_all.h" -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_default, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/volume_all.h" # endif - if (intersection.type == intersection_type::none) { +ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint max_hits, + const uint visibility) +{ + if (!intersection_ray_valid(ray)) { return false; } - isect->prim = payload.prim; - isect->type = payload.type; - isect->object = intersection.user_instance_id; - - isect->t = intersection.distance; - if (intersection.type == intersection_type::triangle) { - isect->u = 1.0f - intersection.triangle_barycentric_coord.y - - intersection.triangle_barycentric_coord.x; - isect->v = intersection.triangle_barycentric_coord.x; - } - else { - isect->u = payload.u; - isect->v = payload.v; - } - - return isect->type != PRIMITIVE_NONE; - -# else - if (!scene_intersect_valid(ray)) { - return false; +# ifdef __EMBREE__ + if (kernel_data.device_bvh) { + return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility); } +# endif # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { - return bvh_intersect_volume_motion(kg, ray, isect, visibility); + return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); } # endif /* __OBJECT_MOTION__ */ - return bvh_intersect_volume(kg, ray, isect, visibility); -# endif /* __KERNEL_OPTIX__ */ + return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); } -#endif /* __VOLUME__ */ -#ifdef __VOLUME_RECORD_ALL__ -ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg, - ccl_private const Ray *ray, - ccl_private Intersection *isect, - const uint max_hits, - const uint visibility) -{ - if (!scene_intersect_valid(ray)) { - return false; - } +# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */ -# ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); - ctx.isect_s = isect; - ctx.max_hits = max_hits; - ctx.num_hits = 0; - ctx.ray = ray; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, visibility); - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - return ctx.num_hits; - } -# endif /* __EMBREE__ */ - -# ifdef __OBJECT_MOTION__ - if (kernel_data.bvh.have_motion) { - return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); - } -# endif /* __OBJECT_MOTION__ */ +# undef BVH_FEATURE +# undef BVH_NAME_JOIN +# undef BVH_NAME_EVAL +# undef BVH_FUNCTION_FULL_NAME - return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); -} -#endif /* __VOLUME_RECORD_ALL__ */ +#endif /* __BVH2__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/embree.h b/intern/cycles/kernel/bvh/embree.h deleted file mode 100644 index 4f7e6435daf..00000000000 --- a/intern/cycles/kernel/bvh/embree.h +++ /dev/null @@ -1,176 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2022 Blender Foundation. */ - -#pragma once - -#include <embree3/rtcore_ray.h> -#include <embree3/rtcore_scene.h> - -#include "kernel/device/cpu/compat.h" -#include "kernel/device/cpu/globals.h" - -#include "kernel/bvh/util.h" - -#include "util/vector.h" - -CCL_NAMESPACE_BEGIN - -struct CCLIntersectContext { - typedef enum { - RAY_REGULAR = 0, - RAY_SHADOW_ALL = 1, - RAY_LOCAL = 2, - RAY_SSS = 3, - RAY_VOLUME_ALL = 4, - } RayType; - - KernelGlobals kg; - RayType type; - - /* For avoiding self intersections */ - const Ray *ray; - - /* for shadow rays */ - Intersection *isect_s; - uint max_hits; - uint num_hits; - uint num_recorded_hits; - float throughput; - float max_t; - bool opaque_hit; - - /* for SSS Rays: */ - LocalIntersection *local_isect; - int local_object_id; - uint *lcg_state; - - CCLIntersectContext(KernelGlobals kg_, RayType type_) - { - kg = kg_; - type = type_; - ray = NULL; - max_hits = 1; - num_hits = 0; - num_recorded_hits = 0; - throughput = 1.0f; - max_t = FLT_MAX; - opaque_hit = false; - isect_s = NULL; - local_isect = NULL; - local_object_id = -1; - lcg_state = NULL; - } -}; - -class IntersectContext { - public: - IntersectContext(CCLIntersectContext *ctx) - { - rtcInitIntersectContext(&context); - userRayExt = ctx; - } - RTCIntersectContext context; - CCLIntersectContext *userRayExt; -}; - -ccl_device_inline void kernel_embree_setup_ray(const Ray &ray, - RTCRay &rtc_ray, - const uint visibility) -{ - rtc_ray.org_x = ray.P.x; - rtc_ray.org_y = ray.P.y; - rtc_ray.org_z = ray.P.z; - rtc_ray.dir_x = ray.D.x; - rtc_ray.dir_y = ray.D.y; - rtc_ray.dir_z = ray.D.z; - rtc_ray.tnear = 0.0f; - rtc_ray.tfar = ray.t; - rtc_ray.time = ray.time; - rtc_ray.mask = visibility; -} - -ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray, - RTCRayHit &rayhit, - const uint visibility) -{ - kernel_embree_setup_ray(ray, rayhit.ray, visibility); - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; -} - -ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg, - const RTCHit *hit, - const Ray *ray) -{ - bool status = false; - if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { - const int oID = hit->instID[0] / 2; - if ((ray->self.object == oID) || (ray->self.light_object == oID)) { - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); - const int pID = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - status = intersection_skip_self_shadow(ray->self, oID, pID); - } - } - else { - const int oID = hit->geomID / 2; - if ((ray->self.object == oID) || (ray->self.light_object == oID)) { - const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - status = intersection_skip_self_shadow(ray->self, oID, pID); - } - } - - return status; -} - -ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg, - const RTCRay *ray, - const RTCHit *hit, - Intersection *isect) -{ - isect->t = ray->tfar; - if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); - isect->prim = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - isect->object = hit->instID[0] / 2; - } - else { - isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - isect->object = hit->geomID / 2; - } - - const bool is_hair = hit->geomID & 1; - if (is_hair) { - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, isect->prim); - isect->type = segment.type; - isect->prim = segment.prim; - isect->u = hit->u; - isect->v = hit->v; - } - else { - isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type; - isect->u = 1.0f - hit->v - hit->u; - isect->v = hit->u; - } -} - -ccl_device_inline void kernel_embree_convert_sss_hit( - KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) -{ - isect->u = 1.0f - hit->v - hit->u; - isect->v = hit->u; - isect->t = ray->tfar; - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, object * 2)); - isect->prim = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - isect->object = object; - isect->type = kernel_tex_fetch(__objects, object).primitive_type; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/local.h b/intern/cycles/kernel/bvh/local.h index 0d05e09d75f..add61adc126 100644 --- a/intern/cycles/kernel/bvh/local.h +++ b/intern/cycles/kernel/bvh/local.h @@ -41,27 +41,27 @@ ccl_device_inline /* traversal variables in registers */ int stack_ptr = 0; - int node_addr = kernel_tex_fetch(__object_node, local_object); + int node_addr = kernel_data_fetch(object_node, local_object); /* ray parameters in registers */ float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + float tmin = ray->tmin; int object = OBJECT_NONE; - float isect_t = ray->t; + float isect_t = ray->tmax; if (local_isect != NULL) { local_isect->num_hits = 0; } kernel_assert((local_isect == NULL) == (max_hits == 0)); - const int object_flag = kernel_tex_fetch(__object_flag, local_object); + const int object_flag = kernel_data_fetch(object_flag, local_object); if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir); #else - isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir); + bvh_instance_push(kg, local_object, ray, &P, &dir, &idir); #endif object = local_object; } @@ -73,7 +73,7 @@ ccl_device_inline while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); traverse_mask = NODE_INTERSECT(kg, P, @@ -81,6 +81,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect_t, node_addr, PATH_RAY_ALL_VISIBILITY, @@ -117,7 +118,7 @@ ccl_device_inline /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); const int prim_addr2 = __float_as_int(leaf.y); @@ -132,18 +133,18 @@ ccl_device_inline case PRIMITIVE_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* Only intersect with matching object, for instanced objects we * already know we are only intersecting the right object. */ if (object == OBJECT_NONE) { - if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) { + if (kernel_data_fetch(prim_object, prim_addr) != local_object) { continue; } } /* Skip self intersection. */ - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self_local(ray->self, prim)) { continue; } @@ -155,6 +156,7 @@ ccl_device_inline local_object, prim, prim_addr, + tmin, isect_t, lcg_state, max_hits)) { @@ -167,18 +169,18 @@ ccl_device_inline case PRIMITIVE_MOTION_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* Only intersect with matching object, for instanced objects we * already know we are only intersecting the right object. */ if (object == OBJECT_NONE) { - if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) { + if (kernel_data_fetch(prim_object, prim_addr) != local_object) { continue; } } /* Skip self intersection. */ - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self_local(ray->self, prim)) { continue; } @@ -191,6 +193,7 @@ ccl_device_inline local_object, prim, prim_addr, + tmin, isect_t, lcg_state, max_hits)) { diff --git a/intern/cycles/kernel/bvh/metal.h b/intern/cycles/kernel/bvh/metal.h deleted file mode 100644 index 04289e259a7..00000000000 --- a/intern/cycles/kernel/bvh/metal.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2021-2022 Blender Foundation */ - -struct MetalRTIntersectionPayload { - RaySelfPrimitives self; - uint visibility; - float u, v; - int prim; - int type; -#if defined(__METALRT_MOTION__) - float time; -#endif -}; - -struct MetalRTIntersectionLocalPayload { - RaySelfPrimitives self; - uint local_object; - uint lcg_state; - short max_hits; - bool has_lcg_state; - bool result; - LocalIntersection local_isect; -}; - -struct MetalRTIntersectionShadowPayload { - RaySelfPrimitives self; - uint visibility; -#if defined(__METALRT_MOTION__) - float time; -#endif - int state; - float throughput; - short max_hits; - short num_hits; - short num_recorded_hits; - bool result; -}; diff --git a/intern/cycles/kernel/bvh/nodes.h b/intern/cycles/kernel/bvh/nodes.h index fd475dcd5e9..e02841fad16 100644 --- a/intern/cycles/kernel/bvh/nodes.h +++ b/intern/cycles/kernel/bvh/nodes.h @@ -9,16 +9,17 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg { Transform space; const int child_addr = node_addr + child * 3; - space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1); - space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2); - space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3); + space.x = kernel_data_fetch(bvh_nodes, child_addr + 1); + space.y = kernel_data_fetch(bvh_nodes, child_addr + 2); + space.z = kernel_data_fetch(bvh_nodes, child_addr + 3); return space; } ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, const float3 P, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) @@ -26,11 +27,11 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, /* fetch node data */ #ifdef __VISIBILITY_FLAG__ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); #endif - float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1); - float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2); - float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3); + float4 node0 = kernel_data_fetch(bvh_nodes, node_addr + 1); + float4 node1 = kernel_data_fetch(bvh_nodes, node_addr + 2); + float4 node2 = kernel_data_fetch(bvh_nodes, node_addr + 3); /* intersect ray against child nodes */ float c0lox = (node0.x - P.x) * idir.x; @@ -39,8 +40,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, float c0hiy = (node1.z - P.y) * idir.y; float c0loz = (node2.x - P.z) * idir.z; float c0hiz = (node2.z - P.z) * idir.z; - float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); - float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); + float c0min = max4(tmin, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); + float c0max = min4(tmax, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); float c1lox = (node0.y - P.x) * idir.x; float c1hix = (node0.w - P.x) * idir.x; @@ -48,8 +49,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, float c1hiy = (node1.w - P.y) * idir.y; float c1loz = (node2.y - P.z) * idir.z; float c1hiz = (node2.w - P.z) * idir.z; - float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); - float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); + float c1min = max4(tmin, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); + float c1max = min4(tmax, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); dist[0] = c0min; dist[1] = c1min; @@ -66,7 +67,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg, const float3 P, const float3 dir, - const float t, + const float tmin, + const float tmax, int node_addr, int child, float dist[2]) @@ -83,8 +85,8 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg, const float far_x = max(lower_xyz.x, upper_xyz.x); const float far_y = max(lower_xyz.y, upper_xyz.y); const float far_z = max(lower_xyz.z, upper_xyz.z); - const float tnear = max4(0.0f, near_x, near_y, near_z); - const float tfar = min4(t, far_x, far_y, far_z); + const float tnear = max4(tmin, near_x, near_y, near_z); + const float tfar = min4(tmax, far_x, far_y, far_z); *dist = tnear; return tnear <= tfar; } @@ -93,16 +95,17 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg, const float3 P, const float3 dir, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) { int mask = 0; #ifdef __VISIBILITY_FLAG__ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); #endif - if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) { + if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 0, &dist[0])) { #ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.x) & visibility)) #endif @@ -110,7 +113,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg, mask |= 1; } } - if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) { + if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 1, &dist[1])) { #ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.y) & visibility)) #endif @@ -125,16 +128,17 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg, const float3 P, const float3 dir, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) { - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); + float4 node = kernel_data_fetch(bvh_nodes, node_addr); if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist); + return bvh_unaligned_node_intersect(kg, P, dir, idir, tmin, tmax, node_addr, visibility, dist); } else { - return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist); + return bvh_aligned_node_intersect(kg, P, idir, tmin, tmax, node_addr, visibility, dist); } } diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h index 2f58929c1e5..2ffe1496c72 100644 --- a/intern/cycles/kernel/bvh/shadow_all.h +++ b/intern/cycles/kernel/bvh/shadow_all.h @@ -49,26 +49,15 @@ ccl_device_inline float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + float tmin = ray->tmin; int object = OBJECT_NONE; uint num_hits = 0; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - /* Max distance in world space. May be dynamically reduced when max number of * recorded hits is exceeded and we no longer need to find hits beyond the max * distance found. */ - float t_max_world = ray->t; - - /* Current maximum distance to the intersection. - * Is calculated as a ray length, transformed to an object space when entering - * instance node. */ - float t_max_current = ray->t; - - /* Conversion from world to local space for the current instance if any, 1.0 - * otherwise. */ - float t_world_to_instance = 1.0f; + const float tmax = ray->tmax; + float tmax_hits = tmax; *r_num_recorded_hits = 0; *r_throughput = 1.0f; @@ -80,7 +69,7 @@ ccl_device_inline while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); traverse_mask = NODE_INTERSECT(kg, P, @@ -88,7 +77,8 @@ ccl_device_inline dir, #endif idir, - t_max_current, + tmin, + tmax, node_addr, visibility, dist); @@ -124,7 +114,7 @@ ccl_device_inline /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); if (prim_addr >= 0) { @@ -137,7 +127,7 @@ ccl_device_inline /* primitive intersection */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == + kernel_assert((kernel_data_fetch(prim_type, prim_addr) & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); bool hit; @@ -147,9 +137,9 @@ ccl_device_inline Intersection isect ccl_optional_struct_init; const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self_shadow(ray->self, prim_object, prim)) { continue; } @@ -157,7 +147,7 @@ ccl_device_inline switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { hit = triangle_intersect( - kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr); + kg, &isect, P, dir, tmin, tmax, visibility, prim_object, prim, prim_addr); break; } #if BVH_FEATURE(BVH_MOTION) @@ -166,7 +156,8 @@ ccl_device_inline &isect, P, dir, - t_max_current, + tmin, + tmax, ray->time, visibility, prim_object, @@ -181,16 +172,16 @@ ccl_device_inline case PRIMITIVE_CURVE_RIBBON: case PRIMITIVE_MOTION_CURVE_RIBBON: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + const float2 prim_time = kernel_data_fetch(prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { hit = false; break; } } - const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); + const int curve_type = kernel_data_fetch(prim_type, prim_addr); hit = curve_intersect( - kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type); + kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, curve_type); break; } @@ -199,16 +190,16 @@ ccl_device_inline case PRIMITIVE_POINT: case PRIMITIVE_MOTION_POINT: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + const float2 prim_time = kernel_data_fetch(prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { hit = false; break; } } - const int point_type = kernel_tex_fetch(__prim_type, prim_addr); + const int point_type = kernel_data_fetch(prim_type, prim_addr); hit = point_intersect( - kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type); + kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, point_type); break; } #endif /* BVH_FEATURE(BVH_POINTCLOUD) */ @@ -220,9 +211,6 @@ ccl_device_inline /* shadow ray early termination */ if (hit) { - /* Convert intersection distance to world space. */ - isect.t /= t_world_to_instance; - /* detect if this surface has a shader with transparent shadows */ /* todo: optimize so primitive visibility flag indicates if * the primitive has a transparent shadow shader? */ @@ -254,7 +242,7 @@ ccl_device_inline if (record_intersection) { /* Test if we need to record this transparent intersection. */ const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); - if (*r_num_recorded_hits < max_record_hits || isect.t < t_max_world) { + if (*r_num_recorded_hits < max_record_hits || isect.t < tmax_hits) { /* If maximum number of hits was reached, replace the intersection with the * highest distance. We want to find the N closest intersections. */ const uint num_recorded_hits = min(*r_num_recorded_hits, max_record_hits); @@ -276,7 +264,7 @@ ccl_device_inline } /* Limit the ray distance and stop counting hits beyond this. */ - t_max_world = max(isect.t, max_t); + tmax_hits = max(isect.t, max_t); } integrator_state_write_shadow_isect(state, &isect, isect_index); @@ -291,23 +279,19 @@ ccl_device_inline } else { /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + object = kernel_data_fetch(prim_object, -prim_addr - 1); #if BVH_FEATURE(BVH_MOTION) - t_world_to_instance = bvh_instance_motion_push( - kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif - /* Convert intersection to object space. */ - t_max_current *= t_world_to_instance; - ++stack_ptr; kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_data_fetch(object_node, object); } } } while (node_addr != ENTRYPOINT_SENTINEL); @@ -316,17 +300,9 @@ ccl_device_inline kernel_assert(object != OBJECT_NONE); /* Instance pop. */ -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); -#else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); -#endif - - /* Restore world space ray length. */ - t_max_current = ray->t; + bvh_instance_pop(ray, &P, &dir, &idir); object = OBJECT_NONE; - t_world_to_instance = 1.0f; node_addr = traversal_stack[stack_ptr]; --stack_ptr; } diff --git a/intern/cycles/kernel/bvh/traversal.h b/intern/cycles/kernel/bvh/traversal.h index 1181d4bfdee..f3744aca5c0 100644 --- a/intern/cycles/kernel/bvh/traversal.h +++ b/intern/cycles/kernel/bvh/traversal.h @@ -43,13 +43,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; + isect->t = ray->tmax; isect->u = 0.0f; isect->v = 0.0f; isect->prim = PRIM_NONE; @@ -62,7 +59,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); { traverse_mask = NODE_INTERSECT(kg, @@ -71,6 +68,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, dir, #endif idir, + tmin, isect->t, node_addr, visibility, @@ -108,7 +106,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); if (prim_addr >= 0) { @@ -121,20 +119,28 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, /* primitive intersection */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self_shadow(ray->self, prim_object, prim)) { continue; } switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { - if (triangle_intersect( - kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) { + if (triangle_intersect(kg, + isect, + P, + dir, + tmin, + isect->t, + visibility, + prim_object, + prim, + prim_addr)) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) return true; @@ -147,6 +153,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, isect, P, dir, + tmin, isect->t, ray->time, visibility, @@ -166,15 +173,15 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, case PRIMITIVE_CURVE_RIBBON: case PRIMITIVE_MOTION_CURVE_RIBBON: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + const float2 prim_time = kernel_data_fetch(prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { break; } } - const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); + const int curve_type = kernel_data_fetch(prim_type, prim_addr); const bool hit = curve_intersect( - kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type); + kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, curve_type); if (hit) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) @@ -187,15 +194,15 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, case PRIMITIVE_POINT: case PRIMITIVE_MOTION_POINT: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + const float2 prim_time = kernel_data_fetch(prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { break; } } - const int point_type = kernel_tex_fetch(__prim_type, prim_addr); + const int point_type = kernel_data_fetch(prim_type, prim_addr); const bool hit = point_intersect( - kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type); + kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, point_type); if (hit) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) @@ -209,19 +216,19 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, } else { /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + object = kernel_data_fetch(prim_object, -prim_addr - 1); #if BVH_FEATURE(BVH_MOTION) - isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif ++stack_ptr; kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_data_fetch(object_node, object); } } } while (node_addr != ENTRYPOINT_SENTINEL); @@ -230,11 +237,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, kernel_assert(object != OBJECT_NONE); /* instance pop */ -#if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); -#else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); -#endif + bvh_instance_pop(ray, &P, &dir, &idir); object = OBJECT_NONE; node_addr = traversal_stack[stack_ptr]; diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h index d53198f97a3..a57703a8b8c 100644 --- a/intern/cycles/kernel/bvh/util.h +++ b/intern/cycles/kernel/bvh/util.h @@ -5,7 +5,59 @@ CCL_NAMESPACE_BEGIN -#if defined(__KERNEL_CPU__) +ccl_device_inline bool intersection_ray_valid(ccl_private const Ray *ray) +{ + /* NOTE: Due to some vectorization code non-finite origin point might + * cause lots of false-positive intersections which will overflow traversal + * stack. + * This code is a quick way to perform early output, to avoid crashes in + * such cases. + * From production scenes so far it seems it's enough to test first element + * only. + * Scene intersection may also called with empty rays for conditional trace + * calls that evaluate to false, so filter those out. + */ + return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; +} + +/* Offset intersection distance by the smallest possible amount, to skip + * intersections at this distance. This works in cases where the ray start + * position is unchanged and only tmin is updated, since for self + * intersection we'll be comparing against the exact same distances. */ +ccl_device_forceinline float intersection_t_offset(const float t) +{ + /* This is a simplified version of `nextafterf(t, FLT_MAX)`, only dealing with + * non-negative and finite t. */ + kernel_assert(t >= 0.0f && isfinite_safe(t)); + const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1; + return __uint_as_float(bits); +} + +/* Ray offset to avoid self intersection. + * + * This function can be used to compute a modified ray start position for rays + * leaving from a surface. This is from: + * "A Fast and Robust Method for Avoiding Self-Intersection" + * Ray Tracing Gems, chapter 6. + */ +ccl_device_inline float3 ray_offset(const float3 P, const float3 Ng) +{ + const float int_scale = 256.0f; + const int3 of_i = make_int3( + (int)(int_scale * Ng.x), (int)(int_scale * Ng.y), (int)(int_scale * Ng.z)); + + const float3 p_i = make_float3( + __int_as_float(__float_as_int(P.x) + ((P.x < 0) ? -of_i.x : of_i.x)), + __int_as_float(__float_as_int(P.y) + ((P.y < 0) ? -of_i.y : of_i.y)), + __int_as_float(__float_as_int(P.z) + ((P.z < 0) ? -of_i.z : of_i.z))); + const float origin = 1.0f / 32.0f; + const float float_scale = 1.0f / 65536.0f; + return make_float3(fabsf(P.x) < origin ? P.x + float_scale * Ng.x : p_i.x, + fabsf(P.y) < origin ? P.y + float_scale * Ng.y : p_i.y, + fabsf(P.z) < origin ? P.z + float_scale * Ng.z : p_i.z); +} + +#ifndef __KERNEL_GPU__ ccl_device int intersections_compare(const void *a, const void *b) { const Intersection *isect_a = (const Intersection *)a; @@ -53,20 +105,20 @@ ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals kg, int shader = 0; if (type & PRIMITIVE_TRIANGLE) { - shader = kernel_tex_fetch(__tri_shader, prim); + shader = kernel_data_fetch(tri_shader, prim); } #ifdef __POINTCLOUD__ else if (type & PRIMITIVE_POINT) { - shader = kernel_tex_fetch(__points_shader, prim); + shader = kernel_data_fetch(points_shader, prim); } #endif #ifdef __HAIR__ else if (type & PRIMITIVE_CURVE) { - shader = kernel_tex_fetch(__curves, prim).shader_id; + shader = kernel_data_fetch(curves, prim).shader_id; } #endif - return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + return kernel_data_fetch(shaders, (shader & SHADER_MASK)).flags; } ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals kg, @@ -76,16 +128,16 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals int shader = 0; if (isect_type & PRIMITIVE_TRIANGLE) { - shader = kernel_tex_fetch(__tri_shader, prim); + shader = kernel_data_fetch(tri_shader, prim); } #ifdef __POINTCLOUD__ else if (isect_type & PRIMITIVE_POINT) { - shader = kernel_tex_fetch(__points_shader, prim); + shader = kernel_data_fetch(points_shader, prim); } #endif #ifdef __HAIR__ else if (isect_type & PRIMITIVE_CURVE) { - shader = kernel_tex_fetch(__curves, prim).shader_id; + shader = kernel_data_fetch(curves, prim).shader_id; } #endif @@ -101,7 +153,7 @@ ccl_device_forceinline int intersection_get_shader( ccl_device_forceinline int intersection_get_object_flags( KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect) { - return kernel_tex_fetch(__object_flag, isect->object); + return kernel_data_fetch(object_flag, isect->object); } /* TODO: find a better (faster) solution for this. Maybe store offset per object for @@ -110,8 +162,8 @@ ccl_device_inline int intersection_find_attribute(KernelGlobals kg, const int object, const uint id) { - uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset; - AttributeMap attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + uint attr_offset = kernel_data_fetch(objects, object).attribute_map_offset; + AttributeMap attr_map = kernel_data_fetch(attributes_map, attr_offset); while (attr_map.id != id) { if (UNLIKELY(attr_map.id == ATTR_STD_NONE)) { @@ -126,7 +178,7 @@ ccl_device_inline int intersection_find_attribute(KernelGlobals kg, else { attr_offset += ATTR_PRIM_TYPES; } - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + attr_map = kernel_data_fetch(attributes_map, attr_offset); } /* return result */ @@ -151,12 +203,12 @@ ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg, } /* Interpolate transparency between curve keys. */ - const KernelCurve kcurve = kernel_tex_fetch(__curves, prim); + const KernelCurve kcurve = kernel_data_fetch(curves, prim); const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type); const int k1 = k0 + 1; - const float f0 = kernel_tex_fetch(__attributes_float, offset + k0); - const float f1 = kernel_tex_fetch(__attributes_float, offset + k1); + const float f0 = kernel_data_fetch(attributes_float, offset + k0); + const float f1 = kernel_data_fetch(attributes_float, offset + k1); return (1.0f - u) * f0 + u * f1; } diff --git a/intern/cycles/kernel/bvh/volume.h b/intern/cycles/kernel/bvh/volume.h index d711b3abbf4..664c692dd3d 100644 --- a/intern/cycles/kernel/bvh/volume.h +++ b/intern/cycles/kernel/bvh/volume.h @@ -46,13 +46,10 @@ ccl_device_inline float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; + isect->t = ray->tmax; isect->u = 0.0f; isect->v = 0.0f; isect->prim = PRIM_NONE; @@ -65,7 +62,7 @@ ccl_device_inline while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); traverse_mask = NODE_INTERSECT(kg, P, @@ -73,6 +70,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect->t, node_addr, visibility, @@ -109,7 +107,7 @@ ccl_device_inline /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); if (prim_addr >= 0) { @@ -125,22 +123,22 @@ ccl_device_inline case PRIMITIVE_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* only primitives from volume object */ const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self(ray->self, prim_object, prim)) { continue; } - int object_flag = kernel_tex_fetch(__object_flag, prim_object); + int object_flag = kernel_data_fetch(object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } triangle_intersect( - kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr); + kg, isect, P, dir, tmin, isect->t, visibility, prim_object, prim, prim_addr); } break; } @@ -148,16 +146,16 @@ ccl_device_inline case PRIMITIVE_MOTION_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* only primitives from volume object */ const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self(ray->self, prim_object, prim)) { continue; } - int object_flag = kernel_tex_fetch(__object_flag, prim_object); + int object_flag = kernel_data_fetch(object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } @@ -165,6 +163,7 @@ ccl_device_inline isect, P, dir, + tmin, isect->t, ray->time, visibility, @@ -182,20 +181,20 @@ ccl_device_inline } else { /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr - 1); - int object_flag = kernel_tex_fetch(__object_flag, object); + object = kernel_data_fetch(prim_object, -prim_addr - 1); + int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME) { #if BVH_FEATURE(BVH_MOTION) - isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif ++stack_ptr; kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_data_fetch(object_node, object); } else { /* pop */ @@ -211,11 +210,7 @@ ccl_device_inline kernel_assert(object != OBJECT_NONE); /* instance pop */ -#if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); -#else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); -#endif + bvh_instance_pop(ray, &P, &dir, &idir); object = OBJECT_NONE; node_addr = traversal_stack[stack_ptr]; diff --git a/intern/cycles/kernel/bvh/volume_all.h b/intern/cycles/kernel/bvh/volume_all.h index a969bae14a1..721eb555d4d 100644 --- a/intern/cycles/kernel/bvh/volume_all.h +++ b/intern/cycles/kernel/bvh/volume_all.h @@ -44,21 +44,17 @@ ccl_device_inline int node_addr = kernel_data.bvh.root; /* ray parameters in registers */ - const float tmax = ray->t; float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif + float isect_t = ray->tmax; int num_hits_in_instance = 0; uint num_hits = 0; - isect_array->t = tmax; + isect_array->t = ray->tmax; /* traversal loop */ do { @@ -67,7 +63,7 @@ ccl_device_inline while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); traverse_mask = NODE_INTERSECT(kg, P, @@ -75,6 +71,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect_t, node_addr, visibility, @@ -111,7 +108,7 @@ ccl_device_inline /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); if (prim_addr >= 0) { @@ -128,21 +125,29 @@ ccl_device_inline case PRIMITIVE_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* only primitives from volume object */ const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self(ray->self, prim_object, prim)) { continue; } - int object_flag = kernel_tex_fetch(__object_flag, prim_object); + int object_flag = kernel_data_fetch(object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } - hit = triangle_intersect( - kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr); + hit = triangle_intersect(kg, + isect_array, + P, + dir, + tmin, + isect_t, + visibility, + prim_object, + prim, + prim_addr); if (hit) { /* Move on to next entry in intersections array. */ isect_array++; @@ -150,18 +155,6 @@ ccl_device_inline num_hits_in_instance++; isect_array->t = isect_t; if (num_hits == max_hits) { - if (object != OBJECT_NONE) { -#if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -#else - Transform itfm = object_fetch_transform( - kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -#endif - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } return num_hits; } } @@ -172,16 +165,16 @@ ccl_device_inline case PRIMITIVE_MOTION_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* only primitives from volume object */ const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self(ray->self, prim_object, prim)) { continue; } - int object_flag = kernel_tex_fetch(__object_flag, prim_object); + int object_flag = kernel_data_fetch(object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } @@ -189,6 +182,7 @@ ccl_device_inline isect_array, P, dir, + tmin, isect_t, ray->time, visibility, @@ -202,18 +196,6 @@ ccl_device_inline num_hits_in_instance++; isect_array->t = isect_t; if (num_hits == max_hits) { - if (object != OBJECT_NONE) { -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform( - kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } return num_hits; } } @@ -228,13 +210,13 @@ ccl_device_inline } else { /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr - 1); - int object_flag = kernel_tex_fetch(__object_flag, object); + object = kernel_data_fetch(prim_object, -prim_addr - 1); + int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME) { #if BVH_FEATURE(BVH_MOTION) - isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif num_hits_in_instance = 0; @@ -244,7 +226,7 @@ ccl_device_inline kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_data_fetch(object_node, object); } else { /* pop */ @@ -260,28 +242,7 @@ ccl_device_inline kernel_assert(object != OBJECT_NONE); /* Instance pop. */ - if (num_hits_in_instance) { - float t_fac; -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -#else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -#endif - /* Scale isect->t to adjust for instancing. */ - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } - else { -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); -#else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); -#endif - } - - isect_t = tmax; - isect_array->t = isect_t; + bvh_instance_pop(ray, &P, &dir, &idir); object = OBJECT_NONE; node_addr = traversal_stack[stack_ptr]; diff --git a/intern/cycles/kernel/camera/camera.h b/intern/cycles/kernel/camera/camera.h index aad68e527ac..27876677281 100644 --- a/intern/cycles/kernel/camera/camera.h +++ b/intern/cycles/kernel/camera/camera.h @@ -45,7 +45,6 @@ ccl_device void camera_sample_perspective(KernelGlobals kg, float3 raster = make_float3(raster_x, raster_y, 0.0f); float3 Pcamera = transform_perspective(&rastertocamera, raster); -#ifdef __CAMERA_MOTION__ if (kernel_data.cam.have_perspective_motion) { /* TODO(sergey): Currently we interpolate projected coordinate which * gives nice looking result and which is simple, but is in fact a bit @@ -63,7 +62,6 @@ ccl_device void camera_sample_perspective(KernelGlobals kg, Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f); } } -#endif float3 P = zero_float3(); float3 D = Pcamera; @@ -87,14 +85,12 @@ ccl_device void camera_sample_perspective(KernelGlobals kg, /* transform ray from camera to world */ Transform cameratoworld = kernel_data.cam.cameratoworld; -#ifdef __CAMERA_MOTION__ if (kernel_data.cam.num_motion_steps) { transform_motion_array_interpolate(&cameratoworld, - kernel_tex_array(__camera_motion), + kernel_data_array(camera_motion), kernel_data.cam.num_motion_steps, ray->time); } -#endif P = transform_point(&cameratoworld, P); D = normalize(transform_direction(&cameratoworld, D)); @@ -159,16 +155,13 @@ ccl_device void camera_sample_perspective(KernelGlobals kg, #endif } -#ifdef __CAMERA_CLIPPING__ /* clipping */ float z_inv = 1.0f / normalize(Pcamera).z; float nearclip = kernel_data.cam.nearclip * z_inv; ray->P += nearclip * ray->D; ray->dP += nearclip * ray->dD; - ray->t = kernel_data.cam.cliplength * z_inv; -#else - ray->t = FLT_MAX; -#endif + ray->tmin = 0.0f; + ray->tmax = kernel_data.cam.cliplength * z_inv; } /* Orthographic Camera */ @@ -207,14 +200,12 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg, /* transform ray from camera to world */ Transform cameratoworld = kernel_data.cam.cameratoworld; -#ifdef __CAMERA_MOTION__ if (kernel_data.cam.num_motion_steps) { transform_motion_array_interpolate(&cameratoworld, - kernel_tex_array(__camera_motion), + kernel_data_array(camera_motion), kernel_data.cam.num_motion_steps, ray->time); } -#endif ray->P = transform_point(&cameratoworld, P); ray->D = normalize(transform_direction(&cameratoworld, D)); @@ -229,20 +220,15 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg, ray->dD = differential_zero_compact(); #endif -#ifdef __CAMERA_CLIPPING__ /* clipping */ - ray->t = kernel_data.cam.cliplength; -#else - ray->t = FLT_MAX; -#endif + ray->tmin = 0.0f; + ray->tmax = kernel_data.cam.cliplength; } /* Panorama Camera */ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, -#ifdef __CAMERA_MOTION__ ccl_global const DecomposedTransform *cam_motion, -#endif float raster_x, float raster_y, float lens_u, @@ -258,7 +244,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, /* indicates ray should not receive any light, outside of the lens */ if (is_zero(D)) { - ray->t = 0.0f; + ray->tmax = 0.0f; return; } @@ -286,12 +272,10 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, /* transform ray from camera to world */ Transform cameratoworld = cam->cameratoworld; -#ifdef __CAMERA_MOTION__ if (cam->num_motion_steps) { transform_motion_array_interpolate( &cameratoworld, cam_motion, cam->num_motion_steps, ray->time); } -#endif /* Stereo transform */ bool use_stereo = cam->interocular_offset != 0.0f; @@ -344,15 +328,12 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, ray->dP = differential_make_compact(dP); #endif -#ifdef __CAMERA_CLIPPING__ /* clipping */ float nearclip = cam->nearclip; ray->P += nearclip * ray->D; ray->dP += nearclip * ray->dD; - ray->t = cam->cliplength; -#else - ray->t = FLT_MAX; -#endif + ray->tmin = 0.0f; + ray->tmax = cam->cliplength; } /* Common */ @@ -368,11 +349,10 @@ ccl_device_inline void camera_sample(KernelGlobals kg, ccl_private Ray *ray) { /* pixel filter */ - int filter_table_offset = kernel_data.film.filter_table_offset; + int filter_table_offset = kernel_data.tables.filter_table_offset; float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE); float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE); -#ifdef __CAMERA_MOTION__ /* motion blur */ if (kernel_data.cam.shuttertime == -1.0f) { ray->time = 0.5f; @@ -410,7 +390,6 @@ ccl_device_inline void camera_sample(KernelGlobals kg, } } } -#endif /* sample */ if (kernel_data.cam.type == CAMERA_PERSPECTIVE) { @@ -420,12 +399,8 @@ ccl_device_inline void camera_sample(KernelGlobals kg, camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray); } else { -#ifdef __CAMERA_MOTION__ - ccl_global const DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion); + ccl_global const DecomposedTransform *cam_motion = kernel_data_array(camera_motion); camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray); -#else - camera_sample_panorama(&kernel_data.cam, raster_x, raster_y, lens_u, lens_v, ray); -#endif } } diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h index a6975a63d5d..1cf06614f3b 100644 --- a/intern/cycles/kernel/closure/alloc.h +++ b/intern/cycles/kernel/closure/alloc.h @@ -8,7 +8,7 @@ CCL_NAMESPACE_BEGIN ccl_device ccl_private ShaderClosure *closure_alloc(ccl_private ShaderData *sd, int size, ClosureType type, - float3 weight) + Spectrum weight) { kernel_assert(size <= sizeof(ShaderClosure)); @@ -49,9 +49,9 @@ ccl_device ccl_private void *closure_alloc_extra(ccl_private ShaderData *sd, int ccl_device_inline ccl_private ShaderClosure *bsdf_alloc(ccl_private ShaderData *sd, int size, - float3 weight) + Spectrum weight) { - kernel_assert(isfinite3_safe(weight)); + kernel_assert(isfinite_safe(weight)); const float sample_weight = fabsf(average(weight)); @@ -59,39 +59,10 @@ ccl_device_inline ccl_private ShaderClosure *bsdf_alloc(ccl_private ShaderData * * we will not allocate new closure. */ if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) { ccl_private ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); - if (sc == NULL) { - return NULL; - } - - sc->sample_weight = sample_weight; - - return sc; - } - - return NULL; -} - -#ifdef __OSL__ -ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, - int size, - float3 weight, - void *data) -{ - kernel_assert(isfinite3_safe(weight)); - - const float sample_weight = fabsf(average(weight)); - - /* Use comparison this way to help dealing with non-finite weight: if the average is not finite - * we will not allocate new closure. */ - if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) { - ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); if (!sc) { return NULL; } - memcpy((void *)sc, data, size); - - sc->weight = weight; sc->sample_weight = sample_weight; return sc; @@ -99,6 +70,5 @@ ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, return NULL; } -#endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 011155cdf5f..f0b28ff77c4 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -103,9 +103,8 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, ccl_private float *pdf) { /* For curves use the smooth normal, particularly for ribbons the geometric @@ -115,306 +114,80 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg, switch (sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: - label = bsdf_diffuse_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_diffuse_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; -#ifdef __SVM__ +#if defined(__SVM__) || defined(__OSL__) case CLOSURE_BSDF_OREN_NAYAR_ID: - label = bsdf_oren_nayar_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_oren_nayar_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; # ifdef __OSL__ case CLOSURE_BSDF_PHONG_RAMP_ID: - label = bsdf_phong_ramp_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_phong_ramp_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_RAMP_ID: - label = bsdf_diffuse_ramp_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_diffuse_ramp_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; # endif case CLOSURE_BSDF_TRANSLUCENT_ID: - label = bsdf_translucent_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_translucent_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_REFLECTION_ID: - label = bsdf_reflection_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_reflection_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_REFRACTION_ID: - label = bsdf_refraction_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_refraction_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_TRANSPARENT_ID: - label = bsdf_transparent_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_transparent_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - label = bsdf_microfacet_ggx_sample(kg, - sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_microfacet_ggx_sample(kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: - label = bsdf_microfacet_multi_ggx_sample(kg, - sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf, - &sd->lcg_state); + label = bsdf_microfacet_multi_ggx_sample( + kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, &sd->lcg_state); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: - label = bsdf_microfacet_multi_ggx_glass_sample(kg, - sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf, - &sd->lcg_state); + label = bsdf_microfacet_multi_ggx_glass_sample( + kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, &sd->lcg_state); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - label = bsdf_microfacet_beckmann_sample(kg, - sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_microfacet_beckmann_sample( + kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: - label = bsdf_ashikhmin_shirley_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_ashikhmin_shirley_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - label = bsdf_ashikhmin_velvet_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_ashikhmin_velvet_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_TOON_ID: - label = bsdf_diffuse_toon_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_diffuse_toon_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_GLOSSY_TOON_ID: - label = bsdf_glossy_toon_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_glossy_toon_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_HAIR_REFLECTION_ID: - label = bsdf_hair_reflection_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_hair_reflection_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - label = bsdf_hair_transmission_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_hair_transmission_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: - label = bsdf_principled_hair_sample( - kg, sc, sd, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); + label = bsdf_principled_hair_sample(kg, sc, sd, randu, randv, eval, omega_in, pdf); break; -# ifdef __PRINCIPLED__ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: - label = bsdf_principled_diffuse_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_principled_diffuse_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: - label = bsdf_principled_sheen_sample(sc, - Ng, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + label = bsdf_principled_sheen_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf); break; -# endif /* __PRINCIPLED__ */ #endif default: label = LABEL_NONE; @@ -434,12 +207,12 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg, else { /* Shadow terminator offset. */ const float frequency_multiplier = - kernel_tex_fetch(__objects, sd->object).shadow_terminator_shading_offset; + kernel_data_fetch(objects, sd->object).shadow_terminator_shading_offset; if (frequency_multiplier > 1.0f) { *eval *= shift_cos_in(dot(*omega_in, sc->N), frequency_multiplier); } if (label & LABEL_DIFFUSE) { - if (!isequal_float3(sc->N, sd->N)) { + if (!isequal(sc->N, sd->N)) { *eval *= bump_shadowing_term((label & LABEL_TRANSMIT) ? -sd->N : sd->N, sc->N, *omega_in); } } @@ -458,7 +231,7 @@ ccl_device #else ccl_device_inline #endif - float3 + Spectrum bsdf_eval(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private const ShaderClosure *sc, @@ -466,14 +239,14 @@ ccl_device_inline const bool is_transmission, ccl_private float *pdf) { - float3 eval = zero_float3(); + Spectrum eval = zero_spectrum(); if (!is_transmission) { switch (sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf); break; -#ifdef __SVM__ +#if defined(__SVM__) || defined(__OSL__) case CLOSURE_BSDF_OREN_NAYAR_ID: eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf); break; @@ -537,26 +310,24 @@ ccl_device_inline case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf); break; -# ifdef __PRINCIPLED__ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf); break; -# endif /* __PRINCIPLED__ */ #endif default: break; } if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { - if (!isequal_float3(sc->N, sd->N)) { + if (!isequal(sc->N, sd->N)) { eval *= bump_shadowing_term(sd->N, sc->N, omega_in); } } /* Shadow terminator offset. */ const float frequency_multiplier = - kernel_tex_fetch(__objects, sd->object).shadow_terminator_shading_offset; + kernel_data_fetch(objects, sd->object).shadow_terminator_shading_offset; if (frequency_multiplier > 1.0f) { eval *= shift_cos_in(dot(omega_in, sc->N), frequency_multiplier); } @@ -566,7 +337,7 @@ ccl_device_inline case CLOSURE_BSDF_DIFFUSE_ID: eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf); break; -#ifdef __SVM__ +#if defined(__SVM__) || defined(__OSL__) case CLOSURE_BSDF_OREN_NAYAR_ID: eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf); break; @@ -622,20 +393,18 @@ ccl_device_inline case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf); break; -# ifdef __PRINCIPLED__ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf); break; -# endif /* __PRINCIPLED__ */ #endif default: break; } if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { - if (!isequal_float3(sc->N, sd->N)) { + if (!isequal(sc->N, sd->N)) { eval *= bump_shadowing_term(-sd->N, sc->N, omega_in); } } @@ -650,7 +419,7 @@ ccl_device_inline ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float roughness) { /* TODO: do we want to blur volume closures? */ -#ifdef __SVM__ +#if defined(__SVM__) || defined(__OSL__) switch (sc->type) { case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index 47066542122..75995262030 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -39,7 +39,7 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough return 2.0f / (roughness * roughness) - 2.0f; } -ccl_device_forceinline float3 +ccl_device_forceinline Spectrum bsdf_ashikhmin_shirley_eval_reflect(ccl_private const ShaderClosure *sc, const float3 I, const float3 omega_in, @@ -55,7 +55,7 @@ bsdf_ashikhmin_shirley_eval_reflect(ccl_private const ShaderClosure *sc, if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } if (NdotI > 0.0f && NdotO > 0.0f) { NdotI = fmaxf(NdotI, 1e-6f); @@ -105,16 +105,16 @@ bsdf_ashikhmin_shirley_eval_reflect(ccl_private const ShaderClosure *sc, } } - return make_float3(out, out, out); + return make_spectrum(out); } -ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_ashikhmin_shirley_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x, @@ -133,14 +133,10 @@ ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x, ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; @@ -214,19 +210,13 @@ ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) { /* Some high number for MIS. */ *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + *eval = make_spectrum(1e6f); label = LABEL_REFLECT | LABEL_SINGULAR; } else { /* leave the rest to eval_reflect */ *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf); } - -#ifdef __RAY_DIFFERENTIALS__ - /* just do the reflection thing for now */ - *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy; -#endif } return label; diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h index 3d7906eef7d..9e68ea5d5e5 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h @@ -31,10 +31,10 @@ ccl_device int bsdf_ashikhmin_velvet_setup(ccl_private VelvetBsdf *bsdf) return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_ashikhmin_velvet_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc; float m_invsigma2 = bsdf->invsigma2; @@ -50,7 +50,7 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(ccl_private const ShaderClo if (!(fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f)) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } float cosNHdivHO = cosNH / cosHO; cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f); @@ -68,33 +68,29 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(ccl_private const ShaderClo float out = 0.25f * (D * G) / cosNO; *pdf = 0.5f * M_1_PI_F; - return make_float3(out, out, out); + return make_spectrum(out); } *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_ashikhmin_velvet_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc; @@ -129,22 +125,16 @@ ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc, float power = 0.25f * (D * G) / cosNO; - *eval = make_float3(power, power, power); - -#ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the retroreflective bounce - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; -#endif + *eval = make_spectrum(power); } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } return LABEL_REFLECT | LABEL_DIFFUSE; } diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h index 759ad03f8e8..ec64c375666 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse.h @@ -26,39 +26,35 @@ ccl_device int bsdf_diffuse_setup(ccl_private DiffuseBsdf *bsdf) return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_diffuse_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_diffuse_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; float3 N = bsdf->N; float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F; *pdf = cos_pi; - return make_float3(cos_pi, cos_pi, cos_pi); + return make_spectrum(cos_pi); } -ccl_device float3 bsdf_diffuse_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_diffuse_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; @@ -68,16 +64,11 @@ ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc, sample_cos_hemisphere(N, randu, randv, omega_in, pdf); if (dot(Ng, *omega_in) > 0.0f) { - *eval = make_float3(*pdf, *pdf, *pdf); -#ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; -#endif + *eval = make_spectrum(*pdf); } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } return LABEL_REFLECT | LABEL_DIFFUSE; } @@ -90,39 +81,35 @@ ccl_device int bsdf_translucent_setup(ccl_private DiffuseBsdf *bsdf) return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_translucent_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_translucent_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_translucent_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_translucent_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; float3 N = bsdf->N; float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F; *pdf = cos_pi; - return make_float3(cos_pi, cos_pi, cos_pi); + return make_spectrum(cos_pi); } ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; @@ -132,16 +119,11 @@ ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc, // distribution over the hemisphere sample_cos_hemisphere(-N, randu, randv, omega_in, pdf); if (dot(Ng, *omega_in) < 0) { - *eval = make_float3(*pdf, *pdf, *pdf); -#ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); -#endif + *eval = make_spectrum(*pdf); } else { *pdf = 0; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } return LABEL_TRANSMIT | LABEL_DIFFUSE; } diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h index aa4c091f587..d7faf5c9e9a 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h @@ -9,6 +9,7 @@ #pragma once #include "kernel/sample/mapping.h" +#include "kernel/util/color.h" CCL_NAMESPACE_BEGIN @@ -46,38 +47,34 @@ ccl_device void bsdf_diffuse_ramp_blur(ccl_private ShaderClosure *sc, float roug { } -ccl_device float3 bsdf_diffuse_ramp_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_diffuse_ramp_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc; float3 N = bsdf->N; float cos_pi = fmaxf(dot(N, omega_in), 0.0f); *pdf = cos_pi * M_1_PI_F; - return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F; + return rgb_to_spectrum(bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F); } -ccl_device float3 bsdf_diffuse_ramp_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_diffuse_ramp_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc; @@ -87,15 +84,11 @@ ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc, sample_cos_hemisphere(N, randu, randv, omega_in, pdf); if (dot(Ng, *omega_in) > 0.0f) { - *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F; -# ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; -# endif + *eval = rgb_to_spectrum(bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F); } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } return LABEL_REFLECT | LABEL_DIFFUSE; } diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index a136ed05800..a29f7c444ae 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -37,10 +37,10 @@ ccl_device int bsdf_hair_transmission_setup(ccl_private HairBsdf *bsdf) return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc; float offset = bsdf->offset; @@ -61,7 +61,7 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClos if (M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) { *pdf = 0.0f; - return make_float3(*pdf, *pdf, *pdf); + return zero_spectrum(); } float roughness1_inv = 1.0f / roughness1; @@ -81,31 +81,31 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClos (2 * (t * t + roughness1 * roughness1) * (a_R - b_R) * costheta_i); *pdf = phi_pdf * theta_pdf; - return make_float3(*pdf, *pdf, *pdf); + return make_spectrum(*pdf); } -ccl_device float3 bsdf_hair_transmission_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_hair_transmission_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_hair_reflection_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_hair_reflection_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_hair_transmission_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_hair_transmission_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc; float offset = bsdf->offset; @@ -125,7 +125,7 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(ccl_private const ShaderC if (M_PI_2_F - fabsf(theta_i) < 0.001f) { *pdf = 0.0f; - return make_float3(*pdf, *pdf, *pdf); + return zero_spectrum(); } float costheta_i = fast_cosf(theta_i); @@ -145,20 +145,16 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(ccl_private const ShaderC float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2)); *pdf = phi_pdf * theta_pdf; - return make_float3(*pdf, *pdf, *pdf); + return make_spectrum(*pdf); } ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc; @@ -194,17 +190,11 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc, fast_sincosf(phi, &sinphi, &cosphi); *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg; - // differentials - TODO: find a better approximation for the reflective bounce -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; - *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; -#endif - *pdf = fabsf(phi_pdf * theta_pdf); if (M_PI_2_F - fabsf(theta_i) < 0.001f) *pdf = 0.0f; - *eval = make_float3(*pdf, *pdf, *pdf); + *eval = make_spectrum(*pdf); return LABEL_REFLECT | LABEL_GLOSSY; } @@ -212,14 +202,10 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc, ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc; @@ -255,18 +241,12 @@ ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc fast_sincosf(phi, &sinphi, &cosphi); *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg; - // differentials - TODO: find a better approximation for the transmission bounce -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; - *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; -#endif - *pdf = fabsf(phi_pdf * theta_pdf); if (M_PI_2_F - fabsf(theta_i) < 0.001f) { *pdf = 0.0f; } - *eval = make_float3(*pdf, *pdf, *pdf); + *eval = make_spectrum(*pdf); /* TODO(sergey): Should always be negative, but seems some precision issue * is involved here. diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h index 33706213403..2236bc62050 100644 --- a/intern/cycles/kernel/closure/bsdf_hair_principled.h +++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h @@ -3,7 +3,7 @@ #pragma once -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ # include <fenv.h> #endif @@ -20,7 +20,7 @@ typedef struct PrincipledHairBSDF { SHADER_CLOSURE_BASE; /* Absorption coefficient. */ - float3 sigma; + Spectrum sigma; /* Variance of the underlying logistic distribution. */ float v; /* Scale factor of the underlying logistic distribution. */ @@ -166,12 +166,6 @@ ccl_device_inline float longitudinal_scattering( } } -/* Combine the three values using their luminances. */ -ccl_device_inline float4 combine_with_energy(KernelGlobals kg, float3 c) -{ - return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c)); -} - #ifdef __HAIR__ /* Set up the hair closure. */ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd, @@ -203,7 +197,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd, float h = (sd->type & PRIMITIVE_CURVE_RIBBON) ? -sd->v : dot(cross(sd->Ng, X), Z); kernel_assert(fabsf(h) < 1.0f + 1e-4f); - kernel_assert(isfinite3_safe(Y)); + kernel_assert(isfinite_safe(Y)); kernel_assert(isfinite_safe(h)); bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h); @@ -214,34 +208,36 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd, #endif /* __HAIR__ */ /* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */ -ccl_device_inline void hair_attenuation(KernelGlobals kg, - float f, - float3 T, - ccl_private float4 *Ap) +ccl_device_inline void hair_attenuation( + KernelGlobals kg, float f, Spectrum T, ccl_private Spectrum *Ap, ccl_private float *Ap_energy) { /* Primary specular (R). */ - Ap[0] = make_float4(f, f, f, f); + Ap[0] = make_spectrum(f); + Ap_energy[0] = f; /* Transmission (TT). */ - float3 col = sqr(1.0f - f) * T; - Ap[1] = combine_with_energy(kg, col); + Spectrum col = sqr(1.0f - f) * T; + Ap[1] = col; + Ap_energy[1] = spectrum_to_gray(kg, col); /* Secondary specular (TRT). */ col *= T * f; - Ap[2] = combine_with_energy(kg, col); + Ap[2] = col; + Ap_energy[2] = spectrum_to_gray(kg, col); /* Residual component (TRRT+). */ - col *= safe_divide_color(T * f, make_float3(1.0f, 1.0f, 1.0f) - T * f); - Ap[3] = combine_with_energy(kg, col); + col *= safe_divide(T * f, one_spectrum() - T * f); + Ap[3] = col; + Ap_energy[3] = spectrum_to_gray(kg, col); /* Normalize sampling weights. */ - float totweight = Ap[0].w + Ap[1].w + Ap[2].w + Ap[3].w; + float totweight = Ap_energy[0] + Ap_energy[1] + Ap_energy[2] + Ap_energy[3]; float fac = safe_divide(1.0f, totweight); - Ap[0].w *= fac; - Ap[1].w *= fac; - Ap[2].w *= fac; - Ap[3].w *= fac; + Ap_energy[0] *= fac; + Ap_energy[1] *= fac; + Ap_energy[2] *= fac; + Ap_energy[3] *= fac; } /* Given the tilt angle, generate the rotated theta_i for the different bounces. */ @@ -266,13 +262,13 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i, } /* Evaluation function for our shader. */ -ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderClosure *sc, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_principled_hair_eval(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderClosure *sc, + const float3 omega_in, + ccl_private float *pdf) { - kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length)); + kernel_assert(isfinite_safe(sd->P) && isfinite_safe(sd->ray_length)); ccl_private const PrincipledHairBSDF *bsdf = (ccl_private const PrincipledHairBSDF *)sc; float3 Y = float4_to_float3(bsdf->extra->geom); @@ -299,9 +295,11 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg, float cos_gamma_t = cos_from_sin(sin_gamma_t); float gamma_t = safe_asinf(sin_gamma_t); - float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t)); - float4 Ap[4]; - hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap); + Spectrum T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t)); + Spectrum Ap[4]; + float Ap_energy[4]; + hair_attenuation( + kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap, Ap_energy); float sin_theta_i = wi.x; float cos_theta_i = cos_from_sin(sin_theta_i); @@ -312,35 +310,40 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg, float angles[6]; hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles); - float4 F; + Spectrum F; + float F_energy; float Mp, Np; /* Primary specular (R). */ Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness); Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t); F = Ap[0] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + F_energy = Ap_energy[0] * Mp * Np; + kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy)); /* Transmission (TT). */ Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v); Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t); F += Ap[1] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + F_energy += Ap_energy[1] * Mp * Np; + kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy)); /* Secondary specular (TRT). */ Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v); Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t); F += Ap[2] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + F_energy += Ap_energy[2] * Mp * Np; + kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy)); /* Residual component (TRRT+). */ Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v); Np = M_1_2PI_F; F += Ap[3] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + F_energy += Ap_energy[3] * Mp * Np; + kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy)); - *pdf = F.w; - return float4_to_float3(F); + *pdf = F_energy; + return F; } /* Sampling function for the hair shader. */ @@ -349,10 +352,8 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg, ccl_private ShaderData *sd, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc; @@ -385,16 +386,18 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg, float cos_gamma_t = cos_from_sin(sin_gamma_t); float gamma_t = safe_asinf(sin_gamma_t); - float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t)); - float4 Ap[4]; - hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap); + Spectrum T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t)); + Spectrum Ap[4]; + float Ap_energy[4]; + hair_attenuation( + kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap, Ap_energy); int p = 0; for (; p < 3; p++) { - if (u[0].x < Ap[p].w) { + if (u[0].x < Ap_energy[p]) { break; } - u[0].x -= Ap[p].w; + u[0].x -= Ap_energy[p]; } float v = bsdf->v; @@ -429,44 +432,43 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg, hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles); - float4 F; + Spectrum F; + float F_energy; float Mp, Np; /* Primary specular (R). */ Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness); Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t); F = Ap[0] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + F_energy = Ap_energy[0] * Mp * Np; + kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy)); /* Transmission (TT). */ Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v); Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t); F += Ap[1] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + F_energy += Ap_energy[1] * Mp * Np; + kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy)); /* Secondary specular (TRT). */ Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v); Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t); F += Ap[2] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + F_energy += Ap_energy[2] * Mp * Np; + kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy)); /* Residual component (TRRT+). */ Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v); Np = M_1_2PI_F; F += Ap[3] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + F_energy += Ap_energy[3] * Mp * Np; + kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy)); - *eval = float4_to_float3(F); - *pdf = F.w; + *eval = F; + *pdf = F_energy; *omega_in = X * sin_theta_i + Y * cos_theta_i * cosf(phi_i) + Z * cos_theta_i * sinf(phi_i); -#ifdef __RAY_DIFFERENTIALS__ - float3 N = safe_normalize(sd->I + *omega_in); - *domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx; - *domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy; -#endif - return LABEL_GLOSSY | ((p == 0) ? LABEL_REFLECT : LABEL_TRANSMIT); } @@ -489,25 +491,28 @@ ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale( return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f; } -ccl_device float3 bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc) +ccl_device Spectrum bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc) { ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc; - return exp3(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v)); + return exp(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v)); } -ccl_device_inline float3 -bsdf_principled_hair_sigma_from_reflectance(const float3 color, const float azimuthal_roughness) +ccl_device_inline Spectrum +bsdf_principled_hair_sigma_from_reflectance(const Spectrum color, const float azimuthal_roughness) { - const float3 sigma = log3(color) / - bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness); + const Spectrum sigma = log(color) / + bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness); return sigma * sigma; } -ccl_device_inline float3 bsdf_principled_hair_sigma_from_concentration(const float eumelanin, - const float pheomelanin) +ccl_device_inline Spectrum bsdf_principled_hair_sigma_from_concentration(const float eumelanin, + const float pheomelanin) { - return eumelanin * make_float3(0.506f, 0.841f, 1.653f) + - pheomelanin * make_float3(0.343f, 0.733f, 1.924f); + const float3 eumelanin_color = make_float3(0.506f, 0.841f, 1.653f); + const float3 pheomelanin_color = make_float3(0.343f, 0.733f, 1.924f); + + return eumelanin * rgb_to_spectrum(eumelanin_color) + + pheomelanin * rgb_to_spectrum(pheomelanin_color); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index db50712f9f0..04d5ca90bfd 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -17,8 +17,8 @@ CCL_NAMESPACE_BEGIN typedef struct MicrofacetExtra { - float3 color, cspec0; - float3 fresnel_color; + Spectrum color, cspec0; + Spectrum fresnel_color; float clearcoat; } MicrofacetExtra; @@ -233,11 +233,11 @@ ccl_device_forceinline float3 microfacet_sample_stretched(KernelGlobals kg, * * Else it is simply white */ -ccl_device_forceinline float3 reflection_color(ccl_private const MicrofacetBsdf *bsdf, - float3 L, - float3 H) +ccl_device_forceinline Spectrum reflection_color(ccl_private const MicrofacetBsdf *bsdf, + float3 L, + float3 H) { - float3 F = make_float3(1.0f, 1.0f, 1.0f); + Spectrum F = one_spectrum(); bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID); if (use_fresnel) { @@ -310,7 +310,7 @@ ccl_device int bsdf_microfacet_ggx_isotropic_setup(ccl_private MicrofacetBsdf *b ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf, ccl_private const ShaderData *sd) { - bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0); + bsdf->extra->cspec0 = saturate(bsdf->extra->cspec0); bsdf->alpha_x = saturatef(bsdf->alpha_x); bsdf->alpha_y = saturatef(bsdf->alpha_y); @@ -325,7 +325,7 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsd ccl_device int bsdf_microfacet_ggx_clearcoat_setup(ccl_private MicrofacetBsdf *bsdf, ccl_private const ShaderData *sd) { - bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0); + bsdf->extra->cspec0 = saturate(bsdf->extra->cspec0); bsdf->alpha_x = saturatef(bsdf->alpha_x); bsdf->alpha_y = bsdf->alpha_x; @@ -357,10 +357,10 @@ ccl_device void bsdf_microfacet_ggx_blur(ccl_private ShaderClosure *sc, float ro bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } -ccl_device float3 bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; float alpha_x = bsdf->alpha_x; @@ -370,7 +370,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosu if (m_refractive || alpha_x * alpha_y <= 1e-7f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } float cosNO = dot(N, I); @@ -451,12 +451,12 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosu /* eq. 20 */ float common = D * 0.25f / cosNO; - float3 F = reflection_color(bsdf, omega_in, m); + Spectrum F = reflection_color(bsdf, omega_in, m); if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { F *= 0.25f * bsdf->extra->clearcoat; } - float3 out = F * G * common; + Spectrum out = F * G * common; /* eq. 2 in distribution of visible normals sampling * `pm = Dw = G1o * dot(m, I) * D / dot(N, I);` */ @@ -469,13 +469,13 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosu return out; } - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; float alpha_x = bsdf->alpha_x; @@ -486,7 +486,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClos if (!m_refractive || alpha_x * alpha_y <= 1e-7f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } float cosNO = dot(N, I); @@ -494,7 +494,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClos if (cosNO <= 0 || cosNI >= 0) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */ + return zero_spectrum(); /* vectors on same side -- not possible */ } /* compute half-vector of the refraction (eq. 16) */ float3 ht = -(m_eta * omega_in + I); @@ -530,21 +530,17 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClos float out = G * fabsf(cosHI * cosHO) * common; *pdf = G1o * fabsf(cosHO * cosHI) * common; - return make_float3(out, out, out); + return make_spectrum(out); } ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; @@ -588,7 +584,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg, if (alpha_x * alpha_y <= 1e-7f) { /* some high number for MIS */ *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + *eval = make_spectrum(1e6f); bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID); @@ -664,7 +660,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg, float common = (G1o * D) * 0.25f / cosNO; *pdf = common; - float3 F = reflection_color(bsdf, *omega_in, m); + Spectrum F = reflection_color(bsdf, *omega_in, m); *eval = G1i * common * F; } @@ -672,14 +668,9 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg, if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { *eval *= 0.25f * bsdf->extra->clearcoat; } - -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx; - *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy; -#endif } else { - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); *pdf = 0.0f; } } @@ -690,39 +681,18 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg, /* CAUTION: the i and o variables are inverted relative to the paper * eq. 39 - compute actual refractive direction */ float3 R, T; -#ifdef __RAY_DIFFERENTIALS__ - float3 dRdx, dRdy, dTdx, dTdy; -#endif float m_eta = bsdf->ior, fresnel; bool inside; - fresnel = fresnel_dielectric(m_eta, - m, - I, - &R, - &T, -#ifdef __RAY_DIFFERENTIALS__ - dIdx, - dIdy, - &dRdx, - &dRdy, - &dTdx, - &dTdy, -#endif - &inside); + fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, &inside); if (!inside && fresnel != 1.0f) { - *omega_in = T; -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dTdx; - *domega_in_dy = dTdy; -#endif if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) { /* some high number for MIS */ *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + *eval = make_spectrum(1e6f); label = LABEL_TRANSMIT | LABEL_SINGULAR; } else { @@ -750,11 +720,11 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg, float out = G1i * fabsf(cosHI * cosHO) * common; *pdf = cosHO * fabsf(cosHI) * common; - *eval = make_float3(out, out, out); + *eval = make_spectrum(out); } } else { - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); *pdf = 0.0f; } } @@ -835,10 +805,10 @@ ccl_device_inline float bsdf_beckmann_aniso_G1( return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f); } -ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_microfacet_beckmann_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; float alpha_x = bsdf->alpha_x; @@ -848,7 +818,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(ccl_private const Shader if (m_refractive || alpha_x * alpha_y <= 1e-7f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } float cosNO = dot(N, I); @@ -910,16 +880,16 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(ccl_private const Shader * pdf = pm * 0.25 / dot(m, I); */ *pdf = G1o * common; - return make_float3(out, out, out); + return make_spectrum(out); } - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_microfacet_beckmann_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; float alpha_x = bsdf->alpha_x; @@ -930,7 +900,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const Shade if (!m_refractive || alpha_x * alpha_y <= 1e-7f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } float cosNO = dot(N, I); @@ -938,7 +908,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const Shade if (cosNO <= 0 || cosNI >= 0) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } /* compute half-vector of the refraction (eq. 16) */ float3 ht = -(m_eta * omega_in + I); @@ -971,21 +941,17 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const Shade float out = G * fabsf(cosHI * cosHO) * common; *pdf = G1o * fabsf(cosHO * cosHI) * common; - return make_float3(out, out, out); + return make_spectrum(out); } ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; @@ -1028,7 +994,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg, if (alpha_x * alpha_y <= 1e-7f) { /* some high number for MIS */ *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + *eval = make_spectrum(1e6f); label = LABEL_REFLECT | LABEL_SINGULAR; } else { @@ -1074,16 +1040,11 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg, float out = G * common; *pdf = G1o * common; - *eval = make_float3(out, out, out); + *eval = make_spectrum(out); } - -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx; - *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy; -#endif } else { - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); *pdf = 0.0f; } } @@ -1094,39 +1055,18 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg, /* CAUTION: the i and o variables are inverted relative to the paper * eq. 39 - compute actual refractive direction */ float3 R, T; -#ifdef __RAY_DIFFERENTIALS__ - float3 dRdx, dRdy, dTdx, dTdy; -#endif float m_eta = bsdf->ior, fresnel; bool inside; - fresnel = fresnel_dielectric(m_eta, - m, - I, - &R, - &T, -#ifdef __RAY_DIFFERENTIALS__ - dIdx, - dIdy, - &dRdx, - &dRdy, - &dTdx, - &dTdy, -#endif - &inside); + fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, &inside); if (!inside && fresnel != 1.0f) { *omega_in = T; -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dTdx; - *domega_in_dy = dTdy; -#endif - if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) { /* some high number for MIS */ *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + *eval = make_spectrum(1e6f); label = LABEL_TRANSMIT | LABEL_SINGULAR; } else { @@ -1155,11 +1095,11 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg, float out = G * fabsf(cosHI * cosHO) * common; *pdf = G1o * cosHO * fabsf(cosHI) * common; - *eval = make_float3(out, out, out); + *eval = make_spectrum(out); } } else { - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); *pdf = 0.0f; } } diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index 10027ae9f77..9402ce11f7a 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -95,29 +95,29 @@ ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, /* Phase function for reflective materials. */ ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi, - ccl_private float3 *weight, + ccl_private Spectrum *weight, const float3 wm) { return -wi + 2.0f * wm * dot(wi, wm); } -ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, - const float lambda, - const float3 wo, - const float2 alpha) +ccl_device_forceinline Spectrum mf_eval_phase_glossy(const float3 w, + const float lambda, + const float3 wo, + const float2 alpha) { if (w.z > 0.9999f) - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); const float3 wh = normalize(wo - w); if (wh.z < 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); float pArea = (w.z < -0.9999f) ? 1.0f : lambda * w.z; const float dotW_WH = dot(-w, wh); if (dotW_WH < 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f); if (alpha.x == alpha.y) @@ -125,7 +125,7 @@ ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, else phase *= D_ggx_aniso(wh, alpha); - return make_float3(phase, phase, phase); + return make_spectrum(phase); } /* Phase function for dielectric transmissive materials, including both reflection and refraction @@ -148,22 +148,22 @@ ccl_device_forceinline float3 mf_sample_phase_glass(const float3 wi, return normalize(wm * (cosI * inv_eta + cosT) - wi * inv_eta); } -ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, - const float lambda, - const float3 wo, - const bool wo_outside, - const float2 alpha, - const float eta) +ccl_device_forceinline Spectrum mf_eval_phase_glass(const float3 w, + const float lambda, + const float3 wo, + const bool wo_outside, + const float2 alpha, + const float eta) { if (w.z > 0.9999f) - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); float pArea = (w.z < -0.9999f) ? 1.0f : lambda * w.z; float v; if (wo_outside) { const float3 wh = normalize(wo - w); if (wh.z < 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); const float dotW_WH = dot(-w, wh); v = fresnel_dielectric_cos(dotW_WH, eta) * max(0.0f, dotW_WH) * D_ggx(wh, alpha.x) * 0.25f / @@ -175,14 +175,14 @@ ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, wh = -wh; const float dotW_WH = dot(-w, wh), dotWO_WH = dot(wo, wh); if (dotW_WH < 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); float temp = dotW_WH + eta * dotWO_WH; v = (1.0f - fresnel_dielectric_cos(dotW_WH, eta)) * max(0.0f, dotW_WH) * max(0.0f, -dotWO_WH) * D_ggx(wh, alpha.x) / (pArea * temp * temp); } - return make_float3(v, v, v); + return make_spectrum(v); } /* === Utility functions for the random walks === */ @@ -371,14 +371,14 @@ ccl_device void bsdf_microfacet_multi_ggx_blur(ccl_private ShaderClosure *sc, fl /* === Closure implementations === */ -/* Multiscattering GGX Glossy closure */ +/* Multi-scattering GGX Glossy closure */ ccl_device int bsdf_microfacet_multi_ggx_common_setup(ccl_private MicrofacetBsdf *bsdf) { bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f); - bsdf->extra->color = saturate3(bsdf->extra->color); - bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0); + bsdf->extra->color = saturate(bsdf->extra->color); + bsdf->extra->cspec0 = saturate(bsdf->extra->cspec0); return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG; } @@ -415,27 +415,27 @@ ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(ccl_private Microfacet return bsdf_microfacet_multi_ggx_common_setup(bsdf); } -ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf, - ccl_private uint *lcg_state) +ccl_device Spectrum bsdf_microfacet_multi_ggx_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf, + ccl_private uint *lcg_state) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf, - ccl_private uint *lcg_state) +ccl_device Spectrum bsdf_microfacet_multi_ggx_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf, + ccl_private uint *lcg_state) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } float3 X, Y, Z; @@ -444,7 +444,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(ccl_private const Shade /* Ensure that the both directions are on the outside w.r.t. the shading normal. */ if (dot(Z, I) <= 0.0f || dot(Z, omega_in) <= 0.0f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID); @@ -478,14 +478,10 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf, ccl_private uint *lcg_state) { @@ -509,11 +505,7 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg, return LABEL_NONE; } *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; - *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; -#endif + *eval = make_spectrum(1e6f); return LABEL_REFLECT | LABEL_SINGULAR; } @@ -551,21 +543,17 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg, *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x); *eval *= *pdf; -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; - *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; -#endif return LABEL_REFLECT | LABEL_GLOSSY; } -/* Multiscattering GGX Glass closure */ +/* Multi-scattering GGX Glass closure */ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ccl_private MicrofacetBsdf *bsdf) { bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); bsdf->alpha_y = bsdf->alpha_x; bsdf->ior = max(0.0f, bsdf->ior); - bsdf->extra->color = saturate3(bsdf->extra->color); + bsdf->extra->color = saturate(bsdf->extra->color); bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID; @@ -578,8 +566,8 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private Microfa bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); bsdf->alpha_y = bsdf->alpha_x; bsdf->ior = max(0.0f, bsdf->ior); - bsdf->extra->color = saturate3(bsdf->extra->color); - bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0); + bsdf->extra->color = saturate(bsdf->extra->color); + bsdf->extra->cspec0 = saturate(bsdf->extra->cspec0); bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID; @@ -588,7 +576,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private Microfa return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG; } -ccl_device float3 +ccl_device Spectrum bsdf_microfacet_multi_ggx_glass_eval_transmit(ccl_private const ShaderClosure *sc, const float3 I, const float3 omega_in, @@ -599,7 +587,7 @@ bsdf_microfacet_multi_ggx_glass_eval_transmit(ccl_private const ShaderClosure *s if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } float3 X, Y, Z; @@ -622,17 +610,18 @@ bsdf_microfacet_multi_ggx_glass_eval_transmit(ccl_private const ShaderClosure *s bsdf->extra->color); } -ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf, - ccl_private uint *lcg_state) +ccl_device Spectrum +bsdf_microfacet_multi_ggx_glass_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf, + ccl_private uint *lcg_state) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID); @@ -661,14 +650,10 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf, ccl_private uint *lcg_state) { @@ -679,41 +664,17 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg, if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { float3 R, T; -#ifdef __RAY_DIFFERENTIALS__ - float3 dRdx, dRdy, dTdx, dTdy; -#endif bool inside; - float fresnel = fresnel_dielectric(bsdf->ior, - Z, - I, - &R, - &T, -#ifdef __RAY_DIFFERENTIALS__ - dIdx, - dIdy, - &dRdx, - &dRdy, - &dTdx, - &dTdy, -#endif - &inside); + float fresnel = fresnel_dielectric(bsdf->ior, Z, I, &R, &T, &inside); *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + *eval = make_spectrum(1e6f); if (randu < fresnel) { *omega_in = R; -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dRdx; - *domega_in_dy = dRdy; -#endif return LABEL_REFLECT | LABEL_SINGULAR; } else { *omega_in = T; -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dTdx; - *domega_in_dy = dTdy; -#endif return LABEL_TRANSMIT | LABEL_SINGULAR; } } @@ -739,22 +700,9 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg, *omega_in = X * localO.x + Y * localO.y + Z * localO.z; if (localO.z * localI.z > 0.0f) { -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; - *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; -#endif return LABEL_REFLECT | LABEL_GLOSSY; } else { -#ifdef __RAY_DIFFERENTIALS__ - float cosI = dot(Z, I); - float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI * cosI))), 1e-7f); - *domega_in_dx = -(bsdf->ior * dIdx) + - ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z; - *domega_in_dy = -(bsdf->ior * dIdy) + - ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z; -#endif - return LABEL_TRANSMIT | LABEL_GLOSSY; } } diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h index e4fcf0e6ba3..91fb9158050 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h @@ -12,16 +12,16 @@ * multi-scattered energy is used. In combination with MIS, that is enough to produce an unbiased * result, although the balance heuristic isn't necessarily optimal anymore. */ -ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, - float3 wo, - const bool wo_outside, - const float3 color, - const float alpha_x, - const float alpha_y, - ccl_private uint *lcg_state, - const float eta, - bool use_fresnel, - const float3 cspec0) +ccl_device_forceinline Spectrum MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, + float3 wo, + const bool wo_outside, + const Spectrum color, + const float alpha_x, + const float alpha_y, + ccl_private uint *lcg_state, + const float eta, + bool use_fresnel, + const Spectrum cspec0) { /* Evaluating for a shallower incoming direction produces less noise, and the properties of the * BSDF guarantee reciprocity. */ @@ -46,7 +46,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, } if (wi.z < 1e-5f || (wo.z < 1e-5f && wo_outside) || (wo.z > -1e-5f && !wo_outside)) - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); const float2 alpha = make_float2(alpha_x, alpha_y); @@ -54,8 +54,8 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float shadowing_lambda = mf_lambda(wo_outside ? wo : -wo, alpha); /* Analytically compute single scattering for lower noise. */ - float3 eval; - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + Spectrum eval; + Spectrum throughput = one_spectrum(); const float3 wh = normalize(wi + wo); #ifdef MF_MULTI_GLASS eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta); @@ -70,7 +70,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, val *= D_ggx(wh, alpha.x); else val *= D_ggx_aniso(wh, alpha); - eval = make_float3(val, val, val); + eval = make_spectrum(val); #endif float F0 = fresnel_dielectric_cos(1.0f, eta); @@ -99,7 +99,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, #ifdef MF_MULTI_GLASS if (order == 0 && use_fresnel) { /* Evaluate amount of scattering towards wo on this microfacet. */ - float3 phase; + Spectrum phase; if (outside) phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta); else @@ -113,7 +113,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, #endif if (order > 0) { /* Evaluate amount of scattering towards wo on this microfacet. */ - float3 phase; + Spectrum phase; #ifdef MF_MULTI_GLASS if (outside) phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta); @@ -172,19 +172,19 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, * walk escaped the surface in wo. The function returns the throughput between wi and wo. Without * reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal. */ -ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, - ccl_private float3 *wo, - const float3 color, - const float alpha_x, - const float alpha_y, - ccl_private uint *lcg_state, - const float eta, - bool use_fresnel, - const float3 cspec0) +ccl_device_forceinline Spectrum MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, + ccl_private float3 *wo, + const Spectrum color, + const float alpha_x, + const float alpha_y, + ccl_private uint *lcg_state, + const float eta, + bool use_fresnel, + const Spectrum cspec0) { const float2 alpha = make_float2(alpha_x, alpha_y); - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + Spectrum throughput = one_spectrum(); float3 wr = -wi; float lambda_r = mf_lambda(wr, alpha); float hr = 1.0f; @@ -229,7 +229,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, throughput *= color; } else { - float3 t_color = interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0); + Spectrum t_color = interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0); if (order == 0) throughput = t_color; @@ -239,7 +239,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, } #else /* MF_MULTI_GLOSSY */ if (use_fresnel) { - float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0); + Spectrum t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0); if (order == 0) throughput = t_color; @@ -254,7 +254,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, G1_r = mf_G1(wr, C1_r, lambda_r); } *wo = make_float3(0.0f, 0.0f, 1.0f); - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } #undef MF_MULTI_GLASS diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h index 56c7ec869c7..b85390f0676 100644 --- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h +++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h @@ -15,10 +15,10 @@ typedef struct OrenNayarBsdf { static_assert(sizeof(ShaderClosure) >= sizeof(OrenNayarBsdf), "OrenNayarBsdf is too large!"); -ccl_device float3 bsdf_oren_nayar_get_intensity(ccl_private const ShaderClosure *sc, - float3 n, - float3 v, - float3 l) +ccl_device Spectrum bsdf_oren_nayar_get_intensity(ccl_private const ShaderClosure *sc, + float3 n, + float3 v, + float3 l) { ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc; float nl = max(dot(n, l), 0.0f); @@ -28,7 +28,7 @@ ccl_device float3 bsdf_oren_nayar_get_intensity(ccl_private const ShaderClosure if (t > 0.0f) t /= max(nl, nv) + FLT_MIN; float is = nl * (bsdf->a + bsdf->b * t); - return make_float3(is, is, is); + return make_spectrum(is); } ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf) @@ -47,10 +47,10 @@ ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf) return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_oren_nayar_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_oren_nayar_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc; if (dot(bsdf->N, omega_in) > 0.0f) { @@ -59,30 +59,26 @@ ccl_device float3 bsdf_oren_nayar_eval_reflect(ccl_private const ShaderClosure * } else { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } } -ccl_device float3 bsdf_oren_nayar_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_oren_nayar_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc; @@ -90,16 +86,10 @@ ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc, if (dot(Ng, *omega_in) > 0.0f) { *eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in); - -#ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the bounce - *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; -#endif } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } return LABEL_REFLECT | LABEL_DIFFUSE; diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h index 74a1f7ae090..4236e77ae6c 100644 --- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h @@ -8,6 +8,8 @@ #pragma once +#include "kernel/util/color.h" + CCL_NAMESPACE_BEGIN #ifdef __OSL__ @@ -42,10 +44,10 @@ ccl_device int bsdf_phong_ramp_setup(ccl_private PhongRampBsdf *bsdf) return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_phong_ramp_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_phong_ramp_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc; float m_exponent = bsdf->exponent; @@ -61,11 +63,11 @@ ccl_device float3 bsdf_phong_ramp_eval_reflect(ccl_private const ShaderClosure * float common = 0.5f * M_1_PI_F * cosp; float out = cosNI * (m_exponent + 2) * common; *pdf = (m_exponent + 1) * common; - return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out; + return rgb_to_spectrum(bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out); } } *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device float3 bsdf_phong_ramp_eval_transmit(ccl_private const ShaderClosure *sc, @@ -80,14 +82,10 @@ ccl_device float3 bsdf_phong_ramp_eval_transmit(ccl_private const ShaderClosure ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc; @@ -97,12 +95,6 @@ ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc, if (cosNO > 0) { // reflect the view vector float3 R = (2 * cosNO) * bsdf->N - I; - -# ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; -# endif - float3 T, B; make_orthonormals(R, &T, &B); float phi = M_2PI_F * randu; @@ -119,12 +111,12 @@ ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc, float common = 0.5f * M_1_PI_F * cosp; *pdf = (m_exponent + 1) * common; float out = cosNI * (m_exponent + 2) * common; - *eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out; + *eval = rgb_to_spectrum(bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out); } } } else { - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); *pdf = 0.0f; } return LABEL_REFLECT | LABEL_GLOSSY; diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h index 5a7020e82d2..39cca1bd970 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h @@ -42,7 +42,7 @@ ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf * return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 +ccl_device Spectrum bsdf_principled_diffuse_compute_brdf(ccl_private const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, @@ -52,7 +52,7 @@ bsdf_principled_diffuse_compute_brdf(ccl_private const PrincipledDiffuseBsdf *bs const float NdotL = dot(N, L); if (NdotL <= 0) { - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } const float NdotV = dot(N, V); @@ -82,7 +82,7 @@ bsdf_principled_diffuse_compute_brdf(ccl_private const PrincipledDiffuseBsdf *bs float value = M_1_PI_F * NdotL * f; - return make_float3(value, value, value); + return make_spectrum(value); } /* Compute Fresnel at entry point, to be combined with #PRINCIPLED_DIFFUSE_LAMBERT_EXIT @@ -109,10 +109,10 @@ ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf * return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_principled_diffuse_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_principled_diffuse_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc; @@ -126,30 +126,26 @@ ccl_device float3 bsdf_principled_diffuse_eval_reflect(ccl_private const ShaderC } else { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } } -ccl_device float3 bsdf_principled_diffuse_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_principled_diffuse_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc; @@ -160,16 +156,10 @@ ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *s if (dot(Ng, *omega_in) > 0) { *eval = bsdf_principled_diffuse_compute_brdf(bsdf, N, I, *omega_in, pdf); - -#ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); -#endif } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } return LABEL_REFLECT | LABEL_DIFFUSE; } diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h index 3a96a93db73..fa46f47eb21 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h +++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h @@ -32,7 +32,7 @@ ccl_device_inline float calculate_avg_principled_sheen_brdf(float3 N, float3 I) return schlick_fresnel(NdotI) * NdotI; } -ccl_device float3 +ccl_device Spectrum calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, ccl_private float *pdf) { float NdotL = dot(N, L); @@ -40,14 +40,14 @@ calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, ccl_priv if (NdotL < 0 || NdotV < 0) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } float LdotH = dot(L, H); float value = schlick_fresnel(LdotH) * NdotL; - return make_float3(value, value, value); + return make_spectrum(value); } ccl_device int bsdf_principled_sheen_setup(ccl_private const ShaderData *sd, @@ -59,10 +59,10 @@ ccl_device int bsdf_principled_sheen_setup(ccl_private const ShaderData *sd, return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_principled_sheen_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_principled_sheen_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc; @@ -77,30 +77,26 @@ ccl_device float3 bsdf_principled_sheen_eval_reflect(ccl_private const ShaderClo } else { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } } -ccl_device float3 bsdf_principled_sheen_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_principled_sheen_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc; @@ -113,15 +109,9 @@ ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc, float3 H = normalize(I + *omega_in); *eval = calculate_principled_sheen_brdf(N, I, *omega_in, H, pdf); - -#ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); -#endif } else { - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); *pdf = 0.0f; } return LABEL_REFLECT | LABEL_DIFFUSE; diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h index c8db2b7cf13..5e6c6cdcde6 100644 --- a/intern/cycles/kernel/closure/bsdf_reflection.h +++ b/intern/cycles/kernel/closure/bsdf_reflection.h @@ -18,35 +18,31 @@ ccl_device int bsdf_reflection_setup(ccl_private MicrofacetBsdf *bsdf) return SD_BSDF; } -ccl_device float3 bsdf_reflection_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_reflection_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_reflection_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_reflection_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; @@ -57,18 +53,14 @@ ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc, if (cosNO > 0) { *omega_in = (2 * cosNO) * N - I; if (dot(Ng, *omega_in) > 0) { -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx; - *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy; -#endif /* Some high number for MIS. */ *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + *eval = make_spectrum(1e6f); } } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } return LABEL_REFLECT | LABEL_SINGULAR; } diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h index 862e774da87..e680a9617db 100644 --- a/intern/cycles/kernel/closure/bsdf_refraction.h +++ b/intern/cycles/kernel/closure/bsdf_refraction.h @@ -18,35 +18,31 @@ ccl_device int bsdf_refraction_setup(ccl_private MicrofacetBsdf *bsdf) return SD_BSDF; } -ccl_device float3 bsdf_refraction_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_refraction_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_refraction_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_refraction_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc; @@ -54,39 +50,19 @@ ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc, float3 N = bsdf->N; float3 R, T; -#ifdef __RAY_DIFFERENTIALS__ - float3 dRdx, dRdy, dTdx, dTdy; -#endif bool inside; float fresnel; - fresnel = fresnel_dielectric(m_eta, - N, - I, - &R, - &T, -#ifdef __RAY_DIFFERENTIALS__ - dIdx, - dIdy, - &dRdx, - &dRdy, - &dTdx, - &dTdy, -#endif - &inside); + fresnel = fresnel_dielectric(m_eta, N, I, &R, &T, &inside); if (!inside && fresnel != 1.0f) { /* Some high number for MIS. */ *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + *eval = make_spectrum(1e6f); *omega_in = T; -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dTdx; - *domega_in_dy = dTdy; -#endif } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } return LABEL_TRANSMIT | LABEL_SINGULAR; } diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h index 0400fc61860..c9086823de9 100644 --- a/intern/cycles/kernel/closure/bsdf_toon.h +++ b/intern/cycles/kernel/closure/bsdf_toon.h @@ -30,7 +30,7 @@ ccl_device int bsdf_diffuse_toon_setup(ccl_private ToonBsdf *bsdf) return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle) +ccl_device float bsdf_toon_get_intensity(float max_angle, float smooth, float angle) { float is; @@ -41,7 +41,7 @@ ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float a else is = 0.0f; - return make_float3(is, is, is); + return is; } ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth) @@ -49,48 +49,44 @@ ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth) return fminf(max_angle + smooth, M_PI_2_F); } -ccl_device float3 bsdf_diffuse_toon_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_diffuse_toon_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc; float max_angle = bsdf->size * M_PI_2_F; float smooth = bsdf->smooth * M_PI_2_F; float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f)); - float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle); + float eval = bsdf_toon_get_intensity(max_angle, smooth, angle); - if (eval.x > 0.0f) { + if (eval > 0.0f) { float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle)); - return *pdf * eval; + return make_spectrum(*pdf * eval); } *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_diffuse_toon_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_diffuse_toon_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc; @@ -103,21 +99,15 @@ ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc, sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf); if (dot(Ng, *omega_in) > 0.0f) { - *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); - -#ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the bounce - *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; -#endif + *eval = make_spectrum(*pdf * bsdf_toon_get_intensity(max_angle, smooth, angle)); } else { - *eval = make_float3(0.f, 0.f, 0.f); + *eval = zero_spectrum(); *pdf = 0.0f; } } else { - *eval = make_float3(0.f, 0.f, 0.f); + *eval = zero_spectrum(); *pdf = 0.0f; } @@ -135,10 +125,10 @@ ccl_device int bsdf_glossy_toon_setup(ccl_private ToonBsdf *bsdf) return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_glossy_toon_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_glossy_toon_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc; float max_angle = bsdf->size * M_PI_2_F; @@ -153,36 +143,32 @@ ccl_device float3 bsdf_glossy_toon_eval_reflect(ccl_private const ShaderClosure float angle = safe_acosf(fmaxf(cosRI, 0.0f)); - float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle); + float eval = bsdf_toon_get_intensity(max_angle, smooth, angle); float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle)); - return *pdf * eval; + return make_spectrum(*pdf * eval); } *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_glossy_toon_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_glossy_toon_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc; @@ -204,21 +190,16 @@ ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc, /* make sure the direction we chose is still in the right hemisphere */ if (cosNI > 0) { - *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); - -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; -#endif + *eval = make_spectrum(*pdf * bsdf_toon_get_intensity(max_angle, smooth, angle)); } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } } else { *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); + *eval = zero_spectrum(); } } diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h index 636d9d664f2..c2aee1e1633 100644 --- a/intern/cycles/kernel/closure/bsdf_transparent.h +++ b/intern/cycles/kernel/closure/bsdf_transparent.h @@ -11,7 +11,7 @@ CCL_NAMESPACE_BEGIN ccl_device void bsdf_transparent_setup(ccl_private ShaderData *sd, - const float3 weight, + const Spectrum weight, uint32_t path_flag) { /* Check cutoff weight. */ @@ -59,45 +59,37 @@ ccl_device void bsdf_transparent_setup(ccl_private ShaderData *sd, } } -ccl_device float3 bsdf_transparent_eval_reflect(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_transparent_eval_reflect(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } -ccl_device float3 bsdf_transparent_eval_transmit(ccl_private const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum bsdf_transparent_eval_transmit(ccl_private const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + ccl_private float *pdf) { *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return zero_spectrum(); } ccl_device int bsdf_transparent_sample(ccl_private const ShaderClosure *sc, float3 Ng, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { // only one direction is possible *omega_in = -I; -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = -dIdx; - *domega_in_dy = -dIdy; -#endif *pdf = 1; - *eval = make_float3(1, 1, 1); + *eval = one_spectrum(); return LABEL_TRANSMIT | LABEL_TRANSPARENT; } diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h index e3b24d487f1..3c48b98fed9 100644 --- a/intern/cycles/kernel/closure/bsdf_util.h +++ b/intern/cycles/kernel/closure/bsdf_util.h @@ -15,14 +15,6 @@ ccl_device float fresnel_dielectric(float eta, const float3 I, ccl_private float3 *R, ccl_private float3 *T, -#ifdef __RAY_DIFFERENTIALS__ - const float3 dIdx, - const float3 dIdy, - ccl_private float3 *dRdx, - ccl_private float3 *dRdy, - ccl_private float3 *dTdx, - ccl_private float3 *dTdy, -#endif ccl_private bool *is_inside) { float cos = dot(N, I), neta; @@ -45,28 +37,16 @@ ccl_device float fresnel_dielectric(float eta, // compute reflection *R = (2 * cos) * Nn - I; -#ifdef __RAY_DIFFERENTIALS__ - *dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx; - *dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy; -#endif float arg = 1 - (neta * neta * (1 - (cos * cos))); if (arg < 0) { *T = make_float3(0.0f, 0.0f, 0.0f); -#ifdef __RAY_DIFFERENTIALS__ - *dTdx = make_float3(0.0f, 0.0f, 0.0f); - *dTdy = make_float3(0.0f, 0.0f, 0.0f); -#endif return 1; // total internal reflection } else { float dnp = max(sqrtf(arg), 1e-7f); float nK = (neta * cos) - dnp; *T = -(neta * I) + (nK * Nn); -#ifdef __RAY_DIFFERENTIALS__ - *dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn; - *dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn; -#endif // compute Fresnel terms float cosTheta1 = cos; // N.R float cosTheta2 = -dot(Nn, *T); @@ -110,8 +90,8 @@ ccl_device float schlick_fresnel(float u) } /* Calculate the fresnel color which is a blend between white and the F0 color (cspec0) */ -ccl_device_forceinline float3 -interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0) +ccl_device_forceinline Spectrum +interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, Spectrum cspec0) { /* Calculate the fresnel interpolation factor * The value from fresnel_dielectric_cos(...) has to be normalized because @@ -121,7 +101,7 @@ interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0 float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm; /* Blend between white and a specular color with respect to the fresnel */ - return cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; + return cspec0 * (1.0f - FH) + make_spectrum(FH); } ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index b87790f5f8a..7131d9d8f38 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -8,8 +8,8 @@ CCL_NAMESPACE_BEGIN typedef struct Bssrdf { SHADER_CLOSURE_BASE; - float3 radius; - float3 albedo; + Spectrum radius; + Spectrum albedo; float roughness; float anisotropy; } Bssrdf; @@ -69,12 +69,13 @@ ccl_device void bssrdf_setup_radius(ccl_private Bssrdf *bssrdf, const float fourthirdA = (4.0f / 3.0f) * (1.0f + F_dr) / (1.0f - F_dr); /* From Jensen's `Fdr` ratio formula. */ - const float3 alpha_prime = make_float3( - bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.x, fourthirdA), - bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.y, fourthirdA), - bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.z, fourthirdA)); + Spectrum alpha_prime; + FOREACH_SPECTRUM_CHANNEL (i) { + GET_SPECTRUM_CHANNEL(alpha_prime, i) = bssrdf_dipole_compute_alpha_prime( + GET_SPECTRUM_CHANNEL(bssrdf->albedo, i), fourthirdA); + } - bssrdf->radius *= sqrt(3.0f * (one_float3() - alpha_prime)); + bssrdf->radius *= sqrt(3.0f * (one_spectrum() - alpha_prime)); } } @@ -98,7 +99,7 @@ ccl_device_inline float bssrdf_burley_fitting(float A) /* Scale mean free path length so it gives similar looking result * to Cubic and Gaussian models. */ -ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r) +ccl_device_inline Spectrum bssrdf_burley_compatible_mfp(Spectrum r) { return 0.25f * M_1_PI_F * r; } @@ -106,11 +107,13 @@ ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r) ccl_device void bssrdf_burley_setup(ccl_private Bssrdf *bssrdf) { /* Mean free path length. */ - const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius); + const Spectrum l = bssrdf_burley_compatible_mfp(bssrdf->radius); /* Surface albedo. */ - const float3 A = bssrdf->albedo; - const float3 s = make_float3( - bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z)); + const Spectrum A = bssrdf->albedo; + Spectrum s; + FOREACH_SPECTRUM_CHANNEL (i) { + GET_SPECTRUM_CHANNEL(s, i) = bssrdf_burley_fitting(GET_SPECTRUM_CHANNEL(A, i)); + } bssrdf->radius = l / s; } @@ -198,22 +201,18 @@ ccl_device void bssrdf_burley_sample(const float d, *h = safe_sqrtf(Rm * Rm - r_ * r_); } -ccl_device float bssrdf_num_channels(const float3 radius) +ccl_device float bssrdf_num_channels(const Spectrum radius) { float channels = 0; - if (radius.x > 0.0f) { - channels += 1.0f; - } - if (radius.y > 0.0f) { - channels += 1.0f; - } - if (radius.z > 0.0f) { - channels += 1.0f; + FOREACH_SPECTRUM_CHANNEL (i) { + if (GET_SPECTRUM_CHANNEL(radius, i) > 0.0f) { + channels += 1.0f; + } } return channels; } -ccl_device void bssrdf_sample(const float3 radius, +ccl_device void bssrdf_sample(const Spectrum radius, float xi, ccl_private float *r, ccl_private float *h) @@ -224,39 +223,44 @@ ccl_device void bssrdf_sample(const float3 radius, /* Sample color channel and reuse random number. Only a subset of channels * may be used if their radius was too small to handle as BSSRDF. */ xi *= num_channels; - - if (xi < 1.0f) { - sampled_radius = (radius.x > 0.0f) ? radius.x : (radius.y > 0.0f) ? radius.y : radius.z; - } - else if (xi < 2.0f) { - xi -= 1.0f; - sampled_radius = (radius.x > 0.0f && radius.y > 0.0f) ? radius.y : radius.z; - } - else { - xi -= 2.0f; - sampled_radius = radius.z; + sampled_radius = 0.0f; + + float sum = 0.0f; + FOREACH_SPECTRUM_CHANNEL (i) { + const float channel_radius = GET_SPECTRUM_CHANNEL(radius, i); + if (channel_radius > 0.0f) { + const float next_sum = sum + 1.0f; + if (xi < next_sum) { + xi -= sum; + sampled_radius = channel_radius; + break; + } + sum = next_sum; + } } /* Sample BSSRDF. */ bssrdf_burley_sample(sampled_radius, xi, r, h); } -ccl_device_forceinline float3 bssrdf_eval(const float3 radius, float r) +ccl_device_forceinline Spectrum bssrdf_eval(const Spectrum radius, float r) { - return make_float3(bssrdf_burley_pdf(radius.x, r), - bssrdf_burley_pdf(radius.y, r), - bssrdf_burley_pdf(radius.z, r)); + Spectrum result; + FOREACH_SPECTRUM_CHANNEL (i) { + GET_SPECTRUM_CHANNEL(result, i) = bssrdf_burley_pdf(GET_SPECTRUM_CHANNEL(radius, i), r); + } + return result; } -ccl_device_forceinline float bssrdf_pdf(const float3 radius, float r) +ccl_device_forceinline float bssrdf_pdf(const Spectrum radius, float r) { - float3 pdf = bssrdf_eval(radius, r); - return (pdf.x + pdf.y + pdf.z) / bssrdf_num_channels(radius); + Spectrum pdf = bssrdf_eval(radius, r); + return reduce_add(pdf) / bssrdf_num_channels(radius); } /* Setup */ -ccl_device_inline ccl_private Bssrdf *bssrdf_alloc(ccl_private ShaderData *sd, float3 weight) +ccl_device_inline ccl_private Bssrdf *bssrdf_alloc(ccl_private ShaderData *sd, Spectrum weight) { ccl_private Bssrdf *bssrdf = (ccl_private Bssrdf *)closure_alloc( sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight); @@ -294,31 +298,20 @@ ccl_device int bssrdf_setup(ccl_private ShaderData *sd, } /* Verify if the radii are large enough to sample without precision issues. */ - int bssrdf_channels = 3; - float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f); - - if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) { - diffuse_weight.x = bssrdf->weight.x; - bssrdf->weight.x = 0.0f; - bssrdf->radius.x = 0.0f; - bssrdf_channels--; - } - if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) { - diffuse_weight.y = bssrdf->weight.y; - bssrdf->weight.y = 0.0f; - bssrdf->radius.y = 0.0f; - bssrdf_channels--; - } - if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) { - diffuse_weight.z = bssrdf->weight.z; - bssrdf->weight.z = 0.0f; - bssrdf->radius.z = 0.0f; - bssrdf_channels--; + int bssrdf_channels = SPECTRUM_CHANNELS; + Spectrum diffuse_weight = zero_spectrum(); + + FOREACH_SPECTRUM_CHANNEL (i) { + if (GET_SPECTRUM_CHANNEL(bssrdf->radius, i) < BSSRDF_MIN_RADIUS) { + GET_SPECTRUM_CHANNEL(diffuse_weight, i) = GET_SPECTRUM_CHANNEL(bssrdf->weight, i); + GET_SPECTRUM_CHANNEL(bssrdf->weight, i) = 0.0f; + GET_SPECTRUM_CHANNEL(bssrdf->radius, i) = 0.0f; + bssrdf_channels--; + } } - if (bssrdf_channels < 3) { + if (bssrdf_channels < SPECTRUM_CHANNELS) { /* Add diffuse BSDF if any radius too small. */ -#ifdef __PRINCIPLED__ if (bssrdf->roughness != FLT_MAX) { ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc( sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight); @@ -329,9 +322,7 @@ ccl_device int bssrdf_setup(ccl_private ShaderData *sd, flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT); } } - else -#endif /* __PRINCIPLED__ */ - { + else { ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( sd, sizeof(DiffuseBsdf), diffuse_weight); diff --git a/intern/cycles/kernel/closure/emissive.h b/intern/cycles/kernel/closure/emissive.h index 03e19cbde21..d896721f77b 100644 --- a/intern/cycles/kernel/closure/emissive.h +++ b/intern/cycles/kernel/closure/emissive.h @@ -12,7 +12,7 @@ CCL_NAMESPACE_BEGIN /* BACKGROUND CLOSURE */ -ccl_device void background_setup(ccl_private ShaderData *sd, const float3 weight) +ccl_device void background_setup(ccl_private ShaderData *sd, const Spectrum weight) { if (sd->flag & SD_EMISSION) { sd->closure_emission_background += weight; @@ -25,7 +25,7 @@ ccl_device void background_setup(ccl_private ShaderData *sd, const float3 weight /* EMISSION CLOSURE */ -ccl_device void emission_setup(ccl_private ShaderData *sd, const float3 weight) +ccl_device void emission_setup(ccl_private ShaderData *sd, const Spectrum weight) { if (sd->flag & SD_EMISSION) { sd->closure_emission_background += weight; @@ -54,11 +54,11 @@ ccl_device void emissive_sample(const float3 Ng, /* todo: not implemented and used yet */ } -ccl_device float3 emissive_simple_eval(const float3 Ng, const float3 I) +ccl_device Spectrum emissive_simple_eval(const float3 Ng, const float3 I) { float res = emissive_pdf(Ng, I); - return make_float3(res, res, res); + return make_spectrum(res); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h index 6e24b60af39..9dbb5154457 100644 --- a/intern/cycles/kernel/closure/volume.h +++ b/intern/cycles/kernel/closure/volume.h @@ -7,7 +7,7 @@ CCL_NAMESPACE_BEGIN /* VOLUME EXTINCTION */ -ccl_device void volume_extinction_setup(ccl_private ShaderData *sd, float3 weight) +ccl_device void volume_extinction_setup(ccl_private ShaderData *sd, Spectrum weight) { if (sd->flag & SD_EXTINCTION) { sd->closure_transparent_extinction += weight; @@ -48,10 +48,10 @@ ccl_device int volume_henyey_greenstein_setup(ccl_private HenyeyGreensteinVolume return SD_SCATTER; } -ccl_device float3 volume_henyey_greenstein_eval_phase(ccl_private const ShaderVolumeClosure *svc, - const float3 I, - float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum volume_henyey_greenstein_eval_phase(ccl_private const ShaderVolumeClosure *svc, + const float3 I, + float3 omega_in, + ccl_private float *pdf) { float g = svc->g; @@ -64,7 +64,7 @@ ccl_device float3 volume_henyey_greenstein_eval_phase(ccl_private const ShaderVo *pdf = single_peaked_henyey_greenstein(cos_theta, g); } - return make_float3(*pdf, *pdf, *pdf); + return make_spectrum(*pdf); } ccl_device float3 @@ -101,37 +101,27 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, ccl_priva ccl_device int volume_henyey_greenstein_sample(ccl_private const ShaderVolumeClosure *svc, float3 I, - float3 dIdx, - float3 dIdy, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private float3 *domega_in_dx, - ccl_private float3 *domega_in_dy, ccl_private float *pdf) { float g = svc->g; /* note that I points towards the viewer and so is used negated */ *omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf); - *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */ - -#ifdef __RAY_DIFFERENTIALS__ - /* todo: implement ray differential estimation */ - *domega_in_dx = make_float3(0.0f, 0.0f, 0.0f); - *domega_in_dy = make_float3(0.0f, 0.0f, 0.0f); -#endif + *eval = make_spectrum(*pdf); /* perfect importance sampling */ return LABEL_VOLUME_SCATTER; } /* VOLUME CLOSURE */ -ccl_device float3 volume_phase_eval(ccl_private const ShaderData *sd, - ccl_private const ShaderVolumeClosure *svc, - float3 omega_in, - ccl_private float *pdf) +ccl_device Spectrum volume_phase_eval(ccl_private const ShaderData *sd, + ccl_private const ShaderVolumeClosure *svc, + float3 omega_in, + ccl_private float *pdf) { return volume_henyey_greenstein_eval_phase(svc, sd->I, omega_in, pdf); } @@ -140,22 +130,11 @@ ccl_device int volume_phase_sample(ccl_private const ShaderData *sd, ccl_private const ShaderVolumeClosure *svc, float randu, float randv, - ccl_private float3 *eval, + ccl_private Spectrum *eval, ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, ccl_private float *pdf) { - return volume_henyey_greenstein_sample(svc, - sd->I, - sd->dI.dx, - sd->dI.dy, - randu, - randv, - eval, - omega_in, - &domega_in->dx, - &domega_in->dy, - pdf); + return volume_henyey_greenstein_sample(svc, sd->I, randu, randv, eval, omega_in, pdf); } /* Volume sampling utilities. */ @@ -164,45 +143,44 @@ ccl_device int volume_phase_sample(ccl_private const ShaderData *sd, * unnecessary work in volumes and subsurface scattering. */ #define VOLUME_THROUGHPUT_EPSILON 1e-6f -ccl_device float3 volume_color_transmittance(float3 sigma, float t) +ccl_device Spectrum volume_color_transmittance(Spectrum sigma, float t) { - return exp3(-sigma * t); + return exp(-sigma * t); } -ccl_device float volume_channel_get(float3 value, int channel) +ccl_device float volume_channel_get(Spectrum value, int channel) { - return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z); + return GET_SPECTRUM_CHANNEL(value, channel); } -ccl_device int volume_sample_channel(float3 albedo, - float3 throughput, +ccl_device int volume_sample_channel(Spectrum albedo, + Spectrum throughput, float rand, - ccl_private float3 *pdf) + ccl_private Spectrum *pdf) { /* Sample color channel proportional to throughput and single scattering * albedo, to significantly reduce noise with many bounce, following: * * "Practical and Controllable Subsurface Scattering for Production Path * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */ - float3 weights = fabs(throughput * albedo); - float sum_weights = weights.x + weights.y + weights.z; + Spectrum weights = fabs(throughput * albedo); + float sum_weights = reduce_add(weights); if (sum_weights > 0.0f) { *pdf = weights / sum_weights; } else { - *pdf = make_float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f); + *pdf = make_spectrum(1.0f / SPECTRUM_CHANNELS); } - if (rand < pdf->x) { - return 0; - } - else if (rand < pdf->x + pdf->y) { - return 1; - } - else { - return 2; + float pdf_sum = 0.0f; + FOREACH_SPECTRUM_CHANNEL (i) { + pdf_sum += GET_SPECTRUM_CHANNEL(*pdf, i); + if (rand < pdf_sum) { + return i; + } } + return SPECTRUM_CHANNELS - 1; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/data_arrays.h b/intern/cycles/kernel/data_arrays.h new file mode 100644 index 00000000000..f2877e6c37f --- /dev/null +++ b/intern/cycles/kernel/data_arrays.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#ifndef KERNEL_DATA_ARRAY +# define KERNEL_DATA_ARRAY(type, name) +#endif + +/* BVH2, not used for OptiX or Embree. */ +KERNEL_DATA_ARRAY(float4, bvh_nodes) +KERNEL_DATA_ARRAY(float4, bvh_leaf_nodes) +KERNEL_DATA_ARRAY(uint, prim_type) +KERNEL_DATA_ARRAY(uint, prim_visibility) +KERNEL_DATA_ARRAY(uint, prim_index) +KERNEL_DATA_ARRAY(uint, prim_object) +KERNEL_DATA_ARRAY(uint, object_node) +KERNEL_DATA_ARRAY(float2, prim_time) + +/* objects */ +KERNEL_DATA_ARRAY(KernelObject, objects) +KERNEL_DATA_ARRAY(Transform, object_motion_pass) +KERNEL_DATA_ARRAY(DecomposedTransform, object_motion) +KERNEL_DATA_ARRAY(uint, object_flag) +KERNEL_DATA_ARRAY(float, object_volume_step) +KERNEL_DATA_ARRAY(uint, object_prim_offset) + +/* cameras */ +KERNEL_DATA_ARRAY(DecomposedTransform, camera_motion) + +/* triangles */ +KERNEL_DATA_ARRAY(uint, tri_shader) +KERNEL_DATA_ARRAY(packed_float3, tri_vnormal) +KERNEL_DATA_ARRAY(uint4, tri_vindex) +KERNEL_DATA_ARRAY(uint, tri_patch) +KERNEL_DATA_ARRAY(float2, tri_patch_uv) +KERNEL_DATA_ARRAY(packed_float3, tri_verts) + +/* curves */ +KERNEL_DATA_ARRAY(KernelCurve, curves) +KERNEL_DATA_ARRAY(float4, curve_keys) +KERNEL_DATA_ARRAY(KernelCurveSegment, curve_segments) + +/* patches */ +KERNEL_DATA_ARRAY(uint, patches) + +/* pointclouds */ +KERNEL_DATA_ARRAY(float4, points) +KERNEL_DATA_ARRAY(uint, points_shader) + +/* attributes */ +KERNEL_DATA_ARRAY(AttributeMap, attributes_map) +KERNEL_DATA_ARRAY(float, attributes_float) +KERNEL_DATA_ARRAY(float2, attributes_float2) +KERNEL_DATA_ARRAY(packed_float3, attributes_float3) +KERNEL_DATA_ARRAY(float4, attributes_float4) +KERNEL_DATA_ARRAY(uchar4, attributes_uchar4) + +/* lights */ +KERNEL_DATA_ARRAY(KernelLightDistribution, light_distribution) +KERNEL_DATA_ARRAY(KernelLight, lights) +KERNEL_DATA_ARRAY(float2, light_background_marginal_cdf) +KERNEL_DATA_ARRAY(float2, light_background_conditional_cdf) + +/* particles */ +KERNEL_DATA_ARRAY(KernelParticle, particles) + +/* shaders */ +KERNEL_DATA_ARRAY(uint4, svm_nodes) +KERNEL_DATA_ARRAY(KernelShader, shaders) + +/* lookup tables */ +KERNEL_DATA_ARRAY(float, lookup_table) + +/* PMJ sample pattern */ +KERNEL_DATA_ARRAY(float, sample_pattern_lut) + +/* image textures */ +KERNEL_DATA_ARRAY(TextureInfo, texture_info) + +/* ies lights */ +KERNEL_DATA_ARRAY(float, ies) + +#undef KERNEL_DATA_ARRAY diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h new file mode 100644 index 00000000000..807d0650fc3 --- /dev/null +++ b/intern/cycles/kernel/data_template.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#ifndef KERNEL_STRUCT_BEGIN +# define KERNEL_STRUCT_BEGIN(name, parent) +#endif +#ifndef KERNEL_STRUCT_END +# define KERNEL_STRUCT_END(name) +#endif +#ifndef KERNEL_STRUCT_MEMBER +# define KERNEL_STRUCT_MEMBER(parent, type, name) +#endif + +/* Background. */ + +KERNEL_STRUCT_BEGIN(KernelBackground, background) +/* xyz store direction, w the angle. float4 instead of float3 is used + * to ensure consistent padding/alignment across devices. */ +KERNEL_STRUCT_MEMBER(background, float4, sun) +/* Only shader index. */ +KERNEL_STRUCT_MEMBER(background, int, surface_shader) +KERNEL_STRUCT_MEMBER(background, int, volume_shader) +KERNEL_STRUCT_MEMBER(background, float, volume_step_size) +KERNEL_STRUCT_MEMBER(background, int, transparent) +KERNEL_STRUCT_MEMBER(background, float, transparent_roughness_squared_threshold) +/* Portal sampling. */ +KERNEL_STRUCT_MEMBER(background, float, portal_weight) +KERNEL_STRUCT_MEMBER(background, int, num_portals) +KERNEL_STRUCT_MEMBER(background, int, portal_offset) +/* Sun sampling. */ +KERNEL_STRUCT_MEMBER(background, float, sun_weight) +/* Importance map sampling. */ +KERNEL_STRUCT_MEMBER(background, float, map_weight) +KERNEL_STRUCT_MEMBER(background, int, map_res_x) +KERNEL_STRUCT_MEMBER(background, int, map_res_y) +/* Multiple importance sampling. */ +KERNEL_STRUCT_MEMBER(background, int, use_mis) +/* Lightgroup. */ +KERNEL_STRUCT_MEMBER(background, int, lightgroup) +/* Padding. */ +KERNEL_STRUCT_MEMBER(background, int, pad1) +KERNEL_STRUCT_MEMBER(background, int, pad2) +KERNEL_STRUCT_MEMBER(background, int, pad3) +KERNEL_STRUCT_END(KernelBackground) + +/* BVH: own BVH2 if no native device acceleration struct used. */ + +KERNEL_STRUCT_BEGIN(KernelBVH, bvh) +KERNEL_STRUCT_MEMBER(bvh, int, root) +KERNEL_STRUCT_MEMBER(bvh, int, have_motion) +KERNEL_STRUCT_MEMBER(bvh, int, have_curves) +KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout) +KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps) +KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions) +KERNEL_STRUCT_MEMBER(bvh, int, pad1) +KERNEL_STRUCT_MEMBER(bvh, int, pad2) +KERNEL_STRUCT_END(KernelBVH) + +/* Film. */ + +KERNEL_STRUCT_BEGIN(KernelFilm, film) +/* XYZ to rendering color space transform. float4 instead of float3 to + * ensure consistent padding/alignment across devices. */ +KERNEL_STRUCT_MEMBER(film, float4, xyz_to_r) +KERNEL_STRUCT_MEMBER(film, float4, xyz_to_g) +KERNEL_STRUCT_MEMBER(film, float4, xyz_to_b) +KERNEL_STRUCT_MEMBER(film, float4, rgb_to_y) +/* Rec709 to rendering color space. */ +KERNEL_STRUCT_MEMBER(film, float4, rec709_to_r) +KERNEL_STRUCT_MEMBER(film, float4, rec709_to_g) +KERNEL_STRUCT_MEMBER(film, float4, rec709_to_b) +KERNEL_STRUCT_MEMBER(film, int, is_rec709) +/* Exposure. */ +KERNEL_STRUCT_MEMBER(film, float, exposure) +/* Passed used. */ +KERNEL_STRUCT_MEMBER(film, int, pass_flag) +KERNEL_STRUCT_MEMBER(film, int, light_pass_flag) +/* Pass offsets. */ +KERNEL_STRUCT_MEMBER(film, int, pass_stride) +KERNEL_STRUCT_MEMBER(film, int, pass_combined) +KERNEL_STRUCT_MEMBER(film, int, pass_depth) +KERNEL_STRUCT_MEMBER(film, int, pass_position) +KERNEL_STRUCT_MEMBER(film, int, pass_normal) +KERNEL_STRUCT_MEMBER(film, int, pass_roughness) +KERNEL_STRUCT_MEMBER(film, int, pass_motion) +KERNEL_STRUCT_MEMBER(film, int, pass_motion_weight) +KERNEL_STRUCT_MEMBER(film, int, pass_uv) +KERNEL_STRUCT_MEMBER(film, int, pass_object_id) +KERNEL_STRUCT_MEMBER(film, int, pass_material_id) +KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_color) +KERNEL_STRUCT_MEMBER(film, int, pass_glossy_color) +KERNEL_STRUCT_MEMBER(film, int, pass_transmission_color) +KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_indirect) +KERNEL_STRUCT_MEMBER(film, int, pass_glossy_indirect) +KERNEL_STRUCT_MEMBER(film, int, pass_transmission_indirect) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_indirect) +KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_glossy_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_transmission_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_emission) +KERNEL_STRUCT_MEMBER(film, int, pass_background) +KERNEL_STRUCT_MEMBER(film, int, pass_ao) +KERNEL_STRUCT_MEMBER(film, float, pass_alpha_threshold) +KERNEL_STRUCT_MEMBER(film, int, pass_shadow) +KERNEL_STRUCT_MEMBER(film, float, pass_shadow_scale) +KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher) +KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_sample_count) +KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_matte) +/* Cryptomatte. */ +KERNEL_STRUCT_MEMBER(film, int, cryptomatte_passes) +KERNEL_STRUCT_MEMBER(film, int, cryptomatte_depth) +KERNEL_STRUCT_MEMBER(film, int, pass_cryptomatte) +/* Adaptive sampling. */ +KERNEL_STRUCT_MEMBER(film, int, pass_adaptive_aux_buffer) +KERNEL_STRUCT_MEMBER(film, int, pass_sample_count) +/* Mist. */ +KERNEL_STRUCT_MEMBER(film, int, pass_mist) +KERNEL_STRUCT_MEMBER(film, float, mist_start) +KERNEL_STRUCT_MEMBER(film, float, mist_inv_depth) +KERNEL_STRUCT_MEMBER(film, float, mist_falloff) +/* Denoising. */ +KERNEL_STRUCT_MEMBER(film, int, pass_denoising_normal) +KERNEL_STRUCT_MEMBER(film, int, pass_denoising_albedo) +KERNEL_STRUCT_MEMBER(film, int, pass_denoising_depth) +/* AOVs. */ +KERNEL_STRUCT_MEMBER(film, int, pass_aov_color) +KERNEL_STRUCT_MEMBER(film, int, pass_aov_value) +/* Light groups. */ +KERNEL_STRUCT_MEMBER(film, int, pass_lightgroup) +/* Baking. */ +KERNEL_STRUCT_MEMBER(film, int, pass_bake_primitive) +KERNEL_STRUCT_MEMBER(film, int, pass_bake_differential) +/* Shadow catcher. */ +KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher) +/* Padding. */ +KERNEL_STRUCT_MEMBER(film, int, pad1) +KERNEL_STRUCT_MEMBER(film, int, pad2) +KERNEL_STRUCT_END(KernelFilm) + +/* Integrator. */ + +KERNEL_STRUCT_BEGIN(KernelIntegrator, integrator) +/* Emission. */ +KERNEL_STRUCT_MEMBER(integrator, int, use_direct_light) +KERNEL_STRUCT_MEMBER(integrator, int, num_distribution) +KERNEL_STRUCT_MEMBER(integrator, int, num_all_lights) +KERNEL_STRUCT_MEMBER(integrator, float, pdf_triangles) +KERNEL_STRUCT_MEMBER(integrator, float, pdf_lights) +KERNEL_STRUCT_MEMBER(integrator, float, light_inv_rr_threshold) +/* Bounces. */ +KERNEL_STRUCT_MEMBER(integrator, int, min_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_diffuse_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_glossy_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_transmission_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_volume_bounce) +/* AO bounces. */ +KERNEL_STRUCT_MEMBER(integrator, int, ao_bounces) +KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_distance) +KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_factor) +KERNEL_STRUCT_MEMBER(integrator, float, ao_additive_factor) +/* Transparency. */ +KERNEL_STRUCT_MEMBER(integrator, int, transparent_min_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, transparent_max_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, transparent_shadows) +/* Caustics. */ +KERNEL_STRUCT_MEMBER(integrator, int, caustics_reflective) +KERNEL_STRUCT_MEMBER(integrator, int, caustics_refractive) +KERNEL_STRUCT_MEMBER(integrator, float, filter_glossy) +/* Seed. */ +KERNEL_STRUCT_MEMBER(integrator, int, seed) +/* Clamp. */ +KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct) +KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect) +/* MIS. */ +KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis) +/* Caustics. */ +KERNEL_STRUCT_MEMBER(integrator, int, use_caustics) +/* Sampling pattern. */ +KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern) +KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance) +/* Volume render. */ +KERNEL_STRUCT_MEMBER(integrator, int, use_volumes) +KERNEL_STRUCT_MEMBER(integrator, int, volume_max_steps) +KERNEL_STRUCT_MEMBER(integrator, float, volume_step_rate) +/* Shadow catcher. */ +KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher) +/* Closure filter. */ +KERNEL_STRUCT_MEMBER(integrator, int, filter_closures) +/* MIS debugging. */ +KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type) +/* Padding */ +KERNEL_STRUCT_MEMBER(integrator, int, pad1) +KERNEL_STRUCT_END(KernelIntegrator) + +/* SVM. For shader specialization. */ + +KERNEL_STRUCT_BEGIN(KernelSVMUsage, svm_usage) +#define SHADER_NODE_TYPE(type) KERNEL_STRUCT_MEMBER(svm_usage, int, type) +#include "kernel/svm/node_types_template.h" +KERNEL_STRUCT_END(KernelSVMUsage) + +#undef KERNEL_STRUCT_BEGIN +#undef KERNEL_STRUCT_MEMBER +#undef KERNEL_STRUCT_END diff --git a/intern/cycles/kernel/device/cpu/bvh.h b/intern/cycles/kernel/device/cpu/bvh.h new file mode 100644 index 00000000000..d9267e1cd6d --- /dev/null +++ b/intern/cycles/kernel/device/cpu/bvh.h @@ -0,0 +1,582 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Blender Foundation */ + +/* CPU Embree implementation of ray-scene intersection. */ + +#pragma once + +#include <embree3/rtcore_ray.h> +#include <embree3/rtcore_scene.h> + +#include "kernel/device/cpu/compat.h" +#include "kernel/device/cpu/globals.h" + +#include "kernel/bvh/types.h" +#include "kernel/bvh/util.h" +#include "kernel/geom/object.h" +#include "kernel/integrator/state.h" +#include "kernel/sample/lcg.h" + +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +#define EMBREE_IS_HAIR(x) (x & 1) + +/* Intersection context. */ + +struct CCLIntersectContext { + typedef enum { + RAY_REGULAR = 0, + RAY_SHADOW_ALL = 1, + RAY_LOCAL = 2, + RAY_SSS = 3, + RAY_VOLUME_ALL = 4, + } RayType; + + KernelGlobals kg; + RayType type; + + /* For avoiding self intersections */ + const Ray *ray; + + /* for shadow rays */ + Intersection *isect_s; + uint max_hits; + uint num_hits; + uint num_recorded_hits; + float throughput; + float max_t; + bool opaque_hit; + + /* for SSS Rays: */ + LocalIntersection *local_isect; + int local_object_id; + uint *lcg_state; + + CCLIntersectContext(KernelGlobals kg_, RayType type_) + { + kg = kg_; + type = type_; + ray = NULL; + max_hits = 1; + num_hits = 0; + num_recorded_hits = 0; + throughput = 1.0f; + max_t = FLT_MAX; + opaque_hit = false; + isect_s = NULL; + local_isect = NULL; + local_object_id = -1; + lcg_state = NULL; + } +}; + +class IntersectContext { + public: + IntersectContext(CCLIntersectContext *ctx) + { + rtcInitIntersectContext(&context); + userRayExt = ctx; + } + RTCIntersectContext context; + CCLIntersectContext *userRayExt; +}; + +/* Utilities. */ + +ccl_device_inline void kernel_embree_setup_ray(const Ray &ray, + RTCRay &rtc_ray, + const uint visibility) +{ + rtc_ray.org_x = ray.P.x; + rtc_ray.org_y = ray.P.y; + rtc_ray.org_z = ray.P.z; + rtc_ray.dir_x = ray.D.x; + rtc_ray.dir_y = ray.D.y; + rtc_ray.dir_z = ray.D.z; + rtc_ray.tnear = ray.tmin; + rtc_ray.tfar = ray.tmax; + rtc_ray.time = ray.time; + rtc_ray.mask = visibility; +} + +ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray, + RTCRayHit &rayhit, + const uint visibility) +{ + kernel_embree_setup_ray(ray, rayhit.ray, visibility); + rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; +} + +ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg, + const RTCHit *hit, + const Ray *ray) +{ + int object, prim; + + if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { + object = hit->instID[0] / 2; + if ((ray->self.object == object) || (ray->self.light_object == object)) { + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, hit->instID[0])); + prim = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); + } + else { + return false; + } + } + else { + object = hit->geomID / 2; + if ((ray->self.object == object) || (ray->self.light_object == object)) { + prim = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.device_bvh, hit->geomID)); + } + else { + return false; + } + } + + const bool is_hair = hit->geomID & 1; + if (is_hair) { + prim = kernel_data_fetch(curve_segments, prim).prim; + } + + return intersection_skip_self_shadow(ray->self, object, prim); +} + +ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg, + const RTCRay *ray, + const RTCHit *hit, + Intersection *isect) +{ + isect->t = ray->tfar; + if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, hit->instID[0])); + isect->prim = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); + isect->object = hit->instID[0] / 2; + } + else { + isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, hit->geomID)); + isect->object = hit->geomID / 2; + } + + const bool is_hair = hit->geomID & 1; + if (is_hair) { + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim); + isect->type = segment.type; + isect->prim = segment.prim; + isect->u = hit->u; + isect->v = hit->v; + } + else { + isect->type = kernel_data_fetch(objects, isect->object).primitive_type; + isect->u = hit->u; + isect->v = hit->v; + } +} + +ccl_device_inline void kernel_embree_convert_sss_hit( + KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) +{ + isect->u = hit->u; + isect->v = hit->v; + isect->t = ray->tfar; + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, object * 2)); + isect->prim = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); + isect->object = object; + isect->type = kernel_data_fetch(objects, object).primitive_type; +} + +/* Ray filter functions. */ + +/* This gets called by Embree at every valid ray/object intersection. + * Things like recording subsurface or shadow hits for later evaluation + * as well as filtering for volume objects happen here. + * Cycles' own BVH does that directly inside the traversal calls. */ +ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args) +{ + /* Current implementation in Cycles assumes only single-ray intersection queries. */ + assert(args->N == 1); + + RTCHit *hit = (RTCHit *)args->hit; + CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; + const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; + + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + } +} + +/* This gets called by Embree at every valid ray/object intersection. + * Things like recording subsurface or shadow hits for later evaluation + * as well as filtering for volume objects happen here. + * Cycles' own BVH does that directly inside the traversal calls. + */ +ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArguments *args) +{ + /* Current implementation in Cycles assumes only single-ray intersection queries. */ + assert(args->N == 1); + + const RTCRay *ray = (RTCRay *)args->ray; + RTCHit *hit = (RTCHit *)args->hit; + CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; + const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; + + switch (ctx->type) { + case CCLIntersectContext::RAY_SHADOW_ALL: { + Intersection current_isect; + kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); + if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) { + *args->valid = 0; + return; + } + /* If no transparent shadows or max number of hits exceeded, all light is blocked. */ + const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type); + if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) { + ctx->opaque_hit = true; + return; + } + + ++ctx->num_hits; + + /* Always use baked shadow transparency for curves. */ + if (current_isect.type & PRIMITIVE_CURVE) { + ctx->throughput *= intersection_curve_shadow_transparency( + kg, current_isect.object, current_isect.prim, current_isect.u); + + if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { + ctx->opaque_hit = true; + return; + } + else { + *args->valid = 0; + return; + } + } + + /* Test if we need to record this transparent intersection. */ + const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); + if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) { + /* If maximum number of hits was reached, replace the intersection with the + * highest distance. We want to find the N closest intersections. */ + const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits); + uint isect_index = num_recorded_hits; + if (num_recorded_hits + 1 >= max_record_hits) { + float max_t = ctx->isect_s[0].t; + uint max_recorded_hit = 0; + + for (uint i = 1; i < num_recorded_hits; ++i) { + if (ctx->isect_s[i].t > max_t) { + max_recorded_hit = i; + max_t = ctx->isect_s[i].t; + } + } + + if (num_recorded_hits >= max_record_hits) { + isect_index = max_recorded_hit; + } + + /* Limit the ray distance and stop counting hits beyond this. + * TODO: is there some way we can tell Embree to stop intersecting beyond + * this distance when max number of hits is reached?. Or maybe it will + * become irrelevant if we make max_hits a very high number on the CPU. */ + ctx->max_t = max(current_isect.t, max_t); + } + + ctx->isect_s[isect_index] = current_isect; + } + + /* Always increase the number of recorded hits, even beyond the maximum, + * so that we can detect this and trace another ray if needed. */ + ++ctx->num_recorded_hits; + + /* This tells Embree to continue tracing. */ + *args->valid = 0; + break; + } + case CCLIntersectContext::RAY_LOCAL: + case CCLIntersectContext::RAY_SSS: { + /* Check if it's hitting the correct object. */ + Intersection current_isect; + if (ctx->type == CCLIntersectContext::RAY_SSS) { + kernel_embree_convert_sss_hit(kg, ray, hit, ¤t_isect, ctx->local_object_id); + } + else { + kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); + if (ctx->local_object_id != current_isect.object) { + /* This tells Embree to continue tracing. */ + *args->valid = 0; + break; + } + } + if (intersection_skip_self_local(cray->self, current_isect.prim)) { + *args->valid = 0; + return; + } + + /* No intersection information requested, just return a hit. */ + if (ctx->max_hits == 0) { + break; + } + + /* Ignore curves. */ + if (EMBREE_IS_HAIR(hit->geomID)) { + /* This tells Embree to continue tracing. */ + *args->valid = 0; + break; + } + + LocalIntersection *local_isect = ctx->local_isect; + int hit_idx = 0; + + if (ctx->lcg_state) { + /* See triangle_intersect_subsurface() for the native equivalent. */ + for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (local_isect->hits[i].t == ray->tfar) { + /* This tells Embree to continue tracing. */ + *args->valid = 0; + return; + } + } + + local_isect->num_hits++; + + if (local_isect->num_hits <= ctx->max_hits) { + hit_idx = local_isect->num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits; + + if (hit_idx >= ctx->max_hits) { + /* This tells Embree to continue tracing. */ + *args->valid = 0; + return; + } + } + } + else { + /* Record closest intersection only. */ + if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) { + *args->valid = 0; + return; + } + + local_isect->num_hits = 1; + } + + /* record intersection */ + local_isect->hits[hit_idx] = current_isect; + local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)); + /* This tells Embree to continue tracing. */ + *args->valid = 0; + break; + } + case CCLIntersectContext::RAY_VOLUME_ALL: { + /* Append the intersection to the end of the array. */ + if (ctx->num_hits < ctx->max_hits) { + Intersection current_isect; + kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); + if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) { + *args->valid = 0; + return; + } + + Intersection *isect = &ctx->isect_s[ctx->num_hits]; + ++ctx->num_hits; + *isect = current_isect; + /* Only primitives from volume object. */ + uint tri_object = isect->object; + int object_flag = kernel_data_fetch(object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + --ctx->num_hits; + } + /* This tells Embree to continue tracing. */ + *args->valid = 0; + } + break; + } + case CCLIntersectContext::RAY_REGULAR: + default: + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + return; + } + break; + } +} + +ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionNArguments *args) +{ + const RTCRay *ray = (RTCRay *)args->ray; + RTCHit *hit = (RTCHit *)args->hit; + + /* Always ignore back-facing intersections. */ + if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), + make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) { + *args->valid = 0; + return; + } + + CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; + const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; + + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + } +} + +ccl_device void kernel_embree_filter_occluded_func_backface_cull( + const RTCFilterFunctionNArguments *args) +{ + const RTCRay *ray = (RTCRay *)args->ray; + RTCHit *hit = (RTCHit *)args->hit; + + /* Always ignore back-facing intersections. */ + if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), + make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) { + *args->valid = 0; + return; + } + + kernel_embree_filter_occluded_func(args); +} + +/* Scene intersection. */ + +ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg, + ccl_private const Ray *ray, + const uint visibility, + ccl_private Intersection *isect) +{ + isect->t = ray->tmax; + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); + IntersectContext rtc_ctx(&ctx); + RTCRayHit ray_hit; + ctx.ray = ray; + kernel_embree_setup_rayhit(*ray, ray_hit, visibility); + rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit); + if (ray_hit.hit.geomID == RTC_INVALID_GEOMETRY_ID || + ray_hit.hit.primID == RTC_INVALID_GEOMETRY_ID) { + return false; + } + + kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect); + return true; +} + +#ifdef __BVH_LOCAL__ +ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private LocalIntersection *local_isect, + int local_object, + ccl_private uint *lcg_state, + int max_hits) +{ + const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) & + SD_OBJECT_TRANSFORM_APPLIED); + CCLIntersectContext ctx(kg, + has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL); + ctx.lcg_state = lcg_state; + ctx.max_hits = max_hits; + ctx.ray = ray; + ctx.local_isect = local_isect; + if (local_isect) { + local_isect->num_hits = 0; + } + ctx.local_object_id = local_object; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); + + /* If this object has its own BVH, use it. */ + if (has_bvh) { + RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2); + if (geom) { + float3 P = ray->P; + float3 dir = ray->D; + float3 idir = ray->D; + bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir); + + rtc_ray.org_x = P.x; + rtc_ray.org_y = P.y; + rtc_ray.org_z = P.z; + rtc_ray.dir_x = dir.x; + rtc_ray.dir_y = dir.y; + rtc_ray.dir_z = dir.z; + rtc_ray.tnear = ray->tmin; + rtc_ray.tfar = ray->tmax; + RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom); + kernel_assert(scene); + if (scene) { + rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray); + } + } + } + else { + rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray); + } + + /* rtcOccluded1 sets tfar to -inf if a hit was found. */ + return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0); +} +#endif + +#ifdef __SHADOW_RECORD_ALL__ +ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg, + IntegratorShadowStateCPU *state, + ccl_private const Ray *ray, + uint visibility, + uint max_hits, + ccl_private uint *num_recorded_hits, + ccl_private float *throughput) +{ + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); + Intersection *isect_array = (Intersection *)state->shadow_isect; + ctx.isect_s = isect_array; + ctx.max_hits = max_hits; + ctx.ray = ray; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(*ray, rtc_ray, visibility); + rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray); + + *num_recorded_hits = ctx.num_recorded_hits; + *throughput = ctx.throughput; + return ctx.opaque_hit; +} +#endif + +#ifdef __VOLUME__ +ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint max_hits, + const uint visibility) +{ + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); + ctx.isect_s = isect; + ctx.max_hits = max_hits; + ctx.num_hits = 0; + ctx.ray = ray; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(*ray, rtc_ray, visibility); + rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray); + return ctx.num_hits; +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h index e1c20169582..1e3e790ca1f 100644 --- a/intern/cycles/kernel/device/cpu/compat.h +++ b/intern/cycles/kernel/device/cpu/compat.h @@ -3,8 +3,6 @@ #pragma once -#define __KERNEL_CPU__ - /* Release kernel has too much false-positive maybe-uninitialized warnings, * which makes it possible to miss actual warnings. */ @@ -35,52 +33,4 @@ CCL_NAMESPACE_BEGIN #define kernel_assert(cond) assert(cond) -/* Texture types to be compatible with CUDA textures. These are really just - * simple arrays and after inlining fetch hopefully revert to being a simple - * pointer lookup. */ -template<typename T> struct texture { - ccl_always_inline const T &fetch(int index) const - { - kernel_assert(index >= 0 && index < width); - return data[index]; - } - - T *data; - int width; -}; - -/* Macros to handle different memory storage on different devices */ - -#ifdef __KERNEL_SSE2__ -typedef vector3<sseb> sse3b; -typedef vector3<ssef> sse3f; -typedef vector3<ssei> sse3i; - -ccl_device_inline void print_sse3b(const char *label, sse3b &a) -{ - print_sseb(label, a.x); - print_sseb(label, a.y); - print_sseb(label, a.z); -} - -ccl_device_inline void print_sse3f(const char *label, sse3f &a) -{ - print_ssef(label, a.x); - print_ssef(label, a.y); - print_ssef(label, a.z); -} - -ccl_device_inline void print_sse3i(const char *label, sse3i &a) -{ - print_ssei(label, a.x); - print_ssei(label, a.y); - print_ssei(label, a.z); -} - -# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) -typedef vector3<avxf> avx3f; -# endif - -#endif - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h index 7e080d428ea..309afae412e 100644 --- a/intern/cycles/kernel/device/cpu/globals.h +++ b/intern/cycles/kernel/device/cpu/globals.h @@ -12,7 +12,7 @@ CCL_NAMESPACE_BEGIN /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in - * the kernel, to access constant data. These are all stored as "textures", but + * the kernel, to access constant data. These are all stored as flat arrays. * these are really just standard arrays. We can't use actually globals because * multiple renders may be running inside the same process. */ @@ -22,11 +22,23 @@ struct OSLThreadData; struct OSLShadingSystem; #endif +/* Array for kernel data, with size to be able to assert on invalid data access. */ +template<typename T> struct kernel_array { + ccl_always_inline const T &fetch(int index) const + { + kernel_assert(index >= 0 && index < width); + return data[index]; + } + + T *data; + int width; +}; + typedef struct KernelGlobalsCPU { -#define KERNEL_TEX(type, name) texture<type> name; -#include "kernel/textures.h" +#define KERNEL_DATA_ARRAY(type, name) kernel_array<type> name; +#include "kernel/data_arrays.h" - KernelData __data; + KernelData data; #ifdef __OSL__ /* On the CPU, we also have the OSL globals here. Most data structures are shared @@ -44,8 +56,8 @@ typedef struct KernelGlobalsCPU { typedef const KernelGlobalsCPU *ccl_restrict KernelGlobals; /* Abstraction macros */ -#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index)) -#define kernel_tex_array(tex) (kg->tex.data) -#define kernel_data (kg->__data) +#define kernel_data_fetch(name, index) (kg->name.fetch(index)) +#define kernel_data_array(name) (kg->name.data) +#define kernel_data (kg->data) CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h index 7809ec5f4a7..320e6309128 100644 --- a/intern/cycles/kernel/device/cpu/image.h +++ b/intern/cycles/kernel/device/cpu/image.h @@ -733,7 +733,7 @@ template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator { ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y) { - const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + const TextureInfo &info = kernel_data_fetch(texture_info, id); if (UNLIKELY(!info.data)) { return zero_float4(); @@ -776,7 +776,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg, float3 P, InterpolationType interp) { - const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + const TextureInfo &info = kernel_data_fetch(texture_info, id); if (UNLIKELY(!info.data)) { return zero_float4(); diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp index b12e3089378..01087c96dd6 100644 --- a/intern/cycles/kernel/device/cpu/kernel.cpp +++ b/intern/cycles/kernel/device/cpu/kernel.cpp @@ -53,8 +53,8 @@ CCL_NAMESPACE_BEGIN void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t) { - if (strcmp(name, "__data") == 0) { - kg->__data = *(KernelData *)host; + if (strcmp(name, "data") == 0) { + kg->data = *(KernelData *)host; } else { assert(0); @@ -66,13 +66,13 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem if (0) { } -#define KERNEL_TEX(type, tname) \ +#define KERNEL_DATA_ARRAY(type, tname) \ else if (strcmp(name, #tname) == 0) \ { \ kg->tname.data = (type *)mem; \ kg->tname.width = size; \ } -#include "kernel/textures.h" +#include "kernel/data_arrays.h" else { assert(0); } diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 0e5f7b4a2fd..0d7c06f4fc6 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -34,7 +34,7 @@ # include "kernel/integrator/megakernel.h" # include "kernel/film/adaptive_sampling.h" -# include "kernel/film/id_passes.h" +# include "kernel/film/cryptomatte_passes.h" # include "kernel/film/read.h" # include "kernel/bake/bake.h" @@ -169,7 +169,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_convergence_check); return false; #else - return kernel_adaptive_sampling_convergence_check( + return film_adaptive_sampling_convergence_check( kg, render_buffer, x, y, threshold, reset, offset, stride); #endif } @@ -185,7 +185,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCP #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_x); #else - kernel_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride); + film_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride); #endif } @@ -200,7 +200,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_y); #else - kernel_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride); + film_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride); #endif } @@ -215,7 +215,7 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU * #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, cryptomatte_postprocess); #else - kernel_cryptomatte_post(kg, render_buffer, pixel_index); + film_cryptomatte_post(kg, render_buffer, pixel_index); #endif } diff --git a/intern/cycles/kernel/device/cuda/globals.h b/intern/cycles/kernel/device/cuda/globals.h index e77fcd2b424..f5f7bcf58ee 100644 --- a/intern/cycles/kernel/device/cuda/globals.h +++ b/intern/cycles/kernel/device/cuda/globals.h @@ -20,18 +20,24 @@ struct KernelGlobalsGPU { }; typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; -/* Global scene data and textures */ -__constant__ KernelData __data; -#define KERNEL_TEX(type, name) const __constant__ __device__ type *name; -#include "kernel/textures.h" +struct KernelParamsCUDA { + /* Global scene data and textures */ + KernelData data; +#define KERNEL_DATA_ARRAY(type, name) const type *name; +#include "kernel/data_arrays.h" + + /* Integrator state */ + IntegratorStateGPU integrator_state; +}; -/* Integrator state */ -__constant__ IntegratorStateGPU __integrator_state; +#ifdef __KERNEL_GPU__ +__constant__ KernelParamsCUDA kernel_params; +#endif /* Abstraction macros */ -#define kernel_data __data -#define kernel_tex_fetch(t, index) t[(index)] -#define kernel_tex_array(t) (t) -#define kernel_integrator_state __integrator_state +#define kernel_data kernel_params.data +#define kernel_data_fetch(name, index) kernel_params.name[(index)] +#define kernel_data_array(name) (kernel_params.name) +#define kernel_integrator_state kernel_params.integrator_state CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/gpu/image.h b/intern/cycles/kernel/device/gpu/image.h index 29d851ae478..a8c72645569 100644 --- a/intern/cycles/kernel/device/gpu/image.h +++ b/intern/cycles/kernel/device/gpu/image.h @@ -181,7 +181,7 @@ ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_in ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y) { - ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + ccl_global const TextureInfo &info = kernel_data_fetch(texture_info, id); /* float4, byte4, ushort4 and half4 */ const int texture_type = info.data_type; @@ -216,7 +216,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg, float3 P, InterpolationType interp) { - ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + ccl_global const TextureInfo &info = kernel_data_fetch(texture_info, id); if (info.use_transform_3d) { P = transform_point(&info.transform_3d, P); diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index d657571a5fa..d7d2000775f 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -14,6 +14,8 @@ #ifdef __KERNEL_METAL__ # include "kernel/device/metal/context_begin.h" +#elif defined(__KERNEL_ONEAPI__) +# include "kernel/device/oneapi/context_begin.h" #endif #include "kernel/device/gpu/work_stealing.h" @@ -40,6 +42,8 @@ #ifdef __KERNEL_METAL__ # include "kernel/device/metal/context_end.h" +#elif defined(__KERNEL_ONEAPI__) +# include "kernel/device/oneapi/context_end.h" #endif #include "kernel/film/read.h" @@ -242,7 +246,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_postfix #if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__) -constant int __dummy_constant [[function_constant(0)]]; +constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]]; #endif ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) @@ -522,7 +526,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) bool converged = true; if (x < sw && y < sh) { - converged = ccl_gpu_kernel_call(kernel_adaptive_sampling_convergence_check( + converged = ccl_gpu_kernel_call(film_adaptive_sampling_convergence_check( nullptr, render_buffer, sx + x, sy + y, threshold, reset, offset, stride)); } @@ -549,7 +553,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) if (y < sh) { ccl_gpu_kernel_call( - kernel_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride)); + film_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride)); } } ccl_gpu_kernel_postfix @@ -568,7 +572,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) if (x < sw) { ccl_gpu_kernel_call( - kernel_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride)); + film_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride)); } } ccl_gpu_kernel_postfix @@ -585,7 +589,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) const int pixel_index = ccl_gpu_global_id_x(); if (pixel_index < num_pixels) { - ccl_gpu_kernel_call(kernel_cryptomatte_post(nullptr, render_buffer, pixel_index)); + ccl_gpu_kernel_call(film_cryptomatte_post(nullptr, render_buffer, pixel_index)); } } ccl_gpu_kernel_postfix diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h index 7d7266d5edf..c1df49c4f49 100644 --- a/intern/cycles/kernel/device/gpu/parallel_active_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h @@ -18,15 +18,68 @@ CCL_NAMESPACE_BEGIN # define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512 #endif -#ifndef __KERNEL_METAL__ +/* TODO: abstract more device differences, define ccl_gpu_local_syncthreads, + * ccl_gpu_thread_warp, ccl_gpu_warp_index, ccl_gpu_num_warps for all devices + * and keep device specific code in compat.h */ + +#ifdef __KERNEL_ONEAPI__ +# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED +template<typename IsActiveOp> +void cpu_serial_active_index_array_impl(const uint num_states, + ccl_global int *ccl_restrict indices, + ccl_global int *ccl_restrict num_indices, + IsActiveOp is_active_op) +{ + int write_index = 0; + for (int state_index = 0; state_index < num_states; state_index++) { + if (is_active_op(state_index)) + indices[write_index++] = state_index; + } + *num_indices = write_index; + return; +} +# endif /* WITH_ONEAPI_SYCL_HOST_ENABLED */ + +template<typename IsActiveOp> +void gpu_parallel_active_index_array_impl(const uint num_states, + ccl_global int *ccl_restrict indices, + ccl_global int *ccl_restrict num_indices, + IsActiveOp is_active_op) +{ + const sycl::nd_item<1> &item_id = sycl::ext::oneapi::experimental::this_nd_item<1>(); + const uint blocksize = item_id.get_local_range(0); + + sycl::multi_ptr<int[GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE + 1], + sycl::access::address_space::local_space> + ptr = sycl::ext::oneapi::group_local_memory< + int[GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE + 1]>(item_id.get_group()); + int *warp_offset = *ptr; + + /* NOTE(@nsirgien): Here we calculate the same value as below but + * faster for DPC++ : seems CUDA converting "%", "/", "*" based calculations below into + * something faster already but DPC++ doesn't, so it's better to use + * direct request of needed parameters - switching from this computation to computation below + * will cause 2.5x performance slowdown. */ + const uint thread_index = item_id.get_local_id(0); + const uint thread_warp = item_id.get_sub_group().get_local_id(); + + const uint warp_index = item_id.get_sub_group().get_group_id(); + const uint num_warps = item_id.get_sub_group().get_group_range()[0]; + + const uint state_index = item_id.get_global_id(0); + + /* Test if state corresponding to this thread is active. */ + const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0; +#else /* !__KERNEL__ONEAPI__ */ +# ifndef __KERNEL_METAL__ template<uint blocksize, typename IsActiveOp> __device__ -#endif +# endif void gpu_parallel_active_index_array_impl(const uint num_states, ccl_global int *indices, ccl_global int *num_indices, -#ifdef __KERNEL_METAL__ +# ifdef __KERNEL_METAL__ const uint is_active, const uint blocksize, const int thread_index, @@ -37,7 +90,7 @@ __device__ const int num_warps, threadgroup int *warp_offset) { -#else +# else IsActiveOp is_active_op) { extern ccl_gpu_shared int warp_offset[]; @@ -52,18 +105,33 @@ __device__ /* Test if state corresponding to this thread is active. */ const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0; -#endif - +# endif +#endif /* !__KERNEL_ONEAPI__ */ /* For each thread within a warp compute how many other active states precede it. */ +#ifdef __KERNEL_ONEAPI__ + const uint thread_offset = sycl::exclusive_scan_over_group( + item_id.get_sub_group(), is_active, std::plus<>()); +#else const uint thread_offset = popcount(ccl_gpu_ballot(is_active) & ccl_gpu_thread_mask(thread_warp)); +#endif /* Last thread in warp stores number of active states for each warp. */ +#ifdef __KERNEL_ONEAPI__ + if (thread_warp == item_id.get_sub_group().get_local_range()[0] - 1) { +#else if (thread_warp == ccl_gpu_warp_size - 1) { +#endif warp_offset[warp_index] = thread_offset + is_active; } +#ifdef __KERNEL_ONEAPI__ + /* NOTE(@nsirgien): For us here only local memory writing (warp_offset) is important, + * so faster local barriers can be used. */ + ccl_gpu_local_syncthreads(); +#else ccl_gpu_syncthreads(); +#endif /* Last thread in block converts per-warp sizes to offsets, increments global size of * index array and gets offset to write to. */ @@ -80,7 +148,13 @@ __device__ warp_offset[num_warps] = atomic_fetch_and_add_uint32(num_indices, block_num_active); } +#ifdef __KERNEL_ONEAPI__ + /* NOTE(@nsirgien): For us here only important local memory writing (warp_offset), + * so faster local barriers can be used. */ + ccl_gpu_local_syncthreads(); +#else ccl_gpu_syncthreads(); +#endif /* Write to index array. */ if (is_active) { @@ -107,7 +181,19 @@ __device__ simd_group_index, \ num_simd_groups, \ simdgroup_offset) - +#elif defined(__KERNEL_ONEAPI__) +# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED +# define gpu_parallel_active_index_array( \ + blocksize, num_states, indices, num_indices, is_active_op) \ + if (ccl_gpu_global_size_x() == 1) \ + cpu_serial_active_index_array_impl(num_states, indices, num_indices, is_active_op); \ + else \ + gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op); +# else +# define gpu_parallel_active_index_array( \ + blocksize, num_states, indices, num_indices, is_active_op) \ + gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op) +# endif #else # define gpu_parallel_active_index_array( \ diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h index 667352ed12e..648988c31b6 100644 --- a/intern/cycles/kernel/device/hip/compat.h +++ b/intern/cycles/kernel/device/hip/compat.h @@ -62,7 +62,7 @@ typedef unsigned long long uint64_t; #define ccl_gpu_block_idx_x (blockIdx.x) #define ccl_gpu_grid_dim_x (gridDim.x) #define ccl_gpu_warp_size (warpSize) -#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) +#define ccl_gpu_thread_mask(thread_warp) uint64_t((1ull << thread_warp) - 1) #define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x) #define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x) diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h index 50f117038a2..3a334b21a9e 100644 --- a/intern/cycles/kernel/device/hip/globals.h +++ b/intern/cycles/kernel/device/hip/globals.h @@ -20,18 +20,24 @@ struct KernelGlobalsGPU { }; typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; -/* Global scene data and textures */ -__constant__ KernelData __data; -#define KERNEL_TEX(type, name) __attribute__((used)) const __constant__ __device__ type *name; -#include "kernel/textures.h" +struct KernelParamsHIP { + /* Global scene data and textures */ + KernelData data; +#define KERNEL_DATA_ARRAY(type, name) const type *name; +#include "kernel/data_arrays.h" + + /* Integrator state */ + IntegratorStateGPU integrator_state; +}; -/* Integrator state */ -__constant__ IntegratorStateGPU __integrator_state; +#ifdef __KERNEL_GPU__ +__constant__ KernelParamsHIP kernel_params; +#endif /* Abstraction macros */ -#define kernel_data __data -#define kernel_tex_fetch(t, index) t[(index)] -#define kernel_tex_array(t) (t) -#define kernel_integrator_state __integrator_state +#define kernel_data kernel_params.data +#define kernel_data_fetch(name, index) kernel_params.name[(index)] +#define kernel_data_array(name) (kernel_params.name) +#define kernel_integrator_state kernel_params.integrator_state CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/metal/bvh.h b/intern/cycles/kernel/device/metal/bvh.h new file mode 100644 index 00000000000..03faa3f020f --- /dev/null +++ b/intern/cycles/kernel/device/metal/bvh.h @@ -0,0 +1,360 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Blender Foundation */ + +/* MetalRT implementation of ray-scene intersection. */ + +#pragma once + +#include "kernel/bvh/types.h" +#include "kernel/bvh/util.h" + +CCL_NAMESPACE_BEGIN + +/* Payload types. */ + +struct MetalRTIntersectionPayload { + RaySelfPrimitives self; + uint visibility; + float u, v; + int prim; + int type; +#if defined(__METALRT_MOTION__) + float time; +#endif +}; + +struct MetalRTIntersectionLocalPayload { + RaySelfPrimitives self; + uint local_object; + uint lcg_state; + short max_hits; + bool has_lcg_state; + bool result; + LocalIntersection local_isect; +}; + +struct MetalRTIntersectionShadowPayload { + RaySelfPrimitives self; + uint visibility; +#if defined(__METALRT_MOTION__) + float time; +#endif + int state; + float throughput; + short max_hits; + short num_hits; + short num_recorded_hits; + bool result; +}; + +/* Scene intersection. */ + +ccl_device_intersect bool scene_intersect(KernelGlobals kg, + ccl_private const Ray *ray, + const uint visibility, + ccl_private Intersection *isect) +{ + if (!intersection_ray_valid(ray)) { + isect->t = ray->tmax; + isect->type = PRIMITIVE_NONE; + return false; + } + +#if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + isect->t = ray->tmax; + isect->type = PRIMITIVE_NONE; + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { + isect->t = ray->tmax; + isect->type = PRIMITIVE_NONE; + kernel_assert(!"Invalid ift_default"); + return false; + } +#endif + + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); + metalrt_intersector_type metalrt_intersect; + + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionPayload payload; + payload.self = ray->self; + payload.u = 0.0f; + payload.v = 0.0f; + payload.visibility = visibility; + + typename metalrt_intersector_type::result_type intersection; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + /* No further intersector setup required: Default MetalRT behavior is any-hit. */ + } + else if (visibility & PATH_RAY_SHADOW_OPAQUE) { + /* No further intersector setup required: Shadow ray early termination is controlled by the + * intersection handler */ + } + +#if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_default, + payload); +#else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +#endif + + if (intersection.type == intersection_type::none) { + isect->t = ray->tmax; + isect->type = PRIMITIVE_NONE; + + return false; + } + + isect->t = intersection.distance; + + isect->prim = payload.prim; + isect->type = payload.type; + isect->object = intersection.user_instance_id; + + isect->t = intersection.distance; + if (intersection.type == intersection_type::triangle) { + isect->u = intersection.triangle_barycentric_coord.x; + isect->v = intersection.triangle_barycentric_coord.y; + } + else { + isect->u = payload.u; + isect->v = payload.v; + } + + return isect->type != PRIMITIVE_NONE; +} + +#ifdef __BVH_LOCAL__ +ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private LocalIntersection *local_isect, + int local_object, + ccl_private uint *lcg_state, + int max_hits) +{ + if (!intersection_ray_valid(ray)) { + if (local_isect) { + local_isect->num_hits = 0; + } + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + if (local_isect) { + local_isect->num_hits = 0; + } + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_local)) { + if (local_isect) { + local_isect->num_hits = 0; + } + kernel_assert(!"Invalid ift_local"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionLocalPayload payload; + payload.self = ray->self; + payload.local_object = local_object; + payload.max_hits = max_hits; + payload.local_isect.num_hits = 0; + if (lcg_state) { + payload.has_lcg_state = true; + payload.lcg_state = *lcg_state; + } + payload.result = false; + + typename metalrt_intersector_type::result_type intersection; + +# if defined(__METALRT_MOTION__) + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload); +# endif + + if (lcg_state) { + *lcg_state = payload.lcg_state; + } + *local_isect = payload.local_isect; + + return payload.result; +} +#endif + +#ifdef __SHADOW_RECORD_ALL__ +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, + IntegratorShadowState state, + ccl_private const Ray *ray, + uint visibility, + uint max_hits, + ccl_private uint *num_recorded_hits, + ccl_private float *throughput) +{ + if (!intersection_ray_valid(ray)) { + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) { + kernel_assert(!"Invalid ift_shadow"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionShadowPayload payload; + payload.self = ray->self; + payload.visibility = visibility; + payload.max_hits = max_hits; + payload.num_hits = 0; + payload.num_recorded_hits = 0; + payload.throughput = 1.0f; + payload.result = false; + payload.state = state; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + + typename metalrt_intersector_type::result_type intersection; + +# if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_shadow, + payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload); +# endif + + *num_recorded_hits = payload.num_recorded_hits; + *throughput = payload.throughput; + + return payload.result; +} +#endif + +#ifdef __VOLUME__ +ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint visibility) +{ + if (!intersection_ray_valid(ray)) { + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { + kernel_assert(!"Invalid ift_default"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionPayload payload; + payload.self = ray->self; + payload.visibility = visibility; + + typename metalrt_intersector_type::result_type intersection; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + +# if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_default, + payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +# endif + + if (intersection.type == intersection_type::none) { + return false; + } + + isect->prim = payload.prim; + isect->type = payload.type; + isect->object = intersection.user_instance_id; + + isect->t = intersection.distance; + if (intersection.type == intersection_type::triangle) { + isect->u = intersection.triangle_barycentric_coord.x; + isect->v = intersection.triangle_barycentric_coord.y; + } + else { + isect->u = payload.u; + isect->v = payload.v; + } + + return isect->type != PRIMITIVE_NONE; +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 0ed52074a90..130a9ebafae 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -29,11 +29,12 @@ using namespace metal::raytracing; /* Qualifiers */ -#if defined(__KERNEL_METAL_APPLE__) +/* Inline everything for Apple GPUs. This gives ~1.1x speedup and 10% spill + * reduction for integator_shade_surface. However it comes at the cost of + * longer compile times (~4.5 minutes on M1 Max) and is disabled for that + * reason, until there is a user option to manually enable it. */ -/* Inline everything for Apple GPUs. - * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface - * at the cost of longer compile times (~4.5 minutes on M1 Max). */ +#if 0 // defined(__KERNEL_METAL_APPLE__) # define ccl_device __attribute__((always_inline)) # define ccl_device_inline __attribute__((always_inline)) @@ -45,8 +46,11 @@ using namespace metal::raytracing; # define ccl_device # define ccl_device_inline ccl_device # define ccl_device_forceinline ccl_device -# define ccl_device_noinline ccl_device __attribute__((noinline)) - +# if defined(__KERNEL_METAL_APPLE__) +# define ccl_device_noinline ccl_device +# else +# define ccl_device_noinline ccl_device __attribute__((noinline)) +# endif #endif #define ccl_device_noinline_cpu ccl_device @@ -189,35 +193,46 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \ } volume_write_lambda_pass{kg, this, state}; /* make_type definitions with Metal style element initializers */ -#ifdef make_float2 -# undef make_float2 -#endif -#ifdef make_float3 -# undef make_float3 -#endif -#ifdef make_float4 -# undef make_float4 -#endif -#ifdef make_int2 -# undef make_int2 -#endif -#ifdef make_int3 -# undef make_int3 -#endif -#ifdef make_int4 -# undef make_int4 -#endif -#ifdef make_uchar4 -# undef make_uchar4 -#endif - -#define make_float2(x, y) float2(x, y) -#define make_float3(x, y, z) float3(x, y, z) -#define make_float4(x, y, z, w) float4(x, y, z, w) -#define make_int2(x, y) int2(x, y) -#define make_int3(x, y, z) int3(x, y, z) -#define make_int4(x, y, z, w) int4(x, y, z, w) -#define make_uchar4(x, y, z, w) uchar4(x, y, z, w) +ccl_device_forceinline float2 make_float2(const float x, const float y) +{ + return float2(x, y); +} + +ccl_device_forceinline float3 make_float3(const float x, const float y, const float z) +{ + return float3(x, y, z); +} + +ccl_device_forceinline float4 make_float4(const float x, + const float y, + const float z, + const float w) +{ + return float4(x, y, z, w); +} + +ccl_device_forceinline int2 make_int2(const int x, const int y) +{ + return int2(x, y); +} + +ccl_device_forceinline int3 make_int3(const int x, const int y, const int z) +{ + return int3(x, y, z); +} + +ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w) +{ + return int4(x, y, z, w); +} + +ccl_device_forceinline uchar4 make_uchar4(const uchar x, + const uchar y, + const uchar z, + const uchar w) +{ + return uchar4(x, y, z, w); +} /* Math functions */ @@ -260,8 +275,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \ #ifdef __METALRT__ -# define __KERNEL_GPU_RAYTRACING__ - # if defined(__METALRT_MOTION__) # define METALRT_TAGS instancing, instance_motion, primitive_motion # else diff --git a/intern/cycles/kernel/device/metal/context_end.h b/intern/cycles/kernel/device/metal/context_end.h index b4c8661c401..44ac0478266 100644 --- a/intern/cycles/kernel/device/metal/context_end.h +++ b/intern/cycles/kernel/device/metal/context_end.h @@ -7,4 +7,4 @@ /* NOTE: These macros will need maintaining as entry-points change. */ #undef kernel_integrator_state -#define kernel_integrator_state context.launch_params_metal.__integrator_state +#define kernel_integrator_state context.launch_params_metal.integrator_state diff --git a/intern/cycles/kernel/device/metal/function_constants.h b/intern/cycles/kernel/device/metal/function_constants.h new file mode 100644 index 00000000000..3adf390c7f6 --- /dev/null +++ b/intern/cycles/kernel/device/metal/function_constants.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Blender Foundation */ + +enum { + Kernel_DummyConstant, +#define KERNEL_STRUCT_MEMBER(parent, type, name) KernelData_##parent##_##name, +#include "kernel/data_template.h" +}; + +#ifdef __KERNEL_METAL__ +# define KERNEL_STRUCT_MEMBER(parent, type, name) \ + constant type kernel_data_##parent##_##name \ + [[function_constant(KernelData_##parent##_##name)]]; +# include "kernel/data_template.h" +#endif diff --git a/intern/cycles/kernel/device/metal/globals.h b/intern/cycles/kernel/device/metal/globals.h index 1c3e775dbae..a336c096440 100644 --- a/intern/cycles/kernel/device/metal/globals.h +++ b/intern/cycles/kernel/device/metal/globals.h @@ -12,11 +12,11 @@ CCL_NAMESPACE_BEGIN typedef struct KernelParamsMetal { -#define KERNEL_TEX(type, name) ccl_global const type *name; -#include "kernel/textures.h" -#undef KERNEL_TEX +#define KERNEL_DATA_ARRAY(type, name) ccl_global const type *name; +#include "kernel/data_arrays.h" +#undef KERNEL_DATA_ARRAY - const IntegratorStateGPU __integrator_state; + const IntegratorStateGPU integrator_state; const KernelData data; } KernelParamsMetal; @@ -27,12 +27,10 @@ typedef struct KernelGlobalsGPU { typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; +/* Abstraction macros */ #define kernel_data launch_params_metal.data -#define kernel_integrator_state launch_params_metal.__integrator_state - -/* data lookup defines */ - -#define kernel_tex_fetch(tex, index) launch_params_metal.tex[index] -#define kernel_tex_array(tex) launch_params_metal.tex +#define kernel_data_fetch(name, index) launch_params_metal.name[index] +#define kernel_data_array(name) launch_params_metal.name +#define kernel_integrator_state launch_params_metal.integrator_state CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index a7252570e64..5646c7446db 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -1,40 +1,44 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2021-2022 Blender Foundation */ -/* Metal kernel entry points */ +/* Metal kernel entry points. */ #include "kernel/device/metal/compat.h" #include "kernel/device/metal/globals.h" +#include "kernel/device/metal/function_constants.h" #include "kernel/device/gpu/kernel.h" -/* MetalRT intersection handlers */ +/* MetalRT intersection handlers. */ + #ifdef __METALRT__ -/* Return type for a bounding box intersection function. */ -struct BoundingBoxIntersectionResult -{ +/* Intersection return types. */ + +/* For a bounding box intersection function. */ +struct BoundingBoxIntersectionResult { bool accept [[accept_intersection]]; bool continue_search [[continue_search]]; float distance [[distance]]; }; -/* Return type for a triangle intersection function. */ -struct TriangleIntersectionResult -{ +/* For a triangle intersection function. */ +struct TriangleIntersectionResult { bool accept [[accept_intersection]]; - bool continue_search [[continue_search]]; + bool continue_search [[continue_search]]; }; enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX }; -ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives& self, +/* Utilities. */ + +ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self, const int object, const int prim) { return (self.prim == prim) && (self.object == object); } -ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives& self, +ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self, const int object, const int prim) { @@ -42,12 +46,14 @@ ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimi ((self.light_prim == prim) && (self.light_object == object)); } -ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives& self, +ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self, const int prim) { return (self.prim == prim); } +/* Hit functions. */ + template<typename TReturn, uint intersection_type> TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload, @@ -57,9 +63,9 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, const float ray_tmax) { TReturn result; - + #ifdef __BVH_LOCAL__ - uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) { /* Only intersect with matching object and skip self-intersecton. */ @@ -100,7 +106,8 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, } else { if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) { - /* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */ + /* Record closest intersection only. Do not terminate ray here, since there is no guarantee + * about distance ordering in any-hit */ result.accept = false; result.continue_search = true; return result; @@ -113,16 +120,16 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, isect->t = ray_tmax; isect->prim = prim; isect->object = object; - isect->type = kernel_tex_fetch(__objects, object).primitive_type; + isect->type = kernel_data_fetch(objects, object).primitive_type; - isect->u = 1.0f - barycentrics.y - barycentrics.x; - isect->v = barycentrics.x; + isect->u = barycentrics.x; + isect->v = barycentrics.y; /* Record geometric normal */ - const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect->prim).w; - const float3 tri_a = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)); - const float3 tri_b = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)); - const float3 tri_c = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); + const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w; + const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0)); + const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1)); + const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2)); payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); /* Continue tracing (without this the trace call would return after the first hit) */ @@ -132,21 +139,20 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, #endif } -[[intersection(triangle, triangle_data, METALRT_TAGS)]] -TriangleIntersectionResult -__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], - uint instance_id [[user_instance_id]], - uint primitive_id [[primitive_id]], - float2 barycentrics [[barycentric_coord]], - float ray_tmax [[distance]]) +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_local_hit_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], + uint instance_id [[user_instance_id]], + uint primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) { return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax); + launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax); } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult __anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]]) { /* unused function */ @@ -168,30 +174,21 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, #ifdef __SHADOW_RECORD_ALL__ # ifdef __VISIBILITY_FLAG__ const uint visibility = payload.visibility; - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { /* continue search */ return true; } # endif - if (intersection_skip_self_shadow(payload.self, object, prim)) { - /* continue search */ - return true; - } - - float u = 0.0f, v = 0.0f; + const float u = barycentrics.x; + const float v = barycentrics.y; int type = 0; if (intersection_type == METALRT_HIT_TRIANGLE) { - u = 1.0f - barycentrics.y - barycentrics.x; - v = barycentrics.x; - type = kernel_tex_fetch(__objects, object).primitive_type; + type = kernel_data_fetch(objects, object).primitive_type; } # ifdef __HAIR__ else { - u = barycentrics.x; - v = barycentrics.y; - - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); type = segment.type; prim = segment.prim; @@ -203,6 +200,11 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, } # endif + if (intersection_skip_self_shadow(payload.self, object, prim)) { + /* continue search */ + return true; + } + # ifndef __TRANSPARENT_SHADOWS__ /* No transparent shadows support compiled in, make opaque. */ payload.result = true; @@ -214,7 +216,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, short num_recorded_hits = payload.num_recorded_hits; MetalKernelContext context(launch_params_metal); - + /* If no transparent shadows, all light is blocked and we can stop immediately. */ if (num_hits >= max_hits || !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { @@ -222,7 +224,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, /* terminate ray */ return false; } - + /* Always use baked shadow transparency for curves. */ if (type & PRIMITIVE_CURVE) { float throughput = payload.throughput; @@ -239,10 +241,10 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, return true; } } - + payload.num_hits += 1; payload.num_recorded_hits += 1; - + uint record_index = num_recorded_hits; const IntegratorShadowState state = payload.state; @@ -277,7 +279,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; - + /* Continue tracing. */ # endif /* __TRANSPARENT_SHADOWS__ */ #endif /* __SHADOW_RECORD_ALL__ */ @@ -285,26 +287,25 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, return true; } -[[intersection(triangle, triangle_data, METALRT_TAGS)]] -TriangleIntersectionResult -__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - unsigned int object [[user_instance_id]], - unsigned int primitive_id [[primitive_id]], - float2 barycentrics [[barycentric_coord]], - float ray_tmax [[distance]]) +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_shadow_all_hit_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) { - uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); TriangleIntersectionResult result; result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, object, prim, barycentrics, ray_tmax); + launch_params_metal, payload, object, prim, barycentrics, ray_tmax); result.accept = !result.continue_search; return result; } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult __anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]]) { /* unused function */ @@ -316,15 +317,16 @@ __anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance] } template<typename TReturnType, uint intersection_type> -inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const float u) +inline TReturnType metalrt_visibility_test( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + uint prim, + const float u) { TReturnType result; - -# ifdef __HAIR__ + +#ifdef __HAIR__ if (intersection_type == METALRT_HIT_BOUNDING_BOX) { /* Filter out curve endcaps. */ if (u == 0.0f || u == 1.0f) { @@ -333,15 +335,23 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa return result; } } -# endif +#endif uint visibility = payload.visibility; -# ifdef __VISIBILITY_FLAG__ - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { +#ifdef __VISIBILITY_FLAG__ + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { result.accept = false; result.continue_search = true; return result; } +#endif + + if (intersection_type == METALRT_HIT_TRIANGLE) { + } +# ifdef __HAIR__ + else { + prim = kernel_data_fetch(curve_segments, prim).prim; + } # endif /* Shadow ray early termination. */ @@ -370,25 +380,25 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa return result; } -[[intersection(triangle, triangle_data, METALRT_TAGS)]] -TriangleIntersectionResult -__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], - unsigned int object [[user_instance_id]], - unsigned int primitive_id [[primitive_id]]) +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_visibility_test_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]]) { - uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); - TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, object, prim, 0.0f); + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + TriangleIntersectionResult result = + metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, 0.0f); if (result.accept) { payload.prim = prim; - payload.type = kernel_tex_fetch(__objects, object).primitive_type; + payload.type = kernel_data_fetch(objects, object).primitive_type; } return result; } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult __anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]]) { /* Unused function */ @@ -399,45 +409,39 @@ __anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance return result; } +/* Primitive intersection functions. */ + #ifdef __HAIR__ -ccl_device_inline -void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_origin, - const float3 ray_direction, - float time, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) +ccl_device_inline void metalrt_intersection_curve( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) { # ifdef __VISIBILITY_FLAG__ const uint visibility = payload.visibility; - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { return; } # endif - float3 P = ray_origin; - float3 dir = ray_direction; - - /* The direction is not normalized by default, but the curve intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - Intersection isect; isect.t = ray_tmax; - /* Transform maximum distance into object space. */ - if (isect.t != FLT_MAX) - isect.t *= len; MetalKernelContext context(launch_params_metal); - if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + if (context.curve_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, isect.u); + launch_params_metal, payload, object, prim, isect.u); if (result.accept) { - result.distance = isect.t / len; + result.distance = isect.t; payload.u = isect.u; payload.v = isect.v; payload.prim = prim; @@ -446,57 +450,46 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal, } } -ccl_device_inline -void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_origin, - const float3 ray_direction, - float time, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) +ccl_device_inline void metalrt_intersection_curve_shadow( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) { const uint visibility = payload.visibility; - float3 P = ray_origin; - float3 dir = ray_direction; - - /* The direction is not normalized by default, but the curve intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - Intersection isect; isect.t = ray_tmax; - /* Transform maximum distance into object space */ - if (isect.t != FLT_MAX) - isect.t *= len; MetalKernelContext context(launch_params_metal); - if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + if (context.curve_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); result.accept = !result.continue_search; - - if (result.accept) { - result.distance = isect.t / len; - } } } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload + [[payload]], const uint object [[user_instance_id]], const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], const float ray_tmax [[max_distance]]) { - uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); BoundingBoxIntersectionResult result; result.accept = false; @@ -504,30 +497,39 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b result.distance = ray_tmax; if (segment.type & PRIMITIVE_CURVE_RIBBON) { - metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, + metalrt_intersection_curve(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, # if defined(__METALRT_MOTION__) payload.time, # else 0.0f, # endif - ray_tmax, result); + ray_tmin, + ray_tmax, + result); } return result; } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_ribbon_shadow( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) { - uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); BoundingBoxIntersectionResult result; result.accept = false; @@ -535,115 +537,133 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me result.distance = ray_tmax; if (segment.type & PRIMITIVE_CURVE_RIBBON) { - metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, + metalrt_intersection_curve_shadow(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, # if defined(__METALRT_MOTION__) - payload.time, + payload.time, # else - 0.0f, + 0.0f, # endif - ray_tmax, result); + ray_tmin, + ray_tmax, + result); } return result; } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult __intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload + [[payload]], const uint object [[user_instance_id]], const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], const float ray_tmax [[max_distance]]) { - uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); - + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + BoundingBoxIntersectionResult result; result.accept = false; result.continue_search = true; result.distance = ray_tmax; - metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, + metalrt_intersection_curve(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, # if defined(__METALRT_MOTION__) payload.time, # else 0.0f, # endif - ray_tmax, result); + ray_tmin, + ray_tmax, + result); return result; } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_all_shadow( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) { - uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); BoundingBoxIntersectionResult result; result.accept = false; result.continue_search = true; result.distance = ray_tmax; - metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, + metalrt_intersection_curve_shadow(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, # if defined(__METALRT_MOTION__) - payload.time, + payload.time, # else - 0.0f, + 0.0f, # endif - ray_tmax, result); + ray_tmin, + ray_tmax, + result); return result; } #endif /* __HAIR__ */ #ifdef __POINTCLOUD__ -ccl_device_inline -void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_origin, - const float3 ray_direction, - float time, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) +ccl_device_inline void metalrt_intersection_point( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) { # ifdef __VISIBILITY_FLAG__ const uint visibility = payload.visibility; - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { return; } # endif - float3 P = ray_origin; - float3 dir = ray_direction; - - /* The direction is not normalized by default, but the point intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - Intersection isect; isect.t = ray_tmax; - /* Transform maximum distance into object space. */ - if (isect.t != FLT_MAX) - isect.t *= len; MetalKernelContext context(launch_params_metal); - if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + if (context.point_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, isect.u); + launch_params_metal, payload, object, prim, isect.u); if (result.accept) { - result.distance = isect.t / len; + result.distance = isect.t; payload.u = isect.u; payload.v = isect.v; payload.prim = prim; @@ -652,99 +672,108 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal, } } -ccl_device_inline -void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_origin, - const float3 ray_direction, - float time, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) +ccl_device_inline void metalrt_intersection_point_shadow( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) { const uint visibility = payload.visibility; - float3 P = ray_origin; - float3 dir = ray_direction; - - /* The direction is not normalized by default, but the point intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - Intersection isect; isect.t = ray_tmax; - /* Transform maximum distance into object space */ - if (isect.t != FLT_MAX) - isect.t *= len; MetalKernelContext context(launch_params_metal); - if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + if (context.point_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); result.accept = !result.continue_search; if (result.accept) { - result.distance = isect.t / len; + result.distance = isect.t; } } } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) { - const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); - const int type = kernel_tex_fetch(__objects, object).primitive_type; + const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const int type = kernel_data_fetch(objects, object).primitive_type; BoundingBoxIntersectionResult result; result.accept = false; result.continue_search = true; result.distance = ray_tmax; - metalrt_intersection_point(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction, + metalrt_intersection_point(launch_params_metal, + payload, + object, + prim, + type, + ray_origin, + ray_direction, # if defined(__METALRT_MOTION__) payload.time, # else 0.0f, # endif - ray_tmax, result); + ray_tmin, + ray_tmax, + result); return result; } -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload + [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) { - const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); - const int type = kernel_tex_fetch(__objects, object).primitive_type; + const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const int type = kernel_data_fetch(objects, object).primitive_type; BoundingBoxIntersectionResult result; result.accept = false; result.continue_search = true; result.distance = ray_tmax; - metalrt_intersection_point_shadow(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction, + metalrt_intersection_point_shadow(launch_params_metal, + payload, + object, + prim, + type, + ray_origin, + ray_direction, # if defined(__METALRT_MOTION__) - payload.time, + payload.time, # else - 0.0f, + 0.0f, # endif - ray_tmax, result); + ray_tmin, + ray_tmax, + result); return result; } diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h new file mode 100644 index 00000000000..5c49674f247 --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/compat.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ + +#pragma once + +#define __KERNEL_GPU__ +#define __KERNEL_ONEAPI__ + +#define CCL_NAMESPACE_BEGIN +#define CCL_NAMESPACE_END + +#include <cstdint> + +#ifndef __NODES_MAX_GROUP__ +# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX +#endif +#ifndef __NODES_FEATURES__ +# define __NODES_FEATURES__ NODE_FEATURE_ALL +#endif + +/* This one does not have an abstraction. + * It's used by other devices directly. + */ + +#define __device__ + +/* Qualifier wrappers for different names on different devices */ + +#define ccl_device +#define ccl_global +#define ccl_always_inline __attribute__((always_inline)) +#define ccl_device_inline inline +#define ccl_noinline __attribute__((noinline)) +#define ccl_inline_constant const constexpr +#define ccl_static_constant const +#define ccl_device_forceinline __attribute__((always_inline)) +#define ccl_device_noinline ccl_device ccl_noinline +#define ccl_device_noinline_cpu ccl_device +#define ccl_device_inline_method ccl_device +#define ccl_restrict __restrict__ +#define ccl_loop_no_unroll +#define ccl_optional_struct_init +#define ccl_private +#define ATTR_FALLTHROUGH __attribute__((fallthrough)) +#define ccl_constant const +#define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) +#define ccl_align(n) __attribute__((aligned(n))) +#define kernel_assert(cond) +#define ccl_may_alias + +/* clang-format off */ + +/* kernel.h adapters */ +#define ccl_gpu_kernel(block_num_threads, thread_num_registers) +#define ccl_gpu_kernel_threads(block_num_threads) + +#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED +# define KG_ND_ITEMS \ + kg->nd_item_local_id_0 = item.get_local_id(0); \ + kg->nd_item_local_range_0 = item.get_local_range(0); \ + kg->nd_item_group_0 = item.get_group(0); \ + kg->nd_item_group_range_0 = item.get_group_range(0); \ + kg->nd_item_global_id_0 = item.get_global_id(0); \ + kg->nd_item_global_range_0 = item.get_global_range(0); +#else +# define KG_ND_ITEMS +#endif + +#define ccl_gpu_kernel_signature(name, ...) \ +void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \ + size_t kernel_global_size, \ + size_t kernel_local_size, \ + sycl::handler &cgh, \ + __VA_ARGS__) { \ + (kg); \ + cgh.parallel_for<class kernel_##name>( \ + sycl::nd_range<1>(kernel_global_size, kernel_local_size), \ + [=](sycl::nd_item<1> item) { \ + KG_ND_ITEMS + +#define ccl_gpu_kernel_postfix \ + }); \ + } + +#define ccl_gpu_kernel_call(x) ((ONEAPIKernelContext*)kg)->x + +#define ccl_gpu_kernel_lambda(func, ...) \ + struct KernelLambda \ + { \ + KernelLambda(const ONEAPIKernelContext *_kg) : kg(_kg) {} \ + ccl_private const ONEAPIKernelContext *kg; \ + __VA_ARGS__; \ + int operator()(const int state) const { return (func); } \ + } ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg) + +/* GPU thread, block, grid size and index */ +#ifndef WITH_ONEAPI_SYCL_HOST_ENABLED +# define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0)) +# define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0)) +# define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0)) +# define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0)) +# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0]) +# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) + +# define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0)) +# define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0)) +#else +# define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0) +# define ccl_gpu_block_dim_x (kg->nd_item_local_range_0) +# define ccl_gpu_block_idx_x (kg->nd_item_group_0) +# define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0) +# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0]) +# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) + +# define ccl_gpu_global_id_x() (kg->nd_item_global_id_0) +# define ccl_gpu_global_size_x() (kg->nd_item_global_range_0) +#endif + + +/* GPU warp synchronization */ + +#define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier() +#define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space) +#ifdef __SYCL_DEVICE_ONLY__ + #define ccl_gpu_ballot(predicate) (sycl::ext::oneapi::group_ballot(sycl::ext::oneapi::experimental::this_sub_group(), predicate).count()) +#else + #define ccl_gpu_ballot(predicate) (predicate ? 1 : 0) +#endif + +/* Debug defines */ +#if defined(__SYCL_DEVICE_ONLY__) +# define CONSTANT __attribute__((opencl_constant)) +#else +# define CONSTANT +#endif + +#define sycl_printf(format, ...) { \ + static const CONSTANT char fmt[] = format; \ + sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \ + } + +#define sycl_printf_(format) { \ + static const CONSTANT char fmt[] = format; \ + sycl::ext::oneapi::experimental::printf(fmt); \ + } + +/* GPU texture objects */ + +/* clang-format on */ + +/* Types */ + +/* It's not possible to use sycl types like sycl::float3, sycl::int3, etc + * because these types have different interfaces from blender version. */ + +using uchar = unsigned char; +using sycl::half; + +/* math functions */ +#define fabsf(x) sycl::fabs((x)) +#define copysignf(x, y) sycl::copysign((x), (y)) +#define asinf(x) sycl::asin((x)) +#define acosf(x) sycl::acos((x)) +#define atanf(x) sycl::atan((x)) +#define floorf(x) sycl::floor((x)) +#define ceilf(x) sycl::ceil((x)) +#define sinhf(x) sycl::sinh((x)) +#define coshf(x) sycl::cosh((x)) +#define tanhf(x) sycl::tanh((x)) +#define hypotf(x, y) sycl::hypot((x), (y)) +#define atan2f(x, y) sycl::atan2((x), (y)) +#define fmaxf(x, y) sycl::fmax((x), (y)) +#define fminf(x, y) sycl::fmin((x), (y)) +#define fmodf(x, y) sycl::fmod((x), (y)) +#define lgammaf(x) sycl::lgamma((x)) + +#define __forceinline __attribute__((always_inline)) + +/* Types */ +#include "util/half.h" +#include "util/types.h" + +/* NOTE(@nsirgien): Declaring these functions after types headers is very important because they + * include oneAPI headers, which transitively include math.h headers which will cause redefinitions + * of the math defines because math.h also uses them and having them defined before math.h include + * is actually UB. */ +/* Use fast math functions - get them from sycl::native namespace for native math function + * implementations */ +#define cosf(x) sycl::native::cos(((float)(x))) +#define sinf(x) sycl::native::sin(((float)(x))) +#define powf(x, y) sycl::native::powr(((float)(x)), ((float)(y))) +#define tanf(x) sycl::native::tan(((float)(x))) +#define logf(x) sycl::native::log(((float)(x))) +#define expf(x) sycl::native::exp(((float)(x))) diff --git a/intern/cycles/kernel/device/oneapi/context_begin.h b/intern/cycles/kernel/device/oneapi/context_begin.h new file mode 100644 index 00000000000..6d6f8cec4ca --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/context_begin.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ + +#ifdef WITH_NANOVDB +# include <nanovdb/NanoVDB.h> +# include <nanovdb/util/SampleFromVoxels.h> +#endif + +/* clang-format off */ +struct ONEAPIKernelContext : public KernelGlobalsGPU { + public: +# include "kernel/device/oneapi/image.h" + /* clang-format on */ diff --git a/intern/cycles/kernel/device/oneapi/context_end.h b/intern/cycles/kernel/device/oneapi/context_end.h new file mode 100644 index 00000000000..ddf0d1f1712 --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/context_end.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ +} +; /* end of ONEAPIKernelContext class definition */ + +#undef kernel_integrator_state +#define kernel_integrator_state (*(kg->integrator_state)) diff --git a/intern/cycles/kernel/device/oneapi/dll_interface_template.h b/intern/cycles/kernel/device/oneapi/dll_interface_template.h new file mode 100644 index 00000000000..5dd0d4203a4 --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/dll_interface_template.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2022 Intel Corporation */ + +/* device_capabilities() returns a C string that must be free'd with oneapi_free(). */ +DLL_INTERFACE_CALL(oneapi_device_capabilities, char *) +DLL_INTERFACE_CALL(oneapi_free, void, void *) +DLL_INTERFACE_CALL(oneapi_get_memcapacity, size_t, SyclQueue *queue) + +DLL_INTERFACE_CALL(oneapi_get_num_multiprocessors, int, SyclQueue *queue) +DLL_INTERFACE_CALL(oneapi_get_max_num_threads_per_multiprocessor, int, SyclQueue *queue) +DLL_INTERFACE_CALL(oneapi_iterate_devices, void, OneAPIDeviceIteratorCallback cb, void *user_ptr) +DLL_INTERFACE_CALL(oneapi_set_error_cb, void, OneAPIErrorCallback, void *user_ptr) + +DLL_INTERFACE_CALL(oneapi_create_queue, bool, SyclQueue *&external_queue, int device_index) +DLL_INTERFACE_CALL(oneapi_free_queue, void, SyclQueue *queue) +DLL_INTERFACE_CALL( + oneapi_usm_aligned_alloc_host, void *, SyclQueue *queue, size_t memory_size, size_t alignment) +DLL_INTERFACE_CALL(oneapi_usm_alloc_device, void *, SyclQueue *queue, size_t memory_size) +DLL_INTERFACE_CALL(oneapi_usm_free, void, SyclQueue *queue, void *usm_ptr) + +DLL_INTERFACE_CALL( + oneapi_usm_memcpy, bool, SyclQueue *queue, void *dest, void *src, size_t num_bytes) +DLL_INTERFACE_CALL(oneapi_queue_synchronize, bool, SyclQueue *queue) +DLL_INTERFACE_CALL(oneapi_usm_memset, + bool, + SyclQueue *queue, + void *usm_ptr, + unsigned char value, + size_t num_bytes) + +DLL_INTERFACE_CALL(oneapi_run_test_kernel, bool, SyclQueue *queue) + +/* Operation with Kernel globals structure - map of global/constant allocation - filled before + * render/kernel execution As we don't know in cycles `sizeof` this - Cycles will manage just as + * pointer. */ +DLL_INTERFACE_CALL(oneapi_kernel_globals_size, bool, SyclQueue *queue, size_t &kernel_global_size) +DLL_INTERFACE_CALL(oneapi_set_global_memory, + void, + SyclQueue *queue, + void *kernel_globals, + const char *memory_name, + void *memory_device_pointer) + +DLL_INTERFACE_CALL(oneapi_kernel_preferred_local_size, + size_t, + SyclQueue *queue, + const DeviceKernel kernel, + const size_t kernel_global_size) +DLL_INTERFACE_CALL(oneapi_enqueue_kernel, + bool, + KernelContext *context, + int kernel, + size_t global_size, + void **args) diff --git a/intern/cycles/kernel/device/oneapi/globals.h b/intern/cycles/kernel/device/oneapi/globals.h new file mode 100644 index 00000000000..d60f4f135ba --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/globals.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ + +#pragma once + +#include "kernel/integrator/state.h" +#include "kernel/types.h" +#include "kernel/util/profiling.h" + +CCL_NAMESPACE_BEGIN + +/* NOTE(@nsirgien): With SYCL we can't declare __constant__ global variable, which will be + * accessible from device code, like it has been done for Cycles CUDA backend. So, the backend will + * allocate this "constant" memory regions and store pointers to them in oneAPI context class */ + +struct IntegratorStateGPU; +struct IntegratorQueueCounter; + +typedef struct KernelGlobalsGPU { + +#define KERNEL_DATA_ARRAY(type, name) const type *__##name = nullptr; +#include "kernel/data_arrays.h" +#undef KERNEL_DATA_ARRAY + IntegratorStateGPU *integrator_state; + const KernelData *__data; +#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED + size_t nd_item_local_id_0; + size_t nd_item_local_range_0; + size_t nd_item_group_0; + size_t nd_item_group_range_0; + + size_t nd_item_global_id_0; + size_t nd_item_global_range_0; +#endif +} KernelGlobalsGPU; + +typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals; + +#define kernel_data (*(__data)) +#define kernel_integrator_state (*(integrator_state)) + +/* data lookup defines */ + +#define kernel_data_fetch(name, index) __##name[index] +#define kernel_data_array(name) __##name + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/oneapi/image.h b/intern/cycles/kernel/device/oneapi/image.h new file mode 100644 index 00000000000..2417b8eac3b --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/image.h @@ -0,0 +1,383 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ + +CCL_NAMESPACE_BEGIN + +/* For oneAPI implementation we do manual lookup and interpolation. */ +/* TODO: share implementation with ../cpu/image.h. */ + +template<typename T> ccl_device_forceinline T tex_fetch(const TextureInfo &info, int index) +{ + return reinterpret_cast<ccl_global T *>(info.data)[index]; +} + +ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) +{ + x %= width; + if (x < 0) + x += width; + return x; +} + +ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width) +{ + return clamp(x, 0, width - 1); +} + +ccl_device_inline float4 svm_image_texture_read(const TextureInfo &info, int x, int y, int z) +{ + const int data_offset = x + info.width * y + info.width * info.height * z; + const int texture_type = info.data_type; + + /* Float4 */ + if (texture_type == IMAGE_DATA_TYPE_FLOAT4) { + return tex_fetch<float4>(info, data_offset); + } + /* Byte4 */ + else if (texture_type == IMAGE_DATA_TYPE_BYTE4) { + uchar4 r = tex_fetch<uchar4>(info, data_offset); + float f = 1.0f / 255.0f; + return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); + } + /* Ushort4 */ + else if (texture_type == IMAGE_DATA_TYPE_USHORT4) { + ushort4 r = tex_fetch<ushort4>(info, data_offset); + float f = 1.0f / 65535.f; + return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); + } + /* Float */ + else if (texture_type == IMAGE_DATA_TYPE_FLOAT) { + float f = tex_fetch<float>(info, data_offset); + return make_float4(f, f, f, 1.0f); + } + /* UShort */ + else if (texture_type == IMAGE_DATA_TYPE_USHORT) { + ushort r = tex_fetch<ushort>(info, data_offset); + float f = r * (1.0f / 65535.0f); + return make_float4(f, f, f, 1.0f); + } + else if (texture_type == IMAGE_DATA_TYPE_HALF) { + float f = tex_fetch<half>(info, data_offset); + return make_float4(f, f, f, 1.0f); + } + else if (texture_type == IMAGE_DATA_TYPE_HALF4) { + half4 r = tex_fetch<half4>(info, data_offset); + return make_float4(r.x, r.y, r.z, r.w); + } + /* Byte */ + else { + uchar r = tex_fetch<uchar>(info, data_offset); + float f = r * (1.0f / 255.0f); + return make_float4(f, f, f, 1.0f); + } +} + +ccl_device_inline float4 svm_image_texture_read_2d(int id, int x, int y) +{ + const TextureInfo &info = kernel_data_fetch(texture_info, id); + + /* Wrap */ + if (info.extension == EXTENSION_REPEAT) { + x = svm_image_texture_wrap_periodic(x, info.width); + y = svm_image_texture_wrap_periodic(y, info.height); + } + else if (info.extension == EXTENSION_EXTEND) { + x = svm_image_texture_wrap_clamp(x, info.width); + y = svm_image_texture_wrap_clamp(y, info.height); + } + else { + if (x < 0 || x >= info.width || y < 0 || y >= info.height) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + + return svm_image_texture_read(info, x, y, 0); +} + +ccl_device_inline float4 svm_image_texture_read_3d(int id, int x, int y, int z) +{ + const TextureInfo &info = kernel_data_fetch(texture_info, id); + + /* Wrap */ + if (info.extension == EXTENSION_REPEAT) { + x = svm_image_texture_wrap_periodic(x, info.width); + y = svm_image_texture_wrap_periodic(y, info.height); + z = svm_image_texture_wrap_periodic(z, info.depth); + } + else if (info.extension == EXTENSION_EXTEND) { + x = svm_image_texture_wrap_clamp(x, info.width); + y = svm_image_texture_wrap_clamp(y, info.height); + z = svm_image_texture_wrap_clamp(z, info.depth); + } + else { + if (x < 0 || x >= info.width || y < 0 || y >= info.height || z < 0 || z >= info.depth) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + + return svm_image_texture_read(info, x, y, z); +} + +static float svm_image_texture_frac(float x, int *ix) +{ + int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); + *ix = i; + return x - (float)i; +} + +#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ + { \ + u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ + u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \ + u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } \ + (void)0 + +ccl_device float4 kernel_tex_image_interp(KernelGlobals, int id, float x, float y) +{ + const TextureInfo &info = kernel_data_fetch(texture_info, id); + + if (info.interpolation == INTERPOLATION_CLOSEST) { + /* Closest interpolation. */ + int ix, iy; + svm_image_texture_frac(x * info.width, &ix); + svm_image_texture_frac(y * info.height, &iy); + + return svm_image_texture_read_2d(id, ix, iy); + } + else if (info.interpolation == INTERPOLATION_LINEAR) { + /* Bilinear interpolation. */ + int ix, iy; + float tx = svm_image_texture_frac(x * info.width - 0.5f, &ix); + float ty = svm_image_texture_frac(y * info.height - 0.5f, &iy); + + float4 r; + r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(id, ix, iy); + r += (1.0f - ty) * tx * svm_image_texture_read_2d(id, ix + 1, iy); + r += ty * (1.0f - tx) * svm_image_texture_read_2d(id, ix, iy + 1); + r += ty * tx * svm_image_texture_read_2d(id, ix + 1, iy + 1); + return r; + } + else { + /* Bicubic interpolation. */ + int ix, iy; + float tx = svm_image_texture_frac(x * info.width - 0.5f, &ix); + float ty = svm_image_texture_frac(y * info.height - 0.5f, &iy); + + float u[4], v[4]; + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + + float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + float weight = u[x] * v[y]; + r += weight * svm_image_texture_read_2d(id, ix + x - 1, iy + y - 1); + } + } + return r; + } +} + +#ifdef WITH_NANOVDB +template<typename T> struct NanoVDBInterpolator { + + typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType; + + static ccl_always_inline float4 read(float r) + { + return make_float4(r, r, r, 1.0f); + } + + static ccl_always_inline float4 read(nanovdb::Vec3f r) + { + return make_float4(r[0], r[1], r[2], 1.0f); + } + + static ccl_always_inline float4 interp_3d_closest(const AccessorType &acc, + float x, + float y, + float z) + { + const nanovdb::Vec3f xyz(x, y, z); + return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz)); + } + + static ccl_always_inline float4 interp_3d_linear(const AccessorType &acc, + float x, + float y, + float z) + { + const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f); + return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz)); + } + + static float4 interp_3d_cubic(const AccessorType &acc, float x, float y, float z) + { + int ix, iy, iz; + int nix, niy, niz; + int pix, piy, piz; + int nnix, nniy, nniz; + /* Tri-cubic b-spline interpolation. */ + const float tx = svm_image_texture_frac(x - 0.5f, &ix); + const float ty = svm_image_texture_frac(y - 0.5f, &iy); + const float tz = svm_image_texture_frac(z - 0.5f, &iz); + pix = ix - 1; + piy = iy - 1; + piz = iz - 1; + nix = ix + 1; + niy = iy + 1; + niz = iz + 1; + nnix = ix + 2; + nniy = iy + 2; + nniz = iz + 2; + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {piy, iy, niy, nniy}; + const int zc[4] = {piz, iz, niz, nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z])))) +# define COL_TERM(col, row) \ + (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) +# define ROW_TERM(row) \ + (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); + + /* Actual interpolation. */ + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + +# undef COL_TERM +# undef ROW_TERM +# undef DATA + } + + static ccl_always_inline float4 + interp_3d(const TextureInfo &info, float x, float y, float z, int interp) + { + using namespace nanovdb; + + NanoGrid<T> *const grid = (NanoGrid<T> *)info.data; + AccessorType acc = grid->getAccessor(); + + switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) { + case INTERPOLATION_CLOSEST: + return interp_3d_closest(acc, x, y, z); + case INTERPOLATION_LINEAR: + return interp_3d_linear(acc, x, y, z); + default: + return interp_3d_cubic(acc, x, y, z); + } + } +}; +#endif /* WITH_NANOVDB */ + +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals, int id, float3 P, int interp) +{ + const TextureInfo &info = kernel_data_fetch(texture_info, id); + + if (info.use_transform_3d) { + Transform tfm = info.transform_3d; + P = transform_point(&tfm, P); + } + + float x = P.x; + float y = P.y; + float z = P.z; + + uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp; + +#ifdef WITH_NANOVDB + if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) { + return NanoVDBInterpolator<float>::interp_3d(info, x, y, z, interpolation); + } + else if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { + return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, x, y, z, interpolation); + } + else if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FPN) { + return NanoVDBInterpolator<nanovdb::FpN>::interp_3d(info, x, y, z, interpolation); + } + else if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FP16) { + return NanoVDBInterpolator<nanovdb::Fp16>::interp_3d(info, x, y, z, interpolation); + } +#else + if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT || + info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3 || + info.data_type == IMAGE_DATA_TYPE_NANOVDB_FPN || + info.data_type == IMAGE_DATA_TYPE_NANOVDB_FP16) { + return make_float4( + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } +#endif + else { + x *= info.width; + y *= info.height; + z *= info.depth; + } + + if (interpolation == INTERPOLATION_CLOSEST) { + /* Closest interpolation. */ + int ix, iy, iz; + svm_image_texture_frac(x, &ix); + svm_image_texture_frac(y, &iy); + svm_image_texture_frac(z, &iz); + + return svm_image_texture_read_3d(id, ix, iy, iz); + } + else if (interpolation == INTERPOLATION_LINEAR) { + /* Trilinear interpolation. */ + int ix, iy, iz; + float tx = svm_image_texture_frac(x - 0.5f, &ix); + float ty = svm_image_texture_frac(y - 0.5f, &iy); + float tz = svm_image_texture_frac(z - 0.5f, &iz); + + float4 r; + r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(id, ix, iy, iz); + r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(id, ix + 1, iy, iz); + r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(id, ix, iy + 1, iz); + r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(id, ix + 1, iy + 1, iz); + + r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(id, ix, iy, iz + 1); + r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(id, ix + 1, iy, iz + 1); + r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(id, ix, iy + 1, iz + 1); + r += tz * ty * tx * svm_image_texture_read_3d(id, ix + 1, iy + 1, iz + 1); + return r; + } + else { + /* Tri-cubic interpolation. */ + int ix, iy, iz; + float tx = svm_image_texture_frac(x - 0.5f, &ix); + float ty = svm_image_texture_frac(y - 0.5f, &iy); + float tz = svm_image_texture_frac(z - 0.5f, &iz); + + float u[4], v[4], w[4]; + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); + + float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + for (int z = 0; z < 4; z++) { + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + float weight = u[x] * v[y] * w[z]; + r += weight * svm_image_texture_read_3d(id, ix + x - 1, iy + y - 1, iz + z - 1); + } + } + } + return r; + } +} + +#undef SET_CUBIC_SPLINE_WEIGHTS + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp new file mode 100644 index 00000000000..097d21b963f --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/kernel.cpp @@ -0,0 +1,929 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ + +#ifdef WITH_ONEAPI + +/* clang-format off */ +# include "kernel.h" +# include <iostream> +# include <map> +# include <set> + +# include <CL/sycl.hpp> + +# include "kernel/device/oneapi/compat.h" +# include "kernel/device/oneapi/globals.h" +# include "kernel/device/oneapi/kernel_templates.h" + +# include "kernel/device/gpu/kernel.h" +/* clang-format on */ + +static OneAPIErrorCallback s_error_cb = nullptr; +static void *s_error_user_ptr = nullptr; + +static std::vector<sycl::device> oneapi_available_devices(); + +void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr) +{ + s_error_cb = cb; + s_error_user_ptr = user_ptr; +} + +void oneapi_check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_host = false) +{ +# ifdef _DEBUG + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + sycl::info::device_type device_type = + queue->get_device().get_info<sycl::info::device::device_type>(); + sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context()); + (void)usm_type; + assert(usm_type == sycl::usm::alloc::device || + ((device_type == sycl::info::device_type::host || + device_type == sycl::info::device_type::is_cpu || allow_host) && + usm_type == sycl::usm::alloc::host)); +# endif +} + +bool oneapi_create_queue(SyclQueue *&external_queue, int device_index) +{ + bool finished_correct = true; + try { + std::vector<sycl::device> devices = oneapi_available_devices(); + if (device_index < 0 || device_index >= devices.size()) { + return false; + } + sycl::queue *created_queue = new sycl::queue(devices[device_index], + sycl::property::queue::in_order()); + external_queue = reinterpret_cast<SyclQueue *>(created_queue); + } + catch (sycl::exception const &e) { + finished_correct = false; + if (s_error_cb) { + s_error_cb(e.what(), s_error_user_ptr); + } + } + return finished_correct; +} + +void oneapi_free_queue(SyclQueue *queue_) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + delete queue; +} + +void *oneapi_usm_aligned_alloc_host(SyclQueue *queue_, size_t memory_size, size_t alignment) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + return sycl::aligned_alloc_host(alignment, memory_size, *queue); +} + +void *oneapi_usm_alloc_device(SyclQueue *queue_, size_t memory_size) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + return sycl::malloc_device(memory_size, *queue); +} + +void oneapi_usm_free(SyclQueue *queue_, void *usm_ptr) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + oneapi_check_usm(queue_, usm_ptr, true); + sycl::free(usm_ptr, *queue); +} + +bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + oneapi_check_usm(queue_, dest, true); + oneapi_check_usm(queue_, src, true); + sycl::event mem_event = queue->memcpy(dest, src, num_bytes); +# ifdef WITH_CYCLES_DEBUG + try { + /* NOTE(@nsirgien) Waiting on memory operation may give more precise error + * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug. + */ + mem_event.wait_and_throw(); + return true; + } + catch (sycl::exception const &e) { + if (s_error_cb) { + s_error_cb(e.what(), s_error_user_ptr); + } + return false; + } +# else + sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context()); + sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context()); + bool from_device_to_host = dest_type == sycl::usm::alloc::host && + src_type == sycl::usm::alloc::device; + bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown || + src_type == sycl::usm::alloc::unknown; + /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host + * may not wait until the end of the transfer before using the memory. + */ + if (from_device_to_host || host_or_device_memop_with_offset) + mem_event.wait(); + return true; +# endif +} + +bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, size_t num_bytes) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + oneapi_check_usm(queue_, usm_ptr, true); + sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes); +# ifdef WITH_CYCLES_DEBUG + try { + /* NOTE(@nsirgien) Waiting on memory operation may give more precise error + * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug. + */ + mem_event.wait_and_throw(); + return true; + } + catch (sycl::exception const &e) { + if (s_error_cb) { + s_error_cb(e.what(), s_error_user_ptr); + } + return false; + } +# else + (void)mem_event; + return true; +# endif +} + +bool oneapi_queue_synchronize(SyclQueue *queue_) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + try { + queue->wait_and_throw(); + return true; + } + catch (sycl::exception const &e) { + if (s_error_cb) { + s_error_cb(e.what(), s_error_user_ptr); + } + return false; + } +} + +/* NOTE(@nsirgien): Execution of this simple kernel will check basic functionality and + * also trigger runtime compilation of all existing oneAPI kernels */ +bool oneapi_run_test_kernel(SyclQueue *queue_) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + size_t N = 8; + sycl::buffer<float, 1> A(N); + sycl::buffer<float, 1> B(N); + + { + sycl::host_accessor A_host_acc(A, sycl::write_only); + for (size_t i = (size_t)0; i < N; i++) + A_host_acc[i] = rand() % 32; + } + + try { + queue->submit([&](sycl::handler &cgh) { + sycl::accessor A_acc(A, cgh, sycl::read_only); + sycl::accessor B_acc(B, cgh, sycl::write_only, sycl::no_init); + + cgh.parallel_for(N, [=](sycl::id<1> idx) { B_acc[idx] = A_acc[idx] + idx.get(0); }); + }); + queue->wait_and_throw(); + + sycl::host_accessor A_host_acc(A, sycl::read_only); + sycl::host_accessor B_host_acc(B, sycl::read_only); + + for (size_t i = (size_t)0; i < N; i++) { + float result = A_host_acc[i] + B_host_acc[i]; + (void)result; + } + } + catch (sycl::exception const &e) { + if (s_error_cb) { + s_error_cb(e.what(), s_error_user_ptr); + } + return false; + } + + return true; +} + +bool oneapi_kernel_globals_size(SyclQueue *queue_, size_t &kernel_global_size) +{ + kernel_global_size = sizeof(KernelGlobalsGPU); + + return true; +} + +void oneapi_set_global_memory(SyclQueue *queue_, + void *kernel_globals, + const char *memory_name, + void *memory_device_pointer) +{ + assert(queue_); + assert(kernel_globals); + assert(memory_name); + assert(memory_device_pointer); + KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals; + oneapi_check_usm(queue_, memory_device_pointer); + oneapi_check_usm(queue_, kernel_globals, true); + + std::string matched_name(memory_name); + +/* This macro will change global ptr of KernelGlobals via name matching. */ +# define KERNEL_DATA_ARRAY(type, name) \ + else if (#name == matched_name) \ + { \ + globals->__##name = (type *)memory_device_pointer; \ + return; \ + } + if (false) { + } + else if ("integrator_state" == matched_name) { + globals->integrator_state = (IntegratorStateGPU *)memory_device_pointer; + return; + } + KERNEL_DATA_ARRAY(KernelData, data) +# include "kernel/data_arrays.h" + else + { + std::cerr << "Can't found global/constant memory with name \"" << matched_name << "\"!" + << std::endl; + assert(false); + } +# undef KERNEL_DATA_ARRAY +} + +/* TODO: Move device information to OneapiDevice initialized on creation and use it. */ +/* TODO: Move below function to oneapi/queue.cpp. */ +size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_, + const DeviceKernel kernel, + const size_t kernel_global_size) +{ + assert(queue_); + sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); + (void)kernel_global_size; + const static size_t preferred_work_group_size_intersect_shading = 32; + const static size_t preferred_work_group_size_technical = 1024; + + size_t preferred_work_group_size = 0; + switch (kernel) { + case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA: + case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE: + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW: + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: + case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND: + case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: + case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: + case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: + preferred_work_group_size = preferred_work_group_size_intersect_shading; + break; + + case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES: + case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES: + case DEVICE_KERNEL_INTEGRATOR_RESET: + case DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS: + preferred_work_group_size = preferred_work_group_size_technical; + break; + + default: + preferred_work_group_size = 512; + } + + const size_t limit_work_group_size = + queue->get_device().get_info<sycl::info::device::max_work_group_size>(); + return std::min(limit_work_group_size, preferred_work_group_size); +} + +bool oneapi_enqueue_kernel(KernelContext *kernel_context, + int kernel, + size_t global_size, + void **args) +{ + bool success = true; + ::DeviceKernel device_kernel = (::DeviceKernel)kernel; + KernelGlobalsGPU *kg = (KernelGlobalsGPU *)kernel_context->kernel_globals; + sycl::queue *queue = reinterpret_cast<sycl::queue *>(kernel_context->queue); + assert(queue); + if (!queue) { + return false; + } + + size_t local_size = oneapi_kernel_preferred_local_size( + kernel_context->queue, device_kernel, global_size); + assert(global_size % local_size == 0); + + /* Local size for DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY needs to be enforced so we + * overwrite it outside of oneapi_kernel_preferred_local_size. */ + if (device_kernel == DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY) { + local_size = GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE; + } + + /* Kernels listed below need a specific number of work groups. */ + if (device_kernel == DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY || + device_kernel == DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY || + device_kernel == DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY || + device_kernel == DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY || + device_kernel == DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY || + device_kernel == DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY || + device_kernel == DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY) { + int num_states = *((int *)(args[0])); + /* Round up to the next work-group. */ + size_t groups_count = (num_states + local_size - 1) / local_size; + /* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices, + * we extend work size to fit uniformity requirements. */ + global_size = groups_count * local_size; + +# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED + if (queue->get_device().is_host()) { + global_size = 1; + local_size = 1; + } +# endif + } + + /* Let the compiler throw an error if there are any kernels missing in this implementation. */ +# if defined(_WIN32) +# pragma warning(error : 4062) +# elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic error "-Wswitch" +# endif + + try { + queue->submit([&](sycl::handler &cgh) { + switch (device_kernel) { + case DEVICE_KERNEL_INTEGRATOR_RESET: { + oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset); + break; + } + case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_init_from_camera); + break; + } + case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_init_from_bake); + break; + } + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_intersect_closest); + break; + } + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_intersect_shadow); + break; + } + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_intersect_subsurface); + break; + } + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_intersect_volume_stack); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_background); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_light); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_shadow); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_shade_surface_raytrace); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface_mnee); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_volume); + break; + } + case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_queued_paths_array); + break; + } + case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_queued_shadow_paths_array); + break; + } + case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_active_paths_array); + break; + } + case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_terminated_paths_array); + break; + } + case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_terminated_shadow_paths_array); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sorted_paths_array); + break; + } + case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_compact_paths_array); + break; + } + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_compact_shadow_paths_array); + break; + } + case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_adaptive_sampling_convergence_check); + break; + } + case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_adaptive_sampling_filter_x); + break; + } + case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_adaptive_sampling_filter_y); + break; + } + case DEVICE_KERNEL_SHADER_EVAL_DISPLACE: { + oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_displace); + break; + } + case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_background); + break; + } + case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_shader_eval_curve_shadow_transparency); + break; + } + case DEVICE_KERNEL_PREFIX_SUM: { + oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_prefix_sum); + break; + } + + /* clang-format off */ + # define DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \ + case DEVICE_KERNEL_FILM_CONVERT_##VARIANT: { \ + oneapi_call(kg, cgh, \ + global_size, \ + local_size, \ + args, \ + oneapi_kernel_film_convert_##variant); \ + break; \ + } + +# define DEVICE_KERNEL_FILM_CONVERT(variant, VARIANT) \ + DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \ + DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT##_HALF_RGBA, variant##_half_rgba) + + DEVICE_KERNEL_FILM_CONVERT(depth, DEPTH); + DEVICE_KERNEL_FILM_CONVERT(mist, MIST); + DEVICE_KERNEL_FILM_CONVERT(sample_count, SAMPLE_COUNT); + DEVICE_KERNEL_FILM_CONVERT(float, FLOAT); + DEVICE_KERNEL_FILM_CONVERT(light_path, LIGHT_PATH); + DEVICE_KERNEL_FILM_CONVERT(float3, FLOAT3); + DEVICE_KERNEL_FILM_CONVERT(motion, MOTION); + DEVICE_KERNEL_FILM_CONVERT(cryptomatte, CRYPTOMATTE); + DEVICE_KERNEL_FILM_CONVERT(shadow_catcher, SHADOW_CATCHER); + DEVICE_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow, + SHADOW_CATCHER_MATTE_WITH_SHADOW); + DEVICE_KERNEL_FILM_CONVERT(combined, COMBINED); + DEVICE_KERNEL_FILM_CONVERT(float4, FLOAT4); + +# undef DEVICE_KERNEL_FILM_CONVERT +# undef DEVICE_KERNEL_FILM_CONVERT_PARTIAL + /* clang-format on */ + + case DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_filter_guiding_preprocess); + break; + } + case DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_filter_guiding_set_fake_albedo); + break; + } + case DEVICE_KERNEL_FILTER_COLOR_PREPROCESS: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_preprocess); + break; + } + case DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_postprocess); + break; + } + case DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_cryptomatte_postprocess); + break; + } + case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_compact_states); + break; + } + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_compact_shadow_states); + break; + } + case DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS: { + oneapi_call(kg, + cgh, + global_size, + local_size, + args, + oneapi_kernel_integrator_shadow_catcher_count_possible_splits); + break; + } + /* Unsupported kernels */ + case DEVICE_KERNEL_NUM: + case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL: + kernel_assert(0); + break; + } + }); + } + catch (sycl::exception const &e) { + if (s_error_cb) { + s_error_cb(e.what(), s_error_user_ptr); + success = false; + } + } + +# if defined(_WIN32) +# pragma warning(default : 4062) +# elif defined(__GNUC__) +# pragma GCC diagnostic pop +# endif + return success; +} + +/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows + * since Windows driver 101.3268. */ +/* The same min compute-runtime version is currently required across Windows and Linux. + * For Windows driver 101.3268, compute-runtime version is 23570. */ +static const int lowest_supported_driver_version_win = 1013268; +static const int lowest_supported_driver_version_neo = 23570; + +static int parse_driver_build_version(const sycl::device &device) +{ + const std::string &driver_version = device.get_info<sycl::info::device::driver_version>(); + int driver_build_version = 0; + + size_t second_dot_position = driver_version.find('.', driver_version.find('.') + 1); + if (second_dot_position == std::string::npos) { + std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version + << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0)," + << " xx.xx.xxx.xxxx (Windows) for device \"" + << device.get_info<sycl::info::device::name>() << "\"." << std::endl; + } + else { + try { + size_t third_dot_position = driver_version.find('.', second_dot_position + 1); + if (third_dot_position != std::string::npos) { + const std::string &third_number_substr = driver_version.substr( + second_dot_position + 1, third_dot_position - second_dot_position - 1); + const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1); + if (third_number_substr.length() == 3 && forth_number_substr.length() == 4) + driver_build_version = std::stoi(third_number_substr) * 10000 + + std::stoi(forth_number_substr); + } + else { + const std::string &third_number_substr = driver_version.substr(second_dot_position + 1); + driver_build_version = std::stoi(third_number_substr); + } + } + catch (std::invalid_argument &e) { + std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version + << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0)," + << " xx.xx.xxx.xxxx (Windows) for device \"" + << device.get_info<sycl::info::device::name>() << "\"." << std::endl; + } + } + + return driver_build_version; +} + +static std::vector<sycl::device> oneapi_available_devices() +{ + bool allow_all_devices = false; + if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr) + allow_all_devices = true; + + /* Host device is useful only for debugging at the moment + * so we hide this device with default build settings. */ +# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED + bool allow_host = true; +# else + bool allow_host = false; +# endif + + const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms(); + + std::vector<sycl::device> available_devices; + for (const sycl::platform &platform : oneapi_platforms) { + /* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL. + */ + if (platform.get_backend() == sycl::backend::opencl) { + continue; + } + + const std::vector<sycl::device> &oneapi_devices = + (allow_all_devices || allow_host) ? platform.get_devices(sycl::info::device_type::all) : + platform.get_devices(sycl::info::device_type::gpu); + + for (const sycl::device &device : oneapi_devices) { + if (allow_all_devices) { + /* still filter out host device if build doesn't support it. */ + if (allow_host || !device.is_host()) { + available_devices.push_back(device); + } + } + else { + bool filter_out = false; + + /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU, + * assuming they have either more than 96 Execution Units or not 7 threads per EU. + * Official support can be broaden to older and smaller GPUs once ready. */ + if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) { + /* Filtered-out defaults in-case these values aren't available through too old L0 + * runtime. */ + int number_of_eus = 96; + int threads_per_eu = 7; + if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) { + number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>(); + } + if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) { + threads_per_eu = + device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>(); + } + /* This filters out all Level-Zero supported GPUs from older generation than Arc. */ + if (number_of_eus <= 96 && threads_per_eu == 7) { + filter_out = true; + } + /* if not already filtered out, check driver version. */ + if (!filter_out) { + int driver_build_version = parse_driver_build_version(device); + if ((driver_build_version > 100000 && + driver_build_version < lowest_supported_driver_version_win) || + driver_build_version < lowest_supported_driver_version_neo) { + filter_out = true; + } + } + } + else if (!allow_host && device.is_host()) { + filter_out = true; + } + else if (!allow_all_devices) { + filter_out = true; + } + + if (!filter_out) { + available_devices.push_back(device); + } + } + } + } + + return available_devices; +} + +char *oneapi_device_capabilities() +{ + std::stringstream capabilities; + + const std::vector<sycl::device> &oneapi_devices = oneapi_available_devices(); + for (const sycl::device &device : oneapi_devices) { + const std::string &name = device.get_info<sycl::info::device::name>(); + + capabilities << std::string("\t") << name << "\n"; +# define WRITE_ATTR(attribute_name, attribute_variable) \ + capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \ + << "\n"; +# define GET_NUM_ATTR(attribute) \ + { \ + size_t attribute = (size_t)device.get_info<sycl::info::device ::attribute>(); \ + capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" << attribute << "\n"; \ + } + + GET_NUM_ATTR(vendor_id) + GET_NUM_ATTR(max_compute_units) + GET_NUM_ATTR(max_work_item_dimensions) + + sycl::id<3> max_work_item_sizes = + device.get_info<sycl::info::device::max_work_item_sizes<3>>(); + WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0))) + WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1))) + WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2))) + + GET_NUM_ATTR(max_work_group_size) + GET_NUM_ATTR(max_num_sub_groups) + GET_NUM_ATTR(sub_group_independent_forward_progress) + + GET_NUM_ATTR(preferred_vector_width_char) + GET_NUM_ATTR(preferred_vector_width_short) + GET_NUM_ATTR(preferred_vector_width_int) + GET_NUM_ATTR(preferred_vector_width_long) + GET_NUM_ATTR(preferred_vector_width_float) + GET_NUM_ATTR(preferred_vector_width_double) + GET_NUM_ATTR(preferred_vector_width_half) + + GET_NUM_ATTR(native_vector_width_char) + GET_NUM_ATTR(native_vector_width_short) + GET_NUM_ATTR(native_vector_width_int) + GET_NUM_ATTR(native_vector_width_long) + GET_NUM_ATTR(native_vector_width_float) + GET_NUM_ATTR(native_vector_width_double) + GET_NUM_ATTR(native_vector_width_half) + + size_t max_clock_frequency = + (size_t)(device.is_host() ? (size_t)0 : + device.get_info<sycl::info::device::max_clock_frequency>()); + WRITE_ATTR("max_clock_frequency", max_clock_frequency) + + GET_NUM_ATTR(address_bits) + GET_NUM_ATTR(max_mem_alloc_size) + + /* NOTE(@nsirgien): Implementation doesn't use image support as bindless images aren't + * supported so we always return false, even if device supports HW texture usage acceleration. + */ + bool image_support = false; + WRITE_ATTR("image_support", (size_t)image_support) + + GET_NUM_ATTR(max_parameter_size) + GET_NUM_ATTR(mem_base_addr_align) + GET_NUM_ATTR(global_mem_size) + GET_NUM_ATTR(local_mem_size) + GET_NUM_ATTR(error_correction_support) + GET_NUM_ATTR(profiling_timer_resolution) + GET_NUM_ATTR(is_available) + +# undef GET_NUM_ATTR +# undef WRITE_ATTR + capabilities << "\n"; + } + + return ::strdup(capabilities.str().c_str()); +} + +void oneapi_free(void *p) +{ + if (p) { + ::free(p); + } +} + +void oneapi_iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr) +{ + int num = 0; + std::vector<sycl::device> devices = oneapi_available_devices(); + for (sycl::device &device : devices) { + const std::string &platform_name = + device.get_platform().get_info<sycl::info::platform::name>(); + std::string name = device.get_info<sycl::info::device::name>(); + std::string id = "ONEAPI_" + platform_name + "_" + name; + if (device.has(sycl::aspect::ext_intel_pci_address)) { + id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>()); + } + (cb)(id.c_str(), name.c_str(), num, user_ptr); + num++; + } +} + +size_t oneapi_get_memcapacity(SyclQueue *queue) +{ + return reinterpret_cast<sycl::queue *>(queue) + ->get_device() + .get_info<sycl::info::device::global_mem_size>(); +} + +int oneapi_get_num_multiprocessors(SyclQueue *queue) +{ + const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device(); + if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) { + return device.get_info<sycl::info::device::ext_intel_gpu_eu_count>(); + } + else + return 0; +} + +int oneapi_get_max_num_threads_per_multiprocessor(SyclQueue *queue) +{ + const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device(); + if (device.has(sycl::aspect::ext_intel_gpu_eu_simd_width) && + device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) { + return device.get_info<sycl::info::device::ext_intel_gpu_eu_simd_width>() * + device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>(); + } + else + return 0; +} + +#endif /* WITH_ONEAPI */ diff --git a/intern/cycles/kernel/device/oneapi/kernel.h b/intern/cycles/kernel/device/oneapi/kernel.h new file mode 100644 index 00000000000..c5f853742ed --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/kernel.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ + +#pragma once + +#ifdef WITH_ONEAPI + +# include <stddef.h> + +/* NOTE(@nsirgien): Should match underlying type in the declaration inside "kernel/types.h" + * TODO: use kernel/types.h directly. */ +enum DeviceKernel : int; + +# ifndef CYCLES_KERNEL_ONEAPI_EXPORT +# ifdef _WIN32 +# if defined(ONEAPI_EXPORT) +# define CYCLES_KERNEL_ONEAPI_EXPORT extern __declspec(dllexport) +# else +# define CYCLES_KERNEL_ONEAPI_EXPORT extern __declspec(dllimport) +# endif +# else +# define CYCLES_KERNEL_ONEAPI_EXPORT +# endif +# endif + +class SyclQueue; + +typedef void (*OneAPIDeviceIteratorCallback)(const char *id, + const char *name, + int num, + void *user_ptr); + +typedef void (*OneAPIErrorCallback)(const char *error, void *user_ptr); + +struct KernelContext { + /* Queue, associated with selected device */ + SyclQueue *queue; + /* Pointer to USM device memory with all global/constant allocation on this device */ + void *kernel_globals; +}; + +/* Use extern C linking so that the symbols can be easily load from the dynamic library at runtime. + */ +# ifdef __cplusplus +extern "C" { +# endif + +# define DLL_INTERFACE_CALL(function, return_type, ...) \ + CYCLES_KERNEL_ONEAPI_EXPORT return_type function(__VA_ARGS__); +# include "kernel/device/oneapi/dll_interface_template.h" +# undef DLL_INTERFACE_CALL + +# ifdef __cplusplus +} +# endif + +#endif /* WITH_ONEAPI */ diff --git a/intern/cycles/kernel/device/oneapi/kernel_templates.h b/intern/cycles/kernel/device/oneapi/kernel_templates.h new file mode 100644 index 00000000000..0ae925cf748 --- /dev/null +++ b/intern/cycles/kernel/device/oneapi/kernel_templates.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ + +#pragma once + +/* Some macro magic to generate templates for kernel arguments. + * The resulting oneapi_call() template allows to call a SYCL/C++ kernel + * with typed arguments by only giving it a void `**args` as given by Cycles. + * The template will automatically cast from void* to the expected type. */ + +/* When expanded by the preprocessor, the generated templates will look like this example: */ +#if 0 +template<typename T0, typename T1, typename T2> +void oneapi_call( + KernelGlobalsGPU *kg, + sycl::handler &cgh, + size_t global_size, + size_t local_size, + void **args, + void (*func)(const KernelGlobalsGPU *, size_t, size_t, sycl::handler &, T0, T1, T2)) +{ + func(kg, global_size, local_size, cgh, *(T0 *)(args[0]), *(T1 *)(args[1]), *(T2 *)(args[2])); +} +#endif + +/* clang-format off */ +#define ONEAPI_TYP(x) typename T##x +#define ONEAPI_CAST(x) *(T##x *)(args[x]) +#define ONEAPI_T(x) T##x + +#define ONEAPI_GET_NTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, N, ...) N +#define ONEAPI_0(_call, ...) +#define ONEAPI_1(_call, x) _call(x) +#define ONEAPI_2(_call, x, ...) _call(x), ONEAPI_1(_call, __VA_ARGS__) +#define ONEAPI_3(_call, x, ...) _call(x), ONEAPI_2(_call, __VA_ARGS__) +#define ONEAPI_4(_call, x, ...) _call(x), ONEAPI_3(_call, __VA_ARGS__) +#define ONEAPI_5(_call, x, ...) _call(x), ONEAPI_4(_call, __VA_ARGS__) +#define ONEAPI_6(_call, x, ...) _call(x), ONEAPI_5(_call, __VA_ARGS__) +#define ONEAPI_7(_call, x, ...) _call(x), ONEAPI_6(_call, __VA_ARGS__) +#define ONEAPI_8(_call, x, ...) _call(x), ONEAPI_7(_call, __VA_ARGS__) +#define ONEAPI_9(_call, x, ...) _call(x), ONEAPI_8(_call, __VA_ARGS__) +#define ONEAPI_10(_call, x, ...) _call(x), ONEAPI_9(_call, __VA_ARGS__) +#define ONEAPI_11(_call, x, ...) _call(x), ONEAPI_10(_call, __VA_ARGS__) +#define ONEAPI_12(_call, x, ...) _call(x), ONEAPI_11(_call, __VA_ARGS__) +#define ONEAPI_13(_call, x, ...) _call(x), ONEAPI_12(_call, __VA_ARGS__) +#define ONEAPI_14(_call, x, ...) _call(x), ONEAPI_13(_call, __VA_ARGS__) +#define ONEAPI_15(_call, x, ...) _call(x), ONEAPI_14(_call, __VA_ARGS__) +#define ONEAPI_16(_call, x, ...) _call(x), ONEAPI_15(_call, __VA_ARGS__) +#define ONEAPI_17(_call, x, ...) _call(x), ONEAPI_16(_call, __VA_ARGS__) +#define ONEAPI_18(_call, x, ...) _call(x), ONEAPI_17(_call, __VA_ARGS__) +#define ONEAPI_19(_call, x, ...) _call(x), ONEAPI_18(_call, __VA_ARGS__) +#define ONEAPI_20(_call, x, ...) _call(x), ONEAPI_19(_call, __VA_ARGS__) +#define ONEAPI_21(_call, x, ...) _call(x), ONEAPI_20(_call, __VA_ARGS__) + +#define ONEAPI_CALL_FOR(x, ...) \ + ONEAPI_GET_NTH_ARG("ignored", \ + ##__VA_ARGS__, \ + ONEAPI_21, \ + ONEAPI_20, \ + ONEAPI_19, \ + ONEAPI_18, \ + ONEAPI_17, \ + ONEAPI_16, \ + ONEAPI_15, \ + ONEAPI_14, \ + ONEAPI_13, \ + ONEAPI_12, \ + ONEAPI_11, \ + ONEAPI_10, \ + ONEAPI_9, \ + ONEAPI_8, \ + ONEAPI_7, \ + ONEAPI_6, \ + ONEAPI_5, \ + ONEAPI_4, \ + ONEAPI_3, \ + ONEAPI_2, \ + ONEAPI_1, \ + ONEAPI_0) \ + (x, ##__VA_ARGS__) + +/* This template automatically casts entries in the void **args array to the types requested by the kernel func. + * Since kernel parameters are passed as void ** to the device, this is the closest that we have to type safety. */ +#define oneapi_template(...) \ + template<ONEAPI_CALL_FOR(ONEAPI_TYP, __VA_ARGS__)> \ + void oneapi_call( \ + KernelGlobalsGPU *kg, \ + sycl::handler &cgh, \ + size_t global_size, \ + size_t local_size, \ + void **args, \ + void (*func)(KernelGlobalsGPU*, size_t, size_t, sycl::handler &, ONEAPI_CALL_FOR(ONEAPI_T, __VA_ARGS__))) \ + { \ + func(kg, \ + global_size, \ + local_size, \ + cgh, \ + ONEAPI_CALL_FOR(ONEAPI_CAST, __VA_ARGS__)); \ + } + +oneapi_template(0) +oneapi_template(0, 1) +oneapi_template(0, 1, 2) +oneapi_template(0, 1, 2, 3) +oneapi_template(0, 1, 2, 3, 4) +oneapi_template(0, 1, 2, 3, 4, 5) +oneapi_template(0, 1, 2, 3, 4, 5, 6) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) +oneapi_template(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) + + /* clang-format on */ diff --git a/intern/cycles/kernel/device/optix/bvh.h b/intern/cycles/kernel/device/optix/bvh.h new file mode 100644 index 00000000000..fb9907709ce --- /dev/null +++ b/intern/cycles/kernel/device/optix/bvh.h @@ -0,0 +1,659 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Blender Foundation */ + +/* OptiX implementation of ray-scene intersection. */ + +#pragma once + +#include "kernel/bvh/types.h" +#include "kernel/bvh/util.h" + +#define OPTIX_DEFINE_ABI_VERSION_ONLY +#include <optix_function_table.h> + +CCL_NAMESPACE_BEGIN + +/* Utilities. */ + +template<typename T> ccl_device_forceinline T *get_payload_ptr_0() +{ + return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1()); +} +template<typename T> ccl_device_forceinline T *get_payload_ptr_2() +{ + return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3()); +} + +template<typename T> ccl_device_forceinline T *get_payload_ptr_6() +{ + return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6()); +} + +ccl_device_forceinline int get_object_id() +{ +#ifdef __OBJECT_MOTION__ + /* Always get the instance ID from the TLAS + * There might be a motion transform node between TLAS and BLAS which does not have one. */ + return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); +#else + return optixGetInstanceId(); +#endif +} + +/* Hit/miss functions. */ + +extern "C" __global__ void __miss__kernel_optix_miss() +{ + /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */ + optixSetPayload_0(__float_as_uint(optixGetRayTmax())); + optixSetPayload_5(PRIMITIVE_NONE); +} + +extern "C" __global__ void __anyhit__kernel_optix_local_hit() +{ +#if defined(__HAIR__) || defined(__POINTCLOUD__) + if (!optixIsTriangleHit()) { + /* Ignore curves and points. */ + return optixIgnoreIntersection(); + } +#endif + +#ifdef __BVH_LOCAL__ + const int object = get_object_id(); + if (object != optixGetPayload_4() /* local_object */) { + /* Only intersect with matching object. */ + return optixIgnoreIntersection(); + } + + const int prim = optixGetPrimitiveIndex(); + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self_local(ray->self, prim)) { + return optixIgnoreIntersection(); + } + + const uint max_hits = optixGetPayload_5(); + if (max_hits == 0) { + /* Special case for when no hit information is requested, just report that something was hit */ + optixSetPayload_5(true); + return optixTerminateRay(); + } + + int hit = 0; + uint *const lcg_state = get_payload_ptr_0<uint>(); + LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>(); + + if (lcg_state) { + for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (optixGetRayTmax() == local_isect->hits[i].t) { + return optixIgnoreIntersection(); + } + } + + hit = local_isect->num_hits++; + + if (local_isect->num_hits > max_hits) { + hit = lcg_step_uint(lcg_state) % local_isect->num_hits; + if (hit >= max_hits) { + return optixIgnoreIntersection(); + } + } + } + else { + if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) { + /* Record closest intersection only. + * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit. + */ + return optixIgnoreIntersection(); + } + + local_isect->num_hits = 1; + } + + Intersection *isect = &local_isect->hits[hit]; + isect->t = optixGetRayTmax(); + isect->prim = prim; + isect->object = get_object_id(); + isect->type = kernel_data_fetch(objects, isect->object).primitive_type; + + const float2 barycentrics = optixGetTriangleBarycentrics(); + isect->u = barycentrics.x; + isect->v = barycentrics.y; + + /* Record geometric normal. */ + const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w; + const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0); + const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1); + const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); + local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + + /* Continue tracing (without this the trace call would return after the first hit). */ + optixIgnoreIntersection(); +#endif +} + +extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() +{ +#ifdef __SHADOW_RECORD_ALL__ + int prim = optixGetPrimitiveIndex(); + const uint object = get_object_id(); +# ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return optixIgnoreIntersection(); + } +# endif + + float u = 0.0f, v = 0.0f; + int type = 0; + if (optixIsTriangleHit()) { + /* Triangle. */ + const float2 barycentrics = optixGetTriangleBarycentrics(); + u = barycentrics.x; + v = barycentrics.y; + type = kernel_data_fetch(objects, object).primitive_type; + } +# ifdef __HAIR__ + else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { + /* Curve. */ + u = __uint_as_float(optixGetAttribute_0()); + v = __uint_as_float(optixGetAttribute_1()); + + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + type = segment.type; + prim = segment.prim; + +# if OPTIX_ABI_VERSION < 55 + /* Filter out curve end-caps. */ + if (u == 0.0f || u == 1.0f) { + return optixIgnoreIntersection(); + } +# endif + } +# endif + else { + /* Point. */ + type = kernel_data_fetch(objects, object).primitive_type; + u = 0.0f; + v = 0.0f; + } + + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self_shadow(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } + +# ifndef __TRANSPARENT_SHADOWS__ + /* No transparent shadows support compiled in, make opaque. */ + optixSetPayload_5(true); + return optixTerminateRay(); +# else + const uint max_hits = optixGetPayload_3(); + const uint num_hits_packed = optixGetPayload_2(); + const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed); + const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed); + + /* If no transparent shadows, all light is blocked and we can stop immediately. */ + if (num_hits >= max_hits || + !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { + optixSetPayload_5(true); + return optixTerminateRay(); + } + + /* Always use baked shadow transparency for curves. */ + if (type & PRIMITIVE_CURVE) { + float throughput = __uint_as_float(optixGetPayload_1()); + throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u); + optixSetPayload_1(__float_as_uint(throughput)); + optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1)); + + if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { + optixSetPayload_5(true); + return optixTerminateRay(); + } + else { + /* Continue tracing. */ + optixIgnoreIntersection(); + return; + } + } + + /* Record transparent intersection. */ + optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1)); + + uint record_index = num_recorded_hits; + + const IntegratorShadowState state = optixGetPayload_0(); + + const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); + if (record_index >= max_record_hits) { + /* If maximum number of hits reached, find a hit to replace. */ + float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); + uint max_recorded_hit = 0; + + for (int i = 1; i < max_record_hits; i++) { + const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); + if (isect_t > max_recorded_t) { + max_recorded_t = isect_t; + max_recorded_hit = i; + } + } + + if (optixGetRayTmax() >= max_recorded_t) { + /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the + * current hit anymore. */ + return; + } + + record_index = max_recorded_hit; + } + + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax(); + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; + + /* Continue tracing. */ + optixIgnoreIntersection(); +# endif /* __TRANSPARENT_SHADOWS__ */ +#endif /* __SHADOW_RECORD_ALL__ */ +} + +extern "C" __global__ void __anyhit__kernel_optix_volume_test() +{ +#if defined(__HAIR__) || defined(__POINTCLOUD__) + if (!optixIsTriangleHit()) { + /* Ignore curves. */ + return optixIgnoreIntersection(); + } +#endif + + const uint object = get_object_id(); +#ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return optixIgnoreIntersection(); + } +#endif + + if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) { + return optixIgnoreIntersection(); + } + + const int prim = optixGetPrimitiveIndex(); + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } +} + +extern "C" __global__ void __anyhit__kernel_optix_visibility_test() +{ +#ifdef __HAIR__ +# if OPTIX_ABI_VERSION < 55 + if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) { + /* Filter out curve end-caps. */ + const float u = __uint_as_float(optixGetAttribute_0()); + if (u == 0.0f || u == 1.0f) { + return optixIgnoreIntersection(); + } + } +# endif +#endif + + const uint object = get_object_id(); + const uint visibility = optixGetPayload_4(); +#ifdef __VISIBILITY_FLAG__ + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return optixIgnoreIntersection(); + } +#endif + + int prim = optixGetPrimitiveIndex(); + if (optixIsTriangleHit()) { + /* Triangle. */ + } +#ifdef __HAIR__ + else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { + /* Curve. */ + prim = kernel_data_fetch(curve_segments, prim).prim; + } +#endif + + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + if (intersection_skip_self_shadow(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } + else { + /* Shadow ray early termination. */ + return optixTerminateRay(); + } + } + else { + if (intersection_skip_self(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } + } +} + +extern "C" __global__ void __closesthit__kernel_optix_hit() +{ + const int object = get_object_id(); + const int prim = optixGetPrimitiveIndex(); + + optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */ + optixSetPayload_4(object); + + if (optixIsTriangleHit()) { + const float2 barycentrics = optixGetTriangleBarycentrics(); + optixSetPayload_1(__float_as_uint(barycentrics.x)); + optixSetPayload_2(__float_as_uint(barycentrics.y)); + optixSetPayload_3(prim); + optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type); + } + else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */ + optixSetPayload_2(optixGetAttribute_1()); + optixSetPayload_3(segment.prim); + optixSetPayload_5(segment.type); + } + else { + optixSetPayload_1(0); + optixSetPayload_2(0); + optixSetPayload_3(prim); + optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type); + } +} + +/* Custom primitive intersection functions. */ + +#ifdef __HAIR__ +ccl_device_inline void optix_intersection_curve(const int prim, const int type) +{ + const int object = get_object_id(); + +# ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return; + } +# endif + + const float3 ray_P = optixGetObjectRayOrigin(); + const float3 ray_D = optixGetObjectRayDirection(); + const float ray_tmin = optixGetRayTmin(); + +# ifdef __OBJECT_MOTION__ + const float time = optixGetRayTime(); +# else + const float time = 0.0f; +# endif + + Intersection isect; + isect.t = optixGetRayTmax(); + + if (curve_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use"); + optixReportIntersection(isect.t, + type & PRIMITIVE_ALL, + __float_as_int(isect.u), /* Attribute_0 */ + __float_as_int(isect.v)); /* Attribute_1 */ + } +} + +extern "C" __global__ void __intersection__curve_ribbon() +{ + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex()); + const int prim = segment.prim; + const int type = segment.type; + if (type & PRIMITIVE_CURVE_RIBBON) { + optix_intersection_curve(prim, type); + } +} + +#endif + +#ifdef __POINTCLOUD__ +extern "C" __global__ void __intersection__point() +{ + const int prim = optixGetPrimitiveIndex(); + const int object = get_object_id(); + const int type = kernel_data_fetch(objects, object).primitive_type; + +# ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return; + } +# endif + + const float3 ray_P = optixGetObjectRayOrigin(); + const float3 ray_D = optixGetObjectRayDirection(); + const float ray_tmin = optixGetRayTmin(); + +# ifdef __OBJECT_MOTION__ + const float time = optixGetRayTime(); +# else + const float time = 0.0f; +# endif + + Intersection isect; + isect.t = optixGetRayTmax(); + + if (point_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use"); + optixReportIntersection(isect.t, type & PRIMITIVE_ALL); + } +} +#endif + +/* Scene intersection. */ + +ccl_device_intersect bool scene_intersect(KernelGlobals kg, + ccl_private const Ray *ray, + const uint visibility, + ccl_private Intersection *isect) +{ + uint p0 = 0; + uint p1 = 0; + uint p2 = 0; + uint p3 = 0; + uint p4 = visibility; + uint p5 = PRIMITIVE_NONE; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + + uint ray_mask = visibility & 0xFF; + uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + else if (visibility & PATH_RAY_SHADOW_OPAQUE) { + ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT; + } + + optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0, + ray->P, + ray->D, + ray->tmin, + ray->tmax, + ray->time, + ray_mask, + ray_flags, + 0, /* SBT offset for PG_HITD */ + 0, + 0, + p0, + p1, + p2, + p3, + p4, + p5, + p6, + p7); + + isect->t = __uint_as_float(p0); + isect->u = __uint_as_float(p1); + isect->v = __uint_as_float(p2); + isect->prim = p3; + isect->object = p4; + isect->type = p5; + + return p5 != PRIMITIVE_NONE; +} + +#ifdef __BVH_LOCAL__ +ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private LocalIntersection *local_isect, + int local_object, + ccl_private uint *lcg_state, + int max_hits) +{ + uint p0 = pointer_pack_to_uint_0(lcg_state); + uint p1 = pointer_pack_to_uint_1(lcg_state); + uint p2 = pointer_pack_to_uint_0(local_isect); + uint p3 = pointer_pack_to_uint_1(local_isect); + uint p4 = local_object; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + + /* Is set to zero on miss or if ray is aborted, so can be used as return value. */ + uint p5 = max_hits; + + if (local_isect) { + local_isect->num_hits = 0; /* Initialize hit count to zero. */ + } + optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0, + ray->P, + ray->D, + ray->tmin, + ray->tmax, + ray->time, + 0xFF, + /* Need to always call into __anyhit__kernel_optix_local_hit. */ + OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + 2, /* SBT offset for PG_HITL */ + 0, + 0, + p0, + p1, + p2, + p3, + p4, + p5, + p6, + p7); + + return p5; +} +#endif + +#ifdef __SHADOW_RECORD_ALL__ +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, + IntegratorShadowState state, + ccl_private const Ray *ray, + uint visibility, + uint max_hits, + ccl_private uint *num_recorded_hits, + ccl_private float *throughput) +{ + uint p0 = state; + uint p1 = __float_as_uint(1.0f); /* Throughput. */ + uint p2 = 0; /* Number of hits. */ + uint p3 = max_hits; + uint p4 = visibility; + uint p5 = false; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + + optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0, + ray->P, + ray->D, + ray->tmin, + ray->tmax, + ray->time, + ray_mask, + /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */ + OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + 1, /* SBT offset for PG_HITS */ + 0, + 0, + p0, + p1, + p2, + p3, + p4, + p5, + p6, + p7); + + *num_recorded_hits = uint16_unpack_from_uint_0(p2); + *throughput = __uint_as_float(p1); + + return p5; +} +#endif + +#ifdef __VOLUME__ +ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint visibility) +{ + uint p0 = 0; + uint p1 = 0; + uint p2 = 0; + uint p3 = 0; + uint p4 = visibility; + uint p5 = PRIMITIVE_NONE; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + + optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0, + ray->P, + ray->D, + ray->tmin, + ray->tmax, + ray->time, + ray_mask, + /* Need to always call into __anyhit__kernel_optix_volume_test. */ + OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + 3, /* SBT offset for PG_HITV */ + 0, + 0, + p0, + p1, + p2, + p3, + p4, + p5, + p6, + p7); + + isect->t = __uint_as_float(p0); + isect->u = __uint_as_float(p1); + isect->v = __uint_as_float(p2); + isect->prim = p3; + isect->object = p4; + isect->type = p5; + + return p5 != PRIMITIVE_NONE; +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index aa4a6321a8b..1a11a533b7e 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -8,7 +8,6 @@ #include <optix.h> #define __KERNEL_GPU__ -#define __KERNEL_GPU_RAYTRACING__ #define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */ #define __KERNEL_OPTIX__ #define CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h index bb752c531f0..7af2e421378 100644 --- a/intern/cycles/kernel/device/optix/globals.h +++ b/intern/cycles/kernel/device/optix/globals.h @@ -28,21 +28,21 @@ struct KernelParamsOptiX { /* Global scene data and textures */ KernelData data; -#define KERNEL_TEX(type, name) const type *name; -#include "kernel/textures.h" +#define KERNEL_DATA_ARRAY(type, name) const type *name; +#include "kernel/data_arrays.h" /* Integrator state */ - IntegratorStateGPU __integrator_state; + IntegratorStateGPU integrator_state; }; #ifdef __NVCC__ -extern "C" static __constant__ KernelParamsOptiX __params; +extern "C" static __constant__ KernelParamsOptiX kernel_params; #endif /* Abstraction macros */ -#define kernel_data __params.data -#define kernel_tex_array(t) __params.t -#define kernel_tex_fetch(t, index) __params.t[(index)] -#define kernel_integrator_state __params.__integrator_state +#define kernel_data kernel_params.data +#define kernel_data_array(name) kernel_params.name +#define kernel_data_fetch(name, index) kernel_params.name[(index)] +#define kernel_integrator_state kernel_params.integrator_state CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index 9843b2e99be..6abb5aeacb9 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -20,469 +20,39 @@ #include "kernel/integrator/intersect_volume_stack.h" // clang-format on -#define OPTIX_DEFINE_ABI_VERSION_ONLY -#include <optix_function_table.h> - -template<typename T> ccl_device_forceinline T *get_payload_ptr_0() -{ - return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1()); -} -template<typename T> ccl_device_forceinline T *get_payload_ptr_2() -{ - return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3()); -} - -template<typename T> ccl_device_forceinline T *get_payload_ptr_6() -{ - return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6()); -} - -ccl_device_forceinline int get_object_id() -{ -#ifdef __OBJECT_MOTION__ - /* Always get the instance ID from the TLAS - * There might be a motion transform node between TLAS and BLAS which does not have one. */ - return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); -#else - return optixGetInstanceId(); -#endif -} - extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : - global_index; - integrator_intersect_closest(nullptr, path_index, __params.render_buffer); + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; + integrator_intersect_closest(nullptr, path_index, kernel_params.render_buffer); } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : - global_index; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; integrator_intersect_shadow(nullptr, path_index); } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurface() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : - global_index; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; integrator_intersect_subsurface(nullptr, path_index); } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_stack() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : - global_index; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; integrator_intersect_volume_stack(nullptr, path_index); } -extern "C" __global__ void __miss__kernel_optix_miss() -{ - /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */ - optixSetPayload_0(__float_as_uint(optixGetRayTmax())); - optixSetPayload_5(PRIMITIVE_NONE); -} - -extern "C" __global__ void __anyhit__kernel_optix_local_hit() -{ -#if defined(__HAIR__) || defined(__POINTCLOUD__) - if (!optixIsTriangleHit()) { - /* Ignore curves and points. */ - return optixIgnoreIntersection(); - } -#endif - -#ifdef __BVH_LOCAL__ - const int object = get_object_id(); - if (object != optixGetPayload_4() /* local_object */) { - /* Only intersect with matching object. */ - return optixIgnoreIntersection(); - } - - const int prim = optixGetPrimitiveIndex(); - ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - if (intersection_skip_self_local(ray->self, prim)) { - return optixIgnoreIntersection(); - } - - const uint max_hits = optixGetPayload_5(); - if (max_hits == 0) { - /* Special case for when no hit information is requested, just report that something was hit */ - optixSetPayload_5(true); - return optixTerminateRay(); - } - - int hit = 0; - uint *const lcg_state = get_payload_ptr_0<uint>(); - LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>(); - - if (lcg_state) { - for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { - if (optixGetRayTmax() == local_isect->hits[i].t) { - return optixIgnoreIntersection(); - } - } - - hit = local_isect->num_hits++; - - if (local_isect->num_hits > max_hits) { - hit = lcg_step_uint(lcg_state) % local_isect->num_hits; - if (hit >= max_hits) { - return optixIgnoreIntersection(); - } - } - } - else { - if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) { - /* Record closest intersection only. - * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit. - */ - return optixIgnoreIntersection(); - } - - local_isect->num_hits = 1; - } - - Intersection *isect = &local_isect->hits[hit]; - isect->t = optixGetRayTmax(); - isect->prim = prim; - isect->object = get_object_id(); - isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type; - - const float2 barycentrics = optixGetTriangleBarycentrics(); - isect->u = 1.0f - barycentrics.y - barycentrics.x; - isect->v = barycentrics.x; - - /* Record geometric normal. */ - const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; - const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0); - const float3 tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1); - const float3 tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); - local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); - - /* Continue tracing (without this the trace call would return after the first hit). */ - optixIgnoreIntersection(); -#endif -} - -extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() -{ -#ifdef __SHADOW_RECORD_ALL__ - int prim = optixGetPrimitiveIndex(); - const uint object = get_object_id(); -# ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { - return optixIgnoreIntersection(); - } -# endif - - ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - if (intersection_skip_self_shadow(ray->self, object, prim)) { - return optixIgnoreIntersection(); - } - - float u = 0.0f, v = 0.0f; - int type = 0; - if (optixIsTriangleHit()) { - const float2 barycentrics = optixGetTriangleBarycentrics(); - u = 1.0f - barycentrics.y - barycentrics.x; - v = barycentrics.x; - type = kernel_tex_fetch(__objects, object).primitive_type; - } -# ifdef __HAIR__ - else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { - u = __uint_as_float(optixGetAttribute_0()); - v = __uint_as_float(optixGetAttribute_1()); - - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); - type = segment.type; - prim = segment.prim; - -# if OPTIX_ABI_VERSION < 55 - /* Filter out curve endcaps. */ - if (u == 0.0f || u == 1.0f) { - return optixIgnoreIntersection(); - } -# endif - } -# endif - else { - type = kernel_tex_fetch(__objects, object).primitive_type; - u = 0.0f; - v = 0.0f; - } - -# ifndef __TRANSPARENT_SHADOWS__ - /* No transparent shadows support compiled in, make opaque. */ - optixSetPayload_5(true); - return optixTerminateRay(); -# else - const uint max_hits = optixGetPayload_3(); - const uint num_hits_packed = optixGetPayload_2(); - const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed); - const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed); - - /* If no transparent shadows, all light is blocked and we can stop immediately. */ - if (num_hits >= max_hits || - !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { - optixSetPayload_5(true); - return optixTerminateRay(); - } - - /* Always use baked shadow transparency for curves. */ - if (type & PRIMITIVE_CURVE) { - float throughput = __uint_as_float(optixGetPayload_1()); - throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u); - optixSetPayload_1(__float_as_uint(throughput)); - optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1)); - - if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { - optixSetPayload_5(true); - return optixTerminateRay(); - } - else { - /* Continue tracing. */ - optixIgnoreIntersection(); - return; - } - } - - /* Record transparent intersection. */ - optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1)); - - uint record_index = num_recorded_hits; - - const IntegratorShadowState state = optixGetPayload_0(); - - const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); - if (record_index >= max_record_hits) { - /* If maximum number of hits reached, find a hit to replace. */ - float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); - uint max_recorded_hit = 0; - - for (int i = 1; i < max_record_hits; i++) { - const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); - if (isect_t > max_recorded_t) { - max_recorded_t = isect_t; - max_recorded_hit = i; - } - } - - if (optixGetRayTmax() >= max_recorded_t) { - /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the - * current hit anymore. */ - return; - } - - record_index = max_recorded_hit; - } - - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax(); - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; - - /* Continue tracing. */ - optixIgnoreIntersection(); -# endif /* __TRANSPARENT_SHADOWS__ */ -#endif /* __SHADOW_RECORD_ALL__ */ -} - -extern "C" __global__ void __anyhit__kernel_optix_volume_test() -{ -#if defined(__HAIR__) || defined(__POINTCLOUD__) - if (!optixIsTriangleHit()) { - /* Ignore curves. */ - return optixIgnoreIntersection(); - } -#endif - - const uint object = get_object_id(); -#ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { - return optixIgnoreIntersection(); - } -#endif - - if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) { - return optixIgnoreIntersection(); - } - - const int prim = optixGetPrimitiveIndex(); - ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - if (intersection_skip_self(ray->self, object, prim)) { - return optixIgnoreIntersection(); - } -} - -extern "C" __global__ void __anyhit__kernel_optix_visibility_test() -{ -#ifdef __HAIR__ -# if OPTIX_ABI_VERSION < 55 - if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) { - /* Filter out curve endcaps. */ - const float u = __uint_as_float(optixGetAttribute_0()); - if (u == 0.0f || u == 1.0f) { - return optixIgnoreIntersection(); - } - } -# endif -#endif - - const uint object = get_object_id(); - const uint visibility = optixGetPayload_4(); -#ifdef __VISIBILITY_FLAG__ - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { - return optixIgnoreIntersection(); - } -#endif - - const int prim = optixGetPrimitiveIndex(); - ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - - if (visibility & PATH_RAY_SHADOW_OPAQUE) { - if (intersection_skip_self_shadow(ray->self, object, prim)) { - return optixIgnoreIntersection(); - } - else { - /* Shadow ray early termination. */ - return optixTerminateRay(); - } - } - else { - if (intersection_skip_self(ray->self, object, prim)) { - return optixIgnoreIntersection(); - } - } -} - -extern "C" __global__ void __closesthit__kernel_optix_hit() -{ - const int object = get_object_id(); - const int prim = optixGetPrimitiveIndex(); - - optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */ - optixSetPayload_4(object); - - if (optixIsTriangleHit()) { - const float2 barycentrics = optixGetTriangleBarycentrics(); - optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x)); - optixSetPayload_2(__float_as_uint(barycentrics.x)); - optixSetPayload_3(prim); - optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type); - } - else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); - optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */ - optixSetPayload_2(optixGetAttribute_1()); - optixSetPayload_3(segment.prim); - optixSetPayload_5(segment.type); - } - else { - optixSetPayload_1(0); - optixSetPayload_2(0); - optixSetPayload_3(prim); - optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type); - } -} - -#ifdef __HAIR__ -ccl_device_inline void optix_intersection_curve(const int prim, const int type) -{ - const int object = get_object_id(); - -# ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { - return; - } -# endif - - float3 P = optixGetObjectRayOrigin(); - float3 dir = optixGetObjectRayDirection(); - - /* The direction is not normalized by default, but the curve intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - -# ifdef __OBJECT_MOTION__ - const float time = optixGetRayTime(); -# else - const float time = 0.0f; -# endif - - Intersection isect; - isect.t = optixGetRayTmax(); - /* Transform maximum distance into object space. */ - if (isect.t != FLT_MAX) - isect.t *= len; - - if (curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { - static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use"); - optixReportIntersection(isect.t / len, - type & PRIMITIVE_ALL, - __float_as_int(isect.u), /* Attribute_0 */ - __float_as_int(isect.v)); /* Attribute_1 */ - } -} - -extern "C" __global__ void __intersection__curve_ribbon() -{ - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, optixGetPrimitiveIndex()); - const int prim = segment.prim; - const int type = segment.type; - if (type & PRIMITIVE_CURVE_RIBBON) { - optix_intersection_curve(prim, type); - } -} - -#endif - -#ifdef __POINTCLOUD__ -extern "C" __global__ void __intersection__point() -{ - const int prim = optixGetPrimitiveIndex(); - const int object = get_object_id(); - const int type = kernel_tex_fetch(__objects, object).primitive_type; - -# ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { - return; - } -# endif - - float3 P = optixGetObjectRayOrigin(); - float3 dir = optixGetObjectRayDirection(); - - /* The direction is not normalized by default, the point intersection routine expects that. */ - float len; - dir = normalize_len(dir, &len); - -# ifdef __OBJECT_MOTION__ - const float time = optixGetRayTime(); -# else - const float time = 0.0f; -# endif - - Intersection isect; - isect.t = optixGetRayTmax(); - /* Transform maximum distance into object space. */ - if (isect.t != FLT_MAX) { - isect.t *= len; - } - - if (point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { - static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use"); - optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL); - } -} -#endif diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu index 3bd57bc0f1a..41e6224f6da 100644 --- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu +++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu @@ -11,15 +11,15 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytrace() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : + const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] : global_index; - integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer); + integrator_shade_surface_raytrace(nullptr, path_index, kernel_params.render_buffer); } extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : + const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] : global_index; - integrator_shade_surface_mnee(nullptr, path_index, __params.render_buffer); + integrator_shade_surface_mnee(nullptr, path_index, kernel_params.render_buffer); } diff --git a/intern/cycles/kernel/film/adaptive_sampling.h b/intern/cycles/kernel/film/adaptive_sampling.h index 16867c39d99..d28c87747c3 100644 --- a/intern/cycles/kernel/film/adaptive_sampling.h +++ b/intern/cycles/kernel/film/adaptive_sampling.h @@ -3,15 +3,15 @@ #pragma once -#include "kernel/film/write_passes.h" +#include "kernel/film/write.h" CCL_NAMESPACE_BEGIN /* Check whether the pixel has converged and should not be sampled anymore. */ -ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg, - ConstIntegratorState state, - ccl_global float *render_buffer) +ccl_device_forceinline bool film_need_sample_pixel(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *render_buffer) { if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) { return true; @@ -28,14 +28,14 @@ ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg, /* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */ -ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg, - ccl_global float *render_buffer, - int x, - int y, - float threshold, - bool reset, - int offset, - int stride) +ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg, + ccl_global float *render_buffer, + int x, + int y, + float threshold, + bool reset, + int offset, + int stride) { kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED); @@ -78,13 +78,13 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg, /* This is a simple box filter in two passes. * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */ -ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg, - ccl_global float *render_buffer, - int y, - int start_x, - int width, - int offset, - int stride) +ccl_device void film_adaptive_sampling_filter_x(KernelGlobals kg, + ccl_global float *render_buffer, + int y, + int start_x, + int width, + int offset, + int stride) { kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); @@ -111,13 +111,13 @@ ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg, } } -ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg, - ccl_global float *render_buffer, - int x, - int start_y, - int height, - int offset, - int stride) +ccl_device void film_adaptive_sampling_filter_y(KernelGlobals kg, + ccl_global float *render_buffer, + int x, + int start_y, + int height, + int offset, + int stride) { kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); diff --git a/intern/cycles/kernel/film/aov_passes.h b/intern/cycles/kernel/film/aov_passes.h new file mode 100644 index 00000000000..3fbb250340f --- /dev/null +++ b/intern/cycles/kernel/film/aov_passes.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "kernel/geom/geom.h" + +#include "kernel/film/write.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void film_write_aov_pass_value(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *ccl_restrict render_buffer, + const int aov_id, + const float value) +{ + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + film_write_pass_float(buffer + kernel_data.film.pass_aov_value + aov_id, value); +} + +ccl_device_inline void film_write_aov_pass_color(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *ccl_restrict render_buffer, + const int aov_id, + const float3 color) +{ + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + film_write_pass_float4(buffer + kernel_data.film.pass_aov_color + aov_id, + make_float4(color.x, color.y, color.z, 1.0f)); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/id_passes.h b/intern/cycles/kernel/film/cryptomatte_passes.h index c8317512bb2..4765777e7e2 100644 --- a/intern/cycles/kernel/film/id_passes.h +++ b/intern/cycles/kernel/film/cryptomatte_passes.h @@ -8,15 +8,15 @@ CCL_NAMESPACE_BEGIN /* Element of ID pass stored in the render buffers. * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the * render buffers might not meet expected by compiler alignment. */ -typedef struct IDPassBufferElement { +typedef struct CryptoPassBufferElement { float x; float y; -} IDPassBufferElement; +} CryptoPassBufferElement; -ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, - int num_slots, - float id, - float weight) +ccl_device_inline void film_write_cryptomatte_slots(ccl_global float *buffer, + int num_slots, + float id, + float weight) { kernel_assert(id != ID_NONE); if (weight == 0.0f) { @@ -24,7 +24,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, } for (int slot = 0; slot < num_slots; slot++) { - ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer; + ccl_global CryptoPassBufferElement *id_buffer = (ccl_global CryptoPassBufferElement *)buffer; #ifdef __ATOMIC_PASS_WRITE__ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ if (id_buffer[slot].x == ID_NONE) { @@ -60,9 +60,9 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, } } -ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots) +ccl_device_inline void film_sort_cryptomatte_slots(ccl_global float *buffer, int num_slots) { - ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer; + ccl_global CryptoPassBufferElement *id_buffer = (ccl_global CryptoPassBufferElement *)buffer; for (int slot = 1; slot < num_slots; ++slot) { if (id_buffer[slot].x == ID_NONE) { return; @@ -70,7 +70,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ int i = slot; while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { - const IDPassBufferElement swap = id_buffer[i]; + const CryptoPassBufferElement swap = id_buffer[i]; id_buffer[i] = id_buffer[i - 1]; id_buffer[i - 1] = swap; --i; @@ -79,15 +79,15 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl } /* post-sorting for Cryptomatte */ -ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg, - ccl_global float *render_buffer, - int pixel_index) +ccl_device_inline void film_cryptomatte_post(KernelGlobals kg, + ccl_global float *render_buffer, + int pixel_index) { const int pass_stride = kernel_data.film.pass_stride; const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride; ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset + kernel_data.film.pass_cryptomatte; - kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + film_sort_cryptomatte_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/data_passes.h b/intern/cycles/kernel/film/data_passes.h new file mode 100644 index 00000000000..efdf616749f --- /dev/null +++ b/intern/cycles/kernel/film/data_passes.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "kernel/geom/geom.h" + +#include "kernel/camera/camera.h" + +#include "kernel/film/cryptomatte_passes.h" +#include "kernel/film/write.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_inline size_t film_write_cryptomatte_pass(ccl_global float *ccl_restrict buffer, + size_t depth, + float id, + float matte_weight) +{ + film_write_cryptomatte_slots(buffer, depth * 2, id, matte_weight); + return depth * 4; +} + +ccl_device_inline void film_write_data_passes(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_global float *ccl_restrict render_buffer) +{ +#ifdef __PASSES__ + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + + if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { + return; + } + + const int flag = kernel_data.film.pass_flag; + + if (!(flag & PASS_ANY)) { + return; + } + + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + + if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { + if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || + average(surface_shader_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { + if (INTEGRATOR_STATE(state, path, sample) == 0) { + if (flag & PASSMASK(DEPTH)) { + const float depth = camera_z_depth(kg, sd->P); + film_write_pass_float(buffer + kernel_data.film.pass_depth, depth); + } + if (flag & PASSMASK(OBJECT_ID)) { + const float id = object_pass_id(kg, sd->object); + film_write_pass_float(buffer + kernel_data.film.pass_object_id, id); + } + if (flag & PASSMASK(MATERIAL_ID)) { + const float id = shader_pass_id(kg, sd); + film_write_pass_float(buffer + kernel_data.film.pass_material_id, id); + } + if (flag & PASSMASK(POSITION)) { + const float3 position = sd->P; + film_write_pass_float3(buffer + kernel_data.film.pass_position, position); + } + } + + if (flag & PASSMASK(NORMAL)) { + const float3 normal = surface_shader_average_normal(kg, sd); + film_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); + } + if (flag & PASSMASK(ROUGHNESS)) { + const float roughness = surface_shader_average_roughness(sd); + film_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness); + } + if (flag & PASSMASK(UV)) { + const float3 uv = primitive_uv(kg, sd); + film_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); + } + if (flag & PASSMASK(MOTION)) { + const float4 speed = primitive_motion_vector(kg, sd); + film_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); + film_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); + } + + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE; + } + } + + if (kernel_data.film.cryptomatte_passes) { + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const float matte_weight = average(throughput) * + (1.0f - average(surface_shader_transparency(kg, sd))); + if (matte_weight > 0.0f) { + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + const float id = object_cryptomatte_id(kg, sd->object); + cryptomatte_buffer += film_write_cryptomatte_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + const float id = kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).cryptomatte_id; + cryptomatte_buffer += film_write_cryptomatte_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + const float id = object_cryptomatte_asset_id(kg, sd->object); + cryptomatte_buffer += film_write_cryptomatte_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + } + } + + if (flag & PASSMASK(DIFFUSE_COLOR)) { + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + film_write_pass_spectrum(buffer + kernel_data.film.pass_diffuse_color, + surface_shader_diffuse(kg, sd) * throughput); + } + if (flag & PASSMASK(GLOSSY_COLOR)) { + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + film_write_pass_spectrum(buffer + kernel_data.film.pass_glossy_color, + surface_shader_glossy(kg, sd) * throughput); + } + if (flag & PASSMASK(TRANSMISSION_COLOR)) { + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + film_write_pass_spectrum(buffer + kernel_data.film.pass_transmission_color, + surface_shader_transmission(kg, sd) * throughput); + } + if (flag & PASSMASK(MIST)) { + /* Bring depth into 0..1 range. */ + const float mist_start = kernel_data.film.mist_start; + const float mist_inv_depth = kernel_data.film.mist_inv_depth; + + const float depth = camera_distance(kg, sd->P); + float mist = saturatef((depth - mist_start) * mist_inv_depth); + + /* Falloff */ + const float mist_falloff = kernel_data.film.mist_falloff; + + if (mist_falloff == 1.0f) + ; + else if (mist_falloff == 2.0f) + mist = mist * mist; + else if (mist_falloff == 0.5f) + mist = sqrtf(mist); + else + mist = powf(mist, mist_falloff); + + /* Modulate by transparency */ + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum alpha = surface_shader_alpha(kg, sd); + const float mist_output = (1.0f - mist) * average(throughput * alpha); + + /* Note that the final value in the render buffer we want is 1 - mist_output, + * to avoid having to tracking this in the Integrator state we do the negation + * after rendering. */ + film_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output); + } +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/denoising_passes.h b/intern/cycles/kernel/film/denoising_passes.h new file mode 100644 index 00000000000..dfc21d787f2 --- /dev/null +++ b/intern/cycles/kernel/film/denoising_passes.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "kernel/geom/geom.h" + +#include "kernel/film/write.h" + +CCL_NAMESPACE_BEGIN + +#ifdef __DENOISING_FEATURES__ +ccl_device_forceinline void film_write_denoising_features_surface(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_global float *ccl_restrict + render_buffer) +{ + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) { + return; + } + + /* Skip implicitly transparent surfaces. */ + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return; + } + + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + + if (kernel_data.film.pass_denoising_depth != PASS_UNUSED) { + const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + const float depth = sd->ray_length - INTEGRATOR_STATE(state, ray, tmin); + const float denoising_depth = ensure_finite(average(denoising_feature_throughput) * depth); + film_write_pass_float(buffer + kernel_data.film.pass_denoising_depth, denoising_depth); + } + + float3 normal = zero_float3(); + Spectrum diffuse_albedo = zero_spectrum(); + Spectrum specular_albedo = zero_spectrum(); + float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + continue; + } + + /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ + normal += sc->N * sc->sample_weight; + sum_weight += sc->sample_weight; + + Spectrum closure_albedo = sc->weight; + /* Closures that include a Fresnel term typically have weights close to 1 even though their + * actual contribution is significantly lower. + * To account for this, we scale their weight by the average fresnel factor (the same is also + * done for the sample weight in the BSDF setup, so we don't need to scale that here). */ + if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) { + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc; + closure_albedo *= bsdf->extra->fresnel_color; + } + else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) { + ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc; + closure_albedo *= bsdf->avg_value; + } + else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) { + closure_albedo *= bsdf_principled_hair_albedo(sc); + } + else if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) { + /* BSSRDF already accounts for weight, retro-reflection would double up. */ + ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *) + sc; + if (bsdf->components == PRINCIPLED_DIFFUSE_RETRO_REFLECTION) { + continue; + } + } + + if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { + diffuse_albedo += closure_albedo; + sum_nonspecular_weight += sc->sample_weight; + } + else { + specular_albedo += closure_albedo; + } + } + + /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ + if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) { + if (sum_weight != 0.0f) { + normal /= sum_weight; + } + + if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) { + /* Transform normal into camera space. */ + const Transform worldtocamera = kernel_data.cam.worldtocamera; + normal = transform_direction(&worldtocamera, normal); + + const float3 denoising_normal = ensure_finite(normal); + film_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); + } + + if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { + const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + const Spectrum denoising_albedo = ensure_finite(denoising_feature_throughput * + diffuse_albedo); + film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + } + + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; + } + else { + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo; + } +} + +ccl_device_forceinline void film_write_denoising_features_volume(KernelGlobals kg, + IntegratorState state, + const Spectrum albedo, + const bool scatter, + ccl_global float *ccl_restrict + render_buffer) +{ + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + + if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) { + /* Assume scatter is sufficiently diffuse to stop writing denoising features. */ + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; + + /* Write view direction as normal. */ + const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f); + film_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); + } + + if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { + /* Write albedo. */ + const Spectrum denoising_albedo = ensure_finite(denoising_feature_throughput * albedo); + film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + } +} +#endif /* __DENOISING_FEATURES__ */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/accumulate.h b/intern/cycles/kernel/film/light_passes.h index e10acfd7eb5..b45b5305119 100644 --- a/intern/cycles/kernel/film/accumulate.h +++ b/intern/cycles/kernel/film/light_passes.h @@ -4,7 +4,7 @@ #pragma once #include "kernel/film/adaptive_sampling.h" -#include "kernel/film/write_passes.h" +#include "kernel/film/write.h" #include "kernel/integrator/shadow_catcher.h" @@ -21,10 +21,10 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval, const ClosureType closure_type, - float3 value) + Spectrum value) { - eval->diffuse = zero_float3(); - eval->glossy = zero_float3(); + eval->diffuse = zero_spectrum(); + eval->glossy = zero_spectrum(); if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) { eval->diffuse = value; @@ -38,7 +38,7 @@ ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval, ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval, const ClosureType closure_type, - float3 value) + Spectrum value) { if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) { eval->diffuse += value; @@ -62,30 +62,30 @@ ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value) eval->sum *= value; } -ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value) +ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, Spectrum value) { eval->diffuse *= value; eval->glossy *= value; eval->sum *= value; } -ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval) +ccl_device_inline Spectrum bsdf_eval_sum(ccl_private const BsdfEval *eval) { return eval->sum; } -ccl_device_inline float3 bsdf_eval_pass_diffuse_weight(ccl_private const BsdfEval *eval) +ccl_device_inline Spectrum bsdf_eval_pass_diffuse_weight(ccl_private const BsdfEval *eval) { /* Ratio of diffuse weight to recover proportions for writing to render pass. * We assume reflection, transmission and volume scatter to be exclusive. */ - return safe_divide_float3_float3(eval->diffuse, eval->sum); + return safe_divide(eval->diffuse, eval->sum); } -ccl_device_inline float3 bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval *eval) +ccl_device_inline Spectrum bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval *eval) { /* Ratio of glossy weight to recover proportions for writing to render pass. * We assume reflection, transmission and volume scatter to be exclusive. */ - return safe_divide_float3_float3(eval->glossy, eval->sum); + return safe_divide(eval->glossy, eval->sum); } /* -------------------------------------------------------------------- @@ -95,17 +95,17 @@ ccl_device_inline float3 bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval * to render buffers instead of using per-thread memory, and to avoid the * impact of clamping on other contributions. */ -ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce) +ccl_device_forceinline void film_clamp_light(KernelGlobals kg, ccl_private Spectrum *L, int bounce) { #ifdef __KERNEL_DEBUG_NAN__ - if (!isfinite3_safe(*L)) { + if (!isfinite_safe(*L)) { kernel_assert(!"Cycles sample with non-finite value detected"); } #endif /* Make sure all components are finite, allowing the contribution to be usable by adaptive * sampling convergence check, but also to make it so render result never causes issues with * post-processing. */ - *L = ensure_finite3(*L); + *L = ensure_finite(*L); #ifdef __CLAMP_SAMPLE__ float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect : @@ -121,55 +121,49 @@ ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private flo * Pass accumulation utilities. */ -/* Get pointer to pixel in render buffer. */ -ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer( - KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) -{ - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - return render_buffer + render_buffer_offset; -} - /* -------------------------------------------------------------------- * Adaptive sampling. */ -ccl_device_inline int kernel_accum_sample(KernelGlobals kg, - ConstIntegratorState state, - ccl_global float *ccl_restrict render_buffer, - int sample, - int sample_offset) +ccl_device_inline int film_write_sample(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *ccl_restrict render_buffer, + int sample, + int sample_offset) { if (kernel_data.film.pass_sample_count == PASS_UNUSED) { return sample; } - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); return atomic_fetch_and_add_uint32( (ccl_global uint *)(buffer) + kernel_data.film.pass_sample_count, 1) + sample_offset; } -ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg, - const int sample, - const float3 contribution, - ccl_global float *ccl_restrict buffer) +ccl_device void film_write_adaptive_buffer(KernelGlobals kg, + const int sample, + const Spectrum contribution, + ccl_global float *ccl_restrict buffer) { - /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping - * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte - * Carlo global illumination" except that here it is applied per pixel and not in hierarchical - * tiles. */ + /* Adaptive Sampling. Fill the additional buffer with only one half of the samples and + * calculate our stopping criteria. This is the heuristic from "A hierarchical automatic + * stopping condition for Monte Carlo global illumination" except that here it is applied + * per pixel and not in hierarchical tiles. */ if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) { return; } - if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) { - kernel_write_pass_float4( - buffer + kernel_data.film.pass_adaptive_aux_buffer, - make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f)); + if (sample_is_class_A(kernel_data.integrator.sampling_pattern, sample)) { + const float3 contribution_rgb = spectrum_to_rgb(contribution); + + film_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer, + make_float4(contribution_rgb.x * 2.0f, + contribution_rgb.y * 2.0f, + contribution_rgb.z * 2.0f, + 0.0f)); } } @@ -184,10 +178,10 @@ ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg, * Returns truth if the contribution is fully handled here and is not to be added to the other * passes (like combined, adaptive sampling). */ -ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, - const uint32_t path_flag, - const float3 contribution, - ccl_global float *ccl_restrict buffer) +ccl_device bool film_write_shadow_catcher(KernelGlobals kg, + const uint32_t path_flag, + const Spectrum contribution, + ccl_global float *ccl_restrict buffer) { if (!kernel_data.integrator.has_shadow_catcher) { return false; @@ -198,7 +192,7 @@ ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, /* Matte pass. */ if (kernel_shadow_catcher_is_matte_path(path_flag)) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution); /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive * sampling is based on how noisy the combined pass is as if there were no catchers in the * scene. */ @@ -206,18 +200,18 @@ ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, /* Shadow catcher pass. */ if (kernel_shadow_catcher_is_object_pass(path_flag)) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher, contribution); return true; } return false; } -ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, - const uint32_t path_flag, - const float3 contribution, - const float transparent, - ccl_global float *ccl_restrict buffer) +ccl_device bool film_write_shadow_catcher_transparent(KernelGlobals kg, + const uint32_t path_flag, + const Spectrum contribution, + const float transparent, + ccl_global float *ccl_restrict buffer) { if (!kernel_data.integrator.has_shadow_catcher) { return false; @@ -232,9 +226,11 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, /* Matte pass. */ if (kernel_shadow_catcher_is_matte_path(path_flag)) { - kernel_write_pass_float4( + const float3 contribution_rgb = spectrum_to_rgb(contribution); + + film_write_pass_float4( buffer + kernel_data.film.pass_shadow_catcher_matte, - make_float4(contribution.x, contribution.y, contribution.z, transparent)); + make_float4(contribution_rgb.x, contribution_rgb.y, contribution_rgb.z, transparent)); /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive * sampling is based on how noisy the combined pass is as if there were no catchers in the * scene. */ @@ -245,17 +241,17 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the * calculation and the alpha channel of the pass contains numbers of samples contributed to a * pixel of the pass. */ - kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher, contribution); return true; } return false; } -ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, - const uint32_t path_flag, - const float transparent, - ccl_global float *ccl_restrict buffer) +ccl_device void film_write_shadow_catcher_transparent_only(KernelGlobals kg, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict buffer) { if (!kernel_data.integrator.has_shadow_catcher) { return; @@ -265,10 +261,29 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, /* Matte pass. */ if (kernel_shadow_catcher_is_matte_path(path_flag)) { - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent); + film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent); } } +/* Write shadow catcher passes on a bounce from the shadow catcher object. */ +ccl_device_forceinline void film_write_shadow_catcher_bounce_data( + KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); + + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + + /* Count sample for the shadow catcher object. */ + film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); + + /* Since the split is done, the sample does not contribute to the matte, so accumulate it as + * transparency to the matte. */ + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, + average(throughput)); +} + #endif /* __SHADOW_CATCHER__ */ /* -------------------------------------------------------------------- @@ -276,54 +291,55 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, */ /* Write combined pass. */ -ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg, - const uint32_t path_flag, - const int sample, - const float3 contribution, - ccl_global float *ccl_restrict buffer) +ccl_device_inline void film_write_combined_pass(KernelGlobals kg, + const uint32_t path_flag, + const int sample, + const Spectrum contribution, + ccl_global float *ccl_restrict buffer) { #ifdef __SHADOW_CATCHER__ - if (kernel_accum_shadow_catcher(kg, path_flag, contribution, buffer)) { + if (film_write_shadow_catcher(kg, path_flag, contribution, buffer)) { return; } #endif if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_combined, contribution); } - kernel_accum_adaptive_buffer(kg, sample, contribution, buffer); + film_write_adaptive_buffer(kg, sample, contribution, buffer); } /* Write combined pass with transparency. */ -ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg, - const uint32_t path_flag, - const int sample, - const float3 contribution, - const float transparent, - ccl_global float *ccl_restrict - buffer) +ccl_device_inline void film_write_combined_transparent_pass(KernelGlobals kg, + const uint32_t path_flag, + const int sample, + const Spectrum contribution, + const float transparent, + ccl_global float *ccl_restrict buffer) { #ifdef __SHADOW_CATCHER__ - if (kernel_accum_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) { + if (film_write_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) { return; } #endif if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { - kernel_write_pass_float4( + const float3 contribution_rgb = spectrum_to_rgb(contribution); + + film_write_pass_float4( buffer + kernel_data.film.pass_combined, - make_float4(contribution.x, contribution.y, contribution.z, transparent)); + make_float4(contribution_rgb.x, contribution_rgb.y, contribution_rgb.z, transparent)); } - kernel_accum_adaptive_buffer(kg, sample, contribution, buffer); + film_write_adaptive_buffer(kg, sample, contribution, buffer); } /* Write background or emission to appropriate pass. */ -ccl_device_inline void kernel_accum_emission_or_background_pass( +ccl_device_inline void film_write_emission_or_background_pass( KernelGlobals kg, ConstIntegratorState state, - float3 contribution, + Spectrum contribution, ccl_global float *ccl_restrict buffer, const int pass, const int lightgroup = LIGHTGROUP_NONE) @@ -340,16 +356,16 @@ ccl_device_inline void kernel_accum_emission_or_background_pass( # ifdef __DENOISING_FEATURES__ if (path_flag & PATH_RAY_DENOISING_FEATURES) { if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { - const float3 denoising_feature_throughput = INTEGRATOR_STATE( + const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( state, path, denoising_feature_throughput); - const float3 denoising_albedo = denoising_feature_throughput * contribution; - kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + const Spectrum denoising_albedo = denoising_feature_throughput * contribution; + film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); } } # endif /* __DENOISING_FEATURES__ */ if (lightgroup != LIGHTGROUP_NONE && kernel_data.film.pass_lightgroup != PASS_UNUSED) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup, + film_write_pass_spectrum(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup, contribution); } @@ -366,15 +382,15 @@ ccl_device_inline void kernel_accum_emission_or_background_pass( if (path_flag & PATH_RAY_SURFACE_PASS) { /* Indirectly visible through reflection. */ - const float3 diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight); - const float3 glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight); + const Spectrum diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight); + const Spectrum glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight); /* Glossy */ const int glossy_pass_offset = ((INTEGRATOR_STATE(state, path, bounce) == 1) ? kernel_data.film.pass_glossy_direct : kernel_data.film.pass_glossy_indirect); if (glossy_pass_offset != PASS_UNUSED) { - kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution); + film_write_pass_spectrum(buffer + glossy_pass_offset, glossy_weight * contribution); } /* Transmission */ @@ -385,8 +401,8 @@ ccl_device_inline void kernel_accum_emission_or_background_pass( if (transmission_pass_offset != PASS_UNUSED) { /* Transmission is what remains if not diffuse and glossy, not stored explicitly to save * GPU memory. */ - const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight; - kernel_write_pass_float3(buffer + transmission_pass_offset, + const Spectrum transmission_weight = one_spectrum() - diffuse_weight - glossy_weight; + film_write_pass_spectrum(buffer + transmission_pass_offset, transmission_weight * contribution); } @@ -408,19 +424,19 @@ ccl_device_inline void kernel_accum_emission_or_background_pass( /* Single write call for GPU coherence. */ if (pass_offset != PASS_UNUSED) { - kernel_write_pass_float3(buffer + pass_offset, contribution); + film_write_pass_spectrum(buffer + pass_offset, contribution); } #endif /* __PASSES__ */ } /* Write light contribution to render buffer. */ -ccl_device_inline void kernel_accum_light(KernelGlobals kg, - ConstIntegratorShadowState state, - ccl_global float *ccl_restrict render_buffer) +ccl_device_inline void film_write_direct_light(KernelGlobals kg, + ConstIntegratorShadowState state, + ccl_global float *ccl_restrict render_buffer) { /* The throughput for shadow paths already contains the light shader evaluation. */ - float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput); - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce)); + Spectrum contribution = INTEGRATOR_STATE(state, shadow_path, throughput); + film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce)); const uint32_t render_pixel_index = INTEGRATOR_STATE(state, shadow_path, render_pixel_index); const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * @@ -433,17 +449,17 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, /* Ambient occlusion. */ if (path_flag & PATH_RAY_SHADOW_FOR_AO) { if ((kernel_data.kernel_features & KERNEL_FEATURE_AO_PASS) && (path_flag & PATH_RAY_CAMERA)) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_ao, contribution); } if (kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) { - const float3 ao_weight = INTEGRATOR_STATE(state, shadow_path, unshadowed_throughput); - kernel_accum_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer); + const Spectrum ao_weight = INTEGRATOR_STATE(state, shadow_path, unshadowed_throughput); + film_write_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer); } return; } /* Direct light shadow. */ - kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer); + film_write_combined_pass(kg, path_flag, sample, contribution, buffer); #ifdef __PASSES__ if (kernel_data.film.light_pass_flag & PASS_ANY) { @@ -458,7 +474,7 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, /* Write lightgroup pass. LIGHTGROUP_NONE is ~0 so decode from unsigned to signed */ const int lightgroup = (int)(INTEGRATOR_STATE(state, shadow_path, lightgroup)) - 1; if (lightgroup != LIGHTGROUP_NONE && kernel_data.film.pass_lightgroup != PASS_UNUSED) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup, + film_write_pass_spectrum(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup, contribution); } @@ -467,15 +483,15 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, if (path_flag & PATH_RAY_SURFACE_PASS) { /* Indirectly visible through reflection. */ - const float3 diffuse_weight = INTEGRATOR_STATE(state, shadow_path, pass_diffuse_weight); - const float3 glossy_weight = INTEGRATOR_STATE(state, shadow_path, pass_glossy_weight); + const Spectrum diffuse_weight = INTEGRATOR_STATE(state, shadow_path, pass_diffuse_weight); + const Spectrum glossy_weight = INTEGRATOR_STATE(state, shadow_path, pass_glossy_weight); /* Glossy */ const int glossy_pass_offset = ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_glossy_direct : kernel_data.film.pass_glossy_indirect); if (glossy_pass_offset != PASS_UNUSED) { - kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution); + film_write_pass_spectrum(buffer + glossy_pass_offset, glossy_weight * contribution); } /* Transmission */ @@ -486,8 +502,8 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, if (transmission_pass_offset != PASS_UNUSED) { /* Transmission is what remains if not diffuse and glossy, not stored explicitly to save * GPU memory. */ - const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight; - kernel_write_pass_float3(buffer + transmission_pass_offset, + const Spectrum transmission_weight = one_spectrum() - diffuse_weight - glossy_weight; + film_write_pass_spectrum(buffer + transmission_pass_offset, transmission_weight * contribution); } @@ -508,19 +524,19 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, /* Single write call for GPU coherence. */ if (pass_offset != PASS_UNUSED) { - kernel_write_pass_float3(buffer + pass_offset, contribution); + film_write_pass_spectrum(buffer + pass_offset, contribution); } } /* Write shadow pass. */ if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) && (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { - const float3 unshadowed_throughput = INTEGRATOR_STATE( + const Spectrum unshadowed_throughput = INTEGRATOR_STATE( state, shadow_path, unshadowed_throughput); - const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput); - const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) * - kernel_data.film.pass_shadow_scale; - kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow); + const Spectrum shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput); + const Spectrum shadow = safe_divide(shadowed_throughput, unshadowed_throughput) * + kernel_data.film.pass_shadow_scale; + film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow, shadow); } } #endif @@ -531,78 +547,96 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, * Note that we accumulate transparency = 1 - alpha in the render buffer. * Otherwise we'd have to write alpha on path termination, which happens * in many places. */ -ccl_device_inline void kernel_accum_transparent(KernelGlobals kg, - ConstIntegratorState state, - const uint32_t path_flag, - const float transparent, - ccl_global float *ccl_restrict buffer) +ccl_device_inline void film_write_transparent(KernelGlobals kg, + ConstIntegratorState state, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict buffer) { if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { - kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent); + film_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent); } - kernel_accum_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer); + film_write_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer); } /* Write holdout to render buffer. */ -ccl_device_inline void kernel_accum_holdout(KernelGlobals kg, - ConstIntegratorState state, - const uint32_t path_flag, - const float transparent, - ccl_global float *ccl_restrict render_buffer) +ccl_device_inline void film_write_holdout(KernelGlobals kg, + ConstIntegratorState state, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict render_buffer) { - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); - kernel_accum_transparent(kg, state, path_flag, transparent, buffer); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + film_write_transparent(kg, state, path_flag, transparent, buffer); } /* Write background contribution to render buffer. * - * Includes transparency, matching kernel_accum_transparent. */ -ccl_device_inline void kernel_accum_background(KernelGlobals kg, - ConstIntegratorState state, - const float3 L, - const float transparent, - const bool is_transparent_background_ray, - ccl_global float *ccl_restrict render_buffer) + * Includes transparency, matching film_write_transparent. */ +ccl_device_inline void film_write_background(KernelGlobals kg, + ConstIntegratorState state, + const Spectrum L, + const float transparent, + const bool is_transparent_background_ray, + ccl_global float *ccl_restrict render_buffer) { - float3 contribution = float3(INTEGRATOR_STATE(state, path, throughput)) * L; - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); + Spectrum contribution = INTEGRATOR_STATE(state, path, throughput) * L; + film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); if (is_transparent_background_ray) { - kernel_accum_transparent(kg, state, path_flag, transparent, buffer); + film_write_transparent(kg, state, path_flag, transparent, buffer); } else { const int sample = INTEGRATOR_STATE(state, path, sample); - kernel_accum_combined_transparent_pass( - kg, path_flag, sample, contribution, transparent, buffer); - } - kernel_accum_emission_or_background_pass(kg, - state, - contribution, - buffer, - kernel_data.film.pass_background, - kernel_data.background.lightgroup); + film_write_combined_transparent_pass(kg, path_flag, sample, contribution, transparent, buffer); + } + film_write_emission_or_background_pass(kg, + state, + contribution, + buffer, + kernel_data.film.pass_background, + kernel_data.background.lightgroup); } /* Write emission to render buffer. */ -ccl_device_inline void kernel_accum_emission(KernelGlobals kg, - ConstIntegratorState state, - const float3 L, - ccl_global float *ccl_restrict render_buffer, - const int lightgroup = LIGHTGROUP_NONE) +ccl_device_inline void film_write_volume_emission(KernelGlobals kg, + ConstIntegratorState state, + const Spectrum L, + ccl_global float *ccl_restrict render_buffer, + const int lightgroup = LIGHTGROUP_NONE) +{ + Spectrum contribution = L; + film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); + + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + const int sample = INTEGRATOR_STATE(state, path, sample); + + film_write_combined_pass(kg, path_flag, sample, contribution, buffer); + film_write_emission_or_background_pass( + kg, state, contribution, buffer, kernel_data.film.pass_emission, lightgroup); +} + +ccl_device_inline void film_write_surface_emission(KernelGlobals kg, + ConstIntegratorState state, + const Spectrum L, + const float mis_weight, + ccl_global float *ccl_restrict render_buffer, + const int lightgroup = LIGHTGROUP_NONE) { - float3 contribution = L; - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); + Spectrum contribution = INTEGRATOR_STATE(state, path, throughput) * L * mis_weight; + film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const int sample = INTEGRATOR_STATE(state, path, sample); - kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer); - kernel_accum_emission_or_background_pass( + film_write_combined_pass(kg, path_flag, sample, contribution, buffer); + film_write_emission_or_background_pass( kg, state, contribution, buffer, kernel_data.film.pass_emission, lightgroup); } diff --git a/intern/cycles/kernel/film/passes.h b/intern/cycles/kernel/film/passes.h deleted file mode 100644 index 773f5726850..00000000000 --- a/intern/cycles/kernel/film/passes.h +++ /dev/null @@ -1,303 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -#pragma once - -#include "kernel/geom/geom.h" - -#include "kernel/film/id_passes.h" -#include "kernel/film/write_passes.h" - -CCL_NAMESPACE_BEGIN - -/* Get pointer to pixel in render buffer. */ -ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer( - KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) -{ - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - return render_buffer + render_buffer_offset; -} - -#ifdef __DENOISING_FEATURES__ - -ccl_device_forceinline void kernel_write_denoising_features_surface( - KernelGlobals kg, - IntegratorState state, - ccl_private const ShaderData *sd, - ccl_global float *ccl_restrict render_buffer) -{ - if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) { - return; - } - - /* Skip implicitly transparent surfaces. */ - if (sd->flag & SD_HAS_ONLY_VOLUME) { - return; - } - - ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); - - if (kernel_data.film.pass_denoising_depth != PASS_UNUSED) { - const float3 denoising_feature_throughput = INTEGRATOR_STATE( - state, path, denoising_feature_throughput); - const float denoising_depth = ensure_finite(average(denoising_feature_throughput) * - sd->ray_length); - kernel_write_pass_float(buffer + kernel_data.film.pass_denoising_depth, denoising_depth); - } - - float3 normal = zero_float3(); - float3 diffuse_albedo = zero_float3(); - float3 specular_albedo = zero_float3(); - float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - continue; - } - - /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ - normal += sc->N * sc->sample_weight; - sum_weight += sc->sample_weight; - - float3 closure_albedo = sc->weight; - /* Closures that include a Fresnel term typically have weights close to 1 even though their - * actual contribution is significantly lower. - * To account for this, we scale their weight by the average fresnel factor (the same is also - * done for the sample weight in the BSDF setup, so we don't need to scale that here). */ - if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) { - ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc; - closure_albedo *= bsdf->extra->fresnel_color; - } - else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) { - ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc; - closure_albedo *= bsdf->avg_value; - } - else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) { - closure_albedo *= bsdf_principled_hair_albedo(sc); - } - else if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) { - /* BSSRDF already accounts for weight, retro-reflection would double up. */ - ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *) - sc; - if (bsdf->components == PRINCIPLED_DIFFUSE_RETRO_REFLECTION) { - continue; - } - } - - if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { - diffuse_albedo += closure_albedo; - sum_nonspecular_weight += sc->sample_weight; - } - else { - specular_albedo += closure_albedo; - } - } - - /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ - if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) { - if (sum_weight != 0.0f) { - normal /= sum_weight; - } - - if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) { - /* Transform normal into camera space. */ - const Transform worldtocamera = kernel_data.cam.worldtocamera; - normal = transform_direction(&worldtocamera, normal); - - const float3 denoising_normal = ensure_finite3(normal); - kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); - } - - if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { - const float3 denoising_feature_throughput = INTEGRATOR_STATE( - state, path, denoising_feature_throughput); - const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput * - diffuse_albedo); - kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); - } - - INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; - } - else { - INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo; - } -} - -ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals kg, - IntegratorState state, - const float3 albedo, - const bool scatter, - ccl_global float *ccl_restrict - render_buffer) -{ - ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); - const float3 denoising_feature_throughput = INTEGRATOR_STATE( - state, path, denoising_feature_throughput); - - if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) { - /* Assume scatter is sufficiently diffuse to stop writing denoising features. */ - INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; - - /* Write view direction as normal. */ - const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f); - kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); - } - - if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { - /* Write albedo. */ - const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput * albedo); - kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); - } -} -#endif /* __DENOISING_FEATURES__ */ - -ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer, - size_t depth, - float id, - float matte_weight) -{ - kernel_write_id_slots(buffer, depth * 2, id, matte_weight); - return depth * 4; -} - -ccl_device_inline void kernel_write_data_passes(KernelGlobals kg, - IntegratorState state, - ccl_private const ShaderData *sd, - ccl_global float *ccl_restrict render_buffer) -{ -#ifdef __PASSES__ - const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); - - if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { - return; - } - - const int flag = kernel_data.film.pass_flag; - - if (!(flag & PASS_ANY)) { - return; - } - - ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); - - if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { - if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || - average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { - if (INTEGRATOR_STATE(state, path, sample) == 0) { - if (flag & PASSMASK(DEPTH)) { - const float depth = camera_z_depth(kg, sd->P); - kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); - } - if (flag & PASSMASK(OBJECT_ID)) { - const float id = object_pass_id(kg, sd->object); - kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id); - } - if (flag & PASSMASK(MATERIAL_ID)) { - const float id = shader_pass_id(kg, sd); - kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id); - } - if (flag & PASSMASK(POSITION)) { - const float3 position = sd->P; - kernel_write_pass_float3(buffer + kernel_data.film.pass_position, position); - } - } - - if (flag & PASSMASK(NORMAL)) { - const float3 normal = shader_bsdf_average_normal(kg, sd); - kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); - } - if (flag & PASSMASK(ROUGHNESS)) { - const float roughness = shader_bsdf_average_roughness(sd); - kernel_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness); - } - if (flag & PASSMASK(UV)) { - const float3 uv = primitive_uv(kg, sd); - kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); - } - if (flag & PASSMASK(MOTION)) { - const float4 speed = primitive_motion_vector(kg, sd); - kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); - kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); - } - - INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE; - } - } - - if (kernel_data.film.cryptomatte_passes) { - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - const float matte_weight = average(throughput) * - (1.0f - average(shader_bsdf_transparency(kg, sd))); - if (matte_weight > 0.0f) { - ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; - if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { - const float id = object_cryptomatte_id(kg, sd->object); - cryptomatte_buffer += kernel_write_id_pass( - cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); - } - if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { - const float id = shader_cryptomatte_id(kg, sd->shader); - cryptomatte_buffer += kernel_write_id_pass( - cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); - } - if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { - const float id = object_cryptomatte_asset_id(kg, sd->object); - cryptomatte_buffer += kernel_write_id_pass( - cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); - } - } - } - - if (flag & PASSMASK(DIFFUSE_COLOR)) { - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, - shader_bsdf_diffuse(kg, sd) * throughput); - } - if (flag & PASSMASK(GLOSSY_COLOR)) { - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, - shader_bsdf_glossy(kg, sd) * throughput); - } - if (flag & PASSMASK(TRANSMISSION_COLOR)) { - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, - shader_bsdf_transmission(kg, sd) * throughput); - } - if (flag & PASSMASK(MIST)) { - /* Bring depth into 0..1 range. */ - const float mist_start = kernel_data.film.mist_start; - const float mist_inv_depth = kernel_data.film.mist_inv_depth; - - const float depth = camera_distance(kg, sd->P); - float mist = saturatef((depth - mist_start) * mist_inv_depth); - - /* Falloff */ - const float mist_falloff = kernel_data.film.mist_falloff; - - if (mist_falloff == 1.0f) - ; - else if (mist_falloff == 2.0f) - mist = mist * mist; - else if (mist_falloff == 0.5f) - mist = sqrtf(mist); - else - mist = powf(mist, mist_falloff); - - /* Modulate by transparency */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - const float3 alpha = shader_bsdf_alpha(kg, sd); - const float mist_output = (1.0f - mist) * average(throughput * alpha); - - /* Note that the final value in the render buffer we want is 1 - mist_output, - * to avoid having to tracking this in the Integrator state we do the negation - * after rendering. */ - kernel_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output); - } -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/read.h b/intern/cycles/kernel/film/read.h index a0236909f4b..108f992e29d 100644 --- a/intern/cycles/kernel/film/read.h +++ b/intern/cycles/kernel/film/read.h @@ -1,6 +1,10 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ +/* Functions to retrieving render passes for display or output. Reading from + * the raw render buffer and normalizing based on the number of samples, + * computing alpha, compositing shadow catchers, etc. */ + #pragma once CCL_NAMESPACE_BEGIN @@ -235,6 +239,21 @@ ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmCon pixel[0] = f.x; pixel[1] = f.y; pixel[2] = f.z; + + /* Optional alpha channel. */ + if (kfilm_convert->num_components >= 4) { + if (kfilm_convert->pass_combined != PASS_UNUSED) { + float scale, scale_exposure; + film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure); + + ccl_global const float *in_combined = buffer + kfilm_convert->pass_combined; + const float alpha = in_combined[3] * scale; + pixel[3] = film_transparency_to_alpha(alpha); + } + else { + pixel[3] = 1.0f; + } + } } /* -------------------------------------------------------------------- diff --git a/intern/cycles/kernel/film/write_passes.h b/intern/cycles/kernel/film/write.h index 9148d73518f..c630a522ee3 100644 --- a/intern/cycles/kernel/film/write_passes.h +++ b/intern/cycles/kernel/film/write.h @@ -3,13 +3,26 @@ #pragma once +#include "kernel/util/color.h" + #ifdef __KERNEL_GPU__ # define __ATOMIC_PASS_WRITE__ #endif CCL_NAMESPACE_BEGIN -ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value) +/* Get pointer to pixel in render buffer. */ +ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer( + KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + return render_buffer + render_buffer_offset; +} + +/* Write to pixel. */ +ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, float value) { #ifdef __ATOMIC_PASS_WRITE__ atomic_add_and_fetch_float(buffer, value); @@ -18,8 +31,7 @@ ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict bu #endif } -ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict buffer, - float3 value) +ccl_device_inline void film_write_pass_float3(ccl_global float *ccl_restrict buffer, float3 value) { #ifdef __ATOMIC_PASS_WRITE__ ccl_global float *buf_x = buffer + 0; @@ -36,8 +48,13 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict b #endif } -ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict buffer, - float4 value) +ccl_device_inline void film_write_pass_spectrum(ccl_global float *ccl_restrict buffer, + Spectrum value) +{ + film_write_pass_float3(buffer, spectrum_to_rgb(value)); +} + +ccl_device_inline void film_write_pass_float4(ccl_global float *ccl_restrict buffer, float4 value) { #ifdef __ATOMIC_PASS_WRITE__ ccl_global float *buf_x = buffer + 0; diff --git a/intern/cycles/kernel/geom/attribute.h b/intern/cycles/kernel/geom/attribute.h index 774b25a76ff..3a0ee1b09d1 100644 --- a/intern/cycles/kernel/geom/attribute.h +++ b/intern/cycles/kernel/geom/attribute.h @@ -16,14 +16,14 @@ CCL_NAMESPACE_BEGIN /* Patch index for triangle, -1 if not subdivision triangle */ -ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd) +ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, int prim) { - return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0; + return (prim != PRIM_NONE) ? kernel_data_fetch(tri_patch, prim) : ~0; } -ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd) +ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, int prim, int type) { - if ((sd->type & PRIMITIVE_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) { + if ((type & PRIMITIVE_TRIANGLE) && subd_triangle_patch(kg, prim) != ~0) { return ATTR_PRIM_SUBD; } else { @@ -42,21 +42,20 @@ ccl_device_inline AttributeDescriptor attribute_not_found() ccl_device_inline uint object_attribute_map_offset(KernelGlobals kg, int object) { - return kernel_tex_fetch(__objects, object).attribute_map_offset; + return kernel_data_fetch(objects, object).attribute_map_offset; } -ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, - ccl_private const ShaderData *sd, - uint id) +ccl_device_inline AttributeDescriptor +find_attribute(KernelGlobals kg, int object, int prim, int type, uint64_t id) { - if (sd->object == OBJECT_NONE) { + if (object == OBJECT_NONE) { return attribute_not_found(); } /* for SVM, find attribute by unique id */ - uint attr_offset = object_attribute_map_offset(kg, sd->object); - attr_offset += attribute_primitive_type(kg, sd); - AttributeMap attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + uint attr_offset = object_attribute_map_offset(kg, object); + attr_offset += attribute_primitive_type(kg, prim, type); + AttributeMap attr_map = kernel_data_fetch(attributes_map, attr_offset); while (attr_map.id != id) { if (UNLIKELY(attr_map.id == ATTR_STD_NONE)) { @@ -71,13 +70,13 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, else { attr_offset += ATTR_PRIM_TYPES; } - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + attr_map = kernel_data_fetch(attributes_map, attr_offset); } AttributeDescriptor desc; desc.element = (AttributeElement)attr_map.element; - if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH && + if (prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH && desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) { return attribute_not_found(); } @@ -91,17 +90,22 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, return desc; } +ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, + ccl_private const ShaderData *sd, + uint64_t id) +{ + return find_attribute(kg, sd->object, sd->prim, sd->type, id); +} + /* Transform matrix attribute on meshes */ -ccl_device Transform primitive_attribute_matrix(KernelGlobals kg, - ccl_private const ShaderData *sd, - const AttributeDescriptor desc) +ccl_device Transform primitive_attribute_matrix(KernelGlobals kg, const AttributeDescriptor desc) { Transform tfm; - tfm.x = kernel_tex_fetch(__attributes_float4, desc.offset + 0); - tfm.y = kernel_tex_fetch(__attributes_float4, desc.offset + 1); - tfm.z = kernel_tex_fetch(__attributes_float4, desc.offset + 2); + tfm.x = kernel_data_fetch(attributes_float4, desc.offset + 0); + tfm.y = kernel_data_fetch(attributes_float4, desc.offset + 1); + tfm.z = kernel_data_fetch(attributes_float4, desc.offset + 2); return tfm; } diff --git a/intern/cycles/kernel/geom/curve.h b/intern/cycles/kernel/geom/curve.h index 4dbc6d4f6db..e243adfde21 100644 --- a/intern/cycles/kernel/geom/curve.h +++ b/intern/cycles/kernel/geom/curve.h @@ -23,12 +23,12 @@ ccl_device float curve_attribute_float(KernelGlobals kg, ccl_private float *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + KernelCurve curve = kernel_data_fetch(curves, sd->prim); int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); - float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); + float f0 = kernel_data_fetch(attributes_float, desc.offset + k0); + float f1 = kernel_data_fetch(attributes_float, desc.offset + k1); # ifdef __RAY_DIFFERENTIALS__ if (dx) @@ -50,7 +50,7 @@ ccl_device float curve_attribute_float(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float, offset); + return kernel_data_fetch(attributes_float, offset); } else { return 0.0f; @@ -65,12 +65,12 @@ ccl_device float2 curve_attribute_float2(KernelGlobals kg, ccl_private float2 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + KernelCurve curve = kernel_data_fetch(curves, sd->prim); int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0); - float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1); + float2 f0 = kernel_data_fetch(attributes_float2, desc.offset + k0); + float2 f1 = kernel_data_fetch(attributes_float2, desc.offset + k1); # ifdef __RAY_DIFFERENTIALS__ if (dx) @@ -96,7 +96,7 @@ ccl_device float2 curve_attribute_float2(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float2, offset); + return kernel_data_fetch(attributes_float2, offset); } else { return make_float2(0.0f, 0.0f); @@ -111,12 +111,12 @@ ccl_device float3 curve_attribute_float3(KernelGlobals kg, ccl_private float3 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + KernelCurve curve = kernel_data_fetch(curves, sd->prim); int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - float3 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0); - float3 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1); + float3 f0 = kernel_data_fetch(attributes_float3, desc.offset + k0); + float3 f1 = kernel_data_fetch(attributes_float3, desc.offset + k1); # ifdef __RAY_DIFFERENTIALS__ if (dx) @@ -138,7 +138,7 @@ ccl_device float3 curve_attribute_float3(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float3, offset); + return kernel_data_fetch(attributes_float3, offset); } else { return make_float3(0.0f, 0.0f, 0.0f); @@ -153,12 +153,12 @@ ccl_device float4 curve_attribute_float4(KernelGlobals kg, ccl_private float4 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + KernelCurve curve = kernel_data_fetch(curves, sd->prim); int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - float4 f0 = kernel_tex_fetch(__attributes_float4, desc.offset + k0); - float4 f1 = kernel_tex_fetch(__attributes_float4, desc.offset + k1); + float4 f0 = kernel_data_fetch(attributes_float4, desc.offset + k0); + float4 f1 = kernel_data_fetch(attributes_float4, desc.offset + k1); # ifdef __RAY_DIFFERENTIALS__ if (dx) @@ -180,7 +180,7 @@ ccl_device float4 curve_attribute_float4(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float4, offset); + return kernel_data_fetch(attributes_float4, offset); } else { return zero_float4(); @@ -195,15 +195,15 @@ ccl_device float curve_thickness(KernelGlobals kg, ccl_private const ShaderData float r = 0.0f; if (sd->type & PRIMITIVE_CURVE) { - KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + KernelCurve curve = kernel_data_fetch(curves, sd->prim); int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; if (!(sd->type & PRIMITIVE_MOTION)) { - P_curve[0] = kernel_tex_fetch(__curve_keys, k0); - P_curve[1] = kernel_tex_fetch(__curve_keys, k1); + P_curve[0] = kernel_data_fetch(curve_keys, k0); + P_curve[1] = kernel_data_fetch(curve_keys, k1); } else { motion_curve_keys_linear(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); @@ -232,14 +232,14 @@ ccl_device float curve_random(KernelGlobals kg, ccl_private const ShaderData *sd ccl_device float3 curve_motion_center_location(KernelGlobals kg, ccl_private const ShaderData *sd) { - KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + KernelCurve curve = kernel_data_fetch(curves, sd->prim); int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; - P_curve[0] = kernel_tex_fetch(__curve_keys, k0); - P_curve[1] = kernel_tex_fetch(__curve_keys, k1); + P_curve[0] = kernel_data_fetch(curve_keys, k0); + P_curve[1] = kernel_data_fetch(curve_keys, k1); return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); } diff --git a/intern/cycles/kernel/geom/curve_intersect.h b/intern/cycles/kernel/geom/curve_intersect.h index e1a1f9c02c5..97644aacaa8 100644 --- a/intern/cycles/kernel/geom/curve_intersect.h +++ b/intern/cycles/kernel/geom/curve_intersect.h @@ -72,7 +72,7 @@ ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const floa ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, const float3 cylinder_end, const float cylinder_radius, - const float3 ray_dir, + const float3 ray_D, ccl_private float2 *t_o, ccl_private float *u0_o, ccl_private float3 *Ng0_o, @@ -82,7 +82,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, /* Calculate quadratic equation to solve. */ const float rl = 1.0f / len(cylinder_end - cylinder_start); const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl; - const float3 O = -P0, dO = ray_dir; + const float3 O = -P0, dO = ray_D; const float dOdO = dot(dO, dO); const float OdO = dot(dO, O); @@ -123,7 +123,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, /* Calculates u and Ng for near hit. */ { *u0_o = (t0 * dOz + Oz) * rl; - const float3 Pr = t0 * ray_dir; + const float3 Pr = t0 * ray_D; const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start; *Ng0_o = Pr - Pl; } @@ -131,7 +131,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, /* Calculates u and Ng for far hit. */ { *u1_o = (t1 * dOz + Oz) * rl; - const float3 Pr = t1 * ray_dir; + const float3 Pr = t1 * ray_D; const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start; *Ng1_o = Pr - Pl; } @@ -141,10 +141,10 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, return true; } -ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir) +ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_D) { const float3 O = -P; - const float3 D = ray_dir; + const float3 D = ray_D; const float ON = dot(O, N); const float DN = dot(D, N); const float min_rcp_input = 1e-18f; @@ -155,8 +155,9 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co return make_float2(lower, upper); } -ccl_device bool curve_intersect_iterative(const float3 ray_dir, - ccl_private float *ray_tfar, +ccl_device bool curve_intersect_iterative(const float3 ray_D, + const float ray_tmin, + ccl_private float *ray_tmax, const float dt, const float4 curve[4], float u, @@ -164,7 +165,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, const bool use_backfacing, ccl_private Intersection *isect) { - const float length_ray_dir = len(ray_dir); + const float length_ray_D = len(ray_D); /* Error of curve evaluations is proportional to largest coordinate. */ const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3])); @@ -175,9 +176,9 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, const float radius_max = box_max.w; for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) { - const float3 Q = ray_dir * t; - const float3 dQdt = ray_dir; - const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t; + const float3 Q = ray_D * t; + const float3 dQdt = ray_D; + const float Q_err = 16.0f * FLT_EPSILON * length_ray_D * t; const float4 P4 = catmull_rom_basis_eval(curve, u); const float4 dPdu4 = catmull_rom_basis_derivative(curve, u); @@ -220,7 +221,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, if (fabsf(f) < f_err && fabsf(g) < g_err) { t += dt; - if (!(0.0f <= t && t <= *ray_tfar)) { + if (!(t >= ray_tmin && t <= *ray_tmax)) { return false; /* Rejects NaNs */ } if (!(u >= 0.0f && u <= 1.0f)) { @@ -232,12 +233,12 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, const float3 U = dradiusdu * R + dPdu; const float3 V = cross(dPdu, R); const float3 Ng = cross(V, U); - if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) { + if (!use_backfacing && dot(ray_D, Ng) > 0.0f) { return false; } /* Record intersection. */ - *ray_tfar = t; + *ray_tmax = t; isect->t = t; isect->u = u; isect->v = 0.0f; @@ -248,16 +249,17 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, return false; } -ccl_device bool curve_intersect_recursive(const float3 ray_orig, - const float3 ray_dir, - float ray_tfar, +ccl_device bool curve_intersect_recursive(const float3 ray_P, + const float3 ray_D, + const float ray_tmin, + float ray_tmax, float4 curve[4], ccl_private Intersection *isect) { /* Move ray closer to make intersection stable. */ const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3])); - const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir); - const float3 ref = ray_orig + ray_dir * dt; + const float dt = dot(center - ray_P, ray_D) / dot(ray_D, ray_D); + const float3 ref = ray_P + ray_D * dt; const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f); curve[0] -= ref4; curve[1] -= ref4; @@ -320,7 +322,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, valid = cylinder_intersect(float4_to_float3(P0), float4_to_float3(P3), r_outer, - ray_dir, + ray_D, &tc_outer, &u_outer0, &Ng_outer0, @@ -331,13 +333,12 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, } /* Intersect with cap-planes. */ - float2 tp = make_float2(-dt, ray_tfar - dt); + float2 tp = make_float2(ray_tmin - dt, ray_tmax - dt); tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y)); - const float2 h0 = half_plane_intersect( - float4_to_float3(P0), float4_to_float3(dP0du), ray_dir); + const float2 h0 = half_plane_intersect(float4_to_float3(P0), float4_to_float3(dP0du), ray_D); tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y)); const float2 h1 = half_plane_intersect( - float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir); + float4_to_float3(P3), -float4_to_float3(dP3du), ray_D); tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y)); valid = tp.x <= tp.y; if (!valid) { @@ -357,7 +358,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, const bool valid_inner = cylinder_intersect(float4_to_float3(P0), float4_to_float3(P3), r_inner, - ray_dir, + ray_D, &tc_inner, &u_inner0, &Ng_inner0, @@ -367,9 +368,9 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, /* At the unstable area we subdivide deeper. */ # if 0 const bool unstable0 = (!valid_inner) | - (fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f); + (fabsf(dot(normalize(ray_D), normalize(Ng_inner0))) < 0.3f); const bool unstable1 = (!valid_inner) | - (fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f); + (fabsf(dot(normalize(ray_D), normalize(Ng_inner1))) < 0.3f); # else /* On the GPU appears to be a little faster if always enabled. */ (void)valid_inner; @@ -394,19 +395,20 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, CURVE_NUM_BEZIER_SUBDIVISIONS; if (depth >= termDepth) { found |= curve_intersect_iterative( - ray_dir, &ray_tfar, dt, curve, u_outer0, tp0.x, use_backfacing, isect); + ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect); } else { recurse = true; } } - if (valid1 && (tp1.x + dt <= ray_tfar)) { + const float t1 = tp1.x + dt; + if (valid1 && (t1 >= ray_tmin && t1 <= ray_tmax)) { const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE : CURVE_NUM_BEZIER_SUBDIVISIONS; if (depth >= termDepth) { found |= curve_intersect_iterative( - ray_dir, &ray_tfar, dt, curve, u_outer1, tp1.y, use_backfacing, isect); + ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect); } else { recurse = true; @@ -456,7 +458,8 @@ ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, c * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two * triangles gets intersected. */ -ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar, +ccl_device_inline bool ribbon_intersect_quad(const float ray_tmin, + const float ray_tmax, const float3 quad_v0, const float3 quad_v1, const float3 quad_v2, @@ -497,7 +500,7 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar, /* Perform depth test? */ const float t = rcpDen * dot(v0, Ng); - if (!(0.0f <= t && t <= ray_tfar)) { + if (!(t >= ray_tmin && t <= ray_tmax)) { return false; } @@ -515,13 +518,16 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar, return true; } -ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3]) +ccl_device_inline void ribbon_ray_space(const float3 ray_D, + const float ray_D_invlen, + float3 ray_space[3]) { - const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y); - const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x); + const float3 D = ray_D * ray_D_invlen; + const float3 dx0 = make_float3(0, D.z, -D.y); + const float3 dx1 = make_float3(-D.z, 0, D.x); ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1); - ray_space[1] = normalize(cross(ray_dir, ray_space[0])); - ray_space[2] = ray_dir; + ray_space[1] = normalize(cross(D, ray_space[0])); + ray_space[2] = D * ray_D_invlen; } ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3], @@ -533,15 +539,17 @@ ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3], } ccl_device_inline bool ribbon_intersect(const float3 ray_org, - const float3 ray_dir, - float ray_tfar, + const float3 ray_D, + const float ray_tmin, + float ray_tmax, const int N, float4 curve[4], ccl_private Intersection *isect) { /* Transform control points into ray space. */ + const float ray_D_invlen = 1.0f / len(ray_D); float3 ray_space[3]; - ribbon_ray_space(ray_dir, ray_space); + ribbon_ray_space(ray_D, ray_D_invlen, ray_space); curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]); curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]); @@ -555,7 +563,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org, /* Evaluate first point and radius scaled normal direction. */ float4 p0 = catmull_rom_basis_eval(curve, 0.0f); float3 dp0dt = float4_to_float3(catmull_rom_basis_derivative(curve, 0.0f)); - if (max3(fabs(dp0dt)) < eps) { + if (reduce_max(fabs(dp0dt)) < eps) { const float4 p1 = catmull_rom_basis_eval(curve, step_size); dp0dt = float4_to_float3(p1 - p0); } @@ -570,7 +578,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org, /* Evaluate next point. */ float3 dp1dt = float4_to_float3(catmull_rom_basis_derivative(curve, u + step_size)); - dp1dt = (max3(fabs(dp1dt)) < eps) ? float4_to_float3(p1 - p0) : dp1dt; + dp1dt = (reduce_max(fabs(dp1dt)) < eps) ? float4_to_float3(p1 - p0) : dp1dt; const float3 wn1 = normalize(make_float3(dp1dt.y, -dp1dt.x, 0.0f)) * p1.w; if (valid) { @@ -582,21 +590,21 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org, /* Intersect quad. */ float vu, vv, vt; - bool valid0 = ribbon_intersect_quad(ray_tfar, lp0, lp1, up1, up0, &vu, &vv, &vt); + bool valid0 = ribbon_intersect_quad(ray_tmin, ray_tmax, lp0, lp1, up1, up0, &vu, &vv, &vt); if (valid0) { /* ignore self intersections */ const float avoidance_factor = 2.0f; if (avoidance_factor != 0.0f) { float r = mix(p0.w, p1.w, vu); - valid0 = vt > avoidance_factor * r; + valid0 = vt > avoidance_factor * r * ray_D_invlen; } if (valid0) { vv = 2.0f * vv - 1.0f; /* Record intersection. */ - ray_tfar = vt; + ray_tmax = vt; isect->t = vt; isect->u = u + vu * step_size; isect->v = vv; @@ -614,8 +622,9 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org, ccl_device_forceinline bool curve_intersect(KernelGlobals kg, ccl_private Intersection *isect, - const float3 P, - const float3 dir, + const float3 ray_P, + const float3 ray_D, + const float tmin, const float tmax, int object, int prim, @@ -624,7 +633,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg, { const bool is_motion = (type & PRIMITIVE_MOTION); - KernelCurve kcurve = kernel_tex_fetch(__curves, prim); + KernelCurve kcurve = kernel_data_fetch(curves, prim); int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type); int k1 = k0 + 1; @@ -633,10 +642,10 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg, float4 curve[4]; if (!is_motion) { - curve[0] = kernel_tex_fetch(__curve_keys, ka); - curve[1] = kernel_tex_fetch(__curve_keys, k0); - curve[2] = kernel_tex_fetch(__curve_keys, k1); - curve[3] = kernel_tex_fetch(__curve_keys, kb); + curve[0] = kernel_data_fetch(curve_keys, ka); + curve[1] = kernel_data_fetch(curve_keys, k0); + curve[2] = kernel_data_fetch(curve_keys, k1); + curve[3] = kernel_data_fetch(curve_keys, kb); } else { motion_curve_keys(kg, object, prim, time, ka, k0, k1, kb, curve); @@ -645,7 +654,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg, if (type & PRIMITIVE_CURVE_RIBBON) { /* todo: adaptive number of subdivisions could help performance here. */ const int subdivisions = kernel_data.bvh.curve_subdivisions; - if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) { + if (ribbon_intersect(ray_P, ray_D, tmin, tmax, subdivisions, curve, isect)) { isect->prim = prim; isect->object = object; isect->type = type; @@ -655,7 +664,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg, return false; } else { - if (curve_intersect_recursive(P, dir, tmax, curve, isect)) { + if (curve_intersect_recursive(ray_P, ray_D, tmin, tmax, curve, isect)) { isect->prim = prim; isect->object = object; isect->type = type; @@ -682,7 +691,7 @@ ccl_device_inline void curve_shader_setup(KernelGlobals kg, D = safe_normalize_len(D, &t); } - KernelCurve kcurve = kernel_tex_fetch(__curves, isect_prim); + KernelCurve kcurve = kernel_data_fetch(curves, isect_prim); int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; @@ -692,10 +701,10 @@ ccl_device_inline void curve_shader_setup(KernelGlobals kg, float4 P_curve[4]; if (!(sd->type & PRIMITIVE_MOTION)) { - P_curve[0] = kernel_tex_fetch(__curve_keys, ka); - P_curve[1] = kernel_tex_fetch(__curve_keys, k0); - P_curve[2] = kernel_tex_fetch(__curve_keys, k1); - P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + P_curve[0] = kernel_data_fetch(curve_keys, ka); + P_curve[1] = kernel_data_fetch(curve_keys, k0); + P_curve[2] = kernel_data_fetch(curve_keys, k1); + P_curve[3] = kernel_data_fetch(curve_keys, kb); } else { motion_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); @@ -729,7 +738,7 @@ ccl_device_inline void curve_shader_setup(KernelGlobals kg, /* NOTE: It is possible that P will be the same as P_inside (precision issues, or very small * radius). In this case use the view direction to approximate the normal. */ const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, sd->u)); - const float3 N = (!isequal_float3(P, P_inside)) ? normalize(P - P_inside) : -sd->I; + const float3 N = (!isequal(P, P_inside)) ? normalize(P - P_inside) : -sd->I; sd->N = N; sd->v = 0.0f; @@ -750,7 +759,7 @@ ccl_device_inline void curve_shader_setup(KernelGlobals kg, sd->P = P; sd->Ng = (sd->type & PRIMITIVE_CURVE_RIBBON) ? sd->I : sd->N; sd->dPdv = cross(sd->dPdu, sd->Ng); - sd->shader = kernel_tex_fetch(__curves, sd->prim).shader_id; + sd->shader = kernel_data_fetch(curves, sd->prim).shader_id; } #endif diff --git a/intern/cycles/kernel/geom/motion_curve.h b/intern/cycles/kernel/geom/motion_curve.h index b5289b6dda1..448e4b95e0b 100644 --- a/intern/cycles/kernel/geom/motion_curve.h +++ b/intern/cycles/kernel/geom/motion_curve.h @@ -27,8 +27,8 @@ ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals kg, { if (step == numsteps) { /* center step: regular key location */ - keys[0] = kernel_tex_fetch(__curve_keys, k0); - keys[1] = kernel_tex_fetch(__curve_keys, k1); + keys[0] = kernel_data_fetch(curve_keys, k0); + keys[1] = kernel_data_fetch(curve_keys, k1); } else { /* center step is not stored in this array */ @@ -37,8 +37,8 @@ ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals kg, offset += step * numkeys; - keys[0] = kernel_tex_fetch(__attributes_float4, offset + k0); - keys[1] = kernel_tex_fetch(__attributes_float4, offset + k1); + keys[0] = kernel_data_fetch(attributes_float4, offset + k0); + keys[1] = kernel_data_fetch(attributes_float4, offset + k1); } } @@ -83,10 +83,10 @@ ccl_device_inline void motion_curve_keys_for_step(KernelGlobals kg, { if (step == numsteps) { /* center step: regular key location */ - keys[0] = kernel_tex_fetch(__curve_keys, k0); - keys[1] = kernel_tex_fetch(__curve_keys, k1); - keys[2] = kernel_tex_fetch(__curve_keys, k2); - keys[3] = kernel_tex_fetch(__curve_keys, k3); + keys[0] = kernel_data_fetch(curve_keys, k0); + keys[1] = kernel_data_fetch(curve_keys, k1); + keys[2] = kernel_data_fetch(curve_keys, k2); + keys[3] = kernel_data_fetch(curve_keys, k3); } else { /* center step is not stored in this array */ @@ -95,10 +95,10 @@ ccl_device_inline void motion_curve_keys_for_step(KernelGlobals kg, offset += step * numkeys; - keys[0] = kernel_tex_fetch(__attributes_float4, offset + k0); - keys[1] = kernel_tex_fetch(__attributes_float4, offset + k1); - keys[2] = kernel_tex_fetch(__attributes_float4, offset + k2); - keys[3] = kernel_tex_fetch(__attributes_float4, offset + k3); + keys[0] = kernel_data_fetch(attributes_float4, offset + k0); + keys[1] = kernel_data_fetch(attributes_float4, offset + k1); + keys[2] = kernel_data_fetch(attributes_float4, offset + k2); + keys[3] = kernel_data_fetch(attributes_float4, offset + k3); } } diff --git a/intern/cycles/kernel/geom/motion_point.h b/intern/cycles/kernel/geom/motion_point.h index c1952ab090a..4916ae702ff 100644 --- a/intern/cycles/kernel/geom/motion_point.h +++ b/intern/cycles/kernel/geom/motion_point.h @@ -19,7 +19,7 @@ motion_point_for_step(KernelGlobals kg, int offset, int numkeys, int numsteps, i { if (step == numsteps) { /* center step: regular key location */ - return kernel_tex_fetch(__points, prim); + return kernel_data_fetch(points, prim); } else { /* center step is not stored in this array */ @@ -28,7 +28,7 @@ motion_point_for_step(KernelGlobals kg, int offset, int numkeys, int numsteps, i offset += step * numkeys; - return kernel_tex_fetch(__attributes_float4, offset + prim); + return kernel_data_fetch(attributes_float4, offset + prim); } } diff --git a/intern/cycles/kernel/geom/motion_triangle.h b/intern/cycles/kernel/geom/motion_triangle.h index a87eb11f4f4..06308071700 100644 --- a/intern/cycles/kernel/geom/motion_triangle.h +++ b/intern/cycles/kernel/geom/motion_triangle.h @@ -30,9 +30,9 @@ ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg, { if (step == numsteps) { /* center step: regular vertex location */ - verts[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); - verts[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); - verts[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); + verts[0] = kernel_data_fetch(tri_verts, tri_vindex.w + 0); + verts[1] = kernel_data_fetch(tri_verts, tri_vindex.w + 1); + verts[2] = kernel_data_fetch(tri_verts, tri_vindex.w + 2); } else { /* center step not store in this array */ @@ -41,9 +41,9 @@ ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg, offset += step * numverts; - verts[0] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x); - verts[1] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y); - verts[2] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z); + verts[0] = kernel_data_fetch(attributes_float3, offset + tri_vindex.x); + verts[1] = kernel_data_fetch(attributes_float3, offset + tri_vindex.y); + verts[2] = kernel_data_fetch(attributes_float3, offset + tri_vindex.z); } } @@ -57,9 +57,9 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg, { if (step == numsteps) { /* center step: regular vertex location */ - normals[0] = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); - normals[1] = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); - normals[2] = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); + normals[0] = kernel_data_fetch(tri_vnormal, tri_vindex.x); + normals[1] = kernel_data_fetch(tri_vnormal, tri_vindex.y); + normals[2] = kernel_data_fetch(tri_vnormal, tri_vindex.z); } else { /* center step is not stored in this array */ @@ -68,9 +68,9 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg, offset += step * numverts; - normals[0] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x); - normals[1] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y); - normals[2] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z); + normals[0] = kernel_data_fetch(attributes_float3, offset + tri_vindex.x); + normals[1] = kernel_data_fetch(attributes_float3, offset + tri_vindex.y); + normals[2] = kernel_data_fetch(attributes_float3, offset + tri_vindex.z); } } @@ -92,7 +92,7 @@ ccl_device_inline void motion_triangle_vertices( /* fetch vertex coordinates */ float3 next_verts[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts); @@ -121,7 +121,7 @@ ccl_device_inline void motion_triangle_vertices_and_normals( /* Fetch vertex coordinates. */ float3 next_verts[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts); @@ -167,7 +167,7 @@ ccl_device_inline float3 motion_triangle_smooth_normal( /* fetch normals */ float3 normals[3], next_normals[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); motion_triangle_normals_for_step( diff --git a/intern/cycles/kernel/geom/motion_triangle_intersect.h b/intern/cycles/kernel/geom/motion_triangle_intersect.h index fb951fa151d..b30ee7258dc 100644 --- a/intern/cycles/kernel/geom/motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h @@ -27,8 +27,8 @@ ccl_device_inline float3 motion_triangle_point_from_uv(KernelGlobals kg, const float v, float3 verts[3]) { - float w = 1.0f - u - v; - float3 P = u * verts[0] + v * verts[1] + w * verts[2]; + /* This appears to give slightly better precision than interpolating with w = (1 - u - v). */ + float3 P = verts[0] + u * (verts[1] - verts[0]) + v * (verts[2] - verts[0]); if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); @@ -46,6 +46,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg, ccl_private Intersection *isect, float3 P, float3 dir, + float tmin, float tmax, float time, uint visibility, @@ -58,12 +59,12 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg, motion_triangle_vertices(kg, object, prim, time, verts); /* Ray-triangle intersection, unoptimized. */ float t, u, v; - if (ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) { + if (ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) { #ifdef __VISIBILITY_FLAG__ /* Visibility flag test. we do it here under the assumption * that most triangles are culled by node flags. */ - if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) + if (kernel_data_fetch(prim_visibility, prim_addr) & visibility) #endif { isect->t = t; @@ -92,6 +93,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg, int object, int prim, int prim_addr, + float tmin, float tmax, ccl_private uint *lcg_state, int max_hits) @@ -101,7 +103,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg, motion_triangle_vertices(kg, object, prim, time, verts); /* Ray-triangle intersection, unoptimized. */ float t, u, v; - if (!ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) { + if (!ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) { return false; } diff --git a/intern/cycles/kernel/geom/motion_triangle_shader.h b/intern/cycles/kernel/geom/motion_triangle_shader.h index 2b2bb858816..413a61b380a 100644 --- a/intern/cycles/kernel/geom/motion_triangle_shader.h +++ b/intern/cycles/kernel/geom/motion_triangle_shader.h @@ -31,7 +31,7 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg, bool is_local) { /* Get shader. */ - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + sd->shader = kernel_data_fetch(tri_shader, sd->prim); /* Get motion info. */ /* TODO(sergey): This logic is really similar to motion_triangle_vertices(), * can we de-duplicate something here? @@ -47,7 +47,7 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg, kernel_assert(offset != ATTR_STD_NOT_FOUND); /* Fetch vertex coordinates. */ float3 verts[3], next_verts[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + uint4 tri_vindex = kernel_data_fetch(tri_vindex, sd->prim); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts); /* Interpolate between steps. */ @@ -68,8 +68,8 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg, sd->N = Ng; /* Compute derivatives of P w.r.t. uv. */ #ifdef __DPDU__ - sd->dPdu = (verts[0] - verts[2]); - sd->dPdv = (verts[1] - verts[2]); + sd->dPdu = (verts[1] - verts[0]); + sd->dPdv = (verts[2] - verts[0]); #endif /* Compute smooth normal. */ if (sd->shader & SHADER_SMOOTH_NORMAL) { @@ -89,7 +89,7 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg, float u = sd->u; float v = sd->v; float w = 1.0f - u - v; - sd->N = (u * normals[0] + v * normals[1] + w * normals[2]); + sd->N = (w * normals[0] + u * normals[1] + v * normals[2]); } } diff --git a/intern/cycles/kernel/geom/object.h b/intern/cycles/kernel/geom/object.h index 3faab7fa905..14ceb636e2e 100644 --- a/intern/cycles/kernel/geom/object.h +++ b/intern/cycles/kernel/geom/object.h @@ -31,10 +31,10 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals kg, enum ObjectTransform type) { if (type == OBJECT_INVERSE_TRANSFORM) { - return kernel_tex_fetch(__objects, object).itfm; + return kernel_data_fetch(objects, object).itfm; } else { - return kernel_tex_fetch(__objects, object).tfm; + return kernel_data_fetch(objects, object).tfm; } } @@ -43,10 +43,10 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals kg, ccl_device_inline Transform lamp_fetch_transform(KernelGlobals kg, int lamp, bool inverse) { if (inverse) { - return kernel_tex_fetch(__lights, lamp).itfm; + return kernel_data_fetch(lights, lamp).itfm; } else { - return kernel_tex_fetch(__lights, lamp).tfm; + return kernel_data_fetch(lights, lamp).tfm; } } @@ -57,7 +57,7 @@ ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals kg, enum ObjectVectorTransform type) { int offset = object * OBJECT_MOTION_PASS_SIZE + (int)type; - return kernel_tex_fetch(__object_motion_pass, offset); + return kernel_data_fetch(object_motion_pass, offset); } /* Motion blurred object transformations */ @@ -65,9 +65,9 @@ ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals kg, #ifdef __OBJECT_MOTION__ ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals kg, int object, float time) { - const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset; - ccl_global const DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset); - const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1; + const uint motion_offset = kernel_data_fetch(objects, object).motion_offset; + ccl_global const DecomposedTransform *motion = &kernel_data_fetch(object_motion, motion_offset); + const uint num_steps = kernel_data_fetch(objects, object).numsteps * 2 + 1; Transform tfm; transform_motion_array_interpolate(&tfm, motion, num_steps, time); @@ -80,13 +80,13 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg, float time, ccl_private Transform *itfm) { - int object_flag = kernel_tex_fetch(__object_flag, object); + int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_MOTION) { /* if we do motion blur */ Transform tfm = object_fetch_transform_motion(kg, object, time); if (itfm) - *itfm = transform_quick_inverse(tfm); + *itfm = transform_inverse(tfm); return tfm; } @@ -259,7 +259,7 @@ ccl_device_inline float3 object_color(KernelGlobals kg, int object) if (object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); - ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object); + ccl_global const KernelObject *kobject = &kernel_data_fetch(objects, object); return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]); } @@ -270,7 +270,7 @@ ccl_device_inline float object_alpha(KernelGlobals kg, int object) if (object == OBJECT_NONE) return 0.0f; - return kernel_tex_fetch(__objects, object).alpha; + return kernel_data_fetch(objects, object).alpha; } /* Pass ID number of object */ @@ -280,7 +280,7 @@ ccl_device_inline float object_pass_id(KernelGlobals kg, int object) if (object == OBJECT_NONE) return 0.0f; - return kernel_tex_fetch(__objects, object).pass_id; + return kernel_data_fetch(objects, object).pass_id; } /* Lightgroup of lamp */ @@ -290,7 +290,7 @@ ccl_device_inline int lamp_lightgroup(KernelGlobals kg, int lamp) if (lamp == LAMP_NONE) return LIGHTGROUP_NONE; - return kernel_tex_fetch(__lights, lamp).lightgroup; + return kernel_data_fetch(lights, lamp).lightgroup; } /* Lightgroup of object */ @@ -300,7 +300,7 @@ ccl_device_inline int object_lightgroup(KernelGlobals kg, int object) if (object == OBJECT_NONE) return LIGHTGROUP_NONE; - return kernel_tex_fetch(__objects, object).lightgroup; + return kernel_data_fetch(objects, object).lightgroup; } /* Per lamp random number for shader variation */ @@ -310,7 +310,7 @@ ccl_device_inline float lamp_random_number(KernelGlobals kg, int lamp) if (lamp == LAMP_NONE) return 0.0f; - return kernel_tex_fetch(__lights, lamp).random; + return kernel_data_fetch(lights, lamp).random; } /* Per object random number for shader variation */ @@ -320,7 +320,7 @@ ccl_device_inline float object_random_number(KernelGlobals kg, int object) if (object == OBJECT_NONE) return 0.0f; - return kernel_tex_fetch(__objects, object).random_number; + return kernel_data_fetch(objects, object).random_number; } /* Particle ID from which this object was generated */ @@ -330,7 +330,7 @@ ccl_device_inline int object_particle_id(KernelGlobals kg, int object) if (object == OBJECT_NONE) return 0; - return kernel_tex_fetch(__objects, object).particle_index; + return kernel_data_fetch(objects, object).particle_index; } /* Generated texture coordinate on surface from where object was instanced */ @@ -340,7 +340,7 @@ ccl_device_inline float3 object_dupli_generated(KernelGlobals kg, int object) if (object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); - ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object); + ccl_global const KernelObject *kobject = &kernel_data_fetch(objects, object); return make_float3( kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]); } @@ -352,7 +352,7 @@ ccl_device_inline float3 object_dupli_uv(KernelGlobals kg, int object) if (object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); - ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object); + ccl_global const KernelObject *kobject = &kernel_data_fetch(objects, object); return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f); } @@ -365,13 +365,13 @@ ccl_device_inline void object_motion_info(KernelGlobals kg, ccl_private int *numkeys) { if (numkeys) { - *numkeys = kernel_tex_fetch(__objects, object).numkeys; + *numkeys = kernel_data_fetch(objects, object).numkeys; } if (numsteps) - *numsteps = kernel_tex_fetch(__objects, object).numsteps; + *numsteps = kernel_data_fetch(objects, object).numsteps; if (numverts) - *numverts = kernel_tex_fetch(__objects, object).numverts; + *numverts = kernel_data_fetch(objects, object).numverts; } /* Offset to an objects patch map */ @@ -381,7 +381,7 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals kg, int object) if (object == OBJECT_NONE) return 0; - return kernel_tex_fetch(__objects, object).patch_map_offset; + return kernel_data_fetch(objects, object).patch_map_offset; } /* Volume step size */ @@ -392,7 +392,7 @@ ccl_device_inline float object_volume_density(KernelGlobals kg, int object) return 1.0f; } - return kernel_tex_fetch(__objects, object).volume_density; + return kernel_data_fetch(objects, object).volume_density; } ccl_device_inline float object_volume_step_size(KernelGlobals kg, int object) @@ -401,14 +401,14 @@ ccl_device_inline float object_volume_step_size(KernelGlobals kg, int object) return kernel_data.background.volume_step_size; } - return kernel_tex_fetch(__object_volume_step, object); + return kernel_data_fetch(object_volume_step, object); } /* Pass ID for shader */ ccl_device int shader_pass_id(KernelGlobals kg, ccl_private const ShaderData *sd) { - return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; + return kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).pass_id; } /* Cryptomatte ID */ @@ -418,7 +418,7 @@ ccl_device_inline float object_cryptomatte_id(KernelGlobals kg, int object) if (object == OBJECT_NONE) return 0.0f; - return kernel_tex_fetch(__objects, object).cryptomatte_object; + return kernel_data_fetch(objects, object).cryptomatte_object; } ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals kg, int object) @@ -426,49 +426,49 @@ ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals kg, int object if (object == OBJECT_NONE) return 0; - return kernel_tex_fetch(__objects, object).cryptomatte_asset; + return kernel_data_fetch(objects, object).cryptomatte_asset; } /* Particle data from which object was instanced */ ccl_device_inline uint particle_index(KernelGlobals kg, int particle) { - return kernel_tex_fetch(__particles, particle).index; + return kernel_data_fetch(particles, particle).index; } ccl_device float particle_age(KernelGlobals kg, int particle) { - return kernel_tex_fetch(__particles, particle).age; + return kernel_data_fetch(particles, particle).age; } ccl_device float particle_lifetime(KernelGlobals kg, int particle) { - return kernel_tex_fetch(__particles, particle).lifetime; + return kernel_data_fetch(particles, particle).lifetime; } ccl_device float particle_size(KernelGlobals kg, int particle) { - return kernel_tex_fetch(__particles, particle).size; + return kernel_data_fetch(particles, particle).size; } ccl_device float4 particle_rotation(KernelGlobals kg, int particle) { - return kernel_tex_fetch(__particles, particle).rotation; + return kernel_data_fetch(particles, particle).rotation; } ccl_device float3 particle_location(KernelGlobals kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).location); + return float4_to_float3(kernel_data_fetch(particles, particle).location); } ccl_device float3 particle_velocity(KernelGlobals kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity); + return float4_to_float3(kernel_data_fetch(particles, particle).velocity); } ccl_device float3 particle_angular_velocity(KernelGlobals kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity); + return float4_to_float3(kernel_data_fetch(particles, particle).angular_velocity); } /* Object intersection in BVH */ @@ -488,127 +488,54 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir) /* Transform ray into object space to enter static object in BVH */ -ccl_device_inline float bvh_instance_push(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir) -{ - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - - *P = transform_point(&tfm, ray->P); - - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); - - return len; -} - -/* Transform ray to exit static object in BVH. */ - -ccl_device_inline float bvh_instance_pop(KernelGlobals kg, +ccl_device_inline void bvh_instance_push(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, ccl_private float3 *dir, - ccl_private float3 *idir, - float t) -{ - if (t != FLT_MAX) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - t /= len(transform_direction(&tfm, ray->D)); - } - - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); - - return t; -} - -/* Same as above, but returns scale factor to apply to multiple intersection distances */ - -ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir, - ccl_private float *t_fac) + ccl_private float3 *idir) { Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *t_fac = 1.0f / len(transform_direction(&tfm, ray->D)); - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); + *P = transform_point(&tfm, ray->P); + + *dir = bvh_clamp_direction(transform_direction(&tfm, ray->D)); *idir = bvh_inverse_direction(*dir); } #ifdef __OBJECT_MOTION__ /* Transform ray into object space to enter motion blurred object in BVH */ -ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir, - ccl_private Transform *itfm) -{ - object_fetch_transform_motion_test(kg, object, ray->time, itfm); - - *P = transform_point(itfm, ray->P); - - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); - - return len; -} - -/* Transform ray to exit motion blurred object in BVH. */ - -ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg, +ccl_device_inline void bvh_instance_motion_push(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, ccl_private float3 *dir, - ccl_private float3 *idir, - float t, - ccl_private Transform *itfm) + ccl_private float3 *idir) { - if (t != FLT_MAX) { - t /= len(transform_direction(itfm, ray->D)); - } + Transform tfm; + object_fetch_transform_motion_test(kg, object, ray->time, &tfm); - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *P = transform_point(&tfm, ray->P); - return t; + *dir = bvh_clamp_direction(transform_direction(&tfm, ray->D)); + *idir = bvh_inverse_direction(*dir); } -/* Same as above, but returns scale factor to apply to multiple intersection distances */ +#endif + +/* Transform ray to exit static object in BVH. */ -ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir, - ccl_private float *t_fac, - ccl_private Transform *itfm) +ccl_device_inline void bvh_instance_pop(ccl_private const Ray *ray, + ccl_private float3 *P, + ccl_private float3 *dir, + ccl_private float3 *idir) { - *t_fac = 1.0f / len(transform_direction(itfm, ray->D)); *P = ray->P; *dir = bvh_clamp_direction(ray->D); *idir = bvh_inverse_direction(*dir); } -#endif - /* TODO: This can be removed when we know if no devices will require explicit * address space qualifiers for this case. */ diff --git a/intern/cycles/kernel/geom/patch.h b/intern/cycles/kernel/geom/patch.h index 1c63a00e30d..ec98ddf51f0 100644 --- a/intern/cycles/kernel/geom/patch.h +++ b/intern/cycles/kernel/geom/patch.h @@ -62,7 +62,7 @@ patch_map_find_patch(KernelGlobals kg, int object, int patch, float u, float v) int quadrant = patch_map_resolve_quadrant(median, &u, &v); kernel_assert(quadrant >= 0); - uint child = kernel_tex_fetch(__patches, node + quadrant); + uint child = kernel_data_fetch(patches, node + quadrant); /* is the quadrant a hole? */ if (!(child & PATCH_MAP_NODE_IS_SET)) { @@ -73,9 +73,9 @@ patch_map_find_patch(KernelGlobals kg, int object, int patch, float u, float v) uint index = child & PATCH_MAP_NODE_INDEX_MASK; if (child & PATCH_MAP_NODE_IS_LEAF) { - handle.array_index = kernel_tex_fetch(__patches, index + 0); - handle.patch_index = kernel_tex_fetch(__patches, index + 1); - handle.vert_index = kernel_tex_fetch(__patches, index + 2); + handle.array_index = kernel_data_fetch(patches, index + 0); + handle.patch_index = kernel_data_fetch(patches, index + 1); + handle.vert_index = kernel_data_fetch(patches, index + 2); return handle; } @@ -189,11 +189,11 @@ ccl_device_inline int patch_eval_indices(KernelGlobals kg, int channel, int indices[PATCH_MAX_CONTROL_VERTS]) { - int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index; + int index_base = kernel_data_fetch(patches, handle->array_index + 2) + handle->vert_index; /* XXX: regular patches only */ for (int i = 0; i < 16; i++) { - indices[i] = kernel_tex_fetch(__patches, index_base + i); + indices[i] = kernel_data_fetch(patches, index_base + i); } return 16; @@ -209,7 +209,7 @@ ccl_device_inline void patch_eval_basis(KernelGlobals kg, float weights_du[PATCH_MAX_CONTROL_VERTS], float weights_dv[PATCH_MAX_CONTROL_VERTS]) { - uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */ + uint patch_bits = kernel_data_fetch(patches, handle->patch_index + 1); /* read patch param */ float d_scale = 1 << patch_eval_depth(patch_bits); bool non_quad_root = (patch_bits >> 4) & 0x1; @@ -287,7 +287,7 @@ ccl_device float patch_eval_float(KernelGlobals kg, *dv = 0.0f; for (int i = 0; i < num_control; i++) { - float v = kernel_tex_fetch(__attributes_float, offset + indices[i]); + float v = kernel_data_fetch(attributes_float, offset + indices[i]); val += v * weights[i]; if (du) @@ -324,7 +324,7 @@ ccl_device float2 patch_eval_float2(KernelGlobals kg, *dv = make_float2(0.0f, 0.0f); for (int i = 0; i < num_control; i++) { - float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]); + float2 v = kernel_data_fetch(attributes_float2, offset + indices[i]); val += v * weights[i]; if (du) @@ -361,7 +361,7 @@ ccl_device float3 patch_eval_float3(KernelGlobals kg, *dv = make_float3(0.0f, 0.0f, 0.0f); for (int i = 0; i < num_control; i++) { - float3 v = kernel_tex_fetch(__attributes_float3, offset + indices[i]); + float3 v = kernel_data_fetch(attributes_float3, offset + indices[i]); val += v * weights[i]; if (du) @@ -398,7 +398,7 @@ ccl_device float4 patch_eval_float4(KernelGlobals kg, *dv = zero_float4(); for (int i = 0; i < num_control; i++) { - float4 v = kernel_tex_fetch(__attributes_float4, offset + indices[i]); + float4 v = kernel_data_fetch(attributes_float4, offset + indices[i]); val += v * weights[i]; if (du) @@ -436,7 +436,7 @@ ccl_device float4 patch_eval_uchar4(KernelGlobals kg, for (int i = 0; i < num_control; i++) { float4 v = color_srgb_to_linear_v4( - color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]))); + color_uchar4_to_float4(kernel_data_fetch(attributes_uchar4, offset + indices[i]))); val += v * weights[i]; if (du) diff --git a/intern/cycles/kernel/geom/point.h b/intern/cycles/kernel/geom/point.h index ee7eca9e0c6..726d829c329 100644 --- a/intern/cycles/kernel/geom/point.h +++ b/intern/cycles/kernel/geom/point.h @@ -26,7 +26,7 @@ ccl_device float point_attribute_float(KernelGlobals kg, # endif if (desc.element == ATTR_ELEMENT_VERTEX) { - return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); + return kernel_data_fetch(attributes_float, desc.offset + sd->prim); } else { return 0.0f; @@ -47,7 +47,7 @@ ccl_device float2 point_attribute_float2(KernelGlobals kg, # endif if (desc.element == ATTR_ELEMENT_VERTEX) { - return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); + return kernel_data_fetch(attributes_float2, desc.offset + sd->prim); } else { return make_float2(0.0f, 0.0f); @@ -68,7 +68,7 @@ ccl_device float3 point_attribute_float3(KernelGlobals kg, # endif if (desc.element == ATTR_ELEMENT_VERTEX) { - return kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim); + return kernel_data_fetch(attributes_float3, desc.offset + sd->prim); } else { return make_float3(0.0f, 0.0f, 0.0f); @@ -89,7 +89,7 @@ ccl_device float4 point_attribute_float4(KernelGlobals kg, # endif if (desc.element == ATTR_ELEMENT_VERTEX) { - return kernel_tex_fetch(__attributes_float4, desc.offset + sd->prim); + return kernel_data_fetch(attributes_float4, desc.offset + sd->prim); } else { return zero_float4(); @@ -104,7 +104,7 @@ ccl_device float3 point_position(KernelGlobals kg, ccl_private const ShaderData /* World space center. */ float3 P = (sd->type & PRIMITIVE_MOTION) ? float4_to_float3(motion_point(kg, sd->object, sd->prim, sd->time)) : - float4_to_float3(kernel_tex_fetch(__points, sd->prim)); + float4_to_float3(kernel_data_fetch(points, sd->prim)); if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { object_position_transform(kg, sd, &P); @@ -122,7 +122,7 @@ ccl_device float point_radius(KernelGlobals kg, ccl_private const ShaderData *sd { if (sd->type & PRIMITIVE_POINT) { /* World space radius. */ - const float r = kernel_tex_fetch(__points, sd->prim).w; + const float r = kernel_data_fetch(points, sd->prim).w; if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) { return r; @@ -155,7 +155,7 @@ ccl_device float point_random(KernelGlobals kg, ccl_private const ShaderData *sd ccl_device float3 point_motion_center_location(KernelGlobals kg, ccl_private const ShaderData *sd) { - return float4_to_float3(kernel_tex_fetch(__points, sd->prim)); + return float4_to_float3(kernel_data_fetch(points, sd->prim)); } #endif /* __POINTCLOUD__ */ diff --git a/intern/cycles/kernel/geom/point_intersect.h b/intern/cycles/kernel/geom/point_intersect.h index c7ae72bb488..15fb814c58d 100644 --- a/intern/cycles/kernel/geom/point_intersect.h +++ b/intern/cycles/kernel/geom/point_intersect.h @@ -9,17 +9,21 @@ CCL_NAMESPACE_BEGIN #ifdef __POINTCLOUD__ -ccl_device_forceinline bool point_intersect_test( - const float4 point, const float3 P, const float3 dir, const float tmax, ccl_private float *t) +ccl_device_forceinline bool point_intersect_test(const float4 point, + const float3 ray_P, + const float3 ray_D, + const float ray_tmin, + const float ray_tmax, + ccl_private float *t) { const float3 center = float4_to_float3(point); const float radius = point.w; - const float rd2 = 1.0f / dot(dir, dir); + const float rd2 = 1.0f / dot(ray_D, ray_D); - const float3 c0 = center - P; - const float projC0 = dot(c0, dir) * rd2; - const float3 perp = c0 - projC0 * dir; + const float3 c0 = center - ray_P; + const float projC0 = dot(c0, ray_D) * rd2; + const float3 perp = c0 - projC0 * ray_D; const float l2 = dot(perp, perp); const float r2 = radius * radius; if (!(l2 <= r2)) { @@ -28,12 +32,12 @@ ccl_device_forceinline bool point_intersect_test( const float td = sqrt((r2 - l2) * rd2); const float t_front = projC0 - td; - const bool valid_front = (0.0f <= t_front) & (t_front <= tmax); + const bool valid_front = (ray_tmin <= t_front) & (t_front <= ray_tmax); /* Always back-face culling for now. */ # if 0 const float t_back = projC0 + td; - const bool valid_back = (0.0f <= t_back) & (t_back <= tmax); + const bool valid_back = (ray_tmin <= t_back) & (t_back <= ray_tmax); /* check if there is a first hit */ const bool valid_first = valid_front | valid_back; @@ -54,18 +58,19 @@ ccl_device_forceinline bool point_intersect_test( ccl_device_forceinline bool point_intersect(KernelGlobals kg, ccl_private Intersection *isect, - const float3 P, - const float3 dir, - const float tmax, + const float3 ray_P, + const float3 ray_D, + const float ray_tmin, + const float ray_tmax, const int object, const int prim, const float time, const int type) { const float4 point = (type & PRIMITIVE_MOTION) ? motion_point(kg, object, prim, time) : - kernel_tex_fetch(__points, prim); + kernel_data_fetch(points, prim); - if (!point_intersect_test(point, P, dir, tmax, &isect->t)) { + if (!point_intersect_test(point, ray_P, ray_D, ray_tmin, ray_tmax, &isect->t)) { return false; } @@ -82,7 +87,7 @@ ccl_device_inline void point_shader_setup(KernelGlobals kg, ccl_private const Intersection *isect, ccl_private const Ray *ray) { - sd->shader = kernel_tex_fetch(__points_shader, isect->prim); + sd->shader = kernel_data_fetch(points_shader, isect->prim); sd->P = ray->P + ray->D * isect->t; /* Texture coordinates, zero for now. */ @@ -94,7 +99,7 @@ ccl_device_inline void point_shader_setup(KernelGlobals kg, /* Compute point center for normal. */ float3 center = float4_to_float3((isect->type & PRIMITIVE_MOTION) ? motion_point(kg, sd->object, sd->prim, sd->time) : - kernel_tex_fetch(__points, sd->prim)); + kernel_data_fetch(points, sd->prim)); if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { object_position_transform_auto(kg, sd, ¢er); } diff --git a/intern/cycles/kernel/geom/primitive.h b/intern/cycles/kernel/geom/primitive.h index 9b4b61fbd84..04b04ff5985 100644 --- a/intern/cycles/kernel/geom/primitive.h +++ b/intern/cycles/kernel/geom/primitive.h @@ -18,14 +18,14 @@ CCL_NAMESPACE_BEGIN * attributes for performance, mainly for GPU performance to avoid bringing in * heavy volume interpolation code. */ -ccl_device_inline float primitive_surface_attribute_float(KernelGlobals kg, - ccl_private const ShaderData *sd, - const AttributeDescriptor desc, - ccl_private float *dx, - ccl_private float *dy) +ccl_device_forceinline float primitive_surface_attribute_float(KernelGlobals kg, + ccl_private const ShaderData *sd, + const AttributeDescriptor desc, + ccl_private float *dx, + ccl_private float *dy) { if (sd->type & PRIMITIVE_TRIANGLE) { - if (subd_triangle_patch(kg, sd) == ~0) + if (subd_triangle_patch(kg, sd->prim) == ~0) return triangle_attribute_float(kg, sd, desc, dx, dy); else return subd_triangle_attribute_float(kg, sd, desc, dx, dy); @@ -49,14 +49,14 @@ ccl_device_inline float primitive_surface_attribute_float(KernelGlobals kg, } } -ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals kg, - ccl_private const ShaderData *sd, - const AttributeDescriptor desc, - ccl_private float2 *dx, - ccl_private float2 *dy) +ccl_device_forceinline float2 primitive_surface_attribute_float2(KernelGlobals kg, + ccl_private const ShaderData *sd, + const AttributeDescriptor desc, + ccl_private float2 *dx, + ccl_private float2 *dy) { if (sd->type & PRIMITIVE_TRIANGLE) { - if (subd_triangle_patch(kg, sd) == ~0) + if (subd_triangle_patch(kg, sd->prim) == ~0) return triangle_attribute_float2(kg, sd, desc, dx, dy); else return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); @@ -80,14 +80,14 @@ ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals kg, } } -ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals kg, - ccl_private const ShaderData *sd, - const AttributeDescriptor desc, - ccl_private float3 *dx, - ccl_private float3 *dy) +ccl_device_forceinline float3 primitive_surface_attribute_float3(KernelGlobals kg, + ccl_private const ShaderData *sd, + const AttributeDescriptor desc, + ccl_private float3 *dx, + ccl_private float3 *dy) { if (sd->type & PRIMITIVE_TRIANGLE) { - if (subd_triangle_patch(kg, sd) == ~0) + if (subd_triangle_patch(kg, sd->prim) == ~0) return triangle_attribute_float3(kg, sd, desc, dx, dy); else return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); @@ -118,7 +118,7 @@ ccl_device_forceinline float4 primitive_surface_attribute_float4(KernelGlobals k ccl_private float4 *dy) { if (sd->type & PRIMITIVE_TRIANGLE) { - if (subd_triangle_patch(kg, sd) == ~0) + if (subd_triangle_patch(kg, sd->prim) == ~0) return triangle_attribute_float4(kg, sd, desc, dx, dy); else return subd_triangle_attribute_float4(kg, sd, desc, dx, dy); @@ -149,15 +149,15 @@ ccl_device_forceinline float4 primitive_surface_attribute_float4(KernelGlobals k * attributes for performance, mainly for GPU performance to avoid bringing in * heavy volume interpolation code. */ -ccl_device_inline bool primitive_is_volume_attribute(ccl_private const ShaderData *sd, - const AttributeDescriptor desc) +ccl_device_forceinline bool primitive_is_volume_attribute(ccl_private const ShaderData *sd, + const AttributeDescriptor desc) { return sd->type == PRIMITIVE_VOLUME; } -ccl_device_inline float primitive_volume_attribute_float(KernelGlobals kg, - ccl_private const ShaderData *sd, - const AttributeDescriptor desc) +ccl_device_forceinline float primitive_volume_attribute_float(KernelGlobals kg, + ccl_private const ShaderData *sd, + const AttributeDescriptor desc) { if (primitive_is_volume_attribute(sd, desc)) { return volume_attribute_value_to_float(volume_attribute_float4(kg, sd, desc)); @@ -167,9 +167,9 @@ ccl_device_inline float primitive_volume_attribute_float(KernelGlobals kg, } } -ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals kg, - ccl_private const ShaderData *sd, - const AttributeDescriptor desc) +ccl_device_forceinline float3 primitive_volume_attribute_float3(KernelGlobals kg, + ccl_private const ShaderData *sd, + const AttributeDescriptor desc) { if (primitive_is_volume_attribute(sd, desc)) { return volume_attribute_value_to_float3(volume_attribute_float4(kg, sd, desc)); @@ -179,9 +179,9 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals kg, } } -ccl_device_inline float4 primitive_volume_attribute_float4(KernelGlobals kg, - ccl_private const ShaderData *sd, - const AttributeDescriptor desc) +ccl_device_forceinline float4 primitive_volume_attribute_float4(KernelGlobals kg, + ccl_private const ShaderData *sd, + const AttributeDescriptor desc) { if (primitive_is_volume_attribute(sd, desc)) { return volume_attribute_float4(kg, sd, desc); @@ -194,7 +194,7 @@ ccl_device_inline float4 primitive_volume_attribute_float4(KernelGlobals kg, /* Default UV coordinate */ -ccl_device_inline float3 primitive_uv(KernelGlobals kg, ccl_private const ShaderData *sd) +ccl_device_forceinline float3 primitive_uv(KernelGlobals kg, ccl_private const ShaderData *sd) { const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV); @@ -262,8 +262,8 @@ ccl_device float3 primitive_tangent(KernelGlobals kg, ccl_private ShaderData *sd /* Motion vector for motion pass */ -ccl_device_inline float4 primitive_motion_vector(KernelGlobals kg, - ccl_private const ShaderData *sd) +ccl_device_forceinline float4 primitive_motion_vector(KernelGlobals kg, + ccl_private const ShaderData *sd) { /* center position */ float3 center; @@ -320,7 +320,7 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals kg, #endif if (sd->type & PRIMITIVE_TRIANGLE) { /* Triangle */ - if (subd_triangle_patch(kg, sd) == ~0) { + if (subd_triangle_patch(kg, sd->prim) == ~0) { motion_pre = triangle_attribute_float3(kg, sd, desc, NULL, NULL); desc.offset += numverts; motion_post = triangle_attribute_float3(kg, sd, desc, NULL, NULL); diff --git a/intern/cycles/kernel/geom/shader_data.h b/intern/cycles/kernel/geom/shader_data.h index 7a439da427a..b67d19365a3 100644 --- a/intern/cycles/kernel/geom/shader_data.h +++ b/intern/cycles/kernel/geom/shader_data.h @@ -7,6 +7,8 @@ #pragma once +#include "kernel/util/differential.h" + CCL_NAMESPACE_BEGIN /* ShaderData setup from incoming ray */ @@ -18,7 +20,7 @@ ccl_device void shader_setup_object_transforms(KernelGlobals kg, { if (sd->object_flag & SD_OBJECT_MOTION) { sd->ob_tfm_motion = object_fetch_transform_motion(kg, sd->object, time); - sd->ob_itfm_motion = transform_quick_inverse(sd->ob_tfm_motion); + sd->ob_itfm_motion = transform_inverse(sd->ob_tfm_motion); } } #endif @@ -40,7 +42,7 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg, sd->ray_length = isect->t; sd->type = isect->type; sd->object = isect->object; - sd->object_flag = kernel_tex_fetch(__object_flag, sd->object); + sd->object_flag = kernel_data_fetch(object_flag, sd->object); sd->prim = isect->prim; sd->lamp = LAMP_NONE; sd->flag = 0; @@ -73,7 +75,7 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg, if (sd->type == PRIMITIVE_TRIANGLE) { /* static triangle */ float3 Ng = triangle_normal(kg, sd); - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + sd->shader = kernel_data_fetch(tri_shader, sd->prim); /* vectors */ sd->P = triangle_point_from_uv(kg, sd, isect->object, isect->prim, isect->u, isect->v); @@ -106,7 +108,7 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg, } } - sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->flag = kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags; /* backfacing test */ bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); @@ -123,9 +125,9 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ /* differentials */ - differential_transfer_compact(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, sd->ray_length); - differential_incoming_compact(&sd->dI, ray->D, ray->dD); - differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); + sd->dP = differential_transfer_compact(ray->dP, ray->D, ray->dD, sd->ray_length); + sd->dI = differential_incoming_compact(ray->dD); + differential_dudv_compact(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); #endif } @@ -169,10 +171,10 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals kg, sd->time = time; sd->ray_length = t; - sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->flag = kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags; sd->object_flag = 0; if (sd->object != OBJECT_NONE) { - sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object); + sd->object_flag |= kernel_data_fetch(object_flag, sd->object); #ifdef __OBJECT_MOTION__ shader_setup_object_transforms(kg, sd, time); @@ -240,8 +242,8 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ /* no ray differentials here yet */ - sd->dP = differential3_zero(); - sd->dI = differential3_zero(); + sd->dP = differential_zero_compact(); + sd->dI = differential_zero_compact(); sd->du = differential_zero(); sd->dv = differential_zero(); #endif @@ -264,21 +266,20 @@ ccl_device void shader_setup_from_displace(KernelGlobals kg, /* force smooth shading for displacement */ shader |= SHADER_SMOOTH_NORMAL; - shader_setup_from_sample( - kg, - sd, - P, - Ng, - I, - shader, - object, - prim, - u, - v, - 0.0f, - 0.5f, - !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED), - LAMP_NONE); + shader_setup_from_sample(kg, + sd, + P, + Ng, + I, + shader, + object, + prim, + u, + v, + 0.0f, + 0.5f, + !(kernel_data_fetch(object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED), + LAMP_NONE); } /* ShaderData setup for point on curve. */ @@ -300,18 +301,18 @@ ccl_device void shader_setup_from_curve(KernelGlobals kg, sd->ray_length = 0.0f; /* Shader */ - sd->shader = kernel_tex_fetch(__curves, prim).shader_id; - sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->shader = kernel_data_fetch(curves, prim).shader_id; + sd->flag = kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags; /* Object */ sd->object = object; - sd->object_flag = kernel_tex_fetch(__object_flag, sd->object); + sd->object_flag = kernel_data_fetch(object_flag, sd->object); #ifdef __OBJECT_MOTION__ shader_setup_object_transforms(kg, sd, sd->time); #endif /* Get control points. */ - KernelCurve kcurve = kernel_tex_fetch(__curves, prim); + KernelCurve kcurve = kernel_data_fetch(curves, prim); int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; @@ -320,10 +321,10 @@ ccl_device void shader_setup_from_curve(KernelGlobals kg, float4 P_curve[4]; - P_curve[0] = kernel_tex_fetch(__curve_keys, ka); - P_curve[1] = kernel_tex_fetch(__curve_keys, k0); - P_curve[2] = kernel_tex_fetch(__curve_keys, k1); - P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + P_curve[0] = kernel_data_fetch(curve_keys, ka); + P_curve[1] = kernel_data_fetch(curve_keys, k0); + P_curve[2] = kernel_data_fetch(curve_keys, k1); + P_curve[3] = kernel_data_fetch(curve_keys, kb); /* Interpolate position and tangent. */ sd->P = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u)); @@ -349,8 +350,8 @@ ccl_device void shader_setup_from_curve(KernelGlobals kg, /* No ray differentials currently. */ #ifdef __RAY_DIFFERENTIALS__ - sd->dP = differential3_zero(); - sd->dI = differential3_zero(); + sd->dP = differential_zero_compact(); + sd->dI = differential_zero_compact(); sd->du = differential_zero(); sd->dv = differential_zero(); #endif @@ -373,7 +374,7 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals kg, sd->Ng = -ray_D; sd->I = -ray_D; sd->shader = kernel_data.background.surface_shader; - sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->flag = kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags; sd->object_flag = 0; sd->time = ray_time; sd->ray_length = 0.0f; @@ -392,8 +393,8 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ /* differentials */ - sd->dP = differential3_zero(); /* TODO: ray->dP */ - differential_incoming(&sd->dI, sd->dP); + sd->dP = differential_zero_compact(); /* TODO: ray->dP */ + sd->dI = differential_zero_compact(); sd->du = differential_zero(); sd->dv = differential_zero(); #endif @@ -408,7 +409,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg, { /* vectors */ - sd->P = ray->P; + sd->P = ray->P + ray->D * ray->tmin; sd->N = -ray->D; sd->Ng = -ray->D; sd->I = -ray->D; @@ -434,15 +435,14 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg, # ifdef __RAY_DIFFERENTIALS__ /* differentials */ - sd->dP = differential3_zero(); /* TODO ray->dD */ - differential_incoming(&sd->dI, sd->dP); + sd->dP = differential_zero_compact(); /* TODO ray->dD */ + sd->dI = differential_zero_compact(); sd->du = differential_zero(); sd->dv = differential_zero(); # endif /* for NDC coordinates */ sd->ray_P = ray->P; - sd->ray_dP = ray->dP; } #endif /* __VOLUME__ */ diff --git a/intern/cycles/kernel/geom/subd_triangle.h b/intern/cycles/kernel/geom/subd_triangle.h index 24e1e454b8c..784ba377318 100644 --- a/intern/cycles/kernel/geom/subd_triangle.h +++ b/intern/cycles/kernel/geom/subd_triangle.h @@ -13,11 +13,11 @@ ccl_device_inline void subd_triangle_patch_uv(KernelGlobals kg, ccl_private const ShaderData *sd, float2 uv[3]) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + uint4 tri_vindex = kernel_data_fetch(tri_vindex, sd->prim); - uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x); - uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y); - uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z); + uv[0] = kernel_data_fetch(tri_patch_uv, tri_vindex.x); + uv[1] = kernel_data_fetch(tri_patch_uv, tri_vindex.y); + uv[2] = kernel_data_fetch(tri_patch_uv, tri_vindex.z); } /* Vertex indices of patch */ @@ -26,10 +26,10 @@ ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals kg, int patch) { uint4 indices; - indices.x = kernel_tex_fetch(__patches, patch + 0); - indices.y = kernel_tex_fetch(__patches, patch + 1); - indices.z = kernel_tex_fetch(__patches, patch + 2); - indices.w = kernel_tex_fetch(__patches, patch + 3); + indices.x = kernel_data_fetch(patches, patch + 0); + indices.y = kernel_data_fetch(patches, patch + 1); + indices.z = kernel_data_fetch(patches, patch + 2); + indices.w = kernel_data_fetch(patches, patch + 3); return indices; } @@ -38,14 +38,14 @@ ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals kg, int patch) ccl_device_inline uint subd_triangle_patch_face(KernelGlobals kg, int patch) { - return kernel_tex_fetch(__patches, patch + 4); + return kernel_data_fetch(patches, patch + 4); } /* Number of corners on originating face */ ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals kg, int patch) { - return kernel_tex_fetch(__patches, patch + 5) & 0xffff; + return kernel_data_fetch(patches, patch + 5) & 0xffff; } /* Indices of the four corners that are used by the patch */ @@ -54,10 +54,10 @@ ccl_device_inline void subd_triangle_patch_corners(KernelGlobals kg, int patch, { uint4 data; - data.x = kernel_tex_fetch(__patches, patch + 4); - data.y = kernel_tex_fetch(__patches, patch + 5); - data.z = kernel_tex_fetch(__patches, patch + 6); - data.w = kernel_tex_fetch(__patches, patch + 7); + data.x = kernel_data_fetch(patches, patch + 4); + data.y = kernel_data_fetch(patches, patch + 5); + data.z = kernel_data_fetch(patches, patch + 6); + data.w = kernel_data_fetch(patches, patch + 7); int num_corners = data.y & 0xffff; @@ -87,18 +87,18 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg, ccl_private float *dx, ccl_private float *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd->prim); #ifdef __PATCH_EVAL__ if (desc.flags & ATTR_SUBDIVIDED) { float2 uv[3]; subd_triangle_patch_uv(kg, sd, uv); - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; + float2 dpdu = uv[1] - uv[0]; + float2 dpdv = uv[2] - uv[0]; /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + float2 p = dpdu * sd->u + dpdv * sd->v + uv[0]; float a, dads, dadt; a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); @@ -141,7 +141,7 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg, if (dy) *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch)); + return kernel_data_fetch(attributes_float, desc.offset + subd_triangle_patch_face(kg, patch)); } else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { float2 uv[3]; @@ -149,10 +149,10 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg, uint4 v = subd_triangle_patch_indices(kg, patch); - float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x); - float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y); - float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z); - float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w); + float f0 = kernel_data_fetch(attributes_float, desc.offset + v.x); + float f1 = kernel_data_fetch(attributes_float, desc.offset + v.y); + float f2 = kernel_data_fetch(attributes_float, desc.offset + v.z); + float f3 = kernel_data_fetch(attributes_float, desc.offset + v.w); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -165,12 +165,12 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a; if (dy) - *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; + *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a; #endif - return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a; } else if (desc.element == ATTR_ELEMENT_CORNER) { float2 uv[3]; @@ -179,10 +179,10 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg, int corners[4]; subd_triangle_patch_corners(kg, patch, corners); - float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset); - float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset); - float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset); - float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset); + float f0 = kernel_data_fetch(attributes_float, corners[0] + desc.offset); + float f1 = kernel_data_fetch(attributes_float, corners[1] + desc.offset); + float f2 = kernel_data_fetch(attributes_float, corners[2] + desc.offset); + float f3 = kernel_data_fetch(attributes_float, corners[3] + desc.offset); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -195,12 +195,12 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a; if (dy) - *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; + *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a; #endif - return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a; } else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { if (dx) @@ -208,7 +208,7 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg, if (dy) *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, desc.offset); + return kernel_data_fetch(attributes_float, desc.offset); } else { if (dx) @@ -226,18 +226,18 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg, ccl_private float2 *dx, ccl_private float2 *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd->prim); #ifdef __PATCH_EVAL__ if (desc.flags & ATTR_SUBDIVIDED) { float2 uv[3]; subd_triangle_patch_uv(kg, sd, uv); - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; + float2 dpdu = uv[1] - uv[0]; + float2 dpdv = uv[2] - uv[0]; /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + float2 p = dpdu * sd->u + dpdv * sd->v + uv[0]; float2 a, dads, dadt; @@ -281,8 +281,7 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg, if (dy) *dy = make_float2(0.0f, 0.0f); - return kernel_tex_fetch(__attributes_float2, - desc.offset + subd_triangle_patch_face(kg, patch)); + return kernel_data_fetch(attributes_float2, desc.offset + subd_triangle_patch_face(kg, patch)); } else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { float2 uv[3]; @@ -290,10 +289,10 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg, uint4 v = subd_triangle_patch_indices(kg, patch); - float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x); - float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y); - float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z); - float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w); + float2 f0 = kernel_data_fetch(attributes_float2, desc.offset + v.x); + float2 f1 = kernel_data_fetch(attributes_float2, desc.offset + v.y); + float2 f2 = kernel_data_fetch(attributes_float2, desc.offset + v.z); + float2 f3 = kernel_data_fetch(attributes_float2, desc.offset + v.w); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -306,12 +305,12 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a; if (dy) - *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; + *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a; #endif - return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a; } else if (desc.element == ATTR_ELEMENT_CORNER) { float2 uv[3]; @@ -322,10 +321,10 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg, float2 f0, f1, f2, f3; - f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset); - f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset); - f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset); - f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset); + f0 = kernel_data_fetch(attributes_float2, corners[0] + desc.offset); + f1 = kernel_data_fetch(attributes_float2, corners[1] + desc.offset); + f2 = kernel_data_fetch(attributes_float2, corners[2] + desc.offset); + f3 = kernel_data_fetch(attributes_float2, corners[3] + desc.offset); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -338,12 +337,12 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a; if (dy) - *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; + *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a; #endif - return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a; } else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { if (dx) @@ -351,7 +350,7 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg, if (dy) *dy = make_float2(0.0f, 0.0f); - return kernel_tex_fetch(__attributes_float2, desc.offset); + return kernel_data_fetch(attributes_float2, desc.offset); } else { if (dx) @@ -369,18 +368,18 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, ccl_private float3 *dx, ccl_private float3 *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd->prim); #ifdef __PATCH_EVAL__ if (desc.flags & ATTR_SUBDIVIDED) { float2 uv[3]; subd_triangle_patch_uv(kg, sd, uv); - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; + float2 dpdu = uv[1] - uv[0]; + float2 dpdv = uv[2] - uv[0]; /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + float2 p = dpdu * sd->u + dpdv * sd->v + uv[0]; float3 a, dads, dadt; a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); @@ -423,8 +422,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, if (dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return kernel_tex_fetch(__attributes_float3, - desc.offset + subd_triangle_patch_face(kg, patch)); + return kernel_data_fetch(attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)); } else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { float2 uv[3]; @@ -432,10 +430,10 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, uint4 v = subd_triangle_patch_indices(kg, patch); - float3 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + v.x); - float3 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + v.y); - float3 f2 = kernel_tex_fetch(__attributes_float3, desc.offset + v.z); - float3 f3 = kernel_tex_fetch(__attributes_float3, desc.offset + v.w); + float3 f0 = kernel_data_fetch(attributes_float3, desc.offset + v.x); + float3 f1 = kernel_data_fetch(attributes_float3, desc.offset + v.y); + float3 f2 = kernel_data_fetch(attributes_float3, desc.offset + v.z); + float3 f3 = kernel_data_fetch(attributes_float3, desc.offset + v.w); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -448,12 +446,12 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a; if (dy) - *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; + *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a; #endif - return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a; } else if (desc.element == ATTR_ELEMENT_CORNER) { float2 uv[3]; @@ -464,10 +462,10 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, float3 f0, f1, f2, f3; - f0 = kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset); - f1 = kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset); - f2 = kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset); - f3 = kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset); + f0 = kernel_data_fetch(attributes_float3, corners[0] + desc.offset); + f1 = kernel_data_fetch(attributes_float3, corners[1] + desc.offset); + f2 = kernel_data_fetch(attributes_float3, corners[2] + desc.offset); + f3 = kernel_data_fetch(attributes_float3, corners[3] + desc.offset); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -480,12 +478,12 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a; if (dy) - *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; + *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a; #endif - return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a; } else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { if (dx) @@ -493,7 +491,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, if (dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return kernel_tex_fetch(__attributes_float3, desc.offset); + return kernel_data_fetch(attributes_float3, desc.offset); } else { if (dx) @@ -511,18 +509,18 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, ccl_private float4 *dx, ccl_private float4 *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd->prim); #ifdef __PATCH_EVAL__ if (desc.flags & ATTR_SUBDIVIDED) { float2 uv[3]; subd_triangle_patch_uv(kg, sd, uv); - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; + float2 dpdu = uv[1] - uv[0]; + float2 dpdv = uv[2] - uv[0]; /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + float2 p = dpdu * sd->u + dpdv * sd->v + uv[0]; float4 a, dads, dadt; if (desc.type == NODE_ATTR_RGBA) { @@ -570,8 +568,7 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, if (dy) *dy = zero_float4(); - return kernel_tex_fetch(__attributes_float4, - desc.offset + subd_triangle_patch_face(kg, patch)); + return kernel_data_fetch(attributes_float4, desc.offset + subd_triangle_patch_face(kg, patch)); } else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { float2 uv[3]; @@ -579,10 +576,10 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, uint4 v = subd_triangle_patch_indices(kg, patch); - float4 f0 = kernel_tex_fetch(__attributes_float4, desc.offset + v.x); - float4 f1 = kernel_tex_fetch(__attributes_float4, desc.offset + v.y); - float4 f2 = kernel_tex_fetch(__attributes_float4, desc.offset + v.z); - float4 f3 = kernel_tex_fetch(__attributes_float4, desc.offset + v.w); + float4 f0 = kernel_data_fetch(attributes_float4, desc.offset + v.x); + float4 f1 = kernel_data_fetch(attributes_float4, desc.offset + v.y); + float4 f2 = kernel_data_fetch(attributes_float4, desc.offset + v.z); + float4 f3 = kernel_data_fetch(attributes_float4, desc.offset + v.w); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -595,12 +592,12 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a; if (dy) - *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; + *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a; #endif - return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a; } else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { float2 uv[3]; @@ -613,19 +610,19 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, if (desc.element == ATTR_ELEMENT_CORNER_BYTE) { f0 = color_srgb_to_linear_v4( - color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset))); + color_uchar4_to_float4(kernel_data_fetch(attributes_uchar4, corners[0] + desc.offset))); f1 = color_srgb_to_linear_v4( - color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset))); + color_uchar4_to_float4(kernel_data_fetch(attributes_uchar4, corners[1] + desc.offset))); f2 = color_srgb_to_linear_v4( - color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset))); + color_uchar4_to_float4(kernel_data_fetch(attributes_uchar4, corners[2] + desc.offset))); f3 = color_srgb_to_linear_v4( - color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset))); + color_uchar4_to_float4(kernel_data_fetch(attributes_uchar4, corners[3] + desc.offset))); } else { - f0 = kernel_tex_fetch(__attributes_float4, corners[0] + desc.offset); - f1 = kernel_tex_fetch(__attributes_float4, corners[1] + desc.offset); - f2 = kernel_tex_fetch(__attributes_float4, corners[2] + desc.offset); - f3 = kernel_tex_fetch(__attributes_float4, corners[3] + desc.offset); + f0 = kernel_data_fetch(attributes_float4, corners[0] + desc.offset); + f1 = kernel_data_fetch(attributes_float4, corners[1] + desc.offset); + f2 = kernel_data_fetch(attributes_float4, corners[2] + desc.offset); + f3 = kernel_data_fetch(attributes_float4, corners[3] + desc.offset); } if (subd_triangle_patch_num_corners(kg, patch) != 4) { @@ -639,12 +636,12 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a; if (dy) - *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; + *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a; #endif - return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a; } else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { if (dx) @@ -652,7 +649,7 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, if (dy) *dy = zero_float4(); - return kernel_tex_fetch(__attributes_float4, desc.offset); + return kernel_data_fetch(attributes_float4, desc.offset); } else { if (dx) diff --git a/intern/cycles/kernel/geom/triangle.h b/intern/cycles/kernel/geom/triangle.h index 8ac7e67ff05..6b9450d59ef 100644 --- a/intern/cycles/kernel/geom/triangle.h +++ b/intern/cycles/kernel/geom/triangle.h @@ -15,10 +15,10 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 triangle_normal(KernelGlobals kg, ccl_private ShaderData *sd) { /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - const float3 v0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); - const float3 v1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); - const float3 v2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, sd->prim); + const float3 v0 = kernel_data_fetch(tri_verts, tri_vindex.w + 0); + const float3 v1 = kernel_data_fetch(tri_verts, tri_vindex.w + 1); + const float3 v2 = kernel_data_fetch(tri_verts, tri_vindex.w + 2); /* return normal */ if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { @@ -40,15 +40,15 @@ ccl_device_inline void triangle_point_normal(KernelGlobals kg, ccl_private int *shader) { /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 v0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); - float3 v1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); - float3 v2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); + float3 v0 = kernel_data_fetch(tri_verts, tri_vindex.w + 0); + float3 v1 = kernel_data_fetch(tri_verts, tri_vindex.w + 1); + float3 v2 = kernel_data_fetch(tri_verts, tri_vindex.w + 2); /* compute point */ - float t = 1.0f - u - v; - *P = (u * v0 + v * v1 + t * v2); + float w = 1.0f - u - v; + *P = (w * v0 + u * v1 + v * v2); /* get object flags */ - int object_flag = kernel_tex_fetch(__object_flag, object); + int object_flag = kernel_data_fetch(object_flag, object); /* compute normal */ if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { *Ng = normalize(cross(v2 - v0, v1 - v0)); @@ -57,17 +57,17 @@ ccl_device_inline void triangle_point_normal(KernelGlobals kg, *Ng = normalize(cross(v1 - v0, v2 - v0)); } /* shader`*/ - *shader = kernel_tex_fetch(__tri_shader, prim); + *shader = kernel_data_fetch(tri_shader, prim); } /* Triangle vertex locations */ ccl_device_inline void triangle_vertices(KernelGlobals kg, int prim, float3 P[3]) { - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); - P[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); - P[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); + P[0] = kernel_data_fetch(tri_verts, tri_vindex.w + 0); + P[1] = kernel_data_fetch(tri_verts, tri_vindex.w + 1); + P[2] = kernel_data_fetch(tri_verts, tri_vindex.w + 2); } /* Triangle vertex locations and vertex normals */ @@ -77,13 +77,13 @@ ccl_device_inline void triangle_vertices_and_normals(KernelGlobals kg, float3 P[3], float3 N[3]) { - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); - P[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); - P[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); - N[0] = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); - N[1] = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); - N[2] = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); + P[0] = kernel_data_fetch(tri_verts, tri_vindex.w + 0); + P[1] = kernel_data_fetch(tri_verts, tri_vindex.w + 1); + P[2] = kernel_data_fetch(tri_verts, tri_vindex.w + 2); + N[0] = kernel_data_fetch(tri_vnormal, tri_vindex.x); + N[1] = kernel_data_fetch(tri_vnormal, tri_vindex.y); + N[2] = kernel_data_fetch(tri_vnormal, tri_vindex.z); } /* Interpolate smooth vertex normal from vertices */ @@ -92,12 +92,12 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v) { /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 n0 = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); - float3 n1 = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); - float3 n2 = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); + float3 n0 = kernel_data_fetch(tri_vnormal, tri_vindex.x); + float3 n1 = kernel_data_fetch(tri_vnormal, tri_vindex.y); + float3 n2 = kernel_data_fetch(tri_vnormal, tri_vindex.z); - float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1); + float3 N = safe_normalize((1.0f - u - v) * n0 + u * n1 + v * n2); return is_zero(N) ? Ng : N; } @@ -106,10 +106,10 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized( KernelGlobals kg, ccl_private const ShaderData *sd, float3 Ng, int prim, float u, float v) { /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 n0 = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); - float3 n1 = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); - float3 n2 = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); + float3 n0 = kernel_data_fetch(tri_vnormal, tri_vindex.x); + float3 n1 = kernel_data_fetch(tri_vnormal, tri_vindex.y); + float3 n2 = kernel_data_fetch(tri_vnormal, tri_vindex.z); /* ensure that the normals are in object space */ if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) { @@ -118,7 +118,7 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized( object_inverse_normal_transform(kg, sd, &n2); } - float3 N = (1.0f - u - v) * n2 + u * n0 + v * n1; + float3 N = (1.0f - u - v) * n0 + u * n1 + v * n2; return is_zero(N) ? Ng : N; } @@ -131,14 +131,14 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals kg, ccl_private float3 *dPdv) { /* fetch triangle vertex coordinates */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - const float3 p0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); - const float3 p1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); - const float3 p2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, prim); + const float3 p0 = kernel_data_fetch(tri_verts, tri_vindex.w + 0); + const float3 p1 = kernel_data_fetch(tri_verts, tri_vindex.w + 1); + const float3 p2 = kernel_data_fetch(tri_verts, tri_vindex.w + 2); /* compute derivatives of P w.r.t. uv */ - *dPdu = (p0 - p2); - *dPdv = (p1 - p2); + *dPdu = (p1 - p0); + *dPdv = (p2 - p0); } /* Reading attributes on various triangle elements */ @@ -153,26 +153,26 @@ ccl_device float triangle_attribute_float(KernelGlobals kg, float f0, f1, f2; if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) { - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x); - f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y); - f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, sd->prim); + f0 = kernel_data_fetch(attributes_float, desc.offset + tri_vindex.x); + f1 = kernel_data_fetch(attributes_float, desc.offset + tri_vindex.y); + f2 = kernel_data_fetch(attributes_float, desc.offset + tri_vindex.z); } else { const int tri = desc.offset + sd->prim * 3; - f0 = kernel_tex_fetch(__attributes_float, tri + 0); - f1 = kernel_tex_fetch(__attributes_float, tri + 1); - f2 = kernel_tex_fetch(__attributes_float, tri + 2); + f0 = kernel_data_fetch(attributes_float, tri + 0); + f1 = kernel_data_fetch(attributes_float, tri + 1); + f2 = kernel_data_fetch(attributes_float, tri + 2); } #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + *dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0; if (dy) - *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; + *dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0; #endif - return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -185,7 +185,7 @@ ccl_device float triangle_attribute_float(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float, offset); + return kernel_data_fetch(attributes_float, offset); } else { return 0.0f; @@ -203,26 +203,26 @@ ccl_device float2 triangle_attribute_float2(KernelGlobals kg, float2 f0, f1, f2; if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) { - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x); - f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y); - f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, sd->prim); + f0 = kernel_data_fetch(attributes_float2, desc.offset + tri_vindex.x); + f1 = kernel_data_fetch(attributes_float2, desc.offset + tri_vindex.y); + f2 = kernel_data_fetch(attributes_float2, desc.offset + tri_vindex.z); } else { const int tri = desc.offset + sd->prim * 3; - f0 = kernel_tex_fetch(__attributes_float2, tri + 0); - f1 = kernel_tex_fetch(__attributes_float2, tri + 1); - f2 = kernel_tex_fetch(__attributes_float2, tri + 2); + f0 = kernel_data_fetch(attributes_float2, tri + 0); + f1 = kernel_data_fetch(attributes_float2, tri + 1); + f2 = kernel_data_fetch(attributes_float2, tri + 2); } #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + *dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0; if (dy) - *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; + *dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0; #endif - return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -235,7 +235,7 @@ ccl_device float2 triangle_attribute_float2(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float2, offset); + return kernel_data_fetch(attributes_float2, offset); } else { return make_float2(0.0f, 0.0f); @@ -253,26 +253,26 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals kg, float3 f0, f1, f2; if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) { - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x); - f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y); - f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, sd->prim); + f0 = kernel_data_fetch(attributes_float3, desc.offset + tri_vindex.x); + f1 = kernel_data_fetch(attributes_float3, desc.offset + tri_vindex.y); + f2 = kernel_data_fetch(attributes_float3, desc.offset + tri_vindex.z); } else { const int tri = desc.offset + sd->prim * 3; - f0 = kernel_tex_fetch(__attributes_float3, tri + 0); - f1 = kernel_tex_fetch(__attributes_float3, tri + 1); - f2 = kernel_tex_fetch(__attributes_float3, tri + 2); + f0 = kernel_data_fetch(attributes_float3, tri + 0); + f1 = kernel_data_fetch(attributes_float3, tri + 1); + f2 = kernel_data_fetch(attributes_float3, tri + 2); } #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + *dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0; if (dy) - *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; + *dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0; #endif - return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -285,7 +285,7 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float3, offset); + return kernel_data_fetch(attributes_float3, offset); } else { return make_float3(0.0f, 0.0f, 0.0f); @@ -304,36 +304,36 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals kg, float4 f0, f1, f2; if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) { - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - f0 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.x); - f1 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.y); - f2 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.z); + const uint4 tri_vindex = kernel_data_fetch(tri_vindex, sd->prim); + f0 = kernel_data_fetch(attributes_float4, desc.offset + tri_vindex.x); + f1 = kernel_data_fetch(attributes_float4, desc.offset + tri_vindex.y); + f2 = kernel_data_fetch(attributes_float4, desc.offset + tri_vindex.z); } else { const int tri = desc.offset + sd->prim * 3; if (desc.element == ATTR_ELEMENT_CORNER) { - f0 = kernel_tex_fetch(__attributes_float4, tri + 0); - f1 = kernel_tex_fetch(__attributes_float4, tri + 1); - f2 = kernel_tex_fetch(__attributes_float4, tri + 2); + f0 = kernel_data_fetch(attributes_float4, tri + 0); + f1 = kernel_data_fetch(attributes_float4, tri + 1); + f2 = kernel_data_fetch(attributes_float4, tri + 2); } else { f0 = color_srgb_to_linear_v4( - color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 0))); + color_uchar4_to_float4(kernel_data_fetch(attributes_uchar4, tri + 0))); f1 = color_srgb_to_linear_v4( - color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 1))); + color_uchar4_to_float4(kernel_data_fetch(attributes_uchar4, tri + 1))); f2 = color_srgb_to_linear_v4( - color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 2))); + color_uchar4_to_float4(kernel_data_fetch(attributes_uchar4, tri + 2))); } } #ifdef __RAY_DIFFERENTIALS__ if (dx) - *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + *dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0; if (dy) - *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; + *dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0; #endif - return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -346,7 +346,7 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float4, offset); + return kernel_data_fetch(attributes_float4, offset); } else { return zero_float4(); diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h index fe531e6868a..847ed22fddd 100644 --- a/intern/cycles/kernel/geom/triangle_intersect.h +++ b/intern/cycles/kernel/geom/triangle_intersect.h @@ -17,23 +17,24 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg, ccl_private Intersection *isect, float3 P, float3 dir, + float tmin, float tmax, uint visibility, int object, int prim, int prim_addr) { - const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; - const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); + const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w; + const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0), + tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1), + tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); float t, u, v; - if (ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) { + if (ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) { #ifdef __VISIBILITY_FLAG__ /* Visibility flag test. we do it here under the assumption * that most triangles are culled by node flags. */ - if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) + if (kernel_data_fetch(prim_visibility, prim_addr) & visibility) #endif { isect->object = object; @@ -62,16 +63,17 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg, int object, int prim, int prim_addr, + float tmin, float tmax, ccl_private uint *lcg_state, int max_hits) { - const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; - const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); + const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w; + const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0), + tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1), + tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); float t, u, v; - if (!ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) { + if (!ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) { return false; } @@ -139,13 +141,13 @@ ccl_device_inline float3 triangle_point_from_uv(KernelGlobals kg, const float u, const float v) { - const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; - const packed_float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); - float w = 1.0f - u - v; + const uint tri_vindex = kernel_data_fetch(tri_vindex, isect_prim).w; + const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0), + tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1), + tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); - float3 P = u * tri_a + v * tri_b + w * tri_c; + /* This appears to give slightly better precision than interpolating with w = (1 - u - v). */ + float3 P = tri_a + u * (tri_b - tri_a) + v * (tri_c - tri_a); if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); diff --git a/intern/cycles/kernel/geom/volume.h b/intern/cycles/kernel/geom/volume.h index 22715dee5bf..885a420c97f 100644 --- a/intern/cycles/kernel/geom/volume.h +++ b/intern/cycles/kernel/geom/volume.h @@ -29,7 +29,7 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals kg, object_inverse_position_transform(kg, sd, &P); if (desc.offset != ATTR_STD_NOT_FOUND) { - Transform tfm = primitive_attribute_matrix(kg, sd, desc); + Transform tfm = primitive_attribute_matrix(kg, desc); P = transform_point(&tfm, P); } @@ -62,7 +62,7 @@ ccl_device float4 volume_attribute_float4(KernelGlobals kg, const AttributeDescriptor desc) { if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { - return kernel_tex_fetch(__attributes_float4, desc.offset); + return kernel_data_fetch(attributes_float4, desc.offset); } else if (desc.element == ATTR_ELEMENT_VOXEL) { /* todo: optimize this so we don't have to transform both here and in diff --git a/intern/cycles/kernel/integrator/displacement_shader.h b/intern/cycles/kernel/integrator/displacement_shader.h new file mode 100644 index 00000000000..839dfe244ac --- /dev/null +++ b/intern/cycles/kernel/integrator/displacement_shader.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Functions to evaluate displacement shader. */ + +#pragma once + +#ifdef __SVM__ +# include "kernel/svm/svm.h" +#endif +#ifdef __OSL__ +# include "kernel/osl/osl.h" +#endif + +CCL_NAMESPACE_BEGIN + +template<typename ConstIntegratorGenericState> +ccl_device void displacement_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *sd) +{ + sd->num_closure = 0; + sd->num_closure_left = 0; + + /* this will modify sd->P */ +#ifdef __OSL__ + if (kg->osl) { + OSLShader::eval_displacement(kg, state, sd); + } + else +#endif + { +#ifdef __SVM__ + svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>( + kg, state, sd, NULL, 0); +#endif + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index 0db4241b6e3..eca2c0b9ffb 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -5,8 +5,8 @@ #include "kernel/camera/camera.h" -#include "kernel/film/accumulate.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/path_state.h" @@ -49,7 +49,8 @@ ccl_device const float2 bake_offset_towards_center(KernelGlobals kg, const float3 to_center = center - P; const float3 offset_P = P + normalize(to_center) * - min(len(to_center), max(max3(fabs(P)), 1.0f) * position_offset); + min(len(to_center), + max(reduce_max(fabs(P)), 1.0f) * position_offset); /* Compute barycentric coordinates at new position. */ const float3 v1 = tri_verts[1] - tri_verts[0]; @@ -91,12 +92,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(kg, state, render_buffer)) { + if (!film_need_sample_pixel(kg, state, render_buffer)) { return false; } /* Always count the sample, even if the camera sample will reject the ray. */ - const int sample = kernel_accum_sample( + const int sample = film_write_sample( kg, state, render_buffer, scheduled_sample, tile->sample_offset); /* Setup render buffers. */ @@ -111,8 +112,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, int prim = __float_as_uint(primitive[1]); if (prim == -1) { /* Accumulate transparency for empty pixels. */ - kernel_accum_transparent(kg, state, 0, 1.0f, buffer); - return false; + film_write_transparent(kg, state, 0, 1.0f, buffer); + return true; } prim += kernel_data.bake.tri_offset; @@ -120,13 +121,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Random number generator. */ const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed; - float filter_x, filter_y; - if (sample == 0) { - filter_x = filter_y = 0.5f; - } - else { - path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y); - } + const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) : + path_rng_2D(kg, rng_hash, sample, PRNG_FILTER); /* Initialize path state for path integration. */ path_state_init_integrator(kg, state, sample, rng_hash); @@ -149,18 +145,24 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Sub-pixel offset. */ if (sample > 0) { - u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f); - v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f), + u = bake_clamp_mirror_repeat(u + dudx * (rand_filter.x - 0.5f) + dudy * (rand_filter.y - 0.5f), + 1.0f); + v = bake_clamp_mirror_repeat(v + dvdx * (rand_filter.x - 0.5f) + dvdy * (rand_filter.y - 0.5f), 1.0f - u); } + /* Convert from Blender to Cycles/Embree/OptiX barycentric convention. */ + const float tmp = u; + u = v; + v = 1.0f - tmp - v; + /* Position and normal on triangle. */ const int object = kernel_data.bake.object_index; float3 P, Ng; int shader; triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); - const int object_flag = kernel_tex_fetch(__object_flag, object); + const int object_flag = kernel_data_fetch(object_flag, object); if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); P = transform_point_auto(&tfm, P); @@ -173,14 +175,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, Ray ray ccl_optional_struct_init; ray.P = zero_float3(); ray.D = normalize(P); - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = 0.5f; ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); integrator_state_write_ray(kg, state, &ray); /* Setup next kernel to execute. */ - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } else { /* Surface baking. */ @@ -193,15 +196,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, } const int shader_index = shader & SHADER_MASK; - const int shader_flags = kernel_tex_fetch(__shaders, shader_index).flags; + const int shader_flags = kernel_data_fetch(shaders, shader_index).flags; /* Fast path for position and normal passes not affected by shaders. */ if (kernel_data.film.pass_position != PASS_UNUSED) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_position, P); + film_write_pass_float3(buffer + kernel_data.film.pass_position, P); return true; } else if (kernel_data.film.pass_normal != PASS_UNUSED && !(shader_flags & SD_HAS_BUMP)) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, N); + film_write_pass_float3(buffer + kernel_data.film.pass_normal, N); return true; } @@ -209,7 +212,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, Ray ray ccl_optional_struct_init; ray.P = P + N; ray.D = -N; - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = 0.5f; /* Setup differentials. */ @@ -246,13 +250,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index); } } diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h index 9fe27cdda9a..8df3e1b9fb3 100644 --- a/intern/cycles/kernel/integrator/init_from_camera.h +++ b/intern/cycles/kernel/integrator/init_from_camera.h @@ -5,8 +5,8 @@ #include "kernel/camera/camera.h" -#include "kernel/film/accumulate.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/shadow_catcher.h" @@ -23,31 +23,21 @@ ccl_device_inline void integrate_camera_sample(KernelGlobals kg, ccl_private Ray *ray) { /* Filter sampling. */ - float filter_u, filter_v; - - if (sample == 0) { - filter_u = 0.5f; - filter_v = 0.5f; - } - else { - path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v); - } + const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) : + path_rng_2D(kg, rng_hash, sample, PRNG_FILTER); /* Depth of field sampling. */ - float lens_u = 0.0f, lens_v = 0.0f; - if (kernel_data.cam.aperturesize > 0.0f) { - path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v); - } + const float2 rand_lens = (kernel_data.cam.aperturesize > 0.0f) ? + path_rng_2D(kg, rng_hash, sample, PRNG_LENS) : + zero_float2(); /* Motion blur time sampling. */ - float time = 0.0f; -#ifdef __CAMERA_MOTION__ - if (kernel_data.cam.shuttertime != -1.0f) - time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME); -#endif + const float rand_time = (kernel_data.cam.shuttertime != -1.0f) ? + path_rng_1D(kg, rng_hash, sample, PRNG_TIME) : + 0.0f; /* Generate camera ray. */ - camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray); + camera_sample(kg, x, y, rand_filter.x, rand_filter.y, rand_lens.x, rand_lens.y, rand_time, ray); } /* Return false to indicate that this pixel is finished. @@ -67,7 +57,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(kg, state, render_buffer)) { + if (!film_need_sample_pixel(kg, state, render_buffer)) { return false; } @@ -76,7 +66,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, * This logic allows to both count actual number of samples per pixel, and to add samples to this * pixel after it was converged and samples were added somewhere else (in which case the * `scheduled_sample` will be different from actual number of samples in this pixel). */ - const int sample = kernel_accum_sample( + const int sample = film_write_sample( kg, state, render_buffer, scheduled_sample, tile->sample_offset); /* Initialize random number seed for path. */ @@ -86,7 +76,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, /* Generate camera ray. */ Ray ray; integrate_camera_sample(kg, sample, x, y, rng_hash, &ray); - if (ray.t == 0.0f) { + if (ray.tmax == 0.0f) { return true; } @@ -100,10 +90,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, /* Continue with intersect_closest kernel, optionally initializing volume * stack before that if the camera may be inside a volume. */ if (kernel_data.cam.is_inside_volume) { - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); } else { - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } return true; diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 2dfac44b414..c7c3d74fa21 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -5,13 +5,13 @@ #include "kernel/camera/projection.h" +#include "kernel/film/light_passes.h" + #include "kernel/integrator/path_state.h" #include "kernel/integrator/shadow_catcher.h" #include "kernel/light/light.h" -#include "kernel/util/differential.h" - #include "kernel/geom/geom.h" #include "kernel/bvh/bvh.h" @@ -87,7 +87,7 @@ ccl_device_forceinline void integrator_split_shadow_catcher( return; } - kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer); + film_write_shadow_catcher_bounce_data(kg, state, render_buffer); /* Mark state as having done a shadow catcher split so that it stops contributing to * the shadow catcher matte pass, but keeps contributing to the combined pass. */ @@ -109,37 +109,38 @@ ccl_device_forceinline void integrator_split_shadow_catcher( /* If using background pass, schedule background shading kernel so that we have a background * to alpha-over on. The background kernel will then continue the path afterwards. */ INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); return; } if (!integrator_state_volume_stack_is_empty(kg, state)) { /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher * objects from it, and then continue shading volume and shadow catcher surface after. */ - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); return; } /* Continue with shading shadow catcher surface. */ const int shader = intersection_get_shader(kg, isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; + const int flags = kernel_data_fetch(shaders, shader).flags; const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } } /* Schedule next kernel to be executed after updating volume stack for shadow catcher. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume( KernelGlobals kg, IntegratorState state) { @@ -149,27 +150,28 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche integrator_state_read_isect(kg, state, &isect); const int shader = intersection_get_shader(kg, &isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; + const int flags = kernel_data_fetch(shaders, shader).flags; const int object_flags = intersection_get_object_flags(kg, &isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } } /* Schedule next kernel to be executed after executing background shader for shadow catcher. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background( KernelGlobals kg, IntegratorState state) { @@ -177,7 +179,8 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche if (!integrator_state_volume_stack_is_empty(kg, state)) { /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher * objects from it, and then continue shading volume and shadow catcher surface after. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_next( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); return; } @@ -190,7 +193,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche * * Note that current_kernel is a template value since making this a variable * leads to poor performance with CUDA atomics. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel( KernelGlobals kg, IntegratorState state, @@ -203,13 +206,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel( if (!integrator_state_volume_stack_is_empty(kg, state)) { const bool hit_surface = hit && !(isect->type & PRIMITIVE_LAMP); const int shader = (hit_surface) ? intersection_get_shader(kg, isect) : SHADER_NONE; - const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0; + const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0; if (!integrator_intersect_terminate(kg, state, flags)) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } return; } @@ -218,12 +221,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel( if (hit) { /* Hit a surface, continue with light or surface kernel. */ if (isect->type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); } else { /* Hit a surface, continue with surface kernel unless terminated. */ const int shader = intersection_get_shader(kg, isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; + const int flags = kernel_data_fetch(shaders, shader).flags; if (!integrator_intersect_terminate(kg, state, flags)) { const int object_flags = intersection_get_object_flags(kg, isect); @@ -231,16 +234,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel( (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } #ifdef __SHADOW_CATCHER__ @@ -249,13 +252,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel( #endif } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } } } else { /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } } @@ -263,7 +266,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( * * The logic here matches integrator_intersect_next_kernel, except that * volume shading and termination testing have already been done. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( KernelGlobals kg, IntegratorState state, @@ -273,29 +276,29 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( if (isect->prim != PRIM_NONE) { /* Hit a surface, continue with light or surface kernel. */ if (isect->type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } else { /* Hit a surface, continue with surface kernel unless terminated. */ const int shader = intersection_get_shader(kg, isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; + const int flags = kernel_data_fetch(shaders, shader).flags; const int object_flags = intersection_get_object_flags(kg, isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } #ifdef __SHADOW_CATCHER__ @@ -307,7 +310,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( } else { /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); return; } } @@ -321,7 +324,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, /* Read ray from integrator state into local memory. */ Ray ray ccl_optional_struct_init; integrator_state_read_ray(kg, state, &ray); - kernel_assert(ray.t != 0.0f); + kernel_assert(ray.tmax != 0.0f); const uint visibility = path_state_ray_visibility(state); const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim); @@ -329,12 +332,12 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, /* Trick to use short AO rays to approximate indirect light at the end of the path. */ if (path_state_ao_bounce(kg, state)) { - ray.t = kernel_data.integrator.ao_bounces_distance; + ray.tmax = kernel_data.integrator.ao_bounces_distance; if (last_isect_object != OBJECT_NONE) { - const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance; + const float object_ao_distance = kernel_data_fetch(objects, last_isect_object).ao_distance; if (object_ao_distance != 0.0f) { - ray.t = object_ao_distance; + ray.tmax = object_ao_distance; } } } @@ -366,7 +369,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, bool from_caustic_caster = false; bool from_caustic_receiver = false; if (!(path_flag & PATH_RAY_CAMERA) && last_isect_object != OBJECT_NONE) { - const int object_flags = kernel_tex_fetch(__object_flag, last_isect_object); + const int object_flags = kernel_data_fetch(object_flag, last_isect_object); from_caustic_receiver = (object_flags & SD_OBJECT_CAUSTICS_RECEIVER); from_caustic_caster = (object_flags & SD_OBJECT_CAUSTICS_CASTER); } diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h index 3e746998225..25ff3d5b23f 100644 --- a/intern/cycles/kernel/integrator/intersect_shadow.h +++ b/intern/cycles/kernel/integrator/intersect_shadow.h @@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k } #ifdef __TRANSPARENT_SHADOWS__ -# if defined(__KERNEL_CPU__) +# ifndef __KERNEL_GPU__ ccl_device int shadow_intersections_compare(const void *a, const void *b) { const Intersection *isect_a = (const Intersection *)a; @@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt if (opaque_hit) { /* Hit an opaque surface, shadow path ends here. */ - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); return; } else { @@ -171,7 +171,9 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt * * TODO: could also write to render buffer directly if no transparent shadows? * Could save a kernel execution for the common case. */ - INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, + integrator_shadow_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } diff --git a/intern/cycles/kernel/integrator/intersect_subsurface.h b/intern/cycles/kernel/integrator/intersect_subsurface.h index 0a2c4ad680d..f439d6905a0 100644 --- a/intern/cycles/kernel/integrator/intersect_subsurface.h +++ b/intern/cycles/kernel/integrator/intersect_subsurface.h @@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat } #endif - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index 49ef01dc870..c2490581e4d 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -5,7 +5,6 @@ #include "kernel/bvh/bvh.h" #include "kernel/geom/geom.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/volume_stack.h" CCL_NAMESPACE_BEGIN @@ -24,7 +23,8 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, Ray volume_ray ccl_optional_struct_init; volume_ray.P = from_P; - volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t); + volume_ray.D = normalize_len(to_P - from_P, &volume_ray.tmax); + volume_ray.tmin = 0.0f; volume_ray.self.object = INTEGRATOR_STATE(state, isect, object); volume_ray.self.prim = INTEGRATOR_STATE(state, isect, prim); volume_ray.self.light_object = OBJECT_NONE; @@ -37,8 +37,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, visibility); + uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { Intersection *isect = hits; @@ -58,12 +57,9 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, volume_stack_enter_exit(kg, state, stack_sd); /* Move ray forward. */ - volume_ray.P = stack_sd->P; + volume_ray.tmin = intersection_t_offset(isect.t); volume_ray.self.object = isect.object; volume_ray.self.prim = isect.prim; - if (volume_ray.t != FLT_MAX) { - volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t); - } ++step; } #endif @@ -82,7 +78,8 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s /* Trace ray in random direction. Any direction works, Z up is a guess to get the * fewest hits. */ volume_ray.D = make_float3(0.0f, 0.0f, 1.0f); - volume_ray.t = FLT_MAX; + volume_ray.tmin = 0.0f; + volume_ray.tmax = FLT_MAX; volume_ray.self.object = OBJECT_NONE; volume_ray.self.prim = PRIM_NONE; volume_ray.self.light_object = OBJECT_NONE; @@ -109,8 +106,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, visibility); + uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { int enclosed_volumes[MAX_VOLUME_STACK_SIZE]; Intersection *isect = hits; @@ -199,7 +195,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s } /* Move ray forward. */ - volume_ray.P = stack_sd->P; + volume_ray.tmin = intersection_t_offset(isect.t); volume_ray.self.object = isect.object; volume_ray.self.prim = isect.prim; ++step; @@ -222,7 +218,9 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt } else { /* Volume stack init for camera rays, continue with intersection of camera ray. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h index ad83f82d091..a0ad7afe591 100644 --- a/intern/cycles/kernel/integrator/mnee.h +++ b/intern/cycles/kernel/integrator/mnee.h @@ -115,7 +115,7 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg, { /* correct light sample position/direction and pdf * NOTE: preserve pdf in area measure */ - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ls->lamp); if (ls->type == LIGHT_POINT || ls->type == LIGHT_SPOT) { ls->D = normalize_len(ls->P - P, &ls->t); @@ -137,8 +137,14 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg, } } else if (ls->type == LIGHT_AREA) { + float invarea = fabsf(klight->area.invarea); ls->D = normalize_len(ls->P - P, &ls->t); - ls->pdf = fabsf(klight->area.invarea); + ls->pdf = invarea; + if (klight->area.tan_spread > 0.f) { + ls->eval_fac = 0.25f * invarea; + ls->eval_fac *= light_spread_attenuation( + ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread); + } } ls->pdf *= kernel_data.integrator.pdf_lights; @@ -154,12 +160,12 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, ccl_private const Intersection *isect, ccl_private ShaderData *sd_vtx) { - sd_vtx->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) : + sd_vtx->object = (isect->object == OBJECT_NONE) ? kernel_data_fetch(prim_object, isect->prim) : isect->object; sd_vtx->type = isect->type; sd_vtx->flag = 0; - sd_vtx->object_flag = kernel_tex_fetch(__object_flag, sd_vtx->object); + sd_vtx->object_flag = kernel_data_fetch(object_flag, sd_vtx->object); /* Matrices and time. */ shader_setup_object_transforms(kg, sd_vtx, ray->time); @@ -171,7 +177,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, sd_vtx->u = isect->u; sd_vtx->v = isect->v; - sd_vtx->shader = kernel_tex_fetch(__tri_shader, sd_vtx->prim); + sd_vtx->shader = kernel_data_fetch(tri_shader, sd_vtx->prim); float3 verts[3]; float3 normals[3]; @@ -180,7 +186,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, triangle_vertices_and_normals(kg, sd_vtx->prim, verts, normals); /* Compute refined position (same code as in triangle_point_from_uv). */ - sd_vtx->P = isect->u * verts[0] + isect->v * verts[1] + (1.f - isect->u - isect->v) * verts[2]; + sd_vtx->P = (1.f - isect->u - isect->v) * verts[0] + isect->u * verts[1] + isect->v * verts[2]; if (!(sd_vtx->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd_vtx); sd_vtx->P = transform_point(&tfm, sd_vtx->P); @@ -207,8 +213,8 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, } /* Tangent space (position derivatives) WRT barycentric (u, v). */ - float3 dp_du = verts[0] - verts[2]; - float3 dp_dv = verts[1] - verts[2]; + float3 dp_du = verts[1] - verts[0]; + float3 dp_dv = verts[2] - verts[0]; /* Geometric normal. */ vtx->ng = normalize(cross(dp_du, dp_dv)); @@ -217,16 +223,16 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, /* Shading normals: Interpolate normals between vertices. */ float n_len; - vtx->n = normalize_len(normals[0] * sd_vtx->u + normals[1] * sd_vtx->v + - normals[2] * (1.0f - sd_vtx->u - sd_vtx->v), + vtx->n = normalize_len(normals[0] * (1.0f - sd_vtx->u - sd_vtx->v) + normals[1] * sd_vtx->u + + normals[2] * sd_vtx->v, &n_len); /* Shading normal derivatives WRT barycentric (u, v) * we calculate the derivative of n = |u*n0 + v*n1 + (1-u-v)*n2| using: * d/du [f(u)/|f(u)|] = [d/du f(u)]/|f(u)| - f(u)/|f(u)|^3 <f(u), d/du f(u)>. */ const float inv_n_len = 1.f / n_len; - float3 dn_du = inv_n_len * (normals[0] - normals[2]); - float3 dn_dv = inv_n_len * (normals[1] - normals[2]); + float3 dn_du = inv_n_len * (normals[1] - normals[0]); + float3 dn_dv = inv_n_len * (normals[2] - normals[0]); dn_du -= vtx->n * dot(vtx->n, dn_du); dn_dv -= vtx->n * dot(vtx->n, dn_dv); @@ -386,7 +392,7 @@ ccl_device_forceinline bool mnee_compute_constraint_derivatives( /* Invert (block) constraint derivative matrix and solve linear system so we can map dh back to dx: * dh / dx = A * dx = inverse(A) x dh - * to use for specular specular manifold walk + * to use for specular manifold walk * (See for example http://faculty.washington.edu/finlayso/ebook/algebraic/advanced/LUtri.htm * for block tridiagonal matrix based linear system solve) */ ccl_device_forceinline bool mnee_solve_matrix_h_to_x(int vertex_count, @@ -436,6 +442,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.light_prim = PRIM_NONE; projection_ray.dP = differential_make_compact(sd->dP); projection_ray.dD = differential_zero_compact(); + projection_ray.tmin = 0.0f; projection_ray.time = sd->time; Intersection projection_isect; @@ -499,8 +506,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.prim = pv.prim; projection_ray.P = pv.p; } - projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t); - projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER; + projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.tmax); + projection_ray.tmax *= MNEE_PROJECTION_DISTANCE_MULTIPLIER; bool projection_success = false; for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) { @@ -509,7 +516,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, break; int hit_object = (projection_isect.object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, projection_isect.prim) : + kernel_data_fetch(prim_object, projection_isect.prim) : projection_isect.object; if (hit_object == mv.object) { @@ -519,8 +526,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.object = projection_isect.object; projection_ray.self.prim = projection_isect.prim; - projection_ray.P += projection_isect.t * projection_ray.D; - projection_ray.t -= projection_isect.t; + projection_ray.tmin = intersection_t_offset(projection_isect.t); } if (!projection_success) { reduce_stepsize = true; @@ -628,9 +634,9 @@ mnee_sample_bsdf_dh(ClosureType type, float alpha_x, float alpha_y, float sample * We assume here that the pdf (in half-vector measure) is the same as * the one calculation when sampling the microfacet normals from the * specular chain above: this allows us to simplify the bsdf weight */ -ccl_device_forceinline float3 mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure, - float3 wi, - float3 wo) +ccl_device_forceinline Spectrum mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure, + float3 wi, + float3 wo) { ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)closure; @@ -801,7 +807,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, float3 wo = normalize_len(vertices[0].p - sd->P, &wo_len); /* Initialize throughput and evaluate receiver bsdf * |n.wo|. */ - shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader); + surface_shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader); /* Update light sample with new position / direct.ion * and keep pdf in vertex area measure */ @@ -829,8 +835,8 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, 1; INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + vertex_count; - float3 light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time); - bsdf_eval_mul3(throughput, light_eval / ls->pdf); + Spectrum light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time); + bsdf_eval_mul(throughput, light_eval / ls->pdf); /* Generalized geometry term. */ @@ -852,6 +858,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, Ray probe_ray; probe_ray.self.light_object = ls->object; probe_ray.self.light_prim = ls->prim; + probe_ray.tmin = 0.0f; probe_ray.dP = differential_make_compact(sd->dP); probe_ray.dD = differential_zero_compact(); probe_ray.time = sd->time; @@ -867,13 +874,13 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, ccl_private const ManifoldVertex &v = vertices[vi]; /* Check visibility. */ - probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t); + probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.tmax); if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) { int hit_object = (probe_isect.object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, probe_isect.prim) : + kernel_data_fetch(prim_object, probe_isect.prim) : probe_isect.object; /* Test whether the ray hit the appropriate object at its intended location. */ - if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE) + if (hit_object != v.object || fabsf(probe_ray.tmax - probe_isect.t) > MNEE_MIN_DISTANCE) return false; } probe_ray.self.object = v.object; @@ -906,7 +913,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + 1 + vi; /* Evaluate shader nodes at solution vi. */ - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true); /* Set light looking dir. */ @@ -917,8 +924,8 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, /* Evaluate product term inside eq.6 at solution interface. vi * divided by corresponding sampled pdf: * fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h| */ - float3 bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo); - bsdf_eval_mul3(throughput, bsdf_contribution); + Spectrum bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo); + bsdf_eval_mul(throughput, bsdf_contribution); } /* Restore original state path bounce info. */ @@ -952,15 +959,16 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, probe_ray.self.light_object = ls->object; probe_ray.self.light_prim = ls->prim; probe_ray.P = sd->P; + probe_ray.tmin = 0.0f; if (ls->t == FLT_MAX) { /* Distant / env light. */ probe_ray.D = ls->D; - probe_ray.t = ls->t; + probe_ray.tmax = ls->t; } else { /* Other lights, avoid self-intersection. */ probe_ray.D = ls->P - probe_ray.P; - probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t); + probe_ray.D = normalize_len(probe_ray.D, &probe_ray.tmax); } probe_ray.dP = differential_make_compact(sd->dP); probe_ray.dD = differential_zero_compact(); @@ -998,7 +1006,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, return 0; /* Last bool argument is the MNEE flag (for TINY_MAX_CLOSURE cap in kernel_shader.h). */ - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true); /* Get and sample refraction bsdf */ @@ -1025,10 +1033,12 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, float2 h = zero_float2(); if (microfacet_bsdf->alpha_x > 0.f && microfacet_bsdf->alpha_y > 0.f) { /* Sample transmissive microfacet bsdf. */ - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - h = mnee_sample_bsdf_dh( - bsdf->type, microfacet_bsdf->alpha_x, microfacet_bsdf->alpha_y, bsdf_u, bsdf_v); + const float2 bsdf_uv = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); + h = mnee_sample_bsdf_dh(bsdf->type, + microfacet_bsdf->alpha_x, + microfacet_bsdf->alpha_y, + bsdf_uv.x, + bsdf_uv.y); } /* Setup differential geometry on vertex. */ @@ -1042,9 +1052,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, probe_ray.self.object = probe_isect.object; probe_ray.self.prim = probe_isect.prim; - probe_ray.P += probe_isect.t * probe_ray.D; - if (ls->t != FLT_MAX) - probe_ray.t -= probe_isect.t; + probe_ray.tmin = intersection_t_offset(probe_isect.t); }; /* Mark the manifold walk invalid to keep mollification on by default. */ diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h index ec93ac6d46f..54560905397 100644 --- a/intern/cycles/kernel/integrator/path_state.h +++ b/intern/cycles/kernel/integrator/path_state.h @@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void path_state_init_queues(IntegratorState state) { INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0; INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0; #endif @@ -48,14 +48,13 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0; INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0; INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash; - INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BOUNCE_NUM; INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | PATH_RAY_TRANSPARENT_BACKGROUND; INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX; INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f; - INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f); + INTEGRATOR_STATE_WRITE(state, path, throughput) = one_spectrum(); #ifdef __MNEE__ INTEGRATOR_STATE_WRITE(state, path, mnee) = 0; @@ -75,7 +74,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, #ifdef __DENOISING_FEATURES__ if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) { INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES; - INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3(); + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_spectrum(); } #endif } @@ -250,7 +249,7 @@ ccl_device_inline float path_state_continuation_probability(KernelGlobals kg, /* Probabilistic termination: use sqrt() to roughly match typical view * transform and do path termination a bit later on average. */ - return min(sqrtf(max3(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f); + return min(sqrtf(reduce_max(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f); } ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state) @@ -299,38 +298,25 @@ ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState sta ccl_device_inline float path_state_rng_1D(KernelGlobals kg, ccl_private const RNGState *rng_state, - int dimension) + const int dimension) { return path_rng_1D( kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension); } -ccl_device_inline void path_state_rng_2D(KernelGlobals kg, - ccl_private const RNGState *rng_state, - int dimension, - ccl_private float *fx, - ccl_private float *fy) +ccl_device_inline float2 path_state_rng_2D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + const int dimension) { - path_rng_2D( - kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy); -} - -ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg, - ccl_private const RNGState *rng_state, - uint hash) -{ - /* Use a hash instead of dimension, this is not great but avoids adding - * more dimensions to each bounce which reduces quality of dimensions we - * are already using. */ - return path_rng_1D( - kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset); + return path_rng_2D( + kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension); } ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, ccl_private const RNGState *rng_state, - int branch, - int num_branches, - int dimension) + const int branch, + const int num_branches, + const int dimension) { return path_rng_1D(kg, rng_state->rng_hash, @@ -338,20 +324,16 @@ ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, rng_state->rng_offset + dimension); } -ccl_device_inline void path_branched_rng_2D(KernelGlobals kg, - ccl_private const RNGState *rng_state, - int branch, - int num_branches, - int dimension, - ccl_private float *fx, - ccl_private float *fy) +ccl_device_inline float2 path_branched_rng_2D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + const int branch, + const int num_branches, + const int dimension) { - path_rng_2D(kg, - rng_state->rng_hash, - rng_state->sample * num_branches + branch, - rng_state->rng_offset + dimension, - fx, - fy); + return path_rng_2D(kg, + rng_state->rng_hash, + rng_state->sample * num_branches + branch, + rng_state->rng_offset + dimension); } /* Utility functions to get light termination value, diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index 72ecf67e8a0..30ce0999258 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -3,18 +3,19 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/film/light_passes.h" + +#include "kernel/integrator/surface_shader.h" + #include "kernel/light/light.h" #include "kernel/light/sample.h" CCL_NAMESPACE_BEGIN -ccl_device float3 integrator_eval_background_shader(KernelGlobals kg, - IntegratorState state, - ccl_global float *ccl_restrict render_buffer) +ccl_device Spectrum integrator_eval_background_shader(KernelGlobals kg, + IntegratorState state, + ccl_global float *ccl_restrict render_buffer) { -#ifdef __BACKGROUND__ const int shader = kernel_data.background.surface_shader; const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); @@ -26,56 +27,35 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg, ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) || ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) || ((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER))) - return zero_float3(); + return zero_spectrum(); } /* Use fast constant background color if available. */ - float3 L = zero_float3(); - if (!shader_constant_emission_eval(kg, shader, &L)) { - /* Evaluate background shader. */ - - /* TODO: does aliasing like this break automatic SoA in CUDA? - * Should we instead store closures separate from ShaderData? */ - ShaderDataTinyStorage emission_sd_storage; - ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - - PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); - shader_setup_from_background(kg, - emission_sd, - INTEGRATOR_STATE(state, ray, P), - INTEGRATOR_STATE(state, ray, D), - INTEGRATOR_STATE(state, ray, time)); - - PROFILING_SHADER(emission_sd->object, emission_sd->shader); - PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>( - kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); - - L = shader_background_eval(emission_sd); + Spectrum L = zero_spectrum(); + if (surface_shader_constant_emission(kg, shader, &L)) { + return L; } - /* Background MIS weights. */ -# ifdef __BACKGROUND_MIS__ - /* Check if background light exists or if we should skip pdf. */ - if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && - kernel_data.background.use_mis) { - const float3 ray_P = INTEGRATOR_STATE(state, ray, P); - const float3 ray_D = INTEGRATOR_STATE(state, ray, D); - const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - - /* multiple importance sampling, get background light pdf for ray - * direction, and compute weight with respect to BSDF pdf */ - const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); - L *= mis_weight; - } -# endif + /* Evaluate background shader. */ - return L; -#else - return make_float3(0.8f, 0.8f, 0.8f); -#endif + /* TODO: does aliasing like this break automatic SoA in CUDA? + * Should we instead store closures separate from ShaderData? */ + ShaderDataTinyStorage emission_sd_storage; + ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + + PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); + shader_setup_from_background(kg, + emission_sd, + INTEGRATOR_STATE(state, ray, P), + INTEGRATOR_STATE(state, ray, D), + INTEGRATOR_STATE(state, ray, time)); + + PROFILING_SHADER(emission_sd->object, emission_sd->shader); + PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>( + kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); + + return surface_shader_background(emission_sd); } ccl_device_inline void integrate_background(KernelGlobals kg, @@ -107,7 +87,7 @@ ccl_device_inline void integrate_background(KernelGlobals kg, for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { /* This path should have been resolved with mnee, it will * generate a firefly for small lights since it is improbable. */ - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp); if (klight->type == LIGHT_BACKGROUND && klight->use_caustics) { eval_background = false; break; @@ -118,17 +98,37 @@ ccl_device_inline void integrate_background(KernelGlobals kg, #endif /* __MNEE__ */ /* Evaluate background shader. */ - float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) : - zero_float3(); + Spectrum L = zero_spectrum(); + + if (eval_background) { + L = integrator_eval_background_shader(kg, state, render_buffer); + + /* When using the ao bounces approximation, adjust background + * shader intensity with ao factor. */ + if (path_state_ao_bounce(kg, state)) { + L *= kernel_data.integrator.ao_bounces_factor; + } + + /* Background MIS weights. */ + float mis_weight = 1.0f; + /* Check if background light exists or if we should skip pdf. */ + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && + kernel_data.background.use_mis) { + const float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); + + /* multiple importance sampling, get background light pdf for ray + * direction, and compute weight with respect to BSDF pdf */ + const float pdf = background_light_pdf(kg, ray_P, ray_D); + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); + } - /* When using the ao bounces approximation, adjust background - * shader intensity with ao factor. */ - if (path_state_ao_bounce(kg, state)) { - L *= kernel_data.integrator.ao_bounces_factor; + L *= mis_weight; } /* Write to render buffer. */ - kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); + film_write_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); } ccl_device_inline void integrate_distant_lights(KernelGlobals kg, @@ -160,7 +160,7 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg, if (INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) { /* This path should have been resolved with mnee, it will * generate a firefly for small lights since it is improbable. */ - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp); if (klight->use_caustics) return; } @@ -170,24 +170,23 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg, /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); + Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); if (is_zero(light_eval)) { return; } /* MIS weighting. */ + float mis_weight = 1.0f; if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); - light_eval *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); } /* Write to render buffer. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission( - kg, state, throughput * light_eval, render_buffer, kernel_data.background.lightgroup); + film_write_surface_emission( + kg, state, light_eval, mis_weight, render_buffer, kernel_data.background.lightgroup); } } } @@ -213,7 +212,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg, } #endif - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h index be926c78439..f2d65eddfbb 100644 --- a/intern/cycles/kernel/integrator/shade_light.h +++ b/intern/cycles/kernel/integrator/shade_light.h @@ -3,8 +3,8 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/film/light_passes.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/light/light.h" #include "kernel/light/sample.h" @@ -22,19 +22,8 @@ ccl_device_inline void integrate_light(KernelGlobals kg, const float3 ray_D = INTEGRATOR_STATE(state, ray, D); const float ray_time = INTEGRATOR_STATE(state, ray, time); - /* Advance ray beyond light. */ - /* TODO: can we make this more numerically robust to avoid reintersecting the - * same light in some cases? Ray should not intersect surface anymore as the - * object and prim ids will prevent self intersection. */ - const float3 new_ray_P = ray_P + ray_D * isect.t; - INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P; - INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t; - - /* Set position to where the BSDF was sampled, for correct MIS PDF. */ - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - ray_P -= ray_D * mis_ray_t; - isect.t += mis_ray_t; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t; + /* Advance ray to new start distance. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(isect.t); LightSample ls ccl_optional_struct_init; const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls); @@ -62,23 +51,22 @@ ccl_device_inline void integrate_light(KernelGlobals kg, /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); + Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); if (is_zero(light_eval)) { return; } /* MIS weighting. */ + float mis_weight = 1.0f; if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); - light_eval *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); } /* Write to render buffer. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission(kg, state, throughput * light_eval, render_buffer, ls.group); + film_write_surface_emission(kg, state, light_eval, mis_weight, render_buffer, ls.group); } ccl_device void integrator_shade_light(KernelGlobals kg, @@ -99,11 +87,13 @@ ccl_device void integrator_shade_light(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce; if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } else { - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); return; } diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index 2b929b7b62e..ba18aed6ff0 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -4,7 +4,7 @@ #pragma once #include "kernel/integrator/shade_volume.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/integrator/volume_stack.h" CCL_NAMESPACE_BEGIN @@ -15,9 +15,9 @@ ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits) } #ifdef __TRANSPARENT_SHADOWS__ -ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, - IntegratorShadowState state, - const int hit) +ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg, + IntegratorShadowState state, + const int hit) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE); @@ -40,7 +40,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, /* Evaluate shader. */ if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, shadow_sd, NULL, PATH_RAY_SHADOW); } @@ -50,7 +50,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, # endif /* Compute transparency from closures. */ - return shader_bsdf_transparency(kg, shadow_sd); + return surface_shader_transparency(kg, shadow_sd); } # ifdef __VOLUME__ @@ -58,7 +58,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, IntegratorShadowState state, const int hit, const int num_recorded_hits, - ccl_private float3 *ccl_restrict + ccl_private Spectrum *ccl_restrict throughput) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME); @@ -75,13 +75,9 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, ray.self.light_object = OBJECT_NONE; ray.self.light_prim = PRIM_NONE; /* Modify ray position and length to match current segment. */ - const float start_t = (hit == 0) ? 0.0f : - INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t); - const float end_t = (hit < num_recorded_hits) ? - INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) : - ray.t; - ray.P += start_t * ray.D; - ray.t = end_t - start_t; + ray.tmin = (hit == 0) ? ray.tmin : INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t); + ray.tmax = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) : + ray.tmax; shader_setup_from_volume(kg, shadow_sd, &ray); @@ -104,7 +100,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) { # ifdef __VOLUME__ if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) { - float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput); + Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput); integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput); if (is_zero(throughput)) { return true; @@ -117,8 +113,8 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, /* Surface shaders. */ if (hit < num_recorded_hits) { - const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit); - const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow; + const Spectrum shadow = integrate_transparent_surface_shadow(kg, state, hit); + const Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow; if (is_zero(throughput)) { return true; } @@ -137,10 +133,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, /* There are more hits that we could not recorded due to memory usage, * adjust ray to intersect again from the last hit. */ const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t); - const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P); - const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D); - INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D; - INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = intersection_t_offset(last_hit_t); } return false; @@ -158,20 +151,22 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg, /* Evaluate transparent shadows. */ const bool opaque = integrate_transparent_shadow(kg, state, num_hits); if (opaque) { - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } #endif if (shadow_intersections_has_remaining(num_hits)) { /* More intersections to find, continue shadow ray. */ - INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, + integrator_shadow_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); return; } else { - kernel_accum_light(kg, state, render_buffer); - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + film_write_direct_light(kg, state, render_buffer); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index ce1398859b7..c19f56a9b70 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -3,14 +3,15 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/film/passes.h" +#include "kernel/film/data_passes.h" +#include "kernel/film/denoising_passes.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/mnee.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/subsurface.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/integrator/volume_stack.h" #include "kernel/light/light.h" @@ -31,7 +32,52 @@ ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg, shader_setup_from_ray(kg, sd, &ray, &isect); } -#ifdef __HOLDOUT__ +ccl_device_forceinline float3 integrate_surface_ray_offset(KernelGlobals kg, + const ccl_private ShaderData *sd, + const float3 ray_P, + const float3 ray_D) +{ + /* No ray offset needed for other primitive types. */ + if (!(sd->type & PRIMITIVE_TRIANGLE)) { + return ray_P; + } + + /* Self intersection tests already account for the case where a ray hits the + * same primitive. However precision issues can still cause neighboring + * triangles to be hit. Here we test if the ray-triangle intersection with + * the same primitive would miss, implying that a neighboring triangle would + * be hit instead. + * + * This relies on triangle intersection to be watertight, and the object inverse + * object transform to match the one used by ray intersection exactly. + * + * Potential improvements: + * - It appears this happens when either barycentric coordinates are small, + * or dot(sd->Ng, ray_D) is small. Detect such cases and skip test? + * - Instead of ray offset, can we tweak P to lie within the triangle? + */ + const uint tri_vindex = kernel_data_fetch(tri_vindex, sd->prim).w; + const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0), + tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1), + tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); + + float3 local_ray_P = ray_P; + float3 local_ray_D = ray_D; + + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + const Transform itfm = object_get_inverse_transform(kg, sd); + local_ray_P = transform_point(&itfm, local_ray_P); + local_ray_D = transform_direction(&itfm, local_ray_D); + } + + if (ray_triangle_intersect_self(local_ray_P, local_ray_D, tri_a, tri_b, tri_c)) { + return ray_P; + } + else { + return ray_offset(ray_P, sd->Ng); + } +} + ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg, ConstIntegratorState state, ccl_private ShaderData *sd, @@ -42,22 +88,18 @@ ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg, if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { - const float3 holdout_weight = shader_holdout_apply(kg, sd); - if (kernel_data.background.transparent) { - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - const float transparent = average(holdout_weight * throughput); - kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer); - } - if (isequal_float3(holdout_weight, one_float3())) { + const Spectrum holdout_weight = surface_shader_apply_holdout(kg, sd); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const float transparent = average(holdout_weight * throughput); + film_write_holdout(kg, state, path_flag, transparent, render_buffer); + if (isequal(holdout_weight, one_spectrum())) { return false; } } return true; } -#endif /* __HOLDOUT__ */ -#ifdef __EMISSION__ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, ConstIntegratorState state, ccl_private const ShaderData *sd, @@ -67,32 +109,29 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); /* Evaluate emissive closure. */ - float3 L = shader_emissive_eval(sd); + Spectrum L = surface_shader_emission(sd); + float mis_weight = 1.0f; -# ifdef __HAIR__ +#ifdef __HAIR__ if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_TRIANGLE)) -# else +#else if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) -# endif +#endif { const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t); + const float t = sd->ray_length; /* Multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf. */ float pdf = triangle_light_pdf(kg, sd, t); - float mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf); - L *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf); } - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission( - kg, state, throughput * L, render_buffer, object_lightgroup(kg, sd->object)); + film_write_surface_emission( + kg, state, L, mis_weight, render_buffer, object_lightgroup(kg, sd->object)); } -#endif /* __EMISSION__ */ -#ifdef __EMISSION__ /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ template<uint node_feature_mask> @@ -111,11 +150,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_position( - kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) { + kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, &ls)) { return; } } @@ -133,15 +171,15 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, Ray ray ccl_optional_struct_init; BsdfEval bsdf_eval ccl_optional_struct_init; - const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D); + const bool is_transmission = surface_shader_is_transmission(sd, ls.D); -# ifdef __MNEE__ +#ifdef __MNEE__ int mnee_vertex_count = 0; IF_KERNEL_FEATURE(MNEE) { if (ls.lamp != LAMP_NONE) { /* Is this a caustic light? */ - const bool use_caustics = kernel_tex_fetch(__lights, ls.lamp).use_caustics; + const bool use_caustics = kernel_data_fetch(lights, ls.lamp).use_caustics; if (use_caustics) { /* Are we on a caustic caster? */ if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER)) @@ -161,16 +199,17 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, light_sample_to_surface_shadow_ray(kg, emission_sd, &ls, &ray); } else -# endif /* __MNEE__ */ +#endif /* __MNEE__ */ { - const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); + const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); if (is_zero(light_eval)) { return; } /* Evaluate BSDF. */ - const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader); - bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf); + const float bsdf_pdf = surface_shader_bsdf_eval( + kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader); + bsdf_eval_mul(&bsdf_eval, light_eval / ls.pdf); if (ls.shader & SHADER_USE_MIS) { const float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, bsdf_pdf); @@ -190,16 +229,20 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, const bool is_light = light_sample_is_light(&ls); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT( - shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false); /* Copy volume stack and enter/exit volume. */ integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); if (is_transmission) { -# ifdef __VOLUME__ +#ifdef __VOLUME__ shadow_volume_stack_enter_exit(kg, shadow_state, sd); -# endif +#endif + } + + if (ray.self.object != OBJECT_NONE) { + ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D); } /* Write shadow ray and associated state to global memory. */ @@ -213,11 +256,12 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, /* Copy state from main path to shadow path. */ uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; - const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) * + bsdf_eval_sum(&bsdf_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - packed_float3 pass_diffuse_weight; - packed_float3 pass_glossy_weight; + PackedSpectrum pass_diffuse_weight; + PackedSpectrum pass_glossy_weight; if (shadow_flag & PATH_RAY_ANY_PASS) { /* Indirect bounce, use weights from earlier surface or volume bounce. */ @@ -227,8 +271,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, else { /* Direct light, use BSDFs at this bounce. */ shadow_flag |= PATH_RAY_SURFACE_PASS; - pass_diffuse_weight = packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval)); - pass_glossy_weight = packed_float3(bsdf_eval_pass_glossy_weight(&bsdf_eval)); + pass_diffuse_weight = PackedSpectrum(bsdf_eval_pass_diffuse_weight(&bsdf_eval)); + pass_glossy_weight = PackedSpectrum(bsdf_eval_pass_glossy_weight(&bsdf_eval)); } INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; @@ -250,7 +294,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE( state, path, glossy_bounce); -# ifdef __MNEE__ +#ifdef __MNEE__ if (mnee_vertex_count > 0) { INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(state, path, transmission_bounce) + mnee_vertex_count - 1; @@ -262,7 +306,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, bounce) = INTEGRATOR_STATE(state, path, bounce) + mnee_vertex_count; } else -# endif +#endif { INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE( state, path, transmission_bounce); @@ -284,7 +328,6 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, ls.group + 1 : kernel_data.background.lightgroup + 1; } -#endif /* Path tracing: bounce off or through surface with new direction. */ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( @@ -298,9 +341,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( return LABEL_NONE; } - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u); + float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); + ccl_private const ShaderClosure *sc = surface_shader_bsdf_bssrdf_pick(sd, &rand_bsdf); #ifdef __SUBSURFACE__ /* BSSRDF closure, we schedule subsurface intersection kernel. */ @@ -313,29 +355,33 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( float bsdf_pdf; BsdfEval bsdf_eval ccl_optional_struct_init; float3 bsdf_omega_in ccl_optional_struct_init; - differential3 bsdf_domega_in ccl_optional_struct_init; int label; - label = shader_bsdf_sample_closure( - kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); + label = surface_shader_bsdf_sample_closure( + kg, sd, sc, rand_bsdf, &bsdf_eval, &bsdf_omega_in, &bsdf_pdf); if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) { return LABEL_NONE; } - /* Setup ray. Note that clipping works through transparent bounces. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; - INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in); - INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ? - INTEGRATOR_STATE(state, ray, t) - sd->ray_length : - FLT_MAX; + if (label & LABEL_TRANSPARENT) { + /* Only need to modify start distance for transparent. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length); + } + else { + /* Setup ray with changed origin and direction. */ + const float3 D = normalize(bsdf_omega_in); + INTEGRATOR_STATE_WRITE(state, ray, P) = integrate_surface_ray_offset(kg, sd, sd->P, D); + INTEGRATOR_STATE_WRITE(state, ray, D) = D; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in); + INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); #endif + } /* Update throughput. */ - float3 throughput = INTEGRATOR_STATE(state, path, throughput); + Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf; INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; @@ -349,12 +395,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( } /* Update path state */ - if (label & LABEL_TRANSPARENT) { - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length; - } - else { + if (!(label & LABEL_TRANSPARENT)) { INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); } @@ -371,17 +413,8 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState return LABEL_NONE; } - /* Setup ray position, direction stays unchanged. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; - - /* Clipping works through transparent. */ - INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length; - -# ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); -# endif - - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length; + /* Only modify start distance. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length); return LABEL_TRANSMIT | LABEL_TRANSPARENT; } @@ -416,23 +449,26 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, return; } - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + const float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); float3 ao_N; - const float3 ao_weight = shader_bsdf_ao( + const Spectrum ao_weight = surface_shader_ao( kg, sd, kernel_data.integrator.ao_additive_factor, &ao_N); float3 ao_D; float ao_pdf; - sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + sample_cos_hemisphere(ao_N, rand_bsdf.x, rand_bsdf.y, &ao_D, &ao_pdf); bool skip_self = true; Ray ray ccl_optional_struct_init; ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self); ray.D = ao_D; - ray.t = kernel_data.integrator.ao_bounces_distance; + if (skip_self) { + ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D); + } + ray.tmin = 0.0f; + ray.tmax = kernel_data.integrator.ao_bounces_distance; ray.time = sd->time; ray.self.object = (skip_self) ? sd->object : OBJECT_NONE; ray.self.prim = (skip_self) ? sd->prim : PRIM_NONE; @@ -442,7 +478,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, ray.dD = differential_zero_compact(); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true); /* Copy volume stack and enter/exit volume. */ integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); @@ -458,7 +495,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag) | PATH_RAY_SHADOW_FOR_AO; - const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * shader_bsdf_alpha(kg, sd); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) * + surface_shader_alpha(kg, sd); INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE( state, path, render_pixel_index); @@ -507,7 +545,7 @@ ccl_device bool integrate_surface(KernelGlobals kg, { /* Evaluate shader. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL); - shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag); + surface_shader_eval<node_feature_mask>(kg, state, &sd, render_buffer, path_flag); /* Initialize additional RNG for BSDFs. */ if (sd.flag & SD_BSDF_NEEDS_LCG) { @@ -529,21 +567,17 @@ ccl_device bool integrate_surface(KernelGlobals kg, #endif { /* Filter closures. */ - shader_prepare_surface_closures(kg, state, &sd, path_flag); + surface_shader_prepare_closures(kg, state, &sd, path_flag); -#ifdef __HOLDOUT__ /* Evaluate holdout. */ if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) { return false; } -#endif -#ifdef __EMISSION__ /* Write emission. */ if (sd.flag & SD_EMISSION) { integrate_surface_emission(kg, state, &sd, render_buffer); } -#endif /* Perform path termination. Most paths have already been terminated in * the intersect_closest kernel, this is just for emission and for dividing @@ -557,11 +591,11 @@ ccl_device bool integrate_surface(KernelGlobals kg, /* Write render passes. */ #ifdef __PASSES__ PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES); - kernel_write_data_passes(kg, state, &sd, render_buffer); + film_write_data_passes(kg, state, &sd, render_buffer); #endif #ifdef __DENOISING_FEATURES__ - kernel_write_denoising_features_surface(kg, state, &sd, render_buffer); + film_write_denoising_features_surface(kg, state, &sd, render_buffer); #endif } @@ -604,22 +638,23 @@ ccl_device bool integrate_surface(KernelGlobals kg, } template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE, - int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> + DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) { if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) { if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_next( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } else { - kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f); - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + kernel_assert(INTEGRATOR_STATE(state, ray, tmax) != 0.0f); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 4a5015946aa..aaef92729d6 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -3,12 +3,13 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/film/passes.h" +#include "kernel/film/data_passes.h" +#include "kernel/film/denoising_passes.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/intersect_closest.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/integrator/volume_shader.h" #include "kernel/integrator/volume_stack.h" #include "kernel/light/light.h" @@ -29,13 +30,13 @@ typedef enum VolumeIntegrateEvent { typedef struct VolumeIntegrateResult { /* Throughput and offset for direct light scattering. */ bool direct_scatter; - float3 direct_throughput; + Spectrum direct_throughput; float direct_t; ShaderVolumePhases direct_phases; /* Throughput and offset for indirect light scattering. */ bool indirect_scatter; - float3 indirect_throughput; + Spectrum indirect_throughput; float indirect_t; ShaderVolumePhases indirect_phases; } VolumeIntegrateResult; @@ -52,19 +53,19 @@ typedef struct VolumeIntegrateResult { * sigma_t = sigma_a + sigma_s */ typedef struct VolumeShaderCoefficients { - float3 sigma_t; - float3 sigma_s; - float3 emission; + Spectrum sigma_t; + Spectrum sigma_s; + Spectrum emission; } VolumeShaderCoefficients; /* Evaluate shader to get extinction coefficient at P. */ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg, IntegratorShadowState state, ccl_private ShaderData *ccl_restrict sd, - ccl_private float3 *ccl_restrict extinction) + ccl_private Spectrum *ccl_restrict extinction) { VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i)) - shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass); + volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass); if (!(sd->flag & SD_EXTINCTION)) { return false; @@ -83,15 +84,16 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i)) - shader_eval_volume<false>(kg, state, sd, path_flag, volume_read_lambda_pass); + volume_shader_eval<false>(kg, state, sd, path_flag, volume_read_lambda_pass); if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) { return false; } - coeff->sigma_s = zero_float3(); - coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3(); - coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3(); + coeff->sigma_s = zero_spectrum(); + coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : + zero_spectrum(); + coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_spectrum(); if (sd->flag & SD_SCATTER) { for (int i = 0; i < sd->num_closure; i++) { @@ -114,7 +116,8 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, ccl_device_forceinline void volume_step_init(KernelGlobals kg, ccl_private const RNGState *rng_state, const float object_step_size, - float t, + const float tmin, + const float tmax, ccl_private float *step_size, ccl_private float *step_shade_offset, ccl_private float *steps_offset, @@ -122,7 +125,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, { if (object_step_size == FLT_MAX) { /* Homogeneous volume. */ - *step_size = t; + *step_size = tmax - tmin; *step_shade_offset = 0.0f; *steps_offset = 1.0f; *max_steps = 1; @@ -130,6 +133,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, else { /* Heterogeneous volume. */ *max_steps = kernel_data.integrator.volume_max_steps; + const float t = tmax - tmin; float step = min(object_step_size, t); /* compute exact steps in advance for malloc */ @@ -141,11 +145,11 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, /* Perform shading at this offset within a step, to integrate over * over the entire step segment. */ - *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4); + *step_shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET); /* Shift starting point of all segment by this random amount to avoid * banding artifacts from the volume bounding shape. */ - *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3); + *steps_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET); } } @@ -160,12 +164,12 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, - ccl_global float3 *ccl_restrict throughput) + ccl_global Spectrum *ccl_restrict throughput) { - float3 sigma_t = zero_float3(); + Spectrum sigma_t = zero_spectrum(); if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) { - *throughput *= volume_color_transmittance(sigma_t, ray->t); + *throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin); } } # endif @@ -176,14 +180,14 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, IntegratorShadowState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, - ccl_private float3 *ccl_restrict throughput, + ccl_private Spectrum *ccl_restrict throughput, const float object_step_size) { /* Load random number state. */ RNGState rng_state; shadow_path_state_rng_load(state, &rng_state); - float3 tp = *throughput; + Spectrum tp = *throughput; /* Prepare for stepping. * For shadows we do not offset all segments, since the starting point is @@ -194,7 +198,8 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, volume_step_init(kg, &rng_state, object_step_size, - ray->t, + ray->tmin, + ray->tmax, &step_size, &step_shade_offset, &unused, @@ -202,17 +207,17 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, const float steps_offset = 1.0f; /* compute extinction at the start */ - float t = 0.0f; + float t = ray->tmin; - float3 sum = zero_float3(); + Spectrum sum = zero_spectrum(); for (int i = 0; i < max_steps; i++) { /* advance to new position */ - float new_t = min(ray->t, (i + steps_offset) * step_size); + float new_t = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size); float dt = new_t - t; float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset); - float3 sigma_t = zero_float3(); + Spectrum sigma_t = zero_spectrum(); /* compute attenuation over segment */ sd->P = new_P; @@ -222,20 +227,19 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, * check then. */ sum += (-sigma_t * dt); if ((i & 0x07) == 0) { /* TODO: Other interval? */ - tp = *throughput * exp3(sum); + tp = *throughput * exp(sum); /* stop if nearly all light is blocked */ - if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON && - tp.z < VOLUME_THROUGHPUT_EPSILON) + if (reduce_max(tp) < VOLUME_THROUGHPUT_EPSILON) break; } } /* stop if at the end of the volume */ t = new_t; - if (t == ray->t) { + if (t == ray->tmax) { /* Update throughput in case we haven't done it above */ - tp = *throughput * exp3(sum); + tp = *throughput * exp(sum); break; } } @@ -257,15 +261,16 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r const float xi, ccl_private float *pdf) { - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float delta = dot((light_P - ray->P), ray->D); const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); if (UNLIKELY(D == 0.0f)) { *pdf = 0.0f; return 0.0f; } - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); if (UNLIKELY(theta_b == theta_a)) { *pdf = 0.0f; @@ -273,7 +278,7 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r } *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); - return min(t, delta + t_); /* min is only for float precision errors */ + return clamp(delta + t_, tmin, tmax); /* clamp is only for float precision errors */ } ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, @@ -286,11 +291,12 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, return 0.0f; } - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float t_ = sample_t - delta; - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); if (UNLIKELY(theta_b == theta_a)) { return 0.0f; } @@ -310,11 +316,12 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray, return 0.0f; } - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float t_ = sample_t - delta; - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); if (UNLIKELY(theta_b == theta_a)) { return 0.0f; } @@ -328,22 +335,22 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray, /* Distance sampling */ ccl_device float volume_distance_sample(float max_t, - float3 sigma_t, + Spectrum sigma_t, int channel, float xi, - ccl_private float3 *transmittance, - ccl_private float3 *pdf) + ccl_private Spectrum *transmittance, + ccl_private Spectrum *pdf) { /* xi is [0, 1[ so log(0) should never happen, division by zero is * avoided because sample_sigma_t > 0 when SD_SCATTER is set */ float sample_sigma_t = volume_channel_get(sigma_t, channel); - float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t); float sample_transmittance = volume_channel_get(full_transmittance, channel); float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t); *transmittance = volume_color_transmittance(sigma_t, sample_t); - *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance); + *pdf = safe_divide_color(sigma_t * *transmittance, one_spectrum() - full_transmittance); /* todo: optimization: when taken together with hit/miss decision, * the full_transmittance cancels out drops out and xi does not @@ -352,33 +359,36 @@ ccl_device float volume_distance_sample(float max_t, return sample_t; } -ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t) +ccl_device Spectrum volume_distance_pdf(float max_t, Spectrum sigma_t, float sample_t) { - float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); - float3 transmittance = volume_color_transmittance(sigma_t, sample_t); + Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t); + Spectrum transmittance = volume_color_transmittance(sigma_t, sample_t); - return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance); + return safe_divide_color(sigma_t * transmittance, one_spectrum() - full_transmittance); } /* Emission */ -ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff, - int closure_flag, - float3 transmittance, - float t) +ccl_device Spectrum volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff, + int closure_flag, + Spectrum transmittance, + float t) { /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t * this goes to E * t as sigma_t goes to zero * * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ - float3 emission = coeff->emission; + Spectrum emission = coeff->emission; if (closure_flag & SD_EXTINCTION) { - float3 sigma_t = coeff->sigma_t; + Spectrum sigma_t = coeff->sigma_t; - emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t; - emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t; - emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t; + FOREACH_SPECTRUM_CHANNEL (i) { + GET_SPECTRUM_CHANNEL(emission, i) *= (GET_SPECTRUM_CHANNEL(sigma_t, i) > 0.0f) ? + (1.0f - GET_SPECTRUM_CHANNEL(transmittance, i)) / + GET_SPECTRUM_CHANNEL(sigma_t, i) : + t; + } } else emission *= t; @@ -390,8 +400,8 @@ ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients typedef struct VolumeIntegrateState { /* Volume segment extents. */ - float start_t; - float end_t; + float tmin; + float tmax; /* If volume is absorption-only up to this point, and no probabilistic * scattering or termination has been used yet. */ @@ -413,27 +423,27 @@ ccl_device_forceinline void volume_integrate_step_scattering( ccl_private const Ray *ray, const float3 equiangular_light_P, ccl_private const VolumeShaderCoefficients &ccl_restrict coeff, - const float3 transmittance, + const Spectrum transmittance, ccl_private VolumeIntegrateState &ccl_restrict vstate, ccl_private VolumeIntegrateResult &ccl_restrict result) { /* Pick random color channel, we use the Veach one-sample * model with balance heuristic for the channels. */ - const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); - float3 channel_pdf; + const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + Spectrum channel_pdf; const int channel = volume_sample_channel( albedo, result.indirect_throughput, vstate.rphase, &channel_pdf); /* Equiangular sampling for direct lighting. */ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) { - if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t && + if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax && vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) { - const float new_dt = result.direct_t - vstate.start_t; - const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + const float new_dt = result.direct_t - vstate.tmin; + const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); result.direct_scatter = true; result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf; - shader_copy_volume_phases(&result.direct_phases, sd); + volume_shader_copy_phases(&result.direct_phases, sd); /* Multiple importance sampling. */ if (vstate.use_mis) { @@ -458,10 +468,10 @@ ccl_device_forceinline void volume_integrate_step_scattering( /* compute sampling distance */ const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel); const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t; - const float new_t = vstate.start_t + new_dt; + const float new_t = vstate.tmin + new_dt; /* transmittance and pdf */ - const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance); if (vstate.distance_pdf * distance_pdf > VOLUME_SAMPLE_PDF_CUTOFF) { @@ -469,7 +479,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( result.indirect_scatter = true; result.indirect_t = new_t; result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf; - shader_copy_volume_phases(&result.indirect_phases, sd); + volume_shader_copy_phases(&result.indirect_phases, sd); if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) { /* If using distance sampling for direct light, just copy parameters @@ -477,7 +487,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( result.direct_scatter = true; result.direct_t = result.indirect_t; result.direct_throughput = result.indirect_throughput; - shader_copy_volume_phases(&result.direct_phases, sd); + volume_shader_copy_phases(&result.direct_phases, sd); /* Multiple importance sampling. */ if (vstate.use_mis) { @@ -528,7 +538,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( volume_step_init(kg, rng_state, object_step_size, - ray->t, + ray->tmin, + ray->tmax, &step_size, &step_shade_offset, &steps_offset, @@ -536,11 +547,11 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Initialize volume integration state. */ VolumeIntegrateState vstate ccl_optional_struct_init; - vstate.start_t = 0.0f; - vstate.end_t = 0.0f; + vstate.tmin = ray->tmin; + vstate.tmax = ray->tmin; vstate.absorption_only = true; - vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE); - vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL); + vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE); + vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_CHANNEL); /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */ vstate.direct_sample_method = direct_sample_method; @@ -559,7 +570,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( vstate.distance_pdf = 1.0f; /* Initialize volume integration result. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); result.direct_throughput = throughput; result.indirect_throughput = throughput; @@ -572,14 +583,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous( # ifdef __DENOISING_FEATURES__ const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES); - float3 accum_albedo = zero_float3(); + Spectrum accum_albedo = zero_spectrum(); # endif - float3 accum_emission = zero_float3(); + Spectrum accum_emission = zero_spectrum(); for (int i = 0; i < max_steps; i++) { /* Advance to new position */ - vstate.end_t = min(ray->t, (i + steps_offset) * step_size); - const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset; + vstate.tmax = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size); + const float shade_t = vstate.tmin + (vstate.tmax - vstate.tmin) * step_shade_offset; sd->P = ray->P + ray->D * shade_t; /* compute segment */ @@ -588,17 +599,17 @@ ccl_device_forceinline void volume_integrate_heterogeneous( const int closure_flag = sd->flag; /* Evaluate transmittance over segment. */ - const float dt = (vstate.end_t - vstate.start_t); - const float3 transmittance = (closure_flag & SD_EXTINCTION) ? - volume_color_transmittance(coeff.sigma_t, dt) : - one_float3(); + const float dt = (vstate.tmax - vstate.tmin); + const Spectrum transmittance = (closure_flag & SD_EXTINCTION) ? + volume_color_transmittance(coeff.sigma_t, dt) : + one_spectrum(); /* Emission. */ if (closure_flag & SD_EMISSION) { /* Only write emission before indirect light scatter position, since we terminate * stepping at that point if we have already found a direct light scatter position. */ if (!result.indirect_scatter) { - const float3 emission = volume_emission_integrate( + const Spectrum emission = volume_emission_integrate( &coeff, closure_flag, transmittance, dt); accum_emission += result.indirect_throughput * emission; } @@ -609,8 +620,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( # ifdef __DENOISING_FEATURES__ /* Accumulate albedo for denoising features. */ if (write_denoising_features && (closure_flag & SD_SCATTER)) { - const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); - accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance); + const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + accum_albedo += result.indirect_throughput * albedo * (one_spectrum() - transmittance); } # endif @@ -626,13 +637,13 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Stop if nearly all light blocked. */ if (!result.indirect_scatter) { - if (max3(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) { - result.indirect_throughput = zero_float3(); + if (reduce_max(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) { + result.indirect_throughput = zero_spectrum(); break; } } else if (!result.direct_scatter) { - if (max3(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) { + if (reduce_max(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) { break; } } @@ -645,28 +656,27 @@ ccl_device_forceinline void volume_integrate_heterogeneous( } /* Stop if at the end of the volume. */ - vstate.start_t = vstate.end_t; - if (vstate.start_t == ray->t) { + vstate.tmin = vstate.tmax; + if (vstate.tmin == ray->tmax) { break; } } /* Write accumulated emission. */ if (!is_zero(accum_emission)) { - kernel_accum_emission( + film_write_volume_emission( kg, state, accum_emission, render_buffer, object_lightgroup(kg, sd->object)); } # ifdef __DENOISING_FEATURES__ /* Write denoising features. */ if (write_denoising_features) { - kernel_write_denoising_features_volume( + film_write_denoising_features_volume( kg, state, accum_albedo, result.indirect_scatter, render_buffer); } # endif /* __DENOISING_FEATURES__ */ } -# ifdef __EMISSION__ /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ ccl_device_forceinline bool integrate_volume_sample_light( @@ -684,11 +694,10 @@ ccl_device_forceinline bool integrate_volume_sample_light( /* Sample position on a light. */ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_volume_segment( - kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls)) { + kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, ls)) { return false; } @@ -708,7 +717,7 @@ ccl_device_forceinline void integrate_volume_direct_light( ccl_private const RNGState *ccl_restrict rng_state, const float3 P, ccl_private const ShaderVolumePhases *ccl_restrict phases, - ccl_private const float3 throughput, + ccl_private const Spectrum throughput, ccl_private LightSample *ccl_restrict ls) { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT); @@ -725,11 +734,10 @@ ccl_device_forceinline void integrate_volume_direct_light( { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_position( - kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) { + kg, rand_light.x, rand_light.y, sd->time, P, bounce, path_flag, ls)) { return; } } @@ -746,21 +754,21 @@ ccl_device_forceinline void integrate_volume_direct_light( * non-constant light sources. */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time); + const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time); if (is_zero(light_eval)) { return; } /* Evaluate BSDF. */ BsdfEval phase_eval ccl_optional_struct_init; - const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval); + const float phase_pdf = volume_shader_phase_eval(kg, sd, phases, ls->D, &phase_eval); if (ls->shader & SHADER_USE_MIS) { float mis_weight = light_sample_mis_weight_nee(kg, ls->pdf, phase_pdf); bsdf_eval_mul(&phase_eval, mis_weight); } - bsdf_eval_mul3(&phase_eval, light_eval / ls->pdf); + bsdf_eval_mul(&phase_eval, light_eval / ls->pdf); /* Path termination. */ const float terminate = path_state_rng_light_termination(kg, rng_state); @@ -774,8 +782,8 @@ ccl_device_forceinline void integrate_volume_direct_light( const bool is_light = light_sample_is_light(ls); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT( - shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false); /* Write shadow ray and associated state to global memory. */ integrator_state_write_shadow_ray(kg, shadow_state, &ray); @@ -789,11 +797,11 @@ ccl_device_forceinline void integrate_volume_direct_light( const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; - const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval); + const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - packed_float3 pass_diffuse_weight; - packed_float3 pass_glossy_weight; + PackedSpectrum pass_diffuse_weight; + PackedSpectrum pass_glossy_weight; if (shadow_flag & PATH_RAY_ANY_PASS) { /* Indirect bounce, use weights from earlier surface or volume bounce. */ @@ -803,8 +811,8 @@ ccl_device_forceinline void integrate_volume_direct_light( else { /* Direct light, no diffuse/glossy distinction needed for volumes. */ shadow_flag |= PATH_RAY_VOLUME_PASS; - pass_diffuse_weight = packed_float3(one_float3()); - pass_glossy_weight = packed_float3(zero_float3()); + pass_diffuse_weight = one_spectrum(); + pass_glossy_weight = zero_spectrum(); } INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; @@ -842,7 +850,6 @@ ccl_device_forceinline void integrate_volume_direct_light( integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); } -# endif /* Path tracing: scatter in new direction using phase function */ ccl_device_forceinline bool integrate_volume_phase_scatter( @@ -854,24 +861,15 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT); - float phase_u, phase_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v); + const float2 rand_phase = path_state_rng_2D(kg, rng_state, PRNG_VOLUME_PHASE); /* Phase closure, sample direction. */ float phase_pdf; BsdfEval phase_eval ccl_optional_struct_init; float3 phase_omega_in ccl_optional_struct_init; - differential3 phase_domega_in ccl_optional_struct_init; - - const int label = shader_volume_phase_sample(kg, - sd, - phases, - phase_u, - phase_v, - &phase_eval, - &phase_omega_in, - &phase_domega_in, - &phase_pdf); + + const int label = volume_shader_phase_sample( + kg, sd, phases, rand_phase, &phase_eval, &phase_omega_in, &phase_pdf); if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) { return false; @@ -880,28 +878,27 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( /* Setup ray. */ INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in); - INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; # ifdef __RAY_DIFFERENTIALS__ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in); # endif // Save memory by storing last hit prim and object in isect INTEGRATOR_STATE_WRITE(state, isect, prim) = sd->prim; INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object; /* Update throughput. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf; + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf; INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase; if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3(); - INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3(); + INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum(); + INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum(); } /* Update path state */ INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); @@ -1021,7 +1018,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, integrator_state_read_isect(kg, state, &isect); /* Set ray length to current segment. */ - ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX; + ray.tmax = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX; /* Clean volume stack for background rays. */ if (isect.prim == PRIM_NONE) { @@ -1032,13 +1029,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, if (event == VOLUME_PATH_SCATTERED) { /* Queue intersect_closest kernel. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); return; } else if (event == VOLUME_PATH_MISSED) { /* End path. */ - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); return; } else { diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h deleted file mode 100644 index 4da92929366..00000000000 --- a/intern/cycles/kernel/integrator/shader_eval.h +++ /dev/null @@ -1,952 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -/* Functions to evaluate shaders and use the resulting shader closures. */ - -#pragma once - -#include "kernel/closure/alloc.h" -#include "kernel/closure/bsdf.h" -#include "kernel/closure/bsdf_util.h" -#include "kernel/closure/emissive.h" - -#include "kernel/film/accumulate.h" - -#include "kernel/svm/svm.h" - -#ifdef __OSL__ -# include "kernel/osl/shader.h" -#endif - -CCL_NAMESPACE_BEGIN - -/* Merging */ - -#if defined(__VOLUME__) -ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd) -{ - /* Merge identical closures to save closure space with stacked volumes. */ - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sci = &sd->closure[i]; - - if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { - continue; - } - - for (int j = i + 1; j < sd->num_closure; j++) { - ccl_private ShaderClosure *scj = &sd->closure[j]; - if (sci->type != scj->type) { - continue; - } - - ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *) - sci; - ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *) - scj; - if (!(hgi->g == hgj->g)) { - continue; - } - - sci->weight += scj->weight; - sci->sample_weight += scj->sample_weight; - - int size = sd->num_closure - (j + 1); - if (size > 0) { - for (int k = 0; k < size; k++) { - scj[k] = scj[k + 1]; - } - } - - sd->num_closure--; - kernel_assert(sd->num_closure >= 0); - j--; - } - } -} - -ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict - phases, - ccl_private const ShaderData *ccl_restrict sd) -{ - phases->num_closure = 0; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *from_sc = &sd->closure[i]; - ccl_private const HenyeyGreensteinVolume *from_hg = - (ccl_private const HenyeyGreensteinVolume *)from_sc; - - if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { - ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure]; - - to_sc->weight = from_sc->weight; - to_sc->sample_weight = from_sc->sample_weight; - to_sc->g = from_hg->g; - phases->num_closure++; - if (phases->num_closure >= MAX_VOLUME_CLOSURE) { - break; - } - } - } -} -#endif /* __VOLUME__ */ - -ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg, - ConstIntegratorState state, - ccl_private ShaderData *sd, - const uint32_t path_flag) -{ - /* Filter out closures. */ - if (kernel_data.integrator.filter_closures) { - if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) { - sd->closure_emission_background = zero_float3(); - } - - if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) { - sd->flag &= ~SD_BSDF_HAS_EVAL; - } - - if (path_flag & PATH_RAY_CAMERA) { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - - if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) || - (CLOSURE_IS_BSDF_GLOSSY(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) || - (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) { - sc->type = CLOSURE_NONE_ID; - sc->sample_weight = 0.0f; - } - else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) { - sc->type = CLOSURE_HOLDOUT_ID; - sc->sample_weight = 0.0f; - sd->flag |= SD_HOLDOUT; - } - } - } - } - - /* Defensive sampling. - * - * We can likely also do defensive sampling at deeper bounces, particularly - * for cases like a perfect mirror but possibly also others. This will need - * a good heuristic. */ - if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) == - 0 && - sd->num_closure > 1) { - float sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sc->sample_weight = max(sc->sample_weight, 0.125f * sum); - } - } - } - - /* Filter glossy. - * - * Blurring of bsdf after bounces, for rays that have a small likelihood - * of following this particular path (diffuse, rough glossy) */ - if (kernel_data.integrator.filter_glossy != FLT_MAX -#ifdef __MNEE__ - && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID) -#endif - ) { - float blur_pdf = kernel_data.integrator.filter_glossy * - INTEGRATOR_STATE(state, path, min_ray_pdf); - - if (blur_pdf < 1.0f) { - float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF(sc->type)) { - bsdf_blur(kg, sc, blur_roughness); - } - } - } - } -} - -/* BSDF */ - -ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd, - const float3 omega_in) -{ - return dot(sd->N, omega_in) < 0.0f; -} - -ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_shader_flags) -{ - if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) { - return false; - } - if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) { - if (CLOSURE_IS_BSDF_DIFFUSE(type)) { - return true; - } - } - if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) { - if (CLOSURE_IS_BSDF_GLOSSY(type)) { - return true; - } - } - if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) { - if (CLOSURE_IS_BSDF_TRANSMISSION(type)) { - return true; - } - } - return false; -} - -ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg, - ccl_private ShaderData *sd, - const float3 omega_in, - const bool is_transmission, - ccl_private const ShaderClosure *skip_sc, - ccl_private BsdfEval *result_eval, - float sum_pdf, - float sum_sample_weight, - const uint light_shader_flags) -{ - /* This is the veach one-sample model with balance heuristic, - * some PDF factors drop out when using balance heuristic weighting. */ - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (sc == skip_sc) { - continue; - } - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - if (CLOSURE_IS_BSDF(sc->type) && !_shader_bsdf_exclude(sc->type, light_shader_flags)) { - float bsdf_pdf = 0.0f; - float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf); - - if (bsdf_pdf != 0.0f) { - bsdf_eval_accum(result_eval, sc->type, eval * sc->weight); - sum_pdf += bsdf_pdf * sc->sample_weight; - } - } - - sum_sample_weight += sc->sample_weight; - } - } - - return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; -} - -#ifndef __KERNEL_CUDA__ -ccl_device -#else -ccl_device_inline -#endif - float - shader_bsdf_eval(KernelGlobals kg, - ccl_private ShaderData *sd, - const float3 omega_in, - const bool is_transmission, - ccl_private BsdfEval *bsdf_eval, - const uint light_shader_flags) -{ - bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_float3()); - - return _shader_bsdf_multi_eval( - kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags); -} - -/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */ -ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick( - ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu) -{ - int sampled = 0; - - if (sd->num_closure > 1) { - /* Pick a BSDF or based on sample weights. */ - float sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - - float r = (*randu) * sum; - float partial_sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - float next_sum = partial_sum + sc->sample_weight; - - if (r < next_sum) { - sampled = i; - - /* Rescale to reuse for direction sample, to better preserve stratification. */ - *randu = (r - partial_sum) / sc->sample_weight; - break; - } - - partial_sum = next_sum; - } - } - } - - return &sd->closure[sampled]; -} - -/* Return weight for picked BSSRDF. */ -ccl_device_inline float3 -shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd, - ccl_private const ShaderClosure *ccl_restrict bssrdf_sc) -{ - float3 weight = bssrdf_sc->weight; - - if (sd->num_closure > 1) { - float sum = 0.0f; - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - weight *= sum / bssrdf_sc->sample_weight; - } - - return weight; -} - -/* Sample direction for picked BSDF, and return evaluation and pdf for all - * BSDFs combined using MIS. */ -ccl_device int shader_bsdf_sample_closure(KernelGlobals kg, - ccl_private ShaderData *sd, - ccl_private const ShaderClosure *sc, - float randu, - float randv, - ccl_private BsdfEval *bsdf_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - /* BSSRDF should already have been handled elsewhere. */ - kernel_assert(CLOSURE_IS_BSDF(sc->type)); - - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) { - bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight); - - if (sd->num_closure > 1) { - const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in); - float sweight = sc->sample_weight; - *pdf = _shader_bsdf_multi_eval( - kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0); - } - } - - return label; -} - -ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd) -{ - float roughness = 0.0f; - float sum_weight = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF(sc->type)) { - /* sqrt once to undo the squaring from multiplying roughness on the - * two axes, and once for the squared roughness convention. */ - float weight = fabsf(average(sc->weight)); - roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); - sum_weight += weight; - } - } - - return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; -} - -ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_HAS_ONLY_VOLUME) { - return one_float3(); - } - else if (sd->flag & SD_TRANSPARENT) { - return sd->closure_transparent_extinction; - } - else { - return zero_float3(); - } -} - -ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd) -{ - if (sd->flag & SD_TRANSPARENT) { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - - if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { - sc->sample_weight = 0.0f; - sc->weight = zero_float3(); - } - } - - sd->flag &= ~SD_TRANSPARENT; - } -} - -ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd); - - alpha = max(alpha, zero_float3()); - alpha = min(alpha, one_float3()); - - return alpha; -} - -ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) - N += sc->N * fabsf(average(sc->weight)); - } - - return (is_zero(N)) ? sd->N : normalize(N); -} - -ccl_device float3 shader_bsdf_ao(KernelGlobals kg, - ccl_private const ShaderData *sd, - const float ao_factor, - ccl_private float3 *N_) -{ - float3 eval = zero_float3(); - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { - ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; - eval += sc->weight * ao_factor; - N += bsdf->N * fabsf(average(sc->weight)); - } - } - - *N_ = (is_zero(N)) ? sd->N : normalize(N); - return eval; -} - -#ifdef __SUBSURFACE__ -ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd) -{ - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSSRDF(sc->type)) { - ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc; - float avg_weight = fabsf(average(sc->weight)); - - N += bssrdf->N * avg_weight; - } - } - - return (is_zero(N)) ? sd->N : normalize(N); -} -#endif /* __SUBSURFACE__ */ - -/* Constant emission optimization */ - -ccl_device bool shader_constant_emission_eval(KernelGlobals kg, - int shader, - ccl_private float3 *eval) -{ - int shader_index = shader & SHADER_MASK; - int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags; - - if (shader_flag & SD_HAS_CONSTANT_EMISSION) { - *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0], - kernel_tex_fetch(__shaders, shader_index).constant_emission[1], - kernel_tex_fetch(__shaders, shader_index).constant_emission[2]); - - return true; - } - - return false; -} - -/* Background */ - -ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_EMISSION) { - return sd->closure_emission_background; - } - else { - return zero_float3(); - } -} - -/* Emission */ - -ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_EMISSION) { - return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; - } - else { - return zero_float3(); - } -} - -/* Holdout */ - -ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd) -{ - float3 weight = zero_float3(); - - /* For objects marked as holdout, preserve transparency and remove all other - * closures, replacing them with a holdout weight. */ - if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { - if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) { - weight = one_float3() - sd->closure_transparent_extinction; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { - sc->type = NBUILTIN_CLOSURES; - } - } - - sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF)); - } - else { - weight = one_float3(); - } - } - else { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_HOLDOUT(sc->type)) { - weight += sc->weight; - } - } - } - - return weight; -} - -/* Surface Evaluation */ - -template<uint node_feature_mask, typename ConstIntegratorGenericState> -ccl_device void shader_eval_surface(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *ccl_restrict sd, - ccl_global float *ccl_restrict buffer, - uint32_t path_flag, - bool use_caustics_storage = false) -{ - /* If path is being terminated, we are tracing a shadow ray or evaluating - * emission, then we don't need to store closures. The emission and shadow - * shader data also do not have a closure array to save GPU memory. */ - int max_closures; - if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { - max_closures = 0; - } - else { - max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures; - } - - sd->num_closure = 0; - sd->num_closure_left = max_closures; - -#ifdef __OSL__ - if (kg->osl) { - if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { - OSLShader::eval_background(kg, state, sd, path_flag); - } - else { - OSLShader::eval_surface(kg, state, sd, path_flag); - } - } - else -#endif - { -#ifdef __SVM__ - svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag); -#else - if (sd->object == OBJECT_NONE) { - sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f); - sd->flag |= SD_EMISSION; - } - else { - ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( - sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f)); - if (bsdf != NULL) { - bsdf->N = sd->N; - sd->flag |= bsdf_diffuse_setup(bsdf); - } - } -#endif - } -} - -/* Volume */ - -#ifdef __VOLUME__ - -ccl_device_inline float _shader_volume_phase_multi_eval( - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - const float3 omega_in, - int skip_phase, - ccl_private BsdfEval *result_eval, - float sum_pdf, - float sum_sample_weight) -{ - for (int i = 0; i < phases->num_closure; i++) { - if (i == skip_phase) - continue; - - ccl_private const ShaderVolumeClosure *svc = &phases->closure[i]; - float phase_pdf = 0.0f; - float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf); - - if (phase_pdf != 0.0f) { - bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - sum_pdf += phase_pdf * svc->sample_weight; - } - - sum_sample_weight += svc->sample_weight; - } - - return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; -} - -ccl_device float shader_volume_phase_eval(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - const float3 omega_in, - ccl_private BsdfEval *phase_eval) -{ - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_float3()); - - return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f); -} - -ccl_device int shader_volume_phase_sample(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - float randu, - float randv, - ccl_private BsdfEval *phase_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - int sampled = 0; - - if (phases->num_closure > 1) { - /* pick a phase closure based on sample weights */ - float sum = 0.0f; - - for (sampled = 0; sampled < phases->num_closure; sampled++) { - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - sum += svc->sample_weight; - } - - float r = randu * sum; - float partial_sum = 0.0f; - - for (sampled = 0; sampled < phases->num_closure; sampled++) { - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - float next_sum = partial_sum + svc->sample_weight; - - if (r <= next_sum) { - /* Rescale to reuse for BSDF direction sample. */ - randu = (r - partial_sum) / svc->sample_weight; - break; - } - - partial_sum = next_sum; - } - - if (sampled == phases->num_closure) { - *pdf = 0.0f; - return LABEL_NONE; - } - } - - /* todo: this isn't quite correct, we don't weight anisotropy properly - * depending on color channels, even if this is perhaps not a common case */ - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) { - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - } - - return label; -} - -ccl_device int shader_phase_sample_closure(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumeClosure *sc, - float randu, - float randv, - ccl_private BsdfEval *phase_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - - return label; -} - -/* Volume Evaluation */ - -template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState> -ccl_device_inline void shader_eval_volume(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *ccl_restrict sd, - const uint32_t path_flag, - StackReadOp stack_read) -{ - /* If path is being terminated, we are tracing a shadow ray or evaluating - * emission, then we don't need to store closures. The emission and shadow - * shader data also do not have a closure array to save GPU memory. */ - int max_closures; - if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { - max_closures = 0; - } - else { - max_closures = kernel_data.max_closures; - } - - /* reset closures once at the start, we will be accumulating the closures - * for all volumes in the stack into a single array of closures */ - sd->num_closure = 0; - sd->num_closure_left = max_closures; - sd->flag = 0; - sd->object_flag = 0; - - for (int i = 0;; i++) { - const VolumeStack entry = stack_read(i); - if (entry.shader == SHADER_NONE) { - break; - } - - /* Setup shader-data from stack. it's mostly setup already in - * shader_setup_from_volume, this switching should be quick. */ - sd->object = entry.object; - sd->lamp = LAMP_NONE; - sd->shader = entry.shader; - - sd->flag &= ~SD_SHADER_FLAGS; - sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; - sd->object_flag &= ~SD_OBJECT_FLAGS; - - if (sd->object != OBJECT_NONE) { - sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object); - -# ifdef __OBJECT_MOTION__ - /* todo: this is inefficient for motion blur, we should be - * caching matrices instead of recomputing them each step */ - shader_setup_object_transforms(kg, sd, sd->time); - - if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) != 0) { - AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY); - kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND); - - const float3 P = sd->P; - const float velocity_scale = kernel_tex_fetch(__objects, sd->object).velocity_scale; - const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? - 0.5f : - 0.0f; - const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ? - (1.0f - kernel_data.cam.shuttertime) + sd->time : - sd->time; - - /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity - * existed, or will exist, at the given time: - * - * `phi(x, T) = phi(x - (T - t) * u(x, T), t)` - * - * where - * - * x : position - * T : super-sampled time (or ray time) - * t : current time of the simulation (in rendering we assume this is center frame with - * relative time = 0) - * phi : the volume quantity - * u : the velocity field - * - * But first we need to determine the velocity field `u(x, T)`, which we can estimate also - * using semi-lagrangian advection. - * - * `u(x, T) = u(x - (T - t) * u(x, T), t)` - * - * This is the typical way to model self-advection in fluid dynamics, however, we do not - * account for other forces affecting the velocity during simulation (pressure, buoyancy, - * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better - * results, a higher order interpolation scheme can be used (at the cost of more lookups), - * or an interpolation of the velocity fields for the previous and next frames could also - * be used to estimate `u(x, T)` (which will cost more memory and lookups). - * - * References: - * "Eulerian Motion Blur", Kim and Ko, 2007 - * "Production Volume Rendering", Wreninge et al., 2012 - */ - - /* Find velocity. */ - float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc); - object_dir_transform(kg, sd, &velocity); - - /* Find advected P. */ - sd->P = P - (time - time_offset) * velocity_scale * velocity; - - /* Find advected velocity. */ - velocity = primitive_volume_attribute_float3(kg, sd, v_desc); - object_dir_transform(kg, sd, &velocity); - - /* Find advected P. */ - sd->P = P - (time - time_offset) * velocity_scale * velocity; - } -# endif - } - - /* evaluate shader */ -# ifdef __SVM__ -# ifdef __OSL__ - if (kg->osl) { - OSLShader::eval_volume(kg, state, sd, path_flag); - } - else -# endif - { - svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>( - kg, state, sd, NULL, path_flag); - } -# endif - - /* Merge closures to avoid exceeding number of closures limit. */ - if (!shadow) { - if (i > 0) { - shader_merge_volume_closures(sd); - } - } - } -} - -#endif /* __VOLUME__ */ - -/* Displacement Evaluation */ - -template<typename ConstIntegratorGenericState> -ccl_device void shader_eval_displacement(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *sd) -{ - sd->num_closure = 0; - sd->num_closure_left = 0; - - /* this will modify sd->P */ -#ifdef __SVM__ -# ifdef __OSL__ - if (kg->osl) - OSLShader::eval_displacement(kg, state, sd); - else -# endif - { - svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>( - kg, state, sd, NULL, 0); - } -#endif -} - -/* Cryptomatte */ - -ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader) -{ - return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h index 42d44580f80..a620853faea 100644 --- a/intern/cycles/kernel/integrator/shadow_catcher.h +++ b/intern/cycles/kernel/integrator/shadow_catcher.h @@ -3,7 +3,6 @@ #pragma once -#include "kernel/film/write_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/state_util.h" @@ -50,7 +49,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, ConstIntegratorState state) { - if (INTEGRATOR_PATH_IS_TERMINATED) { + if (integrator_path_is_terminated(state)) { return false; } @@ -76,28 +75,6 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t return path_flag & PATH_RAY_SHADOW_CATCHER_PASS; } -/* Write shadow catcher passes on a bounce from the shadow catcher object. */ -ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( - KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) -{ - kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); - kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - ccl_global float *buffer = render_buffer + render_buffer_offset; - - /* Count sample for the shadow catcher object. */ - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); - - /* Since the split is done, the sample does not contribute to the matte, so accumulate it as - * transparency to the matte. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, - average(throughput)); -} - #endif /* __SHADOW_CATCHER__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h index eaee65ada40..3b490ecffdd 100644 --- a/intern/cycles/kernel/integrator/shadow_state_template.h +++ b/intern/cycles/kernel/integrator/shadow_state_template.h @@ -27,15 +27,15 @@ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_T /* enum PathRayFlag */ KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING) /* Throughput for shadow pass. */ KERNEL_STRUCT_MEMBER(shadow_path, - packed_float3, + PackedSpectrum, unshadowed_throughput, KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE) /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */ -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) /* Number of intersections found by ray-tracing. */ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING) /* Light group. */ @@ -47,7 +47,8 @@ KERNEL_STRUCT_END(shadow_path) KERNEL_STRUCT_BEGIN(shadow_ray) KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, tmin, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, tmax, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, int, object, KERNEL_FEATURE_PATH_TRACING) diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h index d6fef27f344..d1907bd6e16 100644 --- a/intern/cycles/kernel/integrator/state.h +++ b/intern/cycles/kernel/integrator/state.h @@ -127,6 +127,9 @@ typedef struct IntegratorStateGPU { /* Index of main path which will be used by a next shadow catcher split. */ ccl_global int *next_main_path_index; + + /* Divisor used to partition active indices by locality when sorting by material. */ + uint sort_partition_divisor; } IntegratorStateGPU; /* Abstraction @@ -137,7 +140,7 @@ typedef struct IntegratorStateGPU { * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors * from a kernel which operates on a shadow catcher state will cause bad memory access. */ -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ /* Scalar access on CPU. */ @@ -156,7 +159,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState; # define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ ((state)->nested_struct[array_index].member) -#else /* __KERNEL_CPU__ */ +#else /* !__KERNEL_GPU__ */ /* Array access on GPU with Structure-of-Arrays. */ @@ -177,6 +180,6 @@ typedef int ConstIntegratorShadowState; # define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) -#endif /* __KERNEL_CPU__ */ +#endif /* !__KERNEL_GPU__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h index fed74d49434..4b03c665e17 100644 --- a/intern/cycles/kernel/integrator/state_flow.h +++ b/intern/cycles/kernel/integrator/state_flow.h @@ -10,125 +10,196 @@ CCL_NAMESPACE_BEGIN /* Control Flow * - * Utilities for control flow between kernels. The implementation may differ per device - * or even be handled on the host side. To abstract such differences, experiment with - * different implementations and for debugging, this is abstracted using macros. + * Utilities for control flow between kernels. The implementation is different between CPU and + * GPU devices. For the latter part of the logic is handled on the host side with wavefronts. * * There is a main path for regular path tracing camera for path tracing. Shadows for next * event estimation branch off from this into their own path, that may be computed in - * parallel while the main path continues. + * parallel while the main path continues. Additionally, shading kernels are sorted using + * a key for coherence. * * Each kernel on the main path must call one of these functions. These may not be called * multiple times from the same kernel. * - * INTEGRATOR_PATH_INIT(next_kernel) - * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) - * INTEGRATOR_PATH_TERMINATE(current_kernel) + * integrator_path_init(kg, state, next_kernel) + * integrator_path_next(kg, state, current_kernel, next_kernel) + * integrator_path_terminate(kg, state, current_kernel) * * For the shadow path similar functions are used, and again each shadow kernel must call * one of them, and only once. */ -#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0) -#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \ - (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0) +ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state) +{ + return INTEGRATOR_STATE(state, path, queued_kernel) == 0; +} + +ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state) +{ + return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0; +} #ifdef __KERNEL_GPU__ -# define INTEGRATOR_PATH_INIT(next_kernel) \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; - -# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \ - IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.next_shadow_path_index[0], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; - -# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ - { \ - const int key_ = key; \ - atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ - 1); \ - } -# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ - { \ - const int key_ = key; \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ - 1); \ - } +ccl_device_forceinline void integrator_path_init(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_next(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; +} + +ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( + KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao) +{ + IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( + &kernel_integrator_state.next_shadow_path_index[0], 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; + return shadow_state; +} + +ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; +} + +/* Sort first by truncated state index (for good locality), then by key (for good coherence). */ +# define INTEGRATOR_SORT_KEY(key, state) \ + (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor)) + +ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel, + const uint32_t key) +{ + const int key_ = INTEGRATOR_SORT_KEY(key, state); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1); +} + +ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel, + const uint32_t key) +{ + const int key_ = INTEGRATOR_SORT_KEY(key, state); + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1); +} #else -# define INTEGRATOR_PATH_INIT(next_kernel) \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)key; \ - } -# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)current_kernel; \ - } -# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \ - (void)current_kernel; \ - } -# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)key; \ - (void)current_kernel; \ - } - -# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \ - IntegratorShadowState shadow_state = &state->shadow_type; \ - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \ - (void)current_kernel; \ - } -# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \ - (void)current_kernel; \ - } +ccl_device_forceinline void integrator_path_init(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel, + const uint32_t key) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)key; +} + +ccl_device_forceinline void integrator_path_next(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel, + const uint32_t key) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)key; + (void)current_kernel; +} + +ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( + KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao) +{ + IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; + return shadow_state; +} + +ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel) +{ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; + (void)current_kernel; +} #endif diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h index e7e6db037b0..f4e280e4cb2 100644 --- a/intern/cycles/kernel/integrator/state_template.h +++ b/intern/cycles/kernel/integrator/state_template.h @@ -37,22 +37,21 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* enum PathRayMNEE */ KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING) /* Multiple importance sampling - * The PDF of BSDF sampling at the last scatter point, and distance to the - * last scatter point minus the last ray segment. This distance lets us - * compute the complete distance through transparent surfaces and volumes. */ + * The PDF of BSDF sampling at the last scatter point, which is at ray distance + * zero and distance. Note that transparency and volume attenuation increase + * the ray tmin but keep P unmodified so that this works. */ KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING) /* Filter glossy. */ KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING) /* Continuation probability for path termination. */ KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING) /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) -KERNEL_STRUCT_MEMBER(path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) /* Denoising. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) /* Shader sorting. */ /* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */ KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING) @@ -63,7 +62,8 @@ KERNEL_STRUCT_END(path) KERNEL_STRUCT_BEGIN(ray) KERNEL_STRUCT_MEMBER(ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, tmin, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING) @@ -84,8 +84,8 @@ KERNEL_STRUCT_END(isect) /*************** Subsurface closure state for subsurface kernel ***************/ KERNEL_STRUCT_BEGIN(subsurface) -KERNEL_STRUCT_MEMBER(subsurface, packed_float3, albedo, KERNEL_FEATURE_SUBSURFACE) -KERNEL_STRUCT_MEMBER(subsurface, packed_float3, radius, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, albedo, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, radius, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, packed_float3, Ng, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_END(subsurface) diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h index 280db2d1aac..168122d3a78 100644 --- a/intern/cycles/kernel/integrator/state_util.h +++ b/intern/cycles/kernel/integrator/state_util.h @@ -17,7 +17,8 @@ ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg, { INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P; INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D; - INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = ray->tmin; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = ray->tmax; INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time; INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP; INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD; @@ -29,7 +30,8 @@ ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg, { ray->P = INTEGRATOR_STATE(state, ray, P); ray->D = INTEGRATOR_STATE(state, ray, D); - ray->t = INTEGRATOR_STATE(state, ray, t); + ray->tmin = INTEGRATOR_STATE(state, ray, tmin); + ray->tmax = INTEGRATOR_STATE(state, ray, tmax); ray->time = INTEGRATOR_STATE(state, ray, time); ray->dP = INTEGRATOR_STATE(state, ray, dP); ray->dD = INTEGRATOR_STATE(state, ray, dD); @@ -42,7 +44,8 @@ ccl_device_forceinline void integrator_state_write_shadow_ray( { INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P; INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D; - INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = ray->tmin; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmax) = ray->tmax; INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time; INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP; } @@ -53,7 +56,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg, { ray->P = INTEGRATOR_STATE(state, shadow_ray, P); ray->D = INTEGRATOR_STATE(state, shadow_ray, D); - ray->t = INTEGRATOR_STATE(state, shadow_ray, t); + ray->tmin = INTEGRATOR_STATE(state, shadow_ray, tmin); + ray->tmax = INTEGRATOR_STATE(state, shadow_ray, tmax); ray->time = INTEGRATOR_STATE(state, shadow_ray, time); ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP); ray->dD = differential_zero_compact(); @@ -334,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl return to_state; } -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int) { return INTEGRATOR_STATE(state, path, bounce); diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index b449f807290..15c2cb1c708 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -15,9 +15,9 @@ #include "kernel/integrator/intersect_volume_stack.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/subsurface_disk.h" #include "kernel/integrator/subsurface_random_walk.h" +#include "kernel/integrator/surface_shader.h" CCL_NAMESPACE_BEGIN @@ -38,7 +38,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg, /* Setup ray into surface. */ INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N; - INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact(); @@ -50,12 +51,10 @@ ccl_device int subsurface_bounce(KernelGlobals kg, PATH_RAY_SUBSURFACE_RANDOM_WALK); /* Compute weight, optionally including Fresnel from entry point. */ - float3 weight = shader_bssrdf_sample_weight(sd, sc); -# ifdef __PRINCIPLED__ + Spectrum weight = surface_shader_bssrdf_sample_weight(sd, sc); if (bssrdf->roughness != FLT_MAX) { path_flag |= PATH_RAY_SUBSURFACE_USE_FRESNEL; } -# endif if (sd->flag & SD_BACKFACING) { path_flag |= PATH_RAY_SUBSURFACE_BACKFACING; @@ -69,8 +68,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg, if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { if (INTEGRATOR_STATE(state, path, bounce) == 0) { - INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3(); - INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3(); + INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum(); + INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum(); } } @@ -90,7 +89,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, /* Get bump mapped normal from shader evaluation at exit point. */ float3 N = sd->N; if (sd->flag & SD_HAS_BSSRDF_BUMP) { - N = shader_bssrdf_normal(sd); + N = surface_shader_bssrdf_normal(sd); } /* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */ @@ -98,9 +97,8 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, sd->num_closure = 0; sd->num_closure_left = kernel_data.max_closures; - const float3 weight = one_float3(); + const Spectrum weight = one_spectrum(); -# ifdef __PRINCIPLED__ if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) { ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc( sd, sizeof(PrincipledDiffuseBsdf), weight); @@ -111,9 +109,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT_EXIT); } } - else -# endif /* __PRINCIPLED__ */ - { + else { ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( sd, sizeof(DiffuseBsdf), weight); @@ -147,7 +143,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat /* Update volume stack if needed. */ if (kernel_data.integrator.use_volumes) { const int object = ss_isect.hits[0].object; - const int object_flag = kernel_tex_fetch(__object_flag, object); + const int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) { float3 P = INTEGRATOR_STATE(state, ray, P); @@ -160,7 +156,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat /* Pretend ray is coming from the outside towards the exit point. This ensures * correct front/back facing normals. * TODO: find a more elegant solution? */ - ray.P += ray.D * ray.t * 2.0f; + ray.P += ray.D * ray.tmax * 2.0f; ray.D = -ray.D; integrator_state_write_isect(kg, state, &ss_isect.hits[0]); @@ -170,24 +166,30 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; const int shader = intersection_get_shader(kg, &ss_isect.hits[0]); - const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; + const int shader_flags = kernel_data_fetch(shaders, shader).flags; const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h index 34330671748..a44b6a74d7b 100644 --- a/intern/cycles/kernel/integrator/subsurface_disk.h +++ b/intern/cycles/kernel/integrator/subsurface_disk.h @@ -9,11 +9,11 @@ CCL_NAMESPACE_BEGIN * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf */ -ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, float r) +ccl_device_inline Spectrum subsurface_disk_eval(const Spectrum radius, float disk_r, float r) { - const float3 eval = bssrdf_eval(radius, r); + const Spectrum eval = bssrdf_eval(radius, r); const float pdf = bssrdf_pdf(radius, disk_r); - return (pdf > 0.0f) ? eval / pdf : zero_float3(); + return (pdf > 0.0f) ? eval / pdf : zero_spectrum(); } /* Subsurface scattering step, from a point on the surface to other @@ -25,8 +25,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, ccl_private LocalIntersection &ss_isect) { - float disk_u, disk_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v); + float2 rand_disk = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_DISK); /* Read shading point info from integrator state. */ const float3 P = INTEGRATOR_STATE(state, ray, P); @@ -37,7 +36,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); /* Read subsurface scattering parameters. */ - const float3 radius = INTEGRATOR_STATE(state, subsurface, radius); + const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius); /* Pick random axis in local frame and point on disk. */ float3 disk_N, disk_T, disk_B; @@ -46,20 +45,20 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, disk_N = Ng; make_orthonormals(disk_N, &disk_T, &disk_B); - if (disk_v < 0.5f) { + if (rand_disk.y < 0.5f) { pick_pdf_N = 0.5f; pick_pdf_T = 0.25f; pick_pdf_B = 0.25f; - disk_v *= 2.0f; + rand_disk.y *= 2.0f; } - else if (disk_v < 0.75f) { + else if (rand_disk.y < 0.75f) { float3 tmp = disk_N; disk_N = disk_T; disk_T = tmp; pick_pdf_N = 0.25f; pick_pdf_T = 0.5f; pick_pdf_B = 0.25f; - disk_v = (disk_v - 0.5f) * 4.0f; + rand_disk.y = (rand_disk.y - 0.5f) * 4.0f; } else { float3 tmp = disk_N; @@ -68,21 +67,22 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, pick_pdf_N = 0.25f; pick_pdf_T = 0.25f; pick_pdf_B = 0.5f; - disk_v = (disk_v - 0.75f) * 4.0f; + rand_disk.y = (rand_disk.y - 0.75f) * 4.0f; } /* Sample point on disk. */ - float phi = M_2PI_F * disk_v; + float phi = M_2PI_F * rand_disk.y; float disk_height, disk_r; - bssrdf_sample(radius, disk_u, &disk_r, &disk_height); + bssrdf_sample(radius, rand_disk.x, &disk_r, &disk_height); float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B; /* Create ray. */ ray.P = P + disk_N * disk_height + disk_P; ray.D = -disk_N; - ray.t = 2.0f * disk_height; + ray.tmin = 0.0f; + ray.tmax = 2.0f * disk_height; ray.dP = ray_dP; ray.dD = differential_zero_compact(); ray.time = time; @@ -107,13 +107,13 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, * traversal algorithm. */ sort_intersections_and_normals(ss_isect.hits, ss_isect.Ng, num_eval_hits); - float3 weights[BSSRDF_MAX_HITS]; /* TODO: zero? */ + Spectrum weights[BSSRDF_MAX_HITS]; /* TODO: zero? */ float sum_weights = 0.0f; for (int hit = 0; hit < num_eval_hits; hit++) { /* Get geometric normal. */ const int object = ss_isect.hits[hit].object; - const int object_flag = kernel_tex_fetch(__object_flag, object); + const int object_flag = kernel_data_fetch(object_flag, object); float3 hit_Ng = ss_isect.Ng[hit]; if (path_flag & PATH_RAY_SUBSURFACE_BACKFACING) { hit_Ng = -hit_Ng; @@ -125,17 +125,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { /* Transform normal to world space. */ Transform itfm; - Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm); + object_fetch_transform_motion_test(kg, object, time, &itfm); hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng)); - - /* Transform t to world space, except for OptiX and MetalRT where it already is. */ -#ifdef __KERNEL_GPU_RAYTRACING__ - (void)tfm; -#else - float3 D = transform_direction(&itfm, ray.D); - D = normalize(D) * ss_isect.hits[hit].t; - ss_isect.hits[hit].t = len(transform_direction(&tfm, D)); -#endif } /* Quickly retrieve P and Ng without setting up ShaderData. */ @@ -158,7 +149,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, const float r = len(hit_P - P); /* Evaluate profiles. */ - const float3 weight = subsurface_disk_eval(radius, disk_r, r) * w; + const Spectrum weight = subsurface_disk_eval(radius, disk_r, r) * w; /* Store result. */ ss_isect.Ng[hit] = hit_Ng; @@ -171,11 +162,12 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, } /* Use importance resampling, sampling one of the hits proportional to weight. */ - const float r = lcg_step_float(&lcg_state) * sum_weights; + const float rand_resample = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_DISK_RESAMPLE); + const float r = rand_resample * sum_weights; float partial_sum = 0.0f; for (int hit = 0; hit < num_eval_hits; hit++) { - const float3 weight = weights[hit]; + const Spectrum weight = weights[hit]; const float sample_weight = average(fabs(weight)); float next_sum = partial_sum + sample_weight; @@ -188,7 +180,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, ray.P = ray.P + ray.D * ss_isect.hits[hit].t; ray.D = ss_isect.Ng[hit]; - ray.t = 1.0f; + ray.tmin = 0.0f; + ray.tmax = 1.0f; return true; } diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index b6cd4aae195..a6a59e286c9 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -65,19 +65,20 @@ ccl_device void subsurface_random_walk_remap(const float albedo, *sigma_t = sigma_t_prime / (1.0f - g); } -ccl_device void subsurface_random_walk_coefficients(const float3 albedo, - const float3 radius, +ccl_device void subsurface_random_walk_coefficients(const Spectrum albedo, + const Spectrum radius, const float anisotropy, - ccl_private float3 *sigma_t, - ccl_private float3 *alpha, - ccl_private float3 *throughput) + ccl_private Spectrum *sigma_t, + ccl_private Spectrum *alpha, + ccl_private Spectrum *throughput) { - float sigma_t_x, sigma_t_y, sigma_t_z; - float alpha_x, alpha_y, alpha_z; - - subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x); - subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y); - subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z); + FOREACH_SPECTRUM_CHANNEL (i) { + subsurface_random_walk_remap(GET_SPECTRUM_CHANNEL(albedo, i), + GET_SPECTRUM_CHANNEL(radius, i), + anisotropy, + &GET_SPECTRUM_CHANNEL(*sigma_t, i), + &GET_SPECTRUM_CHANNEL(*alpha, i)); + } /* Throughput already contains closure weight at this point, which includes the * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo @@ -88,21 +89,12 @@ ccl_device void subsurface_random_walk_coefficients(const float3 albedo, * infinite phase functions. To avoid a sharp discontinuity as we go from * such values to 0.0, increase alpha and reduce the throughput to compensate. */ const float min_alpha = 0.2f; - if (alpha_x < min_alpha) { - (*throughput).x *= alpha_x / min_alpha; - alpha_x = min_alpha; - } - if (alpha_y < min_alpha) { - (*throughput).y *= alpha_y / min_alpha; - alpha_y = min_alpha; - } - if (alpha_z < min_alpha) { - (*throughput).z *= alpha_z / min_alpha; - alpha_z = min_alpha; + FOREACH_SPECTRUM_CHANNEL (i) { + if (GET_SPECTRUM_CHANNEL(*alpha, i) < min_alpha) { + GET_SPECTRUM_CHANNEL(*throughput, i) *= GET_SPECTRUM_CHANNEL(*alpha, i) / min_alpha; + GET_SPECTRUM_CHANNEL(*alpha, i) = min_alpha; + } } - - *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z); - *alpha = make_float3(alpha_x, alpha_y, alpha_z); } /* References for Dwivedi sampling: @@ -151,12 +143,12 @@ ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, f return dir.x * T + dir.y * B + dir.z * D; } -ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t, - float t, - bool hit, - ccl_private float3 *transmittance) +ccl_device_forceinline Spectrum subsurface_random_walk_pdf(Spectrum sigma_t, + float t, + bool hit, + ccl_private Spectrum *transmittance) { - float3 T = volume_color_transmittance(sigma_t, t); + Spectrum T = volume_color_transmittance(sigma_t, t); if (transmittance) { *transmittance = T; } @@ -173,8 +165,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, ccl_private Ray &ray, ccl_private LocalIntersection &ss_isect) { - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + const float2 rand_bsdf = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF); const float3 P = INTEGRATOR_STATE(state, ray, P); const float3 N = INTEGRATOR_STATE(state, ray, D); @@ -187,7 +178,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* Sample diffuse surface scatter into the object. */ float3 D; float pdf; - sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf); + sample_cos_hemisphere(-N, rand_bsdf.x, rand_bsdf.y, &D, &pdf); if (dot(-Ng, D) <= 0.0f) { return false; } @@ -195,7 +186,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* Setup ray. */ ray.P = P; ray.D = D; - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = time; ray.dP = ray_dP; ray.dD = differential_zero_compact(); @@ -204,22 +196,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, ray.self.light_object = OBJECT_NONE; ray.self.light_prim = PRIM_NONE; -#ifndef __KERNEL_GPU_RAYTRACING__ - /* Compute or fetch object transforms. */ - Transform ob_itfm ccl_optional_struct_init; - Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm); -#endif - /* Convert subsurface to volume coefficients. * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */ - const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo); - const float3 radius = INTEGRATOR_STATE(state, subsurface, radius); + const Spectrum albedo = INTEGRATOR_STATE(state, subsurface, albedo); + const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius); const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy); - float3 sigma_t, alpha; - float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput); + Spectrum sigma_t, alpha; + Spectrum throughput = INTEGRATOR_STATE_WRITE(state, path, throughput); subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput); - float3 sigma_s = sigma_t * alpha; + Spectrum sigma_s = sigma_t * alpha; /* Theoretically it should be better to use the exact alpha for the channel we're sampling at * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange @@ -229,7 +215,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, * Since the strength of the guided sampling increases as alpha gets lower, using a value that * is too low results in fireflies while one that's too high just gives a bit more noise. * Therefore, the code here uses the highest of the three albedos to be safe. */ - const float diffusion_length = diffusion_length_dwivedi(max3(alpha)); + const float diffusion_length = diffusion_length_dwivedi(reduce_max(alpha)); if (diffusion_length == 1.0f) { /* With specific values of alpha the length might become 1, which in asymptotic makes phase to @@ -242,7 +228,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f)); /* Modify state for RNGs, decorrelated from other paths. */ - rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); + rng_state.rng_hash = hash_hp_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); /* Random walk until we hit the surface again. */ bool hit = false; @@ -254,10 +240,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f)); #ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL - float3 sigma_s_star = sigma_s * (1.0f - anisotropy); - float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star; - float3 sigma_t_org = sigma_t; - float3 sigma_s_org = sigma_s; + Spectrum sigma_s_star = sigma_s * (1.0f - anisotropy); + Spectrum sigma_t_star = sigma_t - sigma_s + sigma_s_star; + Spectrum sigma_t_org = sigma_t; + Spectrum sigma_s_org = sigma_s; const float anisotropy_org = anisotropy; const float guided_fraction_org = guided_fraction; #endif @@ -269,7 +255,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, #ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL // shadow with local variables according to depth float anisotropy, guided_fraction; - float3 sigma_s, sigma_t; + Spectrum sigma_s, sigma_t; if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) { anisotropy = anisotropy_org; guided_fraction = guided_fraction_org; @@ -285,11 +271,11 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, #endif /* Sample color channel, use MIS with balance heuristic. */ - float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL); - float3 channel_pdf; + float rphase = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_PHASE_CHANNEL); + Spectrum channel_pdf; int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf); float sample_sigma_t = volume_channel_get(sigma_t, channel); - float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE); + float randt = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_SCATTER_DISTANCE); /* We need the result of the ray-cast to compute the full guided PDF, so just remember the * relevant terms to avoid recomputing them later. */ @@ -302,7 +288,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* For the initial ray, we already know the direction, so just do classic distance sampling. */ if (bounce > 0) { /* Decide whether we should use guided or classic sampling. */ - bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction); + bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_STRATEGY) < + guided_fraction); /* Determine if we want to sample away from the incoming interface. * This only happens if we found a nearby opposite interface, and the probability for it @@ -316,27 +303,28 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance); backward_fraction = 1.0f / (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length)); - guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction; + guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_DIRECTION) < + backward_fraction; } /* Sample scattering direction. */ - float scatter_u, scatter_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v); + const float2 rand_scatter = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF); float cos_theta; float hg_pdf; if (guided) { - cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u); + cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, rand_scatter.x); /* The backwards guiding distribution is just mirrored along `sd->N`, so swapping the * sign here is enough to sample from that instead. */ if (guide_backward) { cos_theta = -cos_theta; } - float3 newD = direction_from_cosine(N, cos_theta, scatter_v); + float3 newD = direction_from_cosine(N, cos_theta, rand_scatter.y); hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy); ray.D = newD; } else { - float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf); + float3 newD = henyey_greenstrein_sample( + ray.D, anisotropy, rand_scatter.x, rand_scatter.y, &hg_pdf); cos_theta = dot(newD, N); ray.D = newD; } @@ -370,10 +358,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, * chance of connecting to it. * TODO: Maybe use less than 10 times the mean free path? */ if (bounce == 0) { - ray.t = max(t, 10.0f / (min3(sigma_t))); + ray.tmax = max(t, 10.0f / (reduce_min(sigma_t))); } else { - ray.t = t; + ray.tmax = t; /* After the first bounce the object can intersect the same surface again */ ray.self.object = OBJECT_NONE; ray.self.prim = PRIM_NONE; @@ -382,46 +370,39 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, hit = (ss_isect.num_hits > 0); if (hit) { -#ifdef __KERNEL_GPU_RAYTRACING__ - /* t is always in world space with OptiX and MetalRT. */ - ray.t = ss_isect.hits[0].t; -#else - /* Compute world space distance to surface hit. */ - float3 D = transform_direction(&ob_itfm, ray.D); - D = normalize(D) * ss_isect.hits[0].t; - ray.t = len(transform_direction(&ob_tfm, D)); -#endif + ray.tmax = ss_isect.hits[0].t; } if (bounce == 0) { /* Check if we hit the opposite side. */ if (hit) { have_opposite_interface = true; - opposite_distance = dot(ray.P + ray.t * ray.D - P, -N); + opposite_distance = dot(ray.P + ray.tmax * ray.D - P, -N); } /* Apart from the opposite side check, we were supposed to only trace up to distance t, * so check if there would have been a hit in that case. */ - hit = ray.t < t; + hit = ray.tmax < t; } /* Use the distance to the exit point for the throughput update if we found one. */ if (hit) { - t = ray.t; + t = ray.tmax; } /* Advance to new scatter location. */ ray.P += t * ray.D; - float3 transmittance; - float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); + Spectrum transmittance; + Spectrum pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); if (bounce > 0) { /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */ - float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); + Spectrum guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); if (have_opposite_interface) { /* First step of MIS: Depending on geometry we might have two methods for guided * sampling, so perform MIS between them. */ - float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL); + Spectrum back_pdf = subsurface_random_walk_pdf( + backward_stretching * sigma_t, t, hit, NULL); guided_pdf = mix( guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction); } @@ -443,16 +424,14 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* If we hit the surface, we are done. */ break; } - else if (throughput.x < VOLUME_THROUGHPUT_EPSILON && - throughput.y < VOLUME_THROUGHPUT_EPSILON && - throughput.z < VOLUME_THROUGHPUT_EPSILON) { + else if (reduce_max(throughput) < VOLUME_THROUGHPUT_EPSILON) { /* Avoid unnecessary work and precision issue when throughput gets really small. */ break; } } if (hit) { - kernel_assert(isfinite3_safe(throughput)); + kernel_assert(isfinite_safe(throughput)); INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; } diff --git a/intern/cycles/kernel/integrator/surface_shader.h b/intern/cycles/kernel/integrator/surface_shader.h new file mode 100644 index 00000000000..64b5556f7e9 --- /dev/null +++ b/intern/cycles/kernel/integrator/surface_shader.h @@ -0,0 +1,588 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Functions to evaluate shaders. */ + +#pragma once + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/emissive.h" + +#ifdef __SVM__ +# include "kernel/svm/svm.h" +#endif +#ifdef __OSL__ +# include "kernel/osl/osl.h" +#endif + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void surface_shader_prepare_closures(KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd, + const uint32_t path_flag) +{ + /* Filter out closures. */ + if (kernel_data.integrator.filter_closures) { + if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) { + sd->closure_emission_background = zero_spectrum(); + } + + if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) { + sd->flag &= ~SD_BSDF_HAS_EVAL; + } + + if (path_flag & PATH_RAY_CAMERA) { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + + if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) || + (CLOSURE_IS_BSDF_GLOSSY(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) || + (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) { + sc->type = CLOSURE_NONE_ID; + sc->sample_weight = 0.0f; + } + else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) { + sc->type = CLOSURE_HOLDOUT_ID; + sc->sample_weight = 0.0f; + sd->flag |= SD_HOLDOUT; + } + } + } + } + + /* Defensive sampling. + * + * We can likely also do defensive sampling at deeper bounces, particularly + * for cases like a perfect mirror but possibly also others. This will need + * a good heuristic. */ + if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) == + 0 && + sd->num_closure > 1) { + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sc->sample_weight = max(sc->sample_weight, 0.125f * sum); + } + } + } + + /* Filter glossy. + * + * Blurring of bsdf after bounces, for rays that have a small likelihood + * of following this particular path (diffuse, rough glossy) */ + if (kernel_data.integrator.filter_glossy != FLT_MAX +#ifdef __MNEE__ + && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID) +#endif + ) { + float blur_pdf = kernel_data.integrator.filter_glossy * + INTEGRATOR_STATE(state, path, min_ray_pdf); + + if (blur_pdf < 1.0f) { + float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF(sc->type)) { + bsdf_blur(kg, sc, blur_roughness); + } + } + } + } +} + +/* BSDF */ + +ccl_device_inline bool surface_shader_is_transmission(ccl_private const ShaderData *sd, + const float3 omega_in) +{ + return dot(sd->N, omega_in) < 0.0f; +} + +ccl_device_forceinline bool _surface_shader_exclude(ClosureType type, uint light_shader_flags) +{ + if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) { + return false; + } + if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) { + if (CLOSURE_IS_BSDF_DIFFUSE(type)) { + return true; + } + } + if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) { + if (CLOSURE_IS_BSDF_GLOSSY(type)) { + return true; + } + } + if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) { + if (CLOSURE_IS_BSDF_TRANSMISSION(type)) { + return true; + } + } + return false; +} + +ccl_device_inline float _surface_shader_bsdf_eval_mis(KernelGlobals kg, + ccl_private ShaderData *sd, + const float3 omega_in, + const bool is_transmission, + ccl_private const ShaderClosure *skip_sc, + ccl_private BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight, + const uint light_shader_flags) +{ + /* This is the veach one-sample model with balance heuristic, + * some PDF factors drop out when using balance heuristic weighting. */ + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (sc == skip_sc) { + continue; + } + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) { + float bsdf_pdf = 0.0f; + Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf); + + if (bsdf_pdf != 0.0f) { + bsdf_eval_accum(result_eval, sc->type, eval * sc->weight); + sum_pdf += bsdf_pdf * sc->sample_weight; + } + } + + sum_sample_weight += sc->sample_weight; + } + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_inline +#endif + float + surface_shader_bsdf_eval(KernelGlobals kg, + ccl_private ShaderData *sd, + const float3 omega_in, + const bool is_transmission, + ccl_private BsdfEval *bsdf_eval, + const uint light_shader_flags) +{ + bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_spectrum()); + + return _surface_shader_bsdf_eval_mis( + kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags); +} + +/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */ +ccl_device_inline ccl_private const ShaderClosure *surface_shader_bsdf_bssrdf_pick( + ccl_private const ShaderData *ccl_restrict sd, ccl_private float2 *rand_bsdf) +{ + int sampled = 0; + + if (sd->num_closure > 1) { + /* Pick a BSDF or based on sample weights. */ + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + float r = (*rand_bsdf).x * sum; + float partial_sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + float next_sum = partial_sum + sc->sample_weight; + + if (r < next_sum) { + sampled = i; + + /* Rescale to reuse for direction sample, to better preserve stratification. */ + (*rand_bsdf).x = (r - partial_sum) / sc->sample_weight; + break; + } + + partial_sum = next_sum; + } + } + } + + return &sd->closure[sampled]; +} + +/* Return weight for picked BSSRDF. */ +ccl_device_inline Spectrum +surface_shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd, + ccl_private const ShaderClosure *ccl_restrict bssrdf_sc) +{ + Spectrum weight = bssrdf_sc->weight; + + if (sd->num_closure > 1) { + float sum = 0.0f; + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + weight *= sum / bssrdf_sc->sample_weight; + } + + return weight; +} + +/* Sample direction for picked BSDF, and return evaluation and pdf for all + * BSDFs combined using MIS. */ +ccl_device int surface_shader_bsdf_sample_closure(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private const ShaderClosure *sc, + const float2 rand_bsdf, + ccl_private BsdfEval *bsdf_eval, + ccl_private float3 *omega_in, + ccl_private float *pdf) +{ + /* BSSRDF should already have been handled elsewhere. */ + kernel_assert(CLOSURE_IS_BSDF(sc->type)); + + int label; + Spectrum eval = zero_spectrum(); + + *pdf = 0.0f; + label = bsdf_sample(kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, omega_in, pdf); + + if (*pdf != 0.0f) { + bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight); + + if (sd->num_closure > 1) { + const bool is_transmission = surface_shader_is_transmission(sd, *omega_in); + float sweight = sc->sample_weight; + *pdf = _surface_shader_bsdf_eval_mis( + kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0); + } + } + + return label; +} + +ccl_device float surface_shader_average_roughness(ccl_private const ShaderData *sd) +{ + float roughness = 0.0f; + float sum_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF(sc->type)) { + /* sqrt once to undo the squaring from multiplying roughness on the + * two axes, and once for the squared roughness convention. */ + float weight = fabsf(average(sc->weight)); + roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); + sum_weight += weight; + } + } + + return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; +} + +ccl_device Spectrum surface_shader_transparency(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return one_spectrum(); + } + else if (sd->flag & SD_TRANSPARENT) { + return sd->closure_transparent_extinction; + } + else { + return zero_spectrum(); + } +} + +ccl_device void surface_shader_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd) +{ + if (sd->flag & SD_TRANSPARENT) { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + + if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { + sc->sample_weight = 0.0f; + sc->weight = zero_spectrum(); + } + } + + sd->flag &= ~SD_TRANSPARENT; + } +} + +ccl_device Spectrum surface_shader_alpha(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum alpha = one_spectrum() - surface_shader_transparency(kg, sd); + + alpha = saturate(alpha); + + return alpha; +} + +ccl_device Spectrum surface_shader_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device Spectrum surface_shader_glossy(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device Spectrum surface_shader_transmission(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device float3 surface_shader_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) + N += sc->N * fabsf(average(sc->weight)); + } + + return (is_zero(N)) ? sd->N : normalize(N); +} + +ccl_device Spectrum surface_shader_ao(KernelGlobals kg, + ccl_private const ShaderData *sd, + const float ao_factor, + ccl_private float3 *N_) +{ + Spectrum eval = zero_spectrum(); + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { + ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; + eval += sc->weight * ao_factor; + N += bsdf->N * fabsf(average(sc->weight)); + } + } + + *N_ = (is_zero(N)) ? sd->N : normalize(N); + return eval; +} + +#ifdef __SUBSURFACE__ +ccl_device float3 surface_shader_bssrdf_normal(ccl_private const ShaderData *sd) +{ + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSSRDF(sc->type)) { + ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc; + float avg_weight = fabsf(average(sc->weight)); + + N += bssrdf->N * avg_weight; + } + } + + return (is_zero(N)) ? sd->N : normalize(N); +} +#endif /* __SUBSURFACE__ */ + +/* Constant emission optimization */ + +ccl_device bool surface_shader_constant_emission(KernelGlobals kg, + int shader, + ccl_private Spectrum *eval) +{ + int shader_index = shader & SHADER_MASK; + int shader_flag = kernel_data_fetch(shaders, shader_index).flags; + + if (shader_flag & SD_HAS_CONSTANT_EMISSION) { + const float3 emission_rgb = make_float3( + kernel_data_fetch(shaders, shader_index).constant_emission[0], + kernel_data_fetch(shaders, shader_index).constant_emission[1], + kernel_data_fetch(shaders, shader_index).constant_emission[2]); + *eval = rgb_to_spectrum(emission_rgb); + + return true; + } + + return false; +} + +/* Background */ + +ccl_device Spectrum surface_shader_background(ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_EMISSION) { + return sd->closure_emission_background; + } + else { + return zero_spectrum(); + } +} + +/* Emission */ + +ccl_device Spectrum surface_shader_emission(ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_EMISSION) { + return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; + } + else { + return zero_spectrum(); + } +} + +/* Holdout */ + +ccl_device Spectrum surface_shader_apply_holdout(KernelGlobals kg, ccl_private ShaderData *sd) +{ + Spectrum weight = zero_spectrum(); + + /* For objects marked as holdout, preserve transparency and remove all other + * closures, replacing them with a holdout weight. */ + if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { + if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) { + weight = one_spectrum() - sd->closure_transparent_extinction; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { + sc->type = NBUILTIN_CLOSURES; + } + } + + sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF)); + } + else { + weight = one_spectrum(); + } + } + else { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_HOLDOUT(sc->type)) { + weight += sc->weight; + } + } + } + + return weight; +} + +/* Surface Evaluation */ + +template<uint node_feature_mask, typename ConstIntegratorGenericState> +ccl_device void surface_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *ccl_restrict sd, + ccl_global float *ccl_restrict buffer, + uint32_t path_flag, + bool use_caustics_storage = false) +{ + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures; + } + + sd->num_closure = 0; + sd->num_closure_left = max_closures; + +#ifdef __OSL__ + if (kg->osl) { + if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { + OSLShader::eval_background(kg, state, sd, path_flag); + } + else { + OSLShader::eval_surface(kg, state, sd, path_flag); + } + } + else +#endif + { +#ifdef __SVM__ + svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag); +#else + if (sd->object == OBJECT_NONE) { + sd->closure_emission_background = make_spectrum(0.8f); + sd->flag |= SD_EMISSION; + } + else { + ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( + sd, sizeof(DiffuseBsdf), make_spectrum(0.8f)); + if (bsdf != NULL) { + bsdf->N = sd->N; + sd->flag |= bsdf_diffuse_setup(bsdf); + } + } +#endif + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/volume_shader.h b/intern/cycles/kernel/integrator/volume_shader.h new file mode 100644 index 00000000000..31039bfdcf5 --- /dev/null +++ b/intern/cycles/kernel/integrator/volume_shader.h @@ -0,0 +1,354 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Volume shader evaluation and sampling. */ + +#pragma once + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/emissive.h" + +#ifdef __SVM__ +# include "kernel/svm/svm.h" +#endif +#ifdef __OSL__ +# include "kernel/osl/osl.h" +#endif + +CCL_NAMESPACE_BEGIN + +#ifdef __VOLUME__ + +/* Merging */ +ccl_device_inline void volume_shader_merge_closures(ccl_private ShaderData *sd) +{ + /* Merge identical closures to save closure space with stacked volumes. */ + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sci = &sd->closure[i]; + + if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + continue; + } + + for (int j = i + 1; j < sd->num_closure; j++) { + ccl_private ShaderClosure *scj = &sd->closure[j]; + if (sci->type != scj->type) { + continue; + } + + ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *) + sci; + ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *) + scj; + if (!(hgi->g == hgj->g)) { + continue; + } + + sci->weight += scj->weight; + sci->sample_weight += scj->sample_weight; + + int size = sd->num_closure - (j + 1); + if (size > 0) { + for (int k = 0; k < size; k++) { + scj[k] = scj[k + 1]; + } + } + + sd->num_closure--; + kernel_assert(sd->num_closure >= 0); + j--; + } + } +} + +ccl_device_inline void volume_shader_copy_phases(ccl_private ShaderVolumePhases *ccl_restrict + phases, + ccl_private const ShaderData *ccl_restrict sd) +{ + phases->num_closure = 0; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *from_sc = &sd->closure[i]; + ccl_private const HenyeyGreensteinVolume *from_hg = + (ccl_private const HenyeyGreensteinVolume *)from_sc; + + if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure]; + + to_sc->weight = from_sc->weight; + to_sc->sample_weight = from_sc->sample_weight; + to_sc->g = from_hg->g; + phases->num_closure++; + if (phases->num_closure >= MAX_VOLUME_CLOSURE) { + break; + } + } + } +} + +ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + const float3 omega_in, + int skip_phase, + ccl_private BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight) +{ + for (int i = 0; i < phases->num_closure; i++) { + if (i == skip_phase) + continue; + + ccl_private const ShaderVolumeClosure *svc = &phases->closure[i]; + float phase_pdf = 0.0f; + Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf); + + if (phase_pdf != 0.0f) { + bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + sum_pdf += phase_pdf * svc->sample_weight; + } + + sum_sample_weight += svc->sample_weight; + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +ccl_device float volume_shader_phase_eval(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + const float3 omega_in, + ccl_private BsdfEval *phase_eval) +{ + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum()); + + return _volume_shader_phase_eval_mis(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f); +} + +ccl_device int volume_shader_phase_sample(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + float2 rand_phase, + ccl_private BsdfEval *phase_eval, + ccl_private float3 *omega_in, + ccl_private float *pdf) +{ + int sampled = 0; + + if (phases->num_closure > 1) { + /* pick a phase closure based on sample weights */ + float sum = 0.0f; + + for (sampled = 0; sampled < phases->num_closure; sampled++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + sum += svc->sample_weight; + } + + float r = rand_phase.x * sum; + float partial_sum = 0.0f; + + for (sampled = 0; sampled < phases->num_closure; sampled++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + float next_sum = partial_sum + svc->sample_weight; + + if (r <= next_sum) { + /* Rescale to reuse for BSDF direction sample. */ + rand_phase.x = (r - partial_sum) / svc->sample_weight; + break; + } + + partial_sum = next_sum; + } + + if (sampled == phases->num_closure) { + *pdf = 0.0f; + return LABEL_NONE; + } + } + + /* todo: this isn't quite correct, we don't weight anisotropy properly + * depending on color channels, even if this is perhaps not a common case */ + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + int label; + Spectrum eval = zero_spectrum(); + + *pdf = 0.0f; + label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf); + + if (*pdf != 0.0f) { + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + } + + return label; +} + +ccl_device int volume_shader_phase_sample_closure(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumeClosure *sc, + const float2 rand_phase, + ccl_private BsdfEval *phase_eval, + ccl_private float3 *omega_in, + ccl_private float *pdf) +{ + int label; + Spectrum eval = zero_spectrum(); + + *pdf = 0.0f; + label = volume_phase_sample(sd, sc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf); + + if (*pdf != 0.0f) + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + + return label; +} + +/* Motion Blur */ + +# ifdef __OBJECT_MOTION__ +ccl_device_inline void volume_shader_motion_blur(KernelGlobals kg, + ccl_private ShaderData *ccl_restrict sd) +{ + if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) == 0) { + return; + } + + AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY); + kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND); + + const float3 P = sd->P; + const float velocity_scale = kernel_data_fetch(objects, sd->object).velocity_scale; + const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? 0.5f : + 0.0f; + const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ? + (1.0f - kernel_data.cam.shuttertime) + sd->time : + sd->time; + + /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity + * existed, or will exist, at the given time: + * + * `phi(x, T) = phi(x - (T - t) * u(x, T), t)` + * + * where + * + * x : position + * T : super-sampled time (or ray time) + * t : current time of the simulation (in rendering we assume this is center frame with + * relative time = 0) + * phi : the volume quantity + * u : the velocity field + * + * But first we need to determine the velocity field `u(x, T)`, which we can estimate also + * using semi-lagrangian advection. + * + * `u(x, T) = u(x - (T - t) * u(x, T), t)` + * + * This is the typical way to model self-advection in fluid dynamics, however, we do not + * account for other forces affecting the velocity during simulation (pressure, buoyancy, + * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better + * results, a higher order interpolation scheme can be used (at the cost of more lookups), + * or an interpolation of the velocity fields for the previous and next frames could also + * be used to estimate `u(x, T)` (which will cost more memory and lookups). + * + * References: + * "Eulerian Motion Blur", Kim and Ko, 2007 + * "Production Volume Rendering", Wreninge et al., 2012 + */ + + /* Find velocity. */ + float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc); + object_dir_transform(kg, sd, &velocity); + + /* Find advected P. */ + sd->P = P - (time - time_offset) * velocity_scale * velocity; + + /* Find advected velocity. */ + velocity = primitive_volume_attribute_float3(kg, sd, v_desc); + object_dir_transform(kg, sd, &velocity); + + /* Find advected P. */ + sd->P = P - (time - time_offset) * velocity_scale * velocity; +} +# endif + +/* Volume Evaluation */ + +template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState> +ccl_device_inline void volume_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *ccl_restrict sd, + const uint32_t path_flag, + StackReadOp stack_read) +{ + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = kernel_data.max_closures; + } + + /* reset closures once at the start, we will be accumulating the closures + * for all volumes in the stack into a single array of closures */ + sd->num_closure = 0; + sd->num_closure_left = max_closures; + sd->flag = 0; + sd->object_flag = 0; + + for (int i = 0;; i++) { + const VolumeStack entry = stack_read(i); + if (entry.shader == SHADER_NONE) { + break; + } + + /* Setup shader-data from stack. it's mostly setup already in + * shader_setup_from_volume, this switching should be quick. */ + sd->object = entry.object; + sd->lamp = LAMP_NONE; + sd->shader = entry.shader; + + sd->flag &= ~SD_SHADER_FLAGS; + sd->flag |= kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags; + sd->object_flag &= ~SD_OBJECT_FLAGS; + + if (sd->object != OBJECT_NONE) { + sd->object_flag |= kernel_data_fetch(object_flag, sd->object); + +# ifdef __OBJECT_MOTION__ + /* todo: this is inefficient for motion blur, we should be + * caching matrices instead of recomputing them each step */ + shader_setup_object_transforms(kg, sd, sd->time); + + volume_shader_motion_blur(kg, sd); +# endif + } + + /* evaluate shader */ +# ifdef __OSL__ + if (kg->osl) { + OSLShader::eval_volume(kg, state, sd, path_flag); + } + else +# endif + { +# ifdef __SVM__ + svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>( + kg, state, sd, NULL, path_flag); +# endif + } + + /* Merge closures to avoid exceeding number of closures limit. */ + if (!shadow) { + if (i > 0) { + volume_shader_merge_closures(sd); + } + } + } +} + +#endif /* __VOLUME__ */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/volume_stack.h b/intern/cycles/kernel/integrator/volume_stack.h index 5256349a0cc..675e1927fc0 100644 --- a/intern/cycles/kernel/integrator/volume_stack.h +++ b/intern/cycles/kernel/integrator/volume_stack.h @@ -39,7 +39,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, break; } - if (entry.object == sd->object) { + if (entry.object == sd->object && entry.shader == sd->shader) { /* Shift back next stack entries. */ do { entry = stack_read(i + 1); @@ -61,7 +61,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, } /* Already in the stack? then we have nothing to do. */ - if (entry.object == sd->object) { + if (entry.object == sd->object && entry.shader == sd->shader) { return; } } @@ -133,7 +133,7 @@ ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read break; } - int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags; + int shader_flag = kernel_data_fetch(shaders, (entry.shader & SHADER_MASK)).flags; bool heterogeneous = false; @@ -146,7 +146,7 @@ ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read * heterogeneous volume objects may be using the same shader. */ int object = entry.object; if (object != OBJECT_NONE) { - int object_flag = kernel_tex_fetch(__object_flag, object); + int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) { heterogeneous = true; } @@ -180,7 +180,7 @@ ccl_device VolumeSampleMethod volume_stack_sample_method(KernelGlobals kg, Integ break; } - int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags; + int shader_flag = kernel_data_fetch(shaders, (entry.shader & SHADER_MASK)).flags; if (shader_flag & SD_VOLUME_MIS) { /* Multiple importance sampling. */ diff --git a/intern/cycles/kernel/light/background.h b/intern/cycles/kernel/light/background.h index 0cbf7fb76fe..951620ff1cb 100644 --- a/intern/cycles/kernel/light/background.h +++ b/intern/cycles/kernel/light/background.h @@ -9,8 +9,6 @@ CCL_NAMESPACE_BEGIN /* Background Light */ -#ifdef __BACKGROUND_MIS__ - ccl_device float3 background_map_sample(KernelGlobals kg, float randu, float randv, @@ -31,7 +29,7 @@ ccl_device float3 background_map_sample(KernelGlobals kg, int step = count >> 1; int middle = first + step; - if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) { + if (kernel_data_fetch(light_background_marginal_cdf, middle).y < randv) { first = middle + 1; count -= step + 1; } @@ -42,9 +40,9 @@ ccl_device float3 background_map_sample(KernelGlobals kg, int index_v = max(0, first - 1); kernel_assert(index_v >= 0 && index_v < res_y); - float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v); - float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1); - float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y); + float2 cdf_v = kernel_data_fetch(light_background_marginal_cdf, index_v); + float2 cdf_next_v = kernel_data_fetch(light_background_marginal_cdf, index_v + 1); + float2 cdf_last_v = kernel_data_fetch(light_background_marginal_cdf, res_y); /* importance-sampled V direction */ float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv); @@ -57,7 +55,7 @@ ccl_device float3 background_map_sample(KernelGlobals kg, int step = count >> 1; int middle = first + step; - if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y < + if (kernel_data_fetch(light_background_conditional_cdf, index_v * cdf_width + middle).y < randu) { first = middle + 1; count -= step + 1; @@ -69,12 +67,12 @@ ccl_device float3 background_map_sample(KernelGlobals kg, int index_u = max(0, first - 1); kernel_assert(index_u >= 0 && index_u < res_x); - float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, - index_v * cdf_width + index_u); - float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf, - index_v * cdf_width + index_u + 1); - float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, - index_v * cdf_width + res_x); + float2 cdf_u = kernel_data_fetch(light_background_conditional_cdf, + index_v * cdf_width + index_u); + float2 cdf_next_u = kernel_data_fetch(light_background_conditional_cdf, + index_v * cdf_width + index_u + 1); + float2 cdf_last_u = kernel_data_fetch(light_background_conditional_cdf, + index_v * cdf_width + res_x); /* importance-sampled U direction */ float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu); @@ -112,9 +110,9 @@ ccl_device float background_map_pdf(KernelGlobals kg, float3 direction) int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1); /* pdfs in V direction */ - float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, - index_v * cdf_width + res_x); - float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y); + float2 cdf_last_u = kernel_data_fetch(light_background_conditional_cdf, + index_v * cdf_width + res_x); + float2 cdf_last_v = kernel_data_fetch(light_background_marginal_cdf, res_y); float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x; @@ -122,9 +120,9 @@ ccl_device float background_map_pdf(KernelGlobals kg, float3 direction) return 0.0f; /* pdfs in U direction */ - float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, - index_v * cdf_width + index_u); - float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v); + float2 cdf_u = kernel_data_fetch(light_background_conditional_cdf, + index_v * cdf_width + index_u); + float2 cdf_v = kernel_data_fetch(light_background_marginal_cdf, index_v); return (cdf_u.x * cdf_v.x) / denom; } @@ -133,7 +131,7 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side( KernelGlobals kg, float3 P, int index, ccl_private float3 *lightpos, ccl_private float3 *dir) { int portal = kernel_data.background.portal_offset + index; - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, portal); *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]); *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]); @@ -166,7 +164,7 @@ ccl_device_inline float background_portal_pdf( num_possible++; int portal = kernel_data.background.portal_offset + p; - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, portal); float3 axisu = make_float3( klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); float3 axisv = make_float3( @@ -242,7 +240,7 @@ ccl_device float3 background_portal_sample(KernelGlobals kg, if (portal == 0) { /* p is the portal to be sampled. */ int portal = kernel_data.background.portal_offset + p; - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, portal); float3 axisu = make_float3( klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); float3 axisv = make_float3( @@ -435,6 +433,4 @@ ccl_device float background_light_pdf(KernelGlobals kg, float3 P, float3 directi return pdf * kernel_data.integrator.pdf_lights; } -#endif - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h index 1df1615ed99..12a6f21b58d 100644 --- a/intern/cycles/kernel/light/light.h +++ b/intern/cycles/kernel/light/light.h @@ -38,7 +38,7 @@ ccl_device_inline bool light_sample(KernelGlobals kg, const uint32_t path_flag, ccl_private LightSample *ls) { - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp); if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) { if (klight->shader_id & SHADER_EXCLUDE_SHADOW_CATCHER) { return false; @@ -86,7 +86,6 @@ ccl_device_inline bool light_sample(KernelGlobals kg, ls->pdf = invarea / (costheta * costheta * costheta); ls->eval_fac = ls->pdf; } -#ifdef __BACKGROUND_MIS__ else if (type == LIGHT_BACKGROUND) { /* infinite area light (e.g. light dome or env light) */ float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf); @@ -97,7 +96,6 @@ ccl_device_inline bool light_sample(KernelGlobals kg, ls->t = FLT_MAX; ls->eval_fac = 1.0f; } -#endif else { ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]); @@ -202,8 +200,12 @@ ccl_device_inline bool light_sample(KernelGlobals kg, inplane = ls->P - inplane; } - ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f; - ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f; + const float light_u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)); + const float light_v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)); + + /* NOTE: Return barycentric coordinates in the same notation as Embree and OptiX. */ + ls->u = light_v + 0.5f; + ls->v = -light_u - light_v; ls->Ng = Ng; ls->D = normalize_len(ls->P - P, &ls->t); @@ -237,7 +239,7 @@ ccl_device bool lights_intersect(KernelGlobals kg, const uint32_t path_flag) { for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp); if (path_flag & PATH_RAY_CAMERA) { if (klight->shader_id & SHADER_EXCLUDE_CAMERA) { @@ -270,31 +272,26 @@ ccl_device bool lights_intersect(KernelGlobals kg, if (type == LIGHT_SPOT) { /* Spot/Disk light. */ - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - const float3 ray_P = ray->P - ray->D * mis_ray_t; - const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]); const float radius = klight->spot.radius; if (radius == 0.0f) { continue; } /* disk oriented normal */ - const float3 lightN = normalize(ray_P - lightP); + const float3 lightN = normalize(ray->P - lightP); /* One sided. */ if (dot(ray->D, lightN) >= 0.0f) { continue; } float3 P; - if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) { + if (!ray_disk_intersect( + ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) { continue; } } else if (type == LIGHT_POINT) { /* Sphere light (aka, aligned disk light). */ - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - const float3 ray_P = ray->P - ray->D * mis_ray_t; - const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]); const float radius = klight->spot.radius; if (radius == 0.0f) { @@ -302,9 +299,10 @@ ccl_device bool lights_intersect(KernelGlobals kg, } /* disk oriented normal */ - const float3 lightN = normalize(ray_P - lightP); + const float3 lightN = normalize(ray->P - lightP); float3 P; - if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) { + if (!ray_disk_intersect( + ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) { continue; } } @@ -330,8 +328,19 @@ ccl_device bool lights_intersect(KernelGlobals kg, const float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]); float3 P; - if (!ray_quad_intersect( - ray->P, ray->D, 0.0f, ray->t, light_P, axisu, axisv, Ng, &P, &t, &u, &v, is_round)) { + if (!ray_quad_intersect(ray->P, + ray->D, + ray->tmin, + ray->tmax, + light_P, + axisu, + axisv, + Ng, + &P, + &t, + &u, + &v, + is_round)) { continue; } } @@ -358,7 +367,7 @@ ccl_device bool light_sample_from_distant_ray(KernelGlobals kg, const int lamp, ccl_private LightSample *ccl_restrict ls) { - ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + ccl_global const KernelLight *klight = &kernel_data_fetch(lights, lamp); const int shader = klight->shader_id; const float radius = klight->distant.radius; const LightType type = (LightType)klight->type; @@ -433,7 +442,7 @@ ccl_device bool light_sample_from_intersection(KernelGlobals kg, ccl_private LightSample *ccl_restrict ls) { const int lamp = isect->prim; - ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + ccl_global const KernelLight *klight = &kernel_data_fetch(lights, lamp); LightType type = (LightType)klight->type; ls->type = type; ls->shader = klight->shader_id; @@ -562,7 +571,7 @@ ccl_device_inline bool triangle_world_space_vertices( KernelGlobals kg, int object, int prim, float time, float3 V[3]) { bool has_motion = false; - const int object_flag = kernel_tex_fetch(__object_flag, object); + const int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) { motion_triangle_vertices(kg, object, prim, time, V); @@ -699,12 +708,12 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals kg, float area = 0.5f * Nl; /* flip normal if necessary */ - const int object_flag = kernel_tex_fetch(__object_flag, object); + const int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { ls->Ng = -ls->Ng; } ls->eval_fac = 1.0f; - ls->shader = kernel_tex_fetch(__tri_shader, prim); + ls->shader = kernel_data_fetch(tri_shader, prim); ls->object = object; ls->prim = prim; ls->lamp = LAMP_NONE; @@ -775,7 +784,8 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals kg, ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B); /* calculate intersection with the planar triangle */ - if (!ray_triangle_intersect(P, ls->D, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) { + if (!ray_triangle_intersect( + P, ls->D, 0.0f, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) { ls->pdf = 0.0f; return; } @@ -845,7 +855,7 @@ ccl_device int light_distribution_sample(KernelGlobals kg, ccl_private float *ra int half_len = len >> 1; int middle = first + half_len; - if (r < kernel_tex_fetch(__light_distribution, middle).totarea) { + if (r < kernel_data_fetch(light_distribution, middle).totarea) { len = half_len; } else { @@ -860,8 +870,8 @@ ccl_device int light_distribution_sample(KernelGlobals kg, ccl_private float *ra /* Rescale to reuse random number. this helps the 2D samples within * each area light be stratified as well. */ - float distr_min = kernel_tex_fetch(__light_distribution, index).totarea; - float distr_max = kernel_tex_fetch(__light_distribution, index + 1).totarea; + float distr_min = kernel_data_fetch(light_distribution, index).totarea; + float distr_max = kernel_data_fetch(light_distribution, index + 1).totarea; *randu = (r - distr_min) / (distr_max - distr_min); return index; @@ -871,7 +881,7 @@ ccl_device int light_distribution_sample(KernelGlobals kg, ccl_private float *ra ccl_device_inline bool light_select_reached_max_bounces(KernelGlobals kg, int index, int bounce) { - return (bounce > kernel_tex_fetch(__lights, index).max_bounces); + return (bounce > kernel_data_fetch(lights, index).max_bounces); } template<bool in_volume_segment> @@ -886,8 +896,8 @@ ccl_device_noinline bool light_distribution_sample(KernelGlobals kg, { /* Sample light index from distribution. */ const int index = light_distribution_sample(kg, &randu); - ccl_global const KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, - index); + ccl_global const KernelLightDistribution *kdistribution = &kernel_data_fetch(light_distribution, + index); const int prim = kdistribution->prim; if (prim >= 0) { @@ -896,7 +906,7 @@ ccl_device_noinline bool light_distribution_sample(KernelGlobals kg, /* Exclude synthetic meshes from shadow catcher pass. */ if ((path_flag & PATH_RAY_SHADOW_CATCHER_PASS) && - !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_SHADOW_CATCHER)) { + !(kernel_data_fetch(object_flag, object) & SD_OBJECT_SHADOW_CATCHER)) { return false; } diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h index 9bbbd5b0d10..e0d4f221bef 100644 --- a/intern/cycles/kernel/light/sample.h +++ b/intern/cycles/kernel/light/sample.h @@ -4,7 +4,7 @@ #pragma once #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/light/light.h" @@ -14,7 +14,7 @@ CCL_NAMESPACE_BEGIN /* Evaluate shader on light. */ -ccl_device_noinline_cpu float3 +ccl_device_noinline_cpu Spectrum light_sample_shader_eval(KernelGlobals kg, IntegratorState state, ccl_private ShaderData *ccl_restrict emission_sd, @@ -22,24 +22,21 @@ light_sample_shader_eval(KernelGlobals kg, float time) { /* setup shading at emitter */ - float3 eval = zero_float3(); + Spectrum eval = zero_spectrum(); - if (shader_constant_emission_eval(kg, ls->shader, &eval)) { + if (surface_shader_constant_emission(kg, ls->shader, &eval)) { if ((ls->prim != PRIM_NONE) && dot(ls->Ng, ls->D) > 0.0f) { ls->Ng = -ls->Ng; } } else { - /* Setup shader data and call shader_eval_surface once, better + /* Setup shader data and call surface_shader_eval once, better * for GPU coherence and compile times. */ PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); -#ifdef __BACKGROUND_MIS__ if (ls->type == LIGHT_BACKGROUND) { shader_setup_from_background(kg, emission_sd, ls->P, ls->D, time); } - else -#endif - { + else { shader_setup_from_sample(kg, emission_sd, ls->P, @@ -63,26 +60,24 @@ light_sample_shader_eval(KernelGlobals kg, /* No proper path flag, we're evaluating this for all closures. that's * weak but we'd have to do multiple evaluations otherwise. */ - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>( kg, state, emission_sd, NULL, PATH_RAY_EMISSION); /* Evaluate closures. */ -#ifdef __BACKGROUND_MIS__ if (ls->type == LIGHT_BACKGROUND) { - eval = shader_background_eval(emission_sd); + eval = surface_shader_background(emission_sd); } - else -#endif - { - eval = shader_emissive_eval(emission_sd); + else { + eval = surface_shader_emission(emission_sd); } } eval *= ls->eval_fac; if (ls->lamp != LAMP_NONE) { - ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp); - eval *= make_float3(klight->strength[0], klight->strength[1], klight->strength[2]); + ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp); + eval *= rgb_to_spectrum( + make_float3(klight->strength[0], klight->strength[1], klight->strength[2])); } return eval; @@ -106,7 +101,7 @@ ccl_device_inline bool light_sample_terminate(KernelGlobals kg, } if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) { - float probability = max3(fabs(bsdf_eval_sum(eval))) * + float probability = reduce_max(fabs(bsdf_eval_sum(eval))) * kernel_data.integrator.light_inv_rr_threshold; if (probability < 1.0f) { if (rand_terminate >= probability) { @@ -137,8 +132,9 @@ ccl_device_inline float3 shadow_ray_smooth_surface_offset( triangle_vertices_and_normals(kg, sd->prim, V, N); } - const float u = sd->u, v = sd->v; - const float w = 1 - u - v; + const float u = 1.0f - sd->u - sd->v; + const float v = sd->u; + const float w = sd->v; float3 P = V[0] * u + V[1] * v + V[2] * w; /* Local space */ float3 n = N[0] * u + N[1] * v + N[2] * w; /* We get away without normalization */ @@ -187,7 +183,7 @@ ccl_device_inline float3 shadow_ray_offset(KernelGlobals kg, if ((sd->type & PRIMITIVE_TRIANGLE) && (sd->shader & SHADER_SMOOTH_NORMAL)) { const float offset_cutoff = - kernel_tex_fetch(__objects, sd->object).shadow_terminator_geometry_offset; + kernel_data_fetch(objects, sd->object).shadow_terminator_geometry_offset; /* Do ray offset (heavy stuff) only for close to be terminated triangles: * offset_cutoff = 0.1f means that 10-20% of rays will be affected. Also * make a smooth transition near the threshold. */ @@ -227,23 +223,24 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri if (ls->shader & SHADER_CAST_SHADOW) { /* setup ray */ ray->P = P; + ray->tmin = 0.0f; if (ls->t == FLT_MAX) { /* distant light */ ray->D = ls->D; - ray->t = ls->t; + ray->tmax = ls->t; } else { /* other lights, avoid self-intersection */ ray->D = ls->P - P; - ray->D = normalize_len(ray->D, &ray->t); + ray->D = normalize_len(ray->D, &ray->tmax); } } else { /* signal to not cast shadow ray */ ray->P = zero_float3(); ray->D = zero_float3(); - ray->t = 0.0f; + ray->tmax = 0.0f; } ray->dP = differential_make_compact(sd->dP); diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt index 7570490be7c..5075e4e1528 100644 --- a/intern/cycles/kernel/osl/CMakeLists.txt +++ b/intern/cycles/kernel/osl/CMakeLists.txt @@ -10,21 +10,18 @@ set(INC_SYS ) set(SRC - background.cpp - bsdf_diffuse_ramp.cpp - bsdf_phong_ramp.cpp - emissive.cpp - bssrdf.cpp closures.cpp + globals.cpp services.cpp - shader.cpp ) set(HEADER_SRC - closures.h + closures_setup.h + closures_template.h globals.h + osl.h services.h - shader.h + types.h ) set(LIB diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp deleted file mode 100644 index 865ff4ddc6d..00000000000 --- a/intern/cycles/kernel/osl/background.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * - * Adapted from Open Shading Language - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011-2022 Blender Foundation. */ - -#include <OpenImageIO/fmath.h> - -#include <OSL/genclosure.h> - -#include "kernel/osl/closures.h" - -// clang-format off -#include "kernel/device/cpu/compat.h" -#include "kernel/closure/alloc.h" -#include "kernel/closure/emissive.h" -// clang-format on - -CCL_NAMESPACE_BEGIN - -using namespace OSL; - -/// Generic background closure -/// -/// We only have a background closure for the shaders -/// to return a color in background shaders. No methods, -/// only the weight is taking into account -/// -class GenericBackgroundClosure : public CClosurePrimitive { - public: - void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight) - { - background_setup(sd, weight); - } -}; - -/// Holdout closure -/// -/// This will be used by the shader to mark the -/// amount of holdout for the current shading -/// point. No parameters, only the weight will be -/// used -/// -class HoldoutClosure : CClosurePrimitive { - public: - void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight) - { - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight); - sd->flag |= SD_HOLDOUT; - } -}; - -ClosureParam *closure_background_params() -{ - static ClosureParam params[] = { - CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"), - CLOSURE_FINISH_PARAM(GenericBackgroundClosure)}; - return params; -} - -CCLOSURE_PREPARE(closure_background_prepare, GenericBackgroundClosure) - -ClosureParam *closure_holdout_params() -{ - static ClosureParam params[] = {CLOSURE_FINISH_PARAM(HoldoutClosure)}; - return params; -} - -CCLOSURE_PREPARE(closure_holdout_prepare, HoldoutClosure) - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp deleted file mode 100644 index 39fcee1ac0d..00000000000 --- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * - * Adapted from Open Shading Language - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011-2022 Blender Foundation. */ - -#include <OpenImageIO/fmath.h> - -#include <OSL/genclosure.h> - -#include "kernel/device/cpu/compat.h" -#include "kernel/osl/closures.h" - -// clang-format off -#include "kernel/types.h" -#include "kernel/closure/alloc.h" -#include "kernel/closure/bsdf_diffuse_ramp.h" -#include "kernel/closure/bsdf_util.h" -// clang-format on - -CCL_NAMESPACE_BEGIN - -using namespace OSL; - -class DiffuseRampClosure : public CBSDFClosure { - public: - DiffuseRampBsdf params; - Color3 colors[8]; - - void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - DiffuseRampBsdf *bsdf = (DiffuseRampBsdf *)bsdf_alloc_osl( - sd, sizeof(DiffuseRampBsdf), weight, ¶ms); - - if (bsdf) { - bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8); - - if (bsdf->colors) { - for (int i = 0; i < 8; i++) - bsdf->colors[i] = TO_FLOAT3(colors[i]); - - sd->flag |= bsdf_diffuse_ramp_setup(bsdf); - } - } - } -}; - -ClosureParam *closure_bsdf_diffuse_ramp_params() -{ - static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N), - CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8), - CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"), - CLOSURE_FINISH_PARAM(DiffuseRampClosure)}; - return params; -} - -CCLOSURE_PREPARE(closure_bsdf_diffuse_ramp_prepare, DiffuseRampClosure) - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp deleted file mode 100644 index 972ed7e4a6d..00000000000 --- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * - * Adapted from Open Shading Language - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011-2022 Blender Foundation. */ - -#include <OpenImageIO/fmath.h> - -#include <OSL/genclosure.h> - -#include "kernel/device/cpu/compat.h" -#include "kernel/osl/closures.h" - -// clang-format off -#include "kernel/types.h" -#include "kernel/closure/alloc.h" -#include "kernel/closure/bsdf_phong_ramp.h" -#include "kernel/closure/bsdf_util.h" -// clang-format on - -CCL_NAMESPACE_BEGIN - -using namespace OSL; - -class PhongRampClosure : public CBSDFClosure { - public: - PhongRampBsdf params; - Color3 colors[8]; - - void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - PhongRampBsdf *bsdf = (PhongRampBsdf *)bsdf_alloc_osl( - sd, sizeof(PhongRampBsdf), weight, ¶ms); - - if (bsdf) { - bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8); - - if (bsdf->colors) { - for (int i = 0; i < 8; i++) - bsdf->colors[i] = TO_FLOAT3(colors[i]); - - sd->flag |= bsdf_phong_ramp_setup(bsdf); - } - } - } -}; - -ClosureParam *closure_bsdf_phong_ramp_params() -{ - static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N), - CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent), - CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8), - CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"), - CLOSURE_FINISH_PARAM(PhongRampClosure)}; - return params; -} - -CCLOSURE_PREPARE(closure_bsdf_phong_ramp_prepare, PhongRampClosure) - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/bssrdf.cpp b/intern/cycles/kernel/osl/bssrdf.cpp deleted file mode 100644 index 4b282fddad3..00000000000 --- a/intern/cycles/kernel/osl/bssrdf.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * - * Adapted from Open Shading Language - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011-2022 Blender Foundation. */ - -#include <OSL/genclosure.h> - -#include "kernel/device/cpu/compat.h" -#include "kernel/osl/closures.h" - -// clang-format off -#include "kernel/types.h" - -#include "kernel/closure/alloc.h" -#include "kernel/closure/bsdf_util.h" -#include "kernel/closure/bsdf_diffuse.h" -#include "kernel/closure/bsdf_principled_diffuse.h" -#include "kernel/closure/bssrdf.h" -// clang-format on - -CCL_NAMESPACE_BEGIN - -using namespace OSL; - -static ustring u_burley("burley"); -static ustring u_random_walk_fixed_radius("random_walk_fixed_radius"); -static ustring u_random_walk("random_walk"); - -class CBSSRDFClosure : public CClosurePrimitive { - public: - Bssrdf params; - float ior; - ustring method; - - CBSSRDFClosure() - { - params.roughness = FLT_MAX; - params.anisotropy = 1.0f; - ior = 1.4f; - } - - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - if (method == u_burley) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID); - } - else if (method == u_random_walk_fixed_radius) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID); - } - else if (method == u_random_walk) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID); - } - } - - void alloc(ShaderData *sd, uint32_t path_flag, float3 weight, ClosureType type) - { - Bssrdf *bssrdf = bssrdf_alloc(sd, weight); - - if (bssrdf) { - /* disable in case of diffuse ancestor, can't see it well then and - * adds considerably noise due to probabilities of continuing path - * getting lower and lower */ - if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) { - params.radius = make_float3(0.0f, 0.0f, 0.0f); - } - - /* create one closure per color channel */ - bssrdf->radius = params.radius; - bssrdf->albedo = params.albedo; - bssrdf->N = params.N; - bssrdf->roughness = params.roughness; - bssrdf->anisotropy = clamp(params.anisotropy, 0.0f, 0.9f); - sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type, clamp(ior, 1.01f, 3.8f)); - } - } -}; - -ClosureParam *closure_bssrdf_params() -{ - static ClosureParam params[] = { - CLOSURE_STRING_PARAM(CBSSRDFClosure, method), - CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N), - CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius), - CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo), - CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"), - CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, ior, "ior"), - CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.anisotropy, "anisotropy"), - CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"), - CLOSURE_FINISH_PARAM(CBSSRDFClosure)}; - return params; -} - -CCLOSURE_PREPARE(closure_bssrdf_prepare, CBSSRDFClosure) - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/closures.cpp b/intern/cycles/kernel/osl/closures.cpp index 7c6b48154e4..d56e0551a91 100644 --- a/intern/cycles/kernel/osl/closures.cpp +++ b/intern/cycles/kernel/osl/closures.cpp @@ -9,997 +9,304 @@ #include <OSL/genclosure.h> #include <OSL/oslclosure.h> -#include "kernel/osl/closures.h" -#include "kernel/osl/shader.h" +#include "kernel/types.h" + +#include "kernel/osl/globals.h" +#include "kernel/osl/services.h" #include "util/math.h" #include "util/param.h" -// clang-format off #include "kernel/device/cpu/compat.h" #include "kernel/device/cpu/globals.h" -#include "kernel/types.h" - -#include "kernel/closure/alloc.h" -#include "kernel/closure/bsdf_util.h" -#include "kernel/closure/bsdf_ashikhmin_velvet.h" -#include "kernel/closure/bsdf_diffuse.h" -#include "kernel/closure/bsdf_microfacet.h" -#include "kernel/closure/bsdf_microfacet_multi.h" -#include "kernel/closure/bsdf_oren_nayar.h" -#include "kernel/closure/bsdf_reflection.h" -#include "kernel/closure/bsdf_refraction.h" -#include "kernel/closure/bsdf_transparent.h" -#include "kernel/closure/bsdf_ashikhmin_shirley.h" -#include "kernel/closure/bsdf_toon.h" -#include "kernel/closure/bsdf_hair.h" -#include "kernel/closure/bsdf_hair_principled.h" -#include "kernel/closure/bsdf_principled_diffuse.h" -#include "kernel/closure/bsdf_principled_sheen.h" -#include "kernel/closure/volume.h" -// clang-format on - -CCL_NAMESPACE_BEGIN - -using namespace OSL; - -/* BSDF class definitions */ - -BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, DiffuseBsdf, LABEL_DIFFUSE) - BSDF_CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N) -BSDF_CLOSURE_CLASS_END(Diffuse, diffuse) - -BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE) - BSDF_CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N) -BSDF_CLOSURE_CLASS_END(Translucent, translucent) - -BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE) - BSDF_CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness) -BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar) - -BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR) - BSDF_CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N) -BSDF_CLOSURE_CLASS_END(Reflection, reflection) - -BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR) - BSDF_CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior) -BSDF_CLOSURE_CLASS_END(Refraction, refraction) - -BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE) - BSDF_CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma) -BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet) - -BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, - ashikhmin_shirley, - MicrofacetBsdf, - LABEL_GLOSSY | LABEL_REFLECT) - BSDF_CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N) - BSDF_CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T) - BSDF_CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x) - BSDF_CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y) -BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley) - -BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE) - BSDF_CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size) - BSDF_CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth) -BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon) - -BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY) - BSDF_CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size) - BSDF_CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth) -BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXIsotropic, - microfacet_ggx_isotropic, - MicrofacetBsdf, - LABEL_GLOSSY | LABEL_REFLECT) - BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXIsotropicClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXIsotropicClosure, params.alpha_x) -BSDF_CLOSURE_CLASS_END(MicrofacetGGXIsotropic, microfacet_ggx_isotropic) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, - microfacet_ggx, - MicrofacetBsdf, - LABEL_GLOSSY | LABEL_REFLECT) - BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N) - BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.T) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_y) -BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannIsotropic, - microfacet_beckmann_isotropic, - MicrofacetBsdf, - LABEL_GLOSSY | LABEL_REFLECT) - BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannIsotropicClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannIsotropicClosure, params.alpha_x) -BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannIsotropic, microfacet_beckmann_isotropic) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, - microfacet_beckmann, - MicrofacetBsdf, - LABEL_GLOSSY | LABEL_REFLECT) - BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N) - BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.T) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_y) -BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, - microfacet_ggx_refraction, - MicrofacetBsdf, - LABEL_GLOSSY | LABEL_TRANSMIT) - BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior) -BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, - microfacet_beckmann_refraction, - MicrofacetBsdf, - LABEL_GLOSSY | LABEL_TRANSMIT) - BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x) - BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior) -BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction) - -BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY) - BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1) - BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2) - BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T) - BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset) -BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection) - -BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY) - BSDF_CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1) - BSDF_CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2) - BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T) - BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset) -BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission) - -BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse, - principled_diffuse, - PrincipledDiffuseBsdf, - LABEL_DIFFUSE) - BSDF_CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N) - BSDF_CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness) -BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse) - -class PrincipledSheenClosure : public CBSDFClosure { - public: - PrincipledSheenBsdf params; - - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - if (!skip(sd, path_flag, LABEL_DIFFUSE)) { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc_osl( - sd, sizeof(PrincipledSheenBsdf), weight, ¶ms); - sd->flag |= (bsdf) ? bsdf_principled_sheen_setup(sd, bsdf) : 0; - } - } -}; - -static ClosureParam *bsdf_principled_sheen_params() -{ - static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N), - CLOSURE_STRING_KEYPARAM(PrincipledSheenClosure, label, "label"), - CLOSURE_FINISH_PARAM(PrincipledSheenClosure)}; - return params; -} - -CCLOSURE_PREPARE_STATIC(closure_bsdf_principled_sheen_prepare, PrincipledSheenClosure) - -/* PRINCIPLED HAIR BSDF */ -class PrincipledHairClosure : public CBSDFClosure { - public: - PrincipledHairBSDF params; - - PrincipledHairBSDF *alloc(ShaderData *sd, uint32_t path_flag, float3 weight) - { - PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc_osl( - sd, sizeof(PrincipledHairBSDF), weight, ¶ms); - if (!bsdf) { - return NULL; - } - - PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra( - sd, sizeof(PrincipledHairExtra)); - if (!extra) { - return NULL; - } - - bsdf->extra = extra; - return bsdf; - } +#include "kernel/geom/object.h" +#include "kernel/util/differential.h" - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - if (!skip(sd, path_flag, LABEL_GLOSSY)) { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); +#include "kernel/osl/osl.h" - PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - - sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0; - } - } -}; - -static ClosureParam *closure_bsdf_principled_hair_params() -{ - static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N), - CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta), - CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"), - CLOSURE_FINISH_PARAM(PrincipledHairClosure)}; - - return params; -} +#include "kernel/osl/closures_setup.h" -CCLOSURE_PREPARE(closure_bsdf_principled_hair_prepare, PrincipledHairClosure) +#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z) +#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2]) -/* DISNEY PRINCIPLED CLEARCOAT */ -class PrincipledClearcoatClosure : public CBSDFClosure { - public: - MicrofacetBsdf params; - float clearcoat, clearcoat_roughness; - - MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( - sd, sizeof(MicrofacetBsdf), weight, ¶ms); - if (!bsdf) { - return NULL; - } - - MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if (!extra) { - return NULL; - } - - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra = extra; - bsdf->ior = 1.5f; - bsdf->alpha_x = clearcoat_roughness; - bsdf->alpha_y = clearcoat_roughness; - bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); - bsdf->extra->clearcoat = clearcoat; - return bsdf; - } +CCL_NAMESPACE_BEGIN - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } +/* Registration */ - sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); +#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \ + static OSL::ClosureParam *osl_closure_##lower##_params() \ + { \ + static OSL::ClosureParam params[] = { +#define OSL_CLOSURE_STRUCT_END(Upper, lower) \ + CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \ + } \ + ; \ + return params; \ } -}; +#define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key) \ + CLOSURE_##TYPE##_KEYPARAM(Upper##Closure, name, key), +#define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size) \ + CLOSURE_##TYPE##_ARRAY_PARAM(Upper##Closure, name, size), -ClosureParam *closure_bsdf_principled_clearcoat_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N), - CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat), - CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness), - CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"), - CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)}; - return params; -} -CCLOSURE_PREPARE(closure_bsdf_principled_clearcoat_prepare, PrincipledClearcoatClosure) - -/* Registration */ +#include "closures_template.h" -static void register_closure(OSL::ShadingSystem *ss, - const char *name, - int id, - OSL::ClosureParam *params, - OSL::PrepareClosureFunc prepare) +void OSLRenderServices::register_closures(OSL::ShadingSystem *ss) { - /* optimization: it's possible to not use a prepare function at all and - * only initialize the actual class when accessing the closure component - * data, but then we need to map the id to the class somehow */ -#if OSL_LIBRARY_VERSION_CODE >= 10900 - ss->register_closure(name, id, params, prepare, NULL); -#else - ss->register_closure(name, id, params, prepare, NULL, 16); -#endif -} +#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \ + ss->register_closure( \ + #lower, OSL_CLOSURE_##Upper##_ID, osl_closure_##lower##_params(), nullptr, nullptr); -void OSLShader::register_closures(OSLShadingSystem *ss_) -{ - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)ss_; - int id = 0; - - register_closure(ss, "diffuse", id++, bsdf_diffuse_params(), bsdf_diffuse_prepare); - register_closure(ss, "oren_nayar", id++, bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare); - register_closure(ss, "translucent", id++, bsdf_translucent_params(), bsdf_translucent_prepare); - register_closure(ss, "reflection", id++, bsdf_reflection_params(), bsdf_reflection_prepare); - register_closure(ss, "refraction", id++, bsdf_refraction_params(), bsdf_refraction_prepare); - register_closure(ss, - "transparent", - id++, - closure_bsdf_transparent_params(), - closure_bsdf_transparent_prepare); - - register_closure( - ss, "microfacet", id++, closure_bsdf_microfacet_params(), closure_bsdf_microfacet_prepare); - register_closure(ss, - "microfacet_ggx", - id++, - bsdf_microfacet_ggx_isotropic_params(), - bsdf_microfacet_ggx_isotropic_prepare); - register_closure( - ss, "microfacet_ggx_aniso", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare); - register_closure(ss, - "microfacet_ggx_refraction", - id++, - bsdf_microfacet_ggx_refraction_params(), - bsdf_microfacet_ggx_refraction_prepare); - register_closure(ss, - "microfacet_multi_ggx", - id++, - closure_bsdf_microfacet_multi_ggx_params(), - closure_bsdf_microfacet_multi_ggx_prepare); - register_closure(ss, - "microfacet_multi_ggx_glass", - id++, - closure_bsdf_microfacet_multi_ggx_glass_params(), - closure_bsdf_microfacet_multi_ggx_glass_prepare); - register_closure(ss, - "microfacet_multi_ggx_aniso", - id++, - closure_bsdf_microfacet_multi_ggx_aniso_params(), - closure_bsdf_microfacet_multi_ggx_aniso_prepare); - register_closure(ss, - "microfacet_ggx_fresnel", - id++, - closure_bsdf_microfacet_ggx_fresnel_params(), - closure_bsdf_microfacet_ggx_fresnel_prepare); - register_closure(ss, - "microfacet_ggx_aniso_fresnel", - id++, - closure_bsdf_microfacet_ggx_aniso_fresnel_params(), - closure_bsdf_microfacet_ggx_aniso_fresnel_prepare); - register_closure(ss, - "microfacet_multi_ggx_fresnel", - id++, - closure_bsdf_microfacet_multi_ggx_fresnel_params(), - closure_bsdf_microfacet_multi_ggx_fresnel_prepare); - register_closure(ss, - "microfacet_multi_ggx_glass_fresnel", - id++, - closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(), - closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare); - register_closure(ss, - "microfacet_multi_ggx_aniso_fresnel", - id++, - closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(), - closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare); - register_closure(ss, - "microfacet_beckmann", - id++, - bsdf_microfacet_beckmann_isotropic_params(), - bsdf_microfacet_beckmann_isotropic_prepare); - register_closure(ss, - "microfacet_beckmann_aniso", - id++, - bsdf_microfacet_beckmann_params(), - bsdf_microfacet_beckmann_prepare); - register_closure(ss, - "microfacet_beckmann_refraction", - id++, - bsdf_microfacet_beckmann_refraction_params(), - bsdf_microfacet_beckmann_refraction_prepare); - register_closure(ss, - "ashikhmin_shirley", - id++, - bsdf_ashikhmin_shirley_params(), - bsdf_ashikhmin_shirley_prepare); - register_closure( - ss, "ashikhmin_velvet", id++, bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare); - register_closure( - ss, "diffuse_toon", id++, bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare); - register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare); - register_closure(ss, - "principled_diffuse", - id++, - bsdf_principled_diffuse_params(), - bsdf_principled_diffuse_prepare); - register_closure(ss, - "principled_sheen", - id++, - bsdf_principled_sheen_params(), - closure_bsdf_principled_sheen_prepare); - register_closure(ss, - "principled_clearcoat", - id++, - closure_bsdf_principled_clearcoat_params(), - closure_bsdf_principled_clearcoat_prepare); - - register_closure(ss, "emission", id++, closure_emission_params(), closure_emission_prepare); - register_closure( - ss, "background", id++, closure_background_params(), closure_background_prepare); - register_closure(ss, "holdout", id++, closure_holdout_params(), closure_holdout_prepare); - register_closure(ss, - "diffuse_ramp", - id++, - closure_bsdf_diffuse_ramp_params(), - closure_bsdf_diffuse_ramp_prepare); - register_closure( - ss, "phong_ramp", id++, closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare); - register_closure(ss, "bssrdf", id++, closure_bssrdf_params(), closure_bssrdf_prepare); - - register_closure( - ss, "hair_reflection", id++, bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare); - register_closure(ss, - "hair_transmission", - id++, - bsdf_hair_transmission_params(), - bsdf_hair_transmission_prepare); - - register_closure(ss, - "principled_hair", - id++, - closure_bsdf_principled_hair_params(), - closure_bsdf_principled_hair_prepare); - - register_closure(ss, - "henyey_greenstein", - id++, - closure_henyey_greenstein_params(), - closure_henyey_greenstein_prepare); - register_closure( - ss, "absorption", id++, closure_absorption_params(), closure_absorption_prepare); +#include "closures_template.h" } -/* BSDF Closure */ +/* Globals */ -bool CBSDFClosure::skip(const ShaderData *sd, uint32_t path_flag, int scattering) +static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg, + ShaderData *sd, + const void *state, + uint32_t path_flag, + OSLThreadData *tdata) { - /* caustic options */ - if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { - const KernelGlobalsCPU *kg = sd->osl_globals; - - if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || - (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) { - return true; - } + OSL::ShaderGlobals *globals = &tdata->globals; + + const differential3 dP = differential_from_compact(sd->Ng, sd->dP); + const differential3 dI = differential_from_compact(sd->I, sd->dI); + + /* copy from shader data to shader globals */ + globals->P = TO_VEC3(sd->P); + globals->dPdx = TO_VEC3(dP.dx); + globals->dPdy = TO_VEC3(dP.dy); + globals->I = TO_VEC3(sd->I); + globals->dIdx = TO_VEC3(dI.dx); + globals->dIdy = TO_VEC3(dI.dy); + globals->N = TO_VEC3(sd->N); + globals->Ng = TO_VEC3(sd->Ng); + globals->u = sd->u; + globals->dudx = sd->du.dx; + globals->dudy = sd->du.dy; + globals->v = sd->v; + globals->dvdx = sd->dv.dx; + globals->dvdy = sd->dv.dy; + globals->dPdu = TO_VEC3(sd->dPdu); + globals->dPdv = TO_VEC3(sd->dPdv); + globals->surfacearea = 1.0f; + globals->time = sd->time; + + /* booleans */ + globals->raytype = path_flag; + globals->flipHandedness = 0; + globals->backfacing = (sd->flag & SD_BACKFACING); + + /* shader data to be used in services callbacks */ + globals->renderstate = sd; + + /* hacky, we leave it to services to fetch actual object matrix */ + globals->shader2common = sd; + globals->object2common = sd; + + /* must be set to NULL before execute */ + globals->Ci = NULL; + + /* clear trace data */ + tdata->tracedata.init = false; + + /* Used by render-services. */ + sd->osl_globals = kg; + if (path_flag & PATH_RAY_SHADOW) { + sd->osl_path_state = nullptr; + sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state; } - - return false; -} - -/* Standard Microfacet Closure */ - -class MicrofacetClosure : public CBSDFClosure { - public: - MicrofacetBsdf params; - ustring distribution; - int refract; - - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - static ustring u_ggx("ggx"); - static ustring u_default("default"); - - const int label = (refract) ? LABEL_TRANSMIT : LABEL_REFLECT; - if (skip(sd, path_flag, LABEL_GLOSSY | label)) { - return; - } - - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( - sd, sizeof(MicrofacetBsdf), weight, ¶ms); - - if (!bsdf) { - return; - } - - /* GGX */ - if (distribution == u_ggx || distribution == u_default) { - if (!refract) { - if (params.alpha_x == params.alpha_y) { - /* Isotropic */ - sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf); - } - else { - /* Anisotropic */ - sd->flag |= bsdf_microfacet_ggx_setup(bsdf); - } - } - else { - sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); - } - } - /* Beckmann */ - else { - if (!refract) { - if (params.alpha_x == params.alpha_y) { - /* Isotropic */ - sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf); - } - else { - /* Anisotropic */ - sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); - } - } - else { - sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); - } - } + else { + sd->osl_path_state = (const IntegratorStateCPU *)state; + sd->osl_shadow_path_state = nullptr; } -}; - -ClosureParam *closure_bsdf_microfacet_params() -{ - static ClosureParam params[] = {CLOSURE_STRING_PARAM(MicrofacetClosure, distribution), - CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_y), - CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.ior), - CLOSURE_INT_PARAM(MicrofacetClosure, refract), - CLOSURE_STRING_KEYPARAM(MicrofacetClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetClosure)}; - - return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_prepare, MicrofacetClosure) - -/* GGX closures with Fresnel */ - -class MicrofacetFresnelClosure : public CBSDFClosure { - public: - MicrofacetBsdf params; - float3 color; - float3 cspec0; - - MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight) - { - /* Technically, the MultiGGX Glass closure may also transmit. However, - * since this is set statically and only used for caustic flags, this - * is probably as good as it gets. */ - if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { - return NULL; - } - MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( - sd, sizeof(MicrofacetBsdf), weight, ¶ms); - if (!bsdf) { - return NULL; +static void flatten_closure_tree(const KernelGlobalsCPU *kg, + ShaderData *sd, + uint32_t path_flag, + const OSL::ClosureColor *closure, + float3 weight = make_float3(1.0f, 1.0f, 1.0f)) +{ + /* OSL gives us a closure tree, we flatten it into arrays per + * closure type, for evaluation, sampling, etc later on. */ + + switch (closure->id) { + case OSL::ClosureColor::MUL: { + OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; + flatten_closure_tree(kg, sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight); + break; } - - MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if (!extra) { - return NULL; + case OSL::ClosureColor::ADD: { + OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; + flatten_closure_tree(kg, sd, path_flag, add->closureA, weight); + flatten_closure_tree(kg, sd, path_flag, add->closureB, weight); + break; } - - bsdf->extra = extra; - bsdf->extra->color = color; - bsdf->extra->cspec0 = cspec0; - bsdf->extra->clearcoat = 0.0f; - return bsdf; +#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \ + case OSL_CLOSURE_##Upper##_ID: { \ + const OSL::ClosureComponent *comp = reinterpret_cast<const OSL::ClosureComponent *>(closure); \ + weight *= TO_FLOAT3(comp->w); \ + osl_closure_##lower##_setup( \ + kg, sd, path_flag, weight, reinterpret_cast<const Upper##Closure *>(comp + 1)); \ + break; \ } -}; - -class MicrofacetGGXFresnelClosure : public MicrofacetFresnelClosure { - public: - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); +#include "closures_template.h" + default: + break; } -}; - -ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)}; - return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_fresnel_prepare, MicrofacetGGXFresnelClosure); -class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure { - public: - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); +/* Surface */ - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - - sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); - } -}; - -ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params() +void OSLShader::eval_surface(const KernelGlobalsCPU *kg, + const void *state, + ShaderData *sd, + uint32_t path_flag) { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)}; - return params; -} -CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare, - MicrofacetGGXAnisoFresnelClosure); - -/* Multiscattering GGX closures */ - -class MicrofacetMultiClosure : public CBSDFClosure { - public: - MicrofacetBsdf params; - float3 color; - - MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight) - { - /* Technically, the MultiGGX closure may also transmit. However, - * since this is set statically and only used for caustic flags, this - * is probably as good as it gets. */ - if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { - return NULL; + /* setup shader globals from shader data */ + OSLThreadData *tdata = kg->osl_tdata; + shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); + + /* execute shader for this point */ + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShadingContext *octx = tdata->context; + int shader = sd->shader & SHADER_MASK; + + /* automatic bump shader */ + if (kg->osl->bump_state[shader]) { + /* save state */ + const float3 P = sd->P; + const float dP = sd->dP; + const OSL::Vec3 dPdx = globals->dPdx; + const OSL::Vec3 dPdy = globals->dPdy; + + /* set state as if undisplaced */ + if (sd->flag & SD_HAS_DISPLACEMENT) { + float data[9]; + bool found = kg->osl->services->get_attribute(sd, + true, + OSLRenderServices::u_empty, + TypeDesc::TypeVector, + OSLRenderServices::u_geom_undisplaced, + data); + (void)found; + assert(found); + + differential3 tmp_dP; + memcpy(&sd->P, data, sizeof(float) * 3); + memcpy(&tmp_dP.dx, data + 3, sizeof(float) * 3); + memcpy(&tmp_dP.dy, data + 6, sizeof(float) * 3); + + object_position_transform(kg, sd, &sd->P); + object_dir_transform(kg, sd, &tmp_dP.dx); + object_dir_transform(kg, sd, &tmp_dP.dy); + + sd->dP = differential_make_compact(tmp_dP); + + globals->P = TO_VEC3(sd->P); + globals->dPdx = TO_VEC3(tmp_dP.dx); + globals->dPdy = TO_VEC3(tmp_dP.dy); } - MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( - sd, sizeof(MicrofacetBsdf), weight, ¶ms); - if (!bsdf) { - return NULL; - } + /* execute bump shader */ + ss->execute(octx, *(kg->osl->bump_state[shader]), *globals); - MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if (!extra) { - return NULL; - } + /* reset state */ + sd->P = P; + sd->dP = dP; - bsdf->extra = extra; - bsdf->extra->color = color; - bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->clearcoat = 0.0f; - return bsdf; + globals->P = TO_VEC3(P); + globals->dPdx = TO_VEC3(dPdx); + globals->dPdy = TO_VEC3(dPdy); } -}; -class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure { - public: - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - - bsdf->ior = 0.0f; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); + /* surface shader */ + if (kg->osl->surface_state[shader]) { + ss->execute(octx, *(kg->osl->surface_state[shader]), *globals); } -}; - -ClosureParam *closure_bsdf_microfacet_multi_ggx_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)}; - return params; -} -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_prepare, MicrofacetMultiGGXClosure); -class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure { - public: - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - - bsdf->ior = 0.0f; - sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); + /* flatten closure tree */ + if (globals->Ci) { + flatten_closure_tree(kg, sd, path_flag, globals->Ci); } -}; - -ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)}; - return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_prepare, MicrofacetMultiGGXAnisoClosure); - -class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure { - public: - MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure() - { - } - - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); - } -}; +/* Background */ -ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params() +void OSLShader::eval_background(const KernelGlobalsCPU *kg, + const void *state, + ShaderData *sd, + uint32_t path_flag) { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)}; - return params; -} -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_prepare, MicrofacetMultiGGXGlassClosure); - -/* Multiscattering GGX closures with Fresnel */ - -class MicrofacetMultiFresnelClosure : public CBSDFClosure { - public: - MicrofacetBsdf params; - float3 color; - float3 cspec0; - - MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight) - { - /* Technically, the MultiGGX closure may also transmit. However, - * since this is set statically and only used for caustic flags, this - * is probably as good as it gets. */ - if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { - return NULL; - } - - MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( - sd, sizeof(MicrofacetBsdf), weight, ¶ms); - if (!bsdf) { - return NULL; - } + /* setup shader globals from shader data */ + OSLThreadData *tdata = kg->osl_tdata; + shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); - MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if (!extra) { - return NULL; - } + /* execute shader for this point */ + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShadingContext *octx = tdata->context; - bsdf->extra = extra; - bsdf->extra->color = color; - bsdf->extra->cspec0 = cspec0; - bsdf->extra->clearcoat = 0.0f; - return bsdf; + if (kg->osl->background_state) { + ss->execute(octx, *(kg->osl->background_state), *globals); } -}; - -class MicrofacetMultiGGXFresnelClosure : public MicrofacetMultiFresnelClosure { - public: - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd); + /* return background color immediately */ + if (globals->Ci) { + flatten_closure_tree(kg, sd, path_flag, globals->Ci); } -}; - -ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)}; - return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare, - MicrofacetMultiGGXFresnelClosure); - -class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosure { - public: - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd); - } -}; +/* Volume */ -ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params() +void OSLShader::eval_volume(const KernelGlobalsCPU *kg, + const void *state, + ShaderData *sd, + uint32_t path_flag) { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)}; - return params; -} -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare, - MicrofacetMultiGGXAnisoFresnelClosure); - -class MicrofacetMultiGGXGlassFresnelClosure : public MicrofacetMultiFresnelClosure { - public: - MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure() - { + /* setup shader globals from shader data */ + OSLThreadData *tdata = kg->osl_tdata; + shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); + + /* execute shader */ + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShadingContext *octx = tdata->context; + int shader = sd->shader & SHADER_MASK; + + if (kg->osl->volume_state[shader]) { + ss->execute(octx, *(kg->osl->volume_state[shader]), *globals); } - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); - - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if (!bsdf) { - return; - } - - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); + /* flatten closure tree */ + if (globals->Ci) { + flatten_closure_tree(kg, sd, path_flag, globals->Ci); } -}; - -ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)}; - return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare, - MicrofacetMultiGGXGlassFresnelClosure); -/* Transparent */ +/* Displacement */ -class TransparentClosure : public CBSDFClosure { - public: - ShaderClosure params; - float3 unused; - - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - bsdf_transparent_setup(sd, weight, path_flag); - } -}; - -ClosureParam *closure_bsdf_transparent_params() +void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd) { - static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"), - CLOSURE_FINISH_PARAM(TransparentClosure)}; - return params; -} - -CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure) - -/* Volume */ - -class VolumeAbsorptionClosure : public CBSDFClosure { - public: - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - volume_extinction_setup(sd, weight); - } -}; - -ClosureParam *closure_absorption_params() -{ - static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"), - CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)}; - return params; -} - -CCLOSURE_PREPARE(closure_absorption_prepare, VolumeAbsorptionClosure) - -class VolumeHenyeyGreensteinClosure : public CBSDFClosure { - public: - HenyeyGreensteinVolume params; - - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) - { - volume_extinction_setup(sd, weight); - - HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc_osl( - sd, sizeof(HenyeyGreensteinVolume), weight, ¶ms); - if (!volume) { - return; - } - - sd->flag |= volume_henyey_greenstein_setup(volume); + /* setup shader globals from shader data */ + OSLThreadData *tdata = kg->osl_tdata; + shaderdata_to_shaderglobals(kg, sd, state, 0, tdata); + + /* execute shader */ + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShadingContext *octx = tdata->context; + int shader = sd->shader & SHADER_MASK; + + if (kg->osl->displacement_state[shader]) { + ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals); } -}; -ClosureParam *closure_henyey_greenstein_params() -{ - static ClosureParam params[] = { - CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g), - CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"), - CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)}; - return params; + /* get back position */ + sd->P = TO_FLOAT3(globals->P); } -CCLOSURE_PREPARE(closure_henyey_greenstein_prepare, VolumeHenyeyGreensteinClosure) - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/closures.h b/intern/cycles/kernel/osl/closures.h deleted file mode 100644 index e10a3d88a04..00000000000 --- a/intern/cycles/kernel/osl/closures.h +++ /dev/null @@ -1,141 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * - * Adapted from Open Shading Language - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011-2022 Blender Foundation. */ - -#ifndef __OSL_CLOSURES_H__ -#define __OSL_CLOSURES_H__ - -#include "kernel/types.h" -#include "util/types.h" - -#include <OSL/genclosure.h> -#include <OSL/oslclosure.h> -#include <OSL/oslexec.h> - -CCL_NAMESPACE_BEGIN - -OSL::ClosureParam *closure_emission_params(); -OSL::ClosureParam *closure_background_params(); -OSL::ClosureParam *closure_holdout_params(); -OSL::ClosureParam *closure_bsdf_diffuse_ramp_params(); -OSL::ClosureParam *closure_bsdf_phong_ramp_params(); -OSL::ClosureParam *closure_bsdf_transparent_params(); -OSL::ClosureParam *closure_bssrdf_params(); -OSL::ClosureParam *closure_absorption_params(); -OSL::ClosureParam *closure_henyey_greenstein_params(); -OSL::ClosureParam *closure_bsdf_microfacet_params(); -OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_params(); -OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params(); -OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params(); -OSL::ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params(); -OSL::ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params(); -OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params(); -OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(); -OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(); -OSL::ClosureParam *closure_bsdf_principled_clearcoat_params(); - -void closure_emission_prepare(OSL::RendererServices *, int id, void *data); -void closure_background_prepare(OSL::RendererServices *, int id, void *data); -void closure_holdout_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_transparent_prepare(OSL::RendererServices *, int id, void *data); -void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data); -void closure_absorption_prepare(OSL::RendererServices *, int id, void *data); -void closure_henyey_greenstein_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *, - int id, - void *data); -void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *, - int id, - void *data); -void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *, - int id, - void *data); -void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *, - int id, - void *data); -void closure_bsdf_principled_clearcoat_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_principled_hair_prepare(OSL::RendererServices *, int id, void *data); - -#define CCLOSURE_PREPARE(name, classname) \ - void name(RendererServices *, int id, void *data) \ - { \ - memset(data, 0, sizeof(classname)); \ - new (data) classname(); \ - } - -#define CCLOSURE_PREPARE_STATIC(name, classname) static CCLOSURE_PREPARE(name, classname) - -#define CLOSURE_FLOAT3_PARAM(st, fld) \ - { \ - TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) \ - } - -#define BSDF_CLOSURE_FLOAT_PARAM(st, fld) CLOSURE_FLOAT_PARAM(st, fld), -#define BSDF_CLOSURE_FLOAT3_PARAM(st, fld) CLOSURE_FLOAT3_PARAM(st, fld), - -#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z) -#define TO_COLOR3(v) OSL::Color3(v.x, v.y, v.z) -#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2]) - -/* Closure */ - -class CClosurePrimitive { - public: - virtual void setup(ShaderData *sd, uint32_t path_flag, float3 weight) = 0; - - OSL::ustring label; -}; - -/* BSDF */ - -class CBSDFClosure : public CClosurePrimitive { - public: - bool skip(const ShaderData *sd, uint32_t path_flag, int scattering); -}; - -#define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \ -\ - class Upper##Closure : public CBSDFClosure { \ - public: \ - structname params; \ - float3 unused; \ -\ - void setup(ShaderData *sd, uint32_t path_flag, float3 weight) \ - { \ - if (!skip(sd, path_flag, TYPE)) { \ - params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); \ - structname *bsdf = (structname *)bsdf_alloc_osl(sd, sizeof(structname), weight, ¶ms); \ - sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \ - } \ - } \ - }; \ -\ - static ClosureParam *bsdf_##lower##_params() \ - { \ - static ClosureParam params[] = { - -/* parameters */ - -#define BSDF_CLOSURE_CLASS_END(Upper, lower) \ - CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \ - } \ - ; \ - return params; \ - } \ -\ - CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure) - -CCL_NAMESPACE_END - -#endif /* __OSL_CLOSURES_H__ */ diff --git a/intern/cycles/kernel/osl/closures_setup.h b/intern/cycles/kernel/osl/closures_setup.h new file mode 100644 index 00000000000..f8d68444f90 --- /dev/null +++ b/intern/cycles/kernel/osl/closures_setup.h @@ -0,0 +1,1166 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * + * Adapted from Open Shading Language + * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. + * All Rights Reserved. + * + * Modifications Copyright 2011-2022 Blender Foundation. */ + +#pragma once + +// clang-format off +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/bsdf_ashikhmin_velvet.h" +#include "kernel/closure/bsdf_diffuse.h" +#include "kernel/closure/bsdf_microfacet.h" +#include "kernel/closure/bsdf_microfacet_multi.h" +#include "kernel/closure/bsdf_oren_nayar.h" +#include "kernel/closure/bsdf_reflection.h" +#include "kernel/closure/bsdf_refraction.h" +#include "kernel/closure/bsdf_transparent.h" +#include "kernel/closure/bsdf_ashikhmin_shirley.h" +#include "kernel/closure/bsdf_toon.h" +#include "kernel/closure/bsdf_hair.h" +#include "kernel/closure/bsdf_hair_principled.h" +#include "kernel/closure/bsdf_principled_diffuse.h" +#include "kernel/closure/bsdf_principled_sheen.h" +#include "kernel/closure/volume.h" +#include "kernel/closure/bsdf_diffuse_ramp.h" +#include "kernel/closure/bsdf_phong_ramp.h" +#include "kernel/closure/bssrdf.h" +#include "kernel/closure/emissive.h" +// clang-format on + +CCL_NAMESPACE_BEGIN + +#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \ + struct ccl_align(8) Upper##Closure \ + { \ + const char *label; +#define OSL_CLOSURE_STRUCT_END(Upper, lower) \ + } \ + ; \ + ccl_device void osl_closure_##lower##_setup(KernelGlobals kg, \ + ccl_private ShaderData *sd, \ + uint32_t path_flag, \ + float3 weight, \ + ccl_private Upper##Closure *closure); +#define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key) type name; +#define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size) type name[size]; + +#include "closures_template.h" + +ccl_device_forceinline bool osl_closure_skip(KernelGlobals kg, + ccl_private const ShaderData *sd, + uint32_t path_flag, + int scattering) +{ + /* caustic options */ + if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { + if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || + (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) { + return true; + } + } + + return false; +} + +/* Diffuse */ + +ccl_device void osl_closure_diffuse_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const DiffuseClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) { + return; + } + + ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( + sd, sizeof(DiffuseBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + + sd->flag |= bsdf_diffuse_setup(bsdf); +} + +ccl_device void osl_closure_oren_nayar_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const OrenNayarClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) { + return; + } + + ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)bsdf_alloc( + sd, sizeof(OrenNayarBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->roughness = closure->roughness; + + sd->flag |= bsdf_oren_nayar_setup(bsdf); +} + +ccl_device void osl_closure_translucent_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const TranslucentClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) { + return; + } + + ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( + sd, sizeof(DiffuseBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + + sd->flag |= bsdf_translucent_setup(bsdf); +} + +ccl_device void osl_closure_reflection_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const ReflectionClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_SINGULAR)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + + sd->flag |= bsdf_reflection_setup(bsdf); +} + +ccl_device void osl_closure_refraction_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const RefractionClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_SINGULAR)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->ior = closure->ior; + + sd->flag |= bsdf_refraction_setup(bsdf); +} + +ccl_device void osl_closure_transparent_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const TransparentClosure *closure) +{ + bsdf_transparent_setup(sd, rgb_to_spectrum(weight), path_flag); +} + +/* Standard microfacet closures */ + +ccl_device void osl_closure_microfacet_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetClosure *closure) +{ + const int label = (closure->refract) ? LABEL_TRANSMIT : LABEL_REFLECT; + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | label)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = closure->alpha_y; + bsdf->ior = closure->ior; + bsdf->T = closure->T; + + static OSL::ustring u_ggx("ggx"); + static OSL::ustring u_default("default"); + + /* GGX */ + if (closure->distribution == u_ggx || closure->distribution == u_default) { + if (!closure->refract) { + if (closure->alpha_x == closure->alpha_y) { + /* Isotropic */ + sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf); + } + else { + /* Anisotropic */ + sd->flag |= bsdf_microfacet_ggx_setup(bsdf); + } + } + else { + sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); + } + } + /* Beckmann */ + else { + if (!closure->refract) { + if (closure->alpha_x == closure->alpha_y) { + /* Isotropic */ + sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf); + } + else { + /* Anisotropic */ + sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); + } + } + else { + sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); + } + } +} + +ccl_device void osl_closure_microfacet_ggx_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetGGXIsotropicClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + + sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf); +} + +ccl_device void osl_closure_microfacet_ggx_aniso_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetGGXClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = closure->alpha_y; + bsdf->T = closure->T; + + sd->flag |= bsdf_microfacet_ggx_setup(bsdf); +} + +ccl_device void osl_closure_microfacet_ggx_refraction_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetGGXRefractionClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_TRANSMIT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->ior = closure->ior; + + sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); +} + +/* GGX closures with Fresnel */ + +ccl_device void osl_closure_microfacet_ggx_fresnel_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetGGXFresnelClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = bsdf->alpha_x; + bsdf->ior = closure->ior; + + bsdf->extra = extra; + bsdf->extra->color = rgb_to_spectrum(closure->color); + bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0); + bsdf->extra->clearcoat = 0.0f; + + bsdf->T = zero_float3(); + + sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); +} + +ccl_device void osl_closure_microfacet_ggx_aniso_fresnel_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetGGXAnisoFresnelClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = closure->alpha_y; + bsdf->ior = closure->ior; + + bsdf->extra = extra; + bsdf->extra->color = rgb_to_spectrum(closure->color); + bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0); + bsdf->extra->clearcoat = 0.0f; + + bsdf->T = closure->T; + + sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); +} + +/* Multi-scattering GGX closures */ + +ccl_device void osl_closure_microfacet_multi_ggx_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetMultiGGXClosure *closure) +{ + /* Technically, the MultiGGX closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = bsdf->alpha_x; + bsdf->ior = 0.0f; + + bsdf->extra = extra; + bsdf->extra->color = rgb_to_spectrum(closure->color); + bsdf->extra->cspec0 = zero_spectrum(); + bsdf->extra->clearcoat = 0.0f; + + bsdf->T = zero_float3(); + + sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); +} + +ccl_device void osl_closure_microfacet_multi_ggx_glass_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetMultiGGXGlassClosure *closure) +{ + /* Technically, the MultiGGX closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = bsdf->alpha_x; + bsdf->ior = closure->ior; + + bsdf->extra = extra; + bsdf->extra->color = rgb_to_spectrum(closure->color); + bsdf->extra->cspec0 = zero_spectrum(); + bsdf->extra->clearcoat = 0.0f; + + bsdf->T = zero_float3(); + + sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); +} + +ccl_device void osl_closure_microfacet_multi_ggx_aniso_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetMultiGGXAnisoClosure *closure) +{ + /* Technically, the MultiGGX closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = closure->alpha_y; + bsdf->ior = 0.0f; + + bsdf->extra = extra; + bsdf->extra->color = rgb_to_spectrum(closure->color); + bsdf->extra->cspec0 = zero_spectrum(); + bsdf->extra->clearcoat = 0.0f; + + bsdf->T = closure->T; + + sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); +} + +/* Multi-scattering GGX closures with Fresnel */ + +ccl_device void osl_closure_microfacet_multi_ggx_fresnel_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetMultiGGXFresnelClosure *closure) +{ + /* Technically, the MultiGGX closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = bsdf->alpha_x; + bsdf->ior = closure->ior; + + bsdf->extra = extra; + bsdf->extra->color = rgb_to_spectrum(closure->color); + bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0); + bsdf->extra->clearcoat = 0.0f; + + bsdf->T = zero_float3(); + + sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd); +} + +ccl_device void osl_closure_microfacet_multi_ggx_glass_fresnel_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetMultiGGXGlassFresnelClosure *closure) +{ + /* Technically, the MultiGGX closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = bsdf->alpha_x; + bsdf->ior = closure->ior; + + bsdf->extra = extra; + bsdf->extra->color = rgb_to_spectrum(closure->color); + bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0); + bsdf->extra->clearcoat = 0.0f; + + bsdf->T = zero_float3(); + + sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); +} + +ccl_device void osl_closure_microfacet_multi_ggx_aniso_fresnel_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetMultiGGXAnisoFresnelClosure *closure) +{ + /* Technically, the MultiGGX closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = closure->alpha_y; + bsdf->ior = closure->ior; + + bsdf->extra = extra; + bsdf->extra->color = rgb_to_spectrum(closure->color); + bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0); + bsdf->extra->clearcoat = 0.0f; + + bsdf->T = closure->T; + + sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd); +} + +/* Beckmann closures */ + +ccl_device void osl_closure_microfacet_beckmann_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetBeckmannIsotropicClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + + sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf); +} + +ccl_device void osl_closure_microfacet_beckmann_aniso_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetBeckmannClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = closure->alpha_y; + bsdf->T = closure->T; + + sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); +} + +ccl_device void osl_closure_microfacet_beckmann_refraction_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const MicrofacetBeckmannRefractionClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_TRANSMIT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->ior = closure->ior; + + sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); +} + +/* Ashikhmin closures */ + +ccl_device void osl_closure_ashikhmin_velvet_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const AshikhminVelvetClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) { + return; + } + + ccl_private VelvetBsdf *bsdf = (ccl_private VelvetBsdf *)bsdf_alloc( + sd, sizeof(VelvetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->sigma = closure->sigma; + + sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf); +} + +ccl_device void osl_closure_ashikhmin_shirley_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const AshikhminShirleyClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return; + } + + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->alpha_x; + bsdf->alpha_y = closure->alpha_y; + bsdf->T = closure->T; + + sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf); +} + +ccl_device void osl_closure_diffuse_toon_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const DiffuseToonClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) { + return; + } + + ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc( + sd, sizeof(ToonBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->size = closure->size; + bsdf->smooth = closure->smooth; + + sd->flag |= bsdf_diffuse_toon_setup(bsdf); +} + +ccl_device void osl_closure_glossy_toon_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const GlossyToonClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY)) { + return; + } + + ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc( + sd, sizeof(ToonBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->size = closure->size; + bsdf->smooth = closure->smooth; + + sd->flag |= bsdf_glossy_toon_setup(bsdf); +} + +/* Disney principled closures */ + +ccl_device void osl_closure_principled_diffuse_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const PrincipledDiffuseClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) { + return; + } + + ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc( + sd, sizeof(PrincipledDiffuseBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->roughness = closure->roughness; + + sd->flag |= bsdf_principled_diffuse_setup(bsdf); +} + +ccl_device void osl_closure_principled_sheen_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const PrincipledSheenClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) { + return; + } + + ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)bsdf_alloc( + sd, sizeof(PrincipledSheenBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->avg_value = 0.0f; + + sd->flag |= bsdf_principled_sheen_setup(sd, bsdf); +} + +ccl_device void osl_closure_principled_clearcoat_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const PrincipledClearcoatClosure *closure) +{ + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->alpha_x = closure->clearcoat_roughness; + bsdf->alpha_y = closure->clearcoat_roughness; + bsdf->ior = 1.5f; + + bsdf->extra = extra; + bsdf->extra->color = zero_spectrum(); + bsdf->extra->cspec0 = make_spectrum(0.04f); + bsdf->extra->clearcoat = closure->clearcoat; + + bsdf->T = zero_float3(); + + sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); +} + +/* Variable cone emissive closure + * + * This primitive emits in a cone having a configurable penumbra area where the light decays to 0 + * reaching the outer_angle limit. It can also behave as a lambertian emitter if the provided + * angles are PI/2, which is the default + */ +ccl_device void osl_closure_emission_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t /* path_flag */, + float3 weight, + ccl_private const GenericEmissiveClosure *closure) +{ + emission_setup(sd, rgb_to_spectrum(weight)); +} + +/* Generic background closure + * + * We only have a background closure for the shaders to return a color in background shaders. No + * methods, only the weight is taking into account + */ +ccl_device void osl_closure_background_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t /* path_flag */, + float3 weight, + ccl_private const GenericBackgroundClosure *closure) +{ + background_setup(sd, rgb_to_spectrum(weight)); +} + +/* Holdout closure + * + * This will be used by the shader to mark the amount of holdout for the current shading point. No + * parameters, only the weight will be used + */ +ccl_device void osl_closure_holdout_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t /* path_flag */, + float3 weight, + ccl_private const HoldoutClosure *closure) +{ + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, rgb_to_spectrum(weight)); + sd->flag |= SD_HOLDOUT; +} + +ccl_device void osl_closure_diffuse_ramp_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t /* path_flag */, + float3 weight, + ccl_private const DiffuseRampClosure *closure) +{ + ccl_private DiffuseRampBsdf *bsdf = (ccl_private DiffuseRampBsdf *)bsdf_alloc( + sd, sizeof(DiffuseRampBsdf), rgb_to_spectrum(weight)); + + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + + bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8); + if (!bsdf->colors) { + return; + } + + for (int i = 0; i < 8; i++) + bsdf->colors[i] = closure->colors[i]; + + sd->flag |= bsdf_diffuse_ramp_setup(bsdf); +} + +ccl_device void osl_closure_phong_ramp_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t /* path_flag */, + float3 weight, + ccl_private const PhongRampClosure *closure) +{ + ccl_private PhongRampBsdf *bsdf = (ccl_private PhongRampBsdf *)bsdf_alloc( + sd, sizeof(PhongRampBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->exponent = closure->exponent; + + bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8); + if (!bsdf->colors) { + return; + } + + for (int i = 0; i < 8; i++) + bsdf->colors[i] = closure->colors[i]; + + sd->flag |= bsdf_phong_ramp_setup(bsdf); +} + +ccl_device void osl_closure_bssrdf_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const BSSRDFClosure *closure) +{ + static ustring u_burley("burley"); + static ustring u_random_walk_fixed_radius("random_walk_fixed_radius"); + static ustring u_random_walk("random_walk"); + + ClosureType type; + if (closure->method == u_burley) { + type = CLOSURE_BSSRDF_BURLEY_ID; + } + else if (closure->method == u_random_walk_fixed_radius) { + type = CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID; + } + else if (closure->method == u_random_walk) { + type = CLOSURE_BSSRDF_RANDOM_WALK_ID; + } + else { + return; + } + + ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, rgb_to_spectrum(weight)); + if (!bssrdf) { + return; + } + + /* disable in case of diffuse ancestor, can't see it well then and + * adds considerably noise due to probabilities of continuing path + * getting lower and lower */ + if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) { + bssrdf->radius = zero_spectrum(); + } + else { + bssrdf->radius = closure->radius; + } + + /* create one closure per color channel */ + bssrdf->albedo = closure->albedo; + bssrdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bssrdf->roughness = closure->roughness; + bssrdf->anisotropy = clamp(closure->anisotropy, 0.0f, 0.9f); + + sd->flag |= bssrdf_setup(sd, bssrdf, type, clamp(closure->ior, 1.01f, 3.8f)); +} + +/* Hair */ + +ccl_device void osl_closure_hair_reflection_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const HairReflectionClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY)) { + return; + } + + ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc( + sd, sizeof(HairBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->T = closure->T; + bsdf->roughness1 = closure->roughness1; + bsdf->roughness2 = closure->roughness2; + bsdf->offset = closure->offset; + + sd->flag |= bsdf_hair_reflection_setup(bsdf); +} + +ccl_device void osl_closure_hair_transmission_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const HairTransmissionClosure *closure) +{ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY)) { + return; + } + + ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc( + sd, sizeof(HairBsdf), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->T = closure->T; + bsdf->roughness1 = closure->roughness1; + bsdf->roughness2 = closure->roughness2; + bsdf->offset = closure->offset; + + sd->flag |= bsdf_hair_transmission_setup(bsdf); +} + +ccl_device void osl_closure_principled_hair_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const PrincipledHairClosure *closure) +{ +#ifdef __HAIR__ + if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY)) { + return; + } + + ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)bsdf_alloc( + sd, sizeof(PrincipledHairBSDF), rgb_to_spectrum(weight)); + if (!bsdf) { + return; + } + + ccl_private PrincipledHairExtra *extra = (ccl_private PrincipledHairExtra *)closure_alloc_extra( + sd, sizeof(PrincipledHairExtra)); + if (!extra) { + return; + } + + bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N); + bsdf->sigma = closure->sigma; + bsdf->v = closure->v; + bsdf->s = closure->s; + bsdf->alpha = closure->alpha; + bsdf->eta = closure->eta; + bsdf->m0_roughness = closure->m0_roughness; + + bsdf->extra = extra; + + sd->flag |= bsdf_principled_hair_setup(sd, bsdf); +#endif +} + +/* Volume */ + +ccl_device void osl_closure_absorption_setup(KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const VolumeAbsorptionClosure *closure) +{ + volume_extinction_setup(sd, rgb_to_spectrum(weight)); +} + +ccl_device void osl_closure_henyey_greenstein_setup( + KernelGlobals kg, + ccl_private ShaderData *sd, + uint32_t path_flag, + float3 weight, + ccl_private const VolumeHenyeyGreensteinClosure *closure) +{ + volume_extinction_setup(sd, rgb_to_spectrum(weight)); + + ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc( + sd, sizeof(HenyeyGreensteinVolume), rgb_to_spectrum(weight)); + if (!volume) { + return; + } + + volume->g = closure->g; + + sd->flag |= volume_henyey_greenstein_setup(volume); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/closures_template.h b/intern/cycles/kernel/osl/closures_template.h new file mode 100644 index 00000000000..c808b275966 --- /dev/null +++ b/intern/cycles/kernel/osl/closures_template.h @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#ifndef OSL_CLOSURE_STRUCT_BEGIN +# define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) +#endif +#ifndef OSL_CLOSURE_STRUCT_END +# define OSL_CLOSURE_STRUCT_END(Upper, lower) +#endif +#ifndef OSL_CLOSURE_STRUCT_MEMBER +# define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key) +#endif +#ifndef OSL_CLOSURE_STRUCT_ARRAY_MEMBER +# define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size) +#endif + +OSL_CLOSURE_STRUCT_BEGIN(Diffuse, diffuse) + OSL_CLOSURE_STRUCT_MEMBER(Diffuse, VECTOR, packed_float3, N, NULL) +OSL_CLOSURE_STRUCT_END(Diffuse, diffuse) + +OSL_CLOSURE_STRUCT_BEGIN(OrenNayar, oren_nayar) + OSL_CLOSURE_STRUCT_MEMBER(OrenNayar, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(OrenNayar, FLOAT, float, roughness, NULL) +OSL_CLOSURE_STRUCT_END(OrenNayar, oren_nayar) + +OSL_CLOSURE_STRUCT_BEGIN(Translucent, translucent) + OSL_CLOSURE_STRUCT_MEMBER(Translucent, VECTOR, packed_float3, N, NULL) +OSL_CLOSURE_STRUCT_END(Translucent, translucent) + +OSL_CLOSURE_STRUCT_BEGIN(Reflection, reflection) + OSL_CLOSURE_STRUCT_MEMBER(Reflection, VECTOR, packed_float3, N, NULL) +OSL_CLOSURE_STRUCT_END(Reflection, reflection) + +OSL_CLOSURE_STRUCT_BEGIN(Refraction, refraction) + OSL_CLOSURE_STRUCT_MEMBER(Refraction, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(Refraction, FLOAT, float, ior, NULL) +OSL_CLOSURE_STRUCT_END(Refraction, refraction) + +OSL_CLOSURE_STRUCT_BEGIN(Transparent, transparent) +OSL_CLOSURE_STRUCT_END(Transparent, transparent) + +OSL_CLOSURE_STRUCT_BEGIN(Microfacet, microfacet) + OSL_CLOSURE_STRUCT_MEMBER(Microfacet, STRING, ustring, distribution, NULL) + OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, alpha_y, NULL) + OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, ior, NULL) + OSL_CLOSURE_STRUCT_MEMBER(Microfacet, INT, int, refract, NULL) +OSL_CLOSURE_STRUCT_END(Microfacet, microfacet) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGXIsotropic, microfacet_ggx) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXIsotropic, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXIsotropic, FLOAT, float, alpha_x, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetGGXIsotropic, microfacet_ggx) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGX, microfacet_ggx_aniso) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGX, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGX, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGX, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGX, FLOAT, float, alpha_y, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetGGX, microfacet_ggx_aniso) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXRefraction, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXRefraction, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXRefraction, FLOAT, float, ior, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetGGXRefraction, microfacet_ggx_refraction) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGX, microfacet_multi_ggx) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGX, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGX, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGX, VECTOR, packed_float3, color, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGX, microfacet_multi_ggx) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXGlass, microfacet_multi_ggx_glass) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlass, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlass, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlass, FLOAT, float, ior, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlass, VECTOR, packed_float3, color, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXGlass, microfacet_multi_ggx_glass) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXAniso, microfacet_multi_ggx_aniso) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, FLOAT, float, alpha_y, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, VECTOR, packed_float3, color, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXAniso, microfacet_multi_ggx_aniso) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGXFresnel, microfacet_ggx_fresnel) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, FLOAT, float, ior, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, VECTOR, packed_float3, color, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, VECTOR, packed_float3, cspec0, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetGGXFresnel, microfacet_ggx_fresnel) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGXAnisoFresnel, microfacet_ggx_aniso_fresnel) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, FLOAT, float, alpha_y, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, FLOAT, float, ior, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, VECTOR, packed_float3, color, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, VECTOR, packed_float3, cspec0, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetGGXAnisoFresnel, microfacet_ggx_aniso_fresnel) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXFresnel, microfacet_multi_ggx_fresnel) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, FLOAT, float, ior, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, VECTOR, packed_float3, color, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, VECTOR, packed_float3, cspec0, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXFresnel, microfacet_multi_ggx_fresnel) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXGlassFresnel, microfacet_multi_ggx_glass_fresnel) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, FLOAT, float, ior, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, VECTOR, packed_float3, color, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, VECTOR, packed_float3, cspec0, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXGlassFresnel, microfacet_multi_ggx_glass_fresnel) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXAnisoFresnel, microfacet_multi_ggx_aniso_fresnel) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, FLOAT, float, alpha_y, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, FLOAT, float, ior, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, VECTOR, packed_float3, color, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, VECTOR, packed_float3, cspec0, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXAnisoFresnel, microfacet_multi_ggx_aniso_fresnel) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetBeckmannIsotropic, microfacet_beckmann) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannIsotropic, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannIsotropic, FLOAT, float, alpha_x, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetBeckmannIsotropic, microfacet_beckmann) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetBeckmann, microfacet_beckmann_aniso) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmann, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmann, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmann, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmann, FLOAT, float, alpha_y, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetBeckmann, microfacet_beckmann_aniso) + +OSL_CLOSURE_STRUCT_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannRefraction, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannRefraction, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannRefraction, FLOAT, float, ior, NULL) +OSL_CLOSURE_STRUCT_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction) + +OSL_CLOSURE_STRUCT_BEGIN(AshikhminShirley, ashikhmin_shirley) + OSL_CLOSURE_STRUCT_MEMBER(AshikhminShirley, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(AshikhminShirley, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(AshikhminShirley, FLOAT, float, alpha_x, NULL) + OSL_CLOSURE_STRUCT_MEMBER(AshikhminShirley, FLOAT, float, alpha_y, NULL) +OSL_CLOSURE_STRUCT_END(AshikhminShirley, ashikhmin_shirley) + +OSL_CLOSURE_STRUCT_BEGIN(AshikhminVelvet, ashikhmin_velvet) + OSL_CLOSURE_STRUCT_MEMBER(AshikhminVelvet, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(AshikhminVelvet, FLOAT, float, sigma, NULL) +OSL_CLOSURE_STRUCT_END(AshikhminVelvet, ashikhmin_velvet) + +OSL_CLOSURE_STRUCT_BEGIN(DiffuseToon, diffuse_toon) + OSL_CLOSURE_STRUCT_MEMBER(DiffuseToon, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(DiffuseToon, FLOAT, float, size, NULL) + OSL_CLOSURE_STRUCT_MEMBER(DiffuseToon, FLOAT, float, smooth, NULL) +OSL_CLOSURE_STRUCT_END(DiffuseToon, diffuse_toon) + +OSL_CLOSURE_STRUCT_BEGIN(GlossyToon, glossy_toon) + OSL_CLOSURE_STRUCT_MEMBER(GlossyToon, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(GlossyToon, FLOAT, float, size, NULL) + OSL_CLOSURE_STRUCT_MEMBER(GlossyToon, FLOAT, float, smooth, NULL) +OSL_CLOSURE_STRUCT_END(GlossyToon, glossy_toon) + +OSL_CLOSURE_STRUCT_BEGIN(PrincipledDiffuse, principled_diffuse) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledDiffuse, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledDiffuse, FLOAT, float, roughness, NULL) +OSL_CLOSURE_STRUCT_END(PrincipledDiffuse, principled_diffuse) + +OSL_CLOSURE_STRUCT_BEGIN(PrincipledSheen, principled_sheen) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledSheen, VECTOR, packed_float3, N, NULL) +OSL_CLOSURE_STRUCT_END(PrincipledSheen, principled_sheen) + +OSL_CLOSURE_STRUCT_BEGIN(PrincipledClearcoat, principled_clearcoat) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledClearcoat, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledClearcoat, FLOAT, float, clearcoat, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledClearcoat, FLOAT, float, clearcoat_roughness, NULL) +OSL_CLOSURE_STRUCT_END(PrincipledClearcoat, principled_clearcoat) + +OSL_CLOSURE_STRUCT_BEGIN(GenericEmissive, emission) +OSL_CLOSURE_STRUCT_END(GenericEmissive, emission) + +OSL_CLOSURE_STRUCT_BEGIN(GenericBackground, background) +OSL_CLOSURE_STRUCT_END(GenericBackground, background) + +OSL_CLOSURE_STRUCT_BEGIN(Holdout, holdout) +OSL_CLOSURE_STRUCT_END(Holdout, holdout) + +OSL_CLOSURE_STRUCT_BEGIN(DiffuseRamp, diffuse_ramp) + OSL_CLOSURE_STRUCT_MEMBER(DiffuseRamp, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_ARRAY_MEMBER(DiffuseRamp, COLOR, packed_float3, colors, NULL, 8) +OSL_CLOSURE_STRUCT_END(DiffuseRamp, diffuse_ramp) + +OSL_CLOSURE_STRUCT_BEGIN(PhongRamp, phong_ramp) + OSL_CLOSURE_STRUCT_MEMBER(PhongRamp, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PhongRamp, FLOAT, float, exponent, NULL) + OSL_CLOSURE_STRUCT_ARRAY_MEMBER(PhongRamp, COLOR, packed_float3, colors, NULL, 8) +OSL_CLOSURE_STRUCT_END(PhongRamp, phong_ramp) + +OSL_CLOSURE_STRUCT_BEGIN(BSSRDF, bssrdf) + OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, STRING, ustring, method, NULL) + OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, radius, NULL) + OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, albedo, NULL) + OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, FLOAT, float, roughness, "roughness") + OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, FLOAT, float, ior, "ior") + OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, FLOAT, float, anisotropy, "anisotropy") +OSL_CLOSURE_STRUCT_END(BSSRDF, bssrdf) + +OSL_CLOSURE_STRUCT_BEGIN(HairReflection, hair_reflection) + OSL_CLOSURE_STRUCT_MEMBER(HairReflection, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(HairReflection, FLOAT, float, roughness1, NULL) + OSL_CLOSURE_STRUCT_MEMBER(HairReflection, FLOAT, float, roughness2, NULL) + OSL_CLOSURE_STRUCT_MEMBER(HairReflection, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(HairReflection, FLOAT, float, offset, NULL) +OSL_CLOSURE_STRUCT_END(HairReflection, hair_reflection) + +OSL_CLOSURE_STRUCT_BEGIN(HairTransmission, hair_transmission) + OSL_CLOSURE_STRUCT_MEMBER(HairTransmission, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(HairTransmission, FLOAT, float, roughness1, NULL) + OSL_CLOSURE_STRUCT_MEMBER(HairTransmission, FLOAT, float, roughness2, NULL) + OSL_CLOSURE_STRUCT_MEMBER(HairReflection, VECTOR, packed_float3, T, NULL) + OSL_CLOSURE_STRUCT_MEMBER(HairReflection, FLOAT, float, offset, NULL) +OSL_CLOSURE_STRUCT_END(HairTransmission, hair_transmission) + +OSL_CLOSURE_STRUCT_BEGIN(PrincipledHair, principled_hair) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, VECTOR, packed_float3, N, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, VECTOR, packed_float3, sigma, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, v, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, s, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, m0_roughness, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, alpha, NULL) + OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, eta, NULL) +OSL_CLOSURE_STRUCT_END(PrincipledHair, principled_hair) + +OSL_CLOSURE_STRUCT_BEGIN(VolumeAbsorption, absorption) +OSL_CLOSURE_STRUCT_END(VolumeAbsorption, absorption) + +OSL_CLOSURE_STRUCT_BEGIN(VolumeHenyeyGreenstein, henyey_greenstein) + OSL_CLOSURE_STRUCT_MEMBER(VolumeHenyeyGreenstein, FLOAT, float, g, NULL) +OSL_CLOSURE_STRUCT_END(VolumeHenyeyGreenstein, henyey_greenstein) + +#undef OSL_CLOSURE_STRUCT_BEGIN +#undef OSL_CLOSURE_STRUCT_END +#undef OSL_CLOSURE_STRUCT_MEMBER +#undef OSL_CLOSURE_STRUCT_ARRAY_MEMBER diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp deleted file mode 100644 index 1a01b215836..00000000000 --- a/intern/cycles/kernel/osl/emissive.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * - * Adapted from Open Shading Language - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011-2022 Blender Foundation. */ - -#include <OpenImageIO/fmath.h> - -#include <OSL/genclosure.h> - -#include "kernel/osl/closures.h" - -// clang-format off -#include "kernel/device/cpu/compat.h" -#include "kernel/types.h" -#include "kernel/closure/alloc.h" -#include "kernel/closure/emissive.h" -// clang-format on - -CCL_NAMESPACE_BEGIN - -using namespace OSL; - -/// Variable cone emissive closure -/// -/// This primitive emits in a cone having a configurable -/// penumbra area where the light decays to 0 reaching the -/// outer_angle limit. It can also behave as a lambertian emitter -/// if the provided angles are PI/2, which is the default -/// -class GenericEmissiveClosure : public CClosurePrimitive { - public: - void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight) - { - emission_setup(sd, weight); - } -}; - -ClosureParam *closure_emission_params() -{ - static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"), - CLOSURE_FINISH_PARAM(GenericEmissiveClosure)}; - return params; -} - -CCLOSURE_PREPARE(closure_emission_prepare, GenericEmissiveClosure) - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/globals.cpp b/intern/cycles/kernel/osl/globals.cpp new file mode 100644 index 00000000000..92b91182178 --- /dev/null +++ b/intern/cycles/kernel/osl/globals.cpp @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include <OSL/oslexec.h> + +#include "kernel/device/cpu/compat.h" +#include "kernel/device/cpu/globals.h" + +#include "kernel/types.h" + +#include "kernel/osl/globals.h" +#include "kernel/osl/services.h" + +CCL_NAMESPACE_BEGIN + +void OSLGlobals::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals) +{ + /* no osl used? */ + if (!osl_globals->use) { + kg->osl = NULL; + return; + } + + /* Per thread kernel data init. */ + kg->osl = osl_globals; + + OSL::ShadingSystem *ss = kg->osl->ss; + OSLThreadData *tdata = new OSLThreadData(); + + memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals)); + tdata->globals.tracedata = &tdata->tracedata; + tdata->osl_thread_info = ss->create_thread_info(); + tdata->context = ss->get_context(tdata->osl_thread_info); + + tdata->oiio_thread_info = osl_globals->ts->get_perthread_info(); + + kg->osl_ss = (OSLShadingSystem *)ss; + kg->osl_tdata = tdata; +} + +void OSLGlobals::thread_free(KernelGlobalsCPU *kg) +{ + if (!kg->osl) + return; + + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSLThreadData *tdata = kg->osl_tdata; + ss->release_context(tdata->context); + + ss->destroy_thread_info(tdata->osl_thread_info); + + delete tdata; + + kg->osl = NULL; + kg->osl_ss = NULL; + kg->osl_tdata = NULL; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/globals.h b/intern/cycles/kernel/osl/globals.h index 172091c55f5..2b002a0033e 100644 --- a/intern/cycles/kernel/osl/globals.h +++ b/intern/cycles/kernel/osl/globals.h @@ -41,6 +41,10 @@ struct OSLGlobals { use = false; } + /* per thread data */ + static void thread_init(struct KernelGlobalsCPU *kg, OSLGlobals *osl_globals); + static void thread_free(struct KernelGlobalsCPU *kg); + bool use; /* shading system */ @@ -56,16 +60,8 @@ struct OSLGlobals { OSL::ShaderGroupRef background_state; /* attributes */ - struct Attribute { - TypeDesc type; - AttributeDescriptor desc; - ParamValue value; - }; - - typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap; typedef unordered_map<ustring, int, ustringHash> ObjectNameMap; - vector<AttributeMap> attribute_map; ObjectNameMap object_name_map; vector<ustring> object_names; }; diff --git a/intern/cycles/kernel/osl/shader.h b/intern/cycles/kernel/osl/osl.h index f0ab49dd6a8..bef23f3eea1 100644 --- a/intern/cycles/kernel/osl/shader.h +++ b/intern/cycles/kernel/osl/osl.h @@ -1,10 +1,7 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ -#ifndef __OSL_SHADER_H__ -#define __OSL_SHADER_H__ - -#ifdef WITH_OSL +#pragma once /* OSL Shader Engine * @@ -16,30 +13,12 @@ * This means no thread state must be passed along in the kernel itself. */ -# include "kernel/types.h" +#include "kernel/osl/types.h" CCL_NAMESPACE_BEGIN -class Scene; - -struct ShaderClosure; -struct ShaderData; -struct IntegratorStateCPU; -struct differential3; -struct KernelGlobalsCPU; - -struct OSLGlobals; -struct OSLShadingSystem; - class OSLShader { public: - /* init */ - static void register_closures(OSLShadingSystem *ss); - - /* per thread data */ - static void thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals); - static void thread_free(KernelGlobalsCPU *kg); - /* eval */ static void eval_surface(const KernelGlobalsCPU *kg, const void *state, @@ -54,16 +33,6 @@ class OSLShader { ShaderData *sd, uint32_t path_flag); static void eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd); - - /* attributes */ - static int find_attribute(const KernelGlobalsCPU *kg, - const ShaderData *sd, - uint id, - AttributeDescriptor *desc); }; CCL_NAMESPACE_END - -#endif - -#endif /* __OSL_SHADER_H__ */ diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 6e75ae54f33..b744422ee78 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -18,22 +18,17 @@ #include "scene/pointcloud.h" #include "scene/scene.h" -#include "kernel/osl/closures.h" #include "kernel/osl/globals.h" #include "kernel/osl/services.h" -#include "kernel/osl/shader.h" #include "util/foreach.h" #include "util/log.h" #include "util/string.h" -// clang-format off #include "kernel/device/cpu/compat.h" #include "kernel/device/cpu/globals.h" #include "kernel/device/cpu/image.h" -#include "kernel/util/differential.h" - #include "kernel/integrator/state.h" #include "kernel/integrator/state_flow.h" @@ -45,10 +40,10 @@ #include "kernel/camera/projection.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" + +#include "kernel/svm/svm.h" #include "kernel/util/color.h" -// clang-format on CCL_NAMESPACE_BEGIN @@ -125,14 +120,14 @@ ustring OSLRenderServices::u_v("v"); ustring OSLRenderServices::u_empty; OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system) - : texture_system(texture_system) + : OSL::RendererServices(texture_system) { } OSLRenderServices::~OSLRenderServices() { - if (texture_system) { - VLOG_INFO << "OSL texture system stats:\n" << texture_system->getstats(); + if (m_texturesys) { + VLOG_INFO << "OSL texture system stats:\n" << m_texturesys->getstats(); } } @@ -452,6 +447,7 @@ static bool set_attribute_float2(float2 f[3], TypeDesc type, bool derivatives, v return false; } +#if 0 static bool set_attribute_float2(float2 f, TypeDesc type, bool derivatives, void *val) { float2 fv[3]; @@ -462,6 +458,7 @@ static bool set_attribute_float2(float2 f, TypeDesc type, bool derivatives, void return set_attribute_float2(fv, type, derivatives, val); } +#endif static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val) { @@ -590,6 +587,7 @@ static bool set_attribute_float4(float4 f[3], TypeDesc type, bool derivatives, v return false; } +#if 0 static bool set_attribute_float4(float4 f, TypeDesc type, bool derivatives, void *val) { float4 fv[3]; @@ -600,6 +598,7 @@ static bool set_attribute_float4(float4 f, TypeDesc type, bool derivatives, void return set_attribute_float4(fv, type, derivatives, val); } +#endif static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val) { @@ -741,76 +740,75 @@ static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val) return false; } -static bool get_primitive_attribute(const KernelGlobalsCPU *kg, - const ShaderData *sd, - const OSLGlobals::Attribute &attr, - const TypeDesc &type, - bool derivatives, - void *val) +static bool get_object_attribute(const KernelGlobalsCPU *kg, + ShaderData *sd, + const AttributeDescriptor &desc, + const TypeDesc &type, + bool derivatives, + void *val) { - if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || - attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) { + if (desc.type == NODE_ATTR_FLOAT3) { float3 fval[3]; - if (primitive_is_volume_attribute(sd, attr.desc)) { - fval[0] = primitive_volume_attribute_float3(kg, sd, attr.desc); +#ifdef __VOLUME__ + if (primitive_is_volume_attribute(sd, desc)) { + fval[0] = primitive_volume_attribute_float3(kg, sd, desc); } - else { + else +#endif + { memset(fval, 0, sizeof(fval)); fval[0] = primitive_surface_attribute_float3( - kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + kg, sd, desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); } return set_attribute_float3(fval, type, derivatives, val); } - else if (attr.type == TypeFloat2) { - if (primitive_is_volume_attribute(sd, attr.desc)) { + else if (desc.type == NODE_ATTR_FLOAT2) { +#ifdef __VOLUME__ + if (primitive_is_volume_attribute(sd, desc)) { assert(!"Float2 attribute not support for volumes"); return false; } - else { + else +#endif + { float2 fval[3]; fval[0] = primitive_surface_attribute_float2( - kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + kg, sd, desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); return set_attribute_float2(fval, type, derivatives, val); } } - else if (attr.type == TypeDesc::TypeFloat) { + else if (desc.type == NODE_ATTR_FLOAT) { float fval[3]; - if (primitive_is_volume_attribute(sd, attr.desc)) { +#ifdef __VOLUME__ + if (primitive_is_volume_attribute(sd, desc)) { memset(fval, 0, sizeof(fval)); - fval[0] = primitive_volume_attribute_float(kg, sd, attr.desc); + fval[0] = primitive_volume_attribute_float(kg, sd, desc); } - else { + else +#endif + { fval[0] = primitive_surface_attribute_float( - kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + kg, sd, desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); } return set_attribute_float(fval, type, derivatives, val); } - else if (attr.type == TypeDesc::TypeFloat4 || attr.type == TypeRGBA) { + else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) { float4 fval[3]; - if (primitive_is_volume_attribute(sd, attr.desc)) { +#ifdef __VOLUME__ + if (primitive_is_volume_attribute(sd, desc)) { memset(fval, 0, sizeof(fval)); - fval[0] = primitive_volume_attribute_float4(kg, sd, attr.desc); + fval[0] = primitive_volume_attribute_float4(kg, sd, desc); } - else { + else +#endif + { fval[0] = primitive_surface_attribute_float4( - kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + kg, sd, desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); } return set_attribute_float4(fval, type, derivatives, val); } - else { - return false; - } -} - -static bool get_mesh_attribute(const KernelGlobalsCPU *kg, - const ShaderData *sd, - const OSLGlobals::Attribute &attr, - const TypeDesc &type, - bool derivatives, - void *val) -{ - if (attr.type == TypeDesc::TypeMatrix) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc); + else if (desc.type == NODE_ATTR_MATRIX) { + Transform tfm = primitive_attribute_matrix(kg, desc); return set_attribute_matrix(tfm, type, val); } else { @@ -818,44 +816,6 @@ static bool get_mesh_attribute(const KernelGlobalsCPU *kg, } } -static bool get_object_attribute(const OSLGlobals::Attribute &attr, - TypeDesc type, - bool derivatives, - void *val) -{ - if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || - attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) { - const float *data = (const float *)attr.value.data(); - return set_attribute_float3(make_float3(data[0], data[1], data[2]), type, derivatives, val); - } - else if (attr.type == TypeFloat2) { - const float *data = (const float *)attr.value.data(); - return set_attribute_float2(make_float2(data[0], data[1]), type, derivatives, val); - } - else if (attr.type == TypeDesc::TypeFloat) { - const float *data = (const float *)attr.value.data(); - return set_attribute_float(data[0], type, derivatives, val); - } - else if (attr.type == TypeRGBA || attr.type == TypeDesc::TypeFloat4) { - const float *data = (const float *)attr.value.data(); - return set_attribute_float4( - make_float4(data[0], data[1], data[2], data[3]), type, derivatives, val); - } - else if (attr.type == type) { - size_t datasize = attr.value.datasize(); - - memcpy(val, attr.value.data(), datasize); - if (derivatives) { - memset((char *)val + datasize, 0, datasize * 2); - } - - return true; - } - else { - return false; - } -} - bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg, ShaderData *sd, ustring name, @@ -980,6 +940,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0); return set_attribute_float(f, type, derivatives, val); } +#ifdef __HAIR__ /* Hair Attributes */ else if (name == u_is_curve) { float f = (sd->type & PRIMITIVE_CURVE) != 0; @@ -997,6 +958,8 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg float f = curve_random(kg, sd); return set_attribute_float(f, type, derivatives, val); } +#endif +#ifdef __POINTCLOUD__ /* point attributes */ else if (name == u_is_point) { float f = (sd->type & PRIMITIVE_POINT) != 0; @@ -1014,6 +977,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg float f = point_random(kg, sd); return set_attribute_float(f, type, derivatives, val); } +#endif else if (name == u_normal_map_normal) { if (sd->type & PRIMITIVE_TRIANGLE) { float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v); @@ -1024,7 +988,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg } } else { - return false; + return get_background_attribute(kg, sd, name, type, derivatives, val); } } @@ -1094,18 +1058,17 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg, ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P); if (derivatives) { - ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)) - - ndc[0]; - ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)) - - ndc[0]; + ndc[1] = zero_float3(); + ndc[2] = zero_float3(); } } else { ndc[0] = camera_world_to_ndc(kg, sd, sd->P); if (derivatives) { - ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0]; - ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0]; + const differential3 dP = differential_from_compact(sd->Ng, sd->dP); + ndc[1] = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc[0]; + ndc[2] = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc[0]; } } @@ -1133,7 +1096,6 @@ bool OSLRenderServices::get_attribute( ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val) { const KernelGlobalsCPU *kg = sd->osl_globals; - int prim_type = 0; int object; /* lookup of attribute on another object */ @@ -1147,44 +1109,18 @@ bool OSLRenderServices::get_attribute( } else { object = sd->object; - prim_type = attribute_primitive_type(kg, sd); - - if (object == OBJECT_NONE) - return get_background_attribute(kg, sd, name, type, derivatives, val); } /* find attribute on object */ - object = object * ATTR_PRIM_TYPES + prim_type; - OSLGlobals::AttributeMap &attribute_map = kg->osl->attribute_map[object]; - OSLGlobals::AttributeMap::iterator it = attribute_map.find(name); - - if (it != attribute_map.end()) { - const OSLGlobals::Attribute &attr = it->second; - - if (attr.desc.element != ATTR_ELEMENT_OBJECT) { - /* triangle and vertex attributes */ - if (get_primitive_attribute(kg, sd, attr, type, derivatives, val)) - return true; - else - return get_mesh_attribute(kg, sd, attr, type, derivatives, val); - } - else { - /* object attribute */ - return get_object_attribute(attr, type, derivatives, val); - } + const AttributeDescriptor desc = find_attribute( + kg, object, sd->prim, object == sd->object ? sd->type : PRIMITIVE_NONE, name.hash()); + if (desc.offset != ATTR_STD_NOT_FOUND) { + return get_object_attribute(kg, sd, desc, type, derivatives, val); } else { /* not found in attribute, check standard object info */ - bool is_std_object_attribute = get_object_standard_attribute( - kg, sd, name, type, derivatives, val); - - if (is_std_object_attribute) - return true; - - return get_background_attribute(kg, sd, name, type, derivatives, val); + return get_object_standard_attribute(kg, sd, name, type, derivatives, val); } - - return false; } bool OSLRenderServices::get_userdata( @@ -1211,7 +1147,7 @@ TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring file } /* Get handle from OpenImageIO. */ - OSL::TextureSystem *ts = texture_system; + OSL::TextureSystem *ts = m_texturesys; TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename); if (handle == NULL) { return NULL; @@ -1233,7 +1169,7 @@ bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle) OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle; if (handle->oiio_handle) { - OSL::TextureSystem *ts = texture_system; + OSL::TextureSystem *ts = m_texturesys; return ts->good(handle->oiio_handle); } else { @@ -1355,7 +1291,7 @@ bool OSLRenderServices::texture(ustring filename, } case OSLTextureHandle::OIIO: { /* OpenImageIO texture cache. */ - OSL::TextureSystem *ts = texture_system; + OSL::TextureSystem *ts = m_texturesys; if (handle && handle->oiio_handle) { if (texture_thread_info == NULL) { @@ -1459,7 +1395,7 @@ bool OSLRenderServices::texture3d(ustring filename, } case OSLTextureHandle::OIIO: { /* OpenImageIO texture cache. */ - OSL::TextureSystem *ts = texture_system; + OSL::TextureSystem *ts = m_texturesys; if (handle && handle->oiio_handle) { if (texture_thread_info == NULL) { @@ -1543,7 +1479,7 @@ bool OSLRenderServices::environment(ustring filename, ustring *errormessage) { OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle; - OSL::TextureSystem *ts = texture_system; + OSL::TextureSystem *ts = m_texturesys; bool status = false; if (handle && handle->oiio_handle) { @@ -1615,7 +1551,7 @@ bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, } /* Get texture info from OpenImageIO. */ - OSL::TextureSystem *ts = texture_system; + OSL::TextureSystem *ts = m_texturesys; return ts->get_texture_info(filename, subimage, dataname, datatype, data); } @@ -1669,9 +1605,10 @@ bool OSLRenderServices::trace(TraceOpt &options, /* setup ray */ Ray ray; - ray.P = TO_FLOAT3(P); - ray.D = TO_FLOAT3(R); - ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist; + ray.P = make_float3(P.x, P.y, P.z); + ray.D = make_float3(R.x, R.y, R.z); + ray.tmin = 0.0f; + ray.tmax = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist; ray.time = sd->time; ray.self.object = OBJECT_NONE; ray.self.prim = PRIM_NONE; @@ -1692,12 +1629,12 @@ bool OSLRenderServices::trace(TraceOpt &options, /* ray differentials */ differential3 dP; - dP.dx = TO_FLOAT3(dPdx); - dP.dy = TO_FLOAT3(dPdy); + dP.dx = make_float3(dPdx.x, dPdx.y, dPdx.z); + dP.dy = make_float3(dPdy.x, dPdy.y, dPdy.z); ray.dP = differential_make_compact(dP); differential3 dD; - dD.dx = TO_FLOAT3(dRdx); - dD.dy = TO_FLOAT3(dRdy); + dD.dx = make_float3(dRdx.x, dRdx.y, dRdx.z); + dD.dy = make_float3(dRdy.x, dRdy.y, dRdy.z); ray.dD = differential_make_compact(dD); /* allocate trace data */ @@ -1710,12 +1647,12 @@ bool OSLRenderServices::trace(TraceOpt &options, const KernelGlobalsCPU *kg = sd->osl_globals; - /* Can't raytrace from shaders like displacement, before BVH exists. */ + /* Can't ray-trace from shaders like displacement, before BVH exists. */ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { return false; } - /* Raytrace, leaving out shadow opaque to avoid early exit. */ + /* Ray-trace, leaving out shadow opaque to avoid early exit. */ uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE; tracedata->hit = scene_intersect(kg, &ray, visibility, &tracedata->isect); return tracedata->hit; @@ -1756,11 +1693,13 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, return set_attribute_float3(sd->Ng, type, derivatives, val); } else if (name == u_P) { - float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy}; + const differential3 dP = differential_from_compact(sd->Ng, sd->dP); + float3 f[3] = {sd->P, dP.dx, dP.dy}; return set_attribute_float3(f, type, derivatives, val); } else if (name == u_I) { - float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy}; + const differential3 dI = differential_from_compact(sd->I, sd->dI); + float3 f[3] = {sd->I, dI.dx, dI.dy}; return set_attribute_float3(f, type, derivatives, val); } else if (name == u_u) { diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h index edffd912bad..334b6682e34 100644 --- a/intern/cycles/kernel/osl/services.h +++ b/intern/cycles/kernel/osl/services.h @@ -76,6 +76,8 @@ class OSLRenderServices : public OSL::RendererServices { OSLRenderServices(OSL::TextureSystem *texture_system); ~OSLRenderServices(); + static void register_closures(OSL::ShadingSystem *ss); + bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, @@ -321,7 +323,6 @@ class OSLRenderServices : public OSL::RendererServices { * globals to be shared between different render sessions. This saves memory, * and is required because texture handles are cached as part of the shared * shading system. */ - OSL::TextureSystem *texture_system; OSLTextureHandleMap textures; }; diff --git a/intern/cycles/kernel/osl/shader.cpp b/intern/cycles/kernel/osl/shader.cpp deleted file mode 100644 index af96c0070e3..00000000000 --- a/intern/cycles/kernel/osl/shader.cpp +++ /dev/null @@ -1,417 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -#include <OSL/oslexec.h> - -// clang-format off -#include "kernel/device/cpu/compat.h" -#include "kernel/device/cpu/globals.h" - -#include "kernel/types.h" - -#include "kernel/geom/object.h" - -#include "kernel/integrator/state.h" - -#include "kernel/osl/closures.h" -#include "kernel/osl/globals.h" -#include "kernel/osl/services.h" -#include "kernel/osl/shader.h" -// clang-format on - -#include "scene/attribute.h" - -CCL_NAMESPACE_BEGIN - -/* Threads */ - -void OSLShader::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals) -{ - /* no osl used? */ - if (!osl_globals->use) { - kg->osl = NULL; - return; - } - - /* Per thread kernel data init. */ - kg->osl = osl_globals; - - OSL::ShadingSystem *ss = kg->osl->ss; - OSLThreadData *tdata = new OSLThreadData(); - - memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals)); - tdata->globals.tracedata = &tdata->tracedata; - tdata->globals.flipHandedness = false; - tdata->osl_thread_info = ss->create_thread_info(); - tdata->context = ss->get_context(tdata->osl_thread_info); - - tdata->oiio_thread_info = osl_globals->ts->get_perthread_info(); - - kg->osl_ss = (OSLShadingSystem *)ss; - kg->osl_tdata = tdata; -} - -void OSLShader::thread_free(KernelGlobalsCPU *kg) -{ - if (!kg->osl) - return; - - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSLThreadData *tdata = kg->osl_tdata; - ss->release_context(tdata->context); - - ss->destroy_thread_info(tdata->osl_thread_info); - - delete tdata; - - kg->osl = NULL; - kg->osl_ss = NULL; - kg->osl_tdata = NULL; -} - -/* Globals */ - -static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg, - ShaderData *sd, - const void *state, - uint32_t path_flag, - OSLThreadData *tdata) -{ - OSL::ShaderGlobals *globals = &tdata->globals; - - /* copy from shader data to shader globals */ - globals->P = TO_VEC3(sd->P); - globals->dPdx = TO_VEC3(sd->dP.dx); - globals->dPdy = TO_VEC3(sd->dP.dy); - globals->I = TO_VEC3(sd->I); - globals->dIdx = TO_VEC3(sd->dI.dx); - globals->dIdy = TO_VEC3(sd->dI.dy); - globals->N = TO_VEC3(sd->N); - globals->Ng = TO_VEC3(sd->Ng); - globals->u = sd->u; - globals->dudx = sd->du.dx; - globals->dudy = sd->du.dy; - globals->v = sd->v; - globals->dvdx = sd->dv.dx; - globals->dvdy = sd->dv.dy; - globals->dPdu = TO_VEC3(sd->dPdu); - globals->dPdv = TO_VEC3(sd->dPdv); - globals->surfacearea = 1.0f; - globals->time = sd->time; - - /* booleans */ - globals->raytype = path_flag; - globals->backfacing = (sd->flag & SD_BACKFACING); - - /* shader data to be used in services callbacks */ - globals->renderstate = sd; - - /* hacky, we leave it to services to fetch actual object matrix */ - globals->shader2common = sd; - globals->object2common = sd; - - /* must be set to NULL before execute */ - globals->Ci = NULL; - - /* clear trace data */ - tdata->tracedata.init = false; - - /* Used by render-services. */ - sd->osl_globals = kg; - if (path_flag & PATH_RAY_SHADOW) { - sd->osl_path_state = nullptr; - sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state; - } - else { - sd->osl_path_state = (const IntegratorStateCPU *)state; - sd->osl_shadow_path_state = nullptr; - } -} - -/* Surface */ - -static void flatten_surface_closure_tree(ShaderData *sd, - uint32_t path_flag, - const OSL::ClosureColor *closure, - float3 weight = make_float3(1.0f, 1.0f, 1.0f)) -{ - /* OSL gives us a closure tree, we flatten it into arrays per - * closure type, for evaluation, sampling, etc later on. */ - - switch (closure->id) { - case OSL::ClosureColor::MUL: { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight); - break; - } - case OSL::ClosureColor::ADD: { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - flatten_surface_closure_tree(sd, path_flag, add->closureA, weight); - flatten_surface_closure_tree(sd, path_flag, add->closureB, weight); - break; - } - default: { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - - if (prim) { -#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - weight = weight * TO_FLOAT3(comp->w); -#endif - prim->setup(sd, path_flag, weight); - } - break; - } - } -} - -void OSLShader::eval_surface(const KernelGlobalsCPU *kg, - const void *state, - ShaderData *sd, - uint32_t path_flag) -{ - /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; - shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); - - /* execute shader for this point */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; - int shader = sd->shader & SHADER_MASK; - - /* automatic bump shader */ - if (kg->osl->bump_state[shader]) { - /* save state */ - float3 P = sd->P; - float3 dPdx = sd->dP.dx; - float3 dPdy = sd->dP.dy; - - /* set state as if undisplaced */ - if (sd->flag & SD_HAS_DISPLACEMENT) { - float data[9]; - bool found = kg->osl->services->get_attribute(sd, - true, - OSLRenderServices::u_empty, - TypeDesc::TypeVector, - OSLRenderServices::u_geom_undisplaced, - data); - (void)found; - assert(found); - - memcpy(&sd->P, data, sizeof(float) * 3); - memcpy(&sd->dP.dx, data + 3, sizeof(float) * 3); - memcpy(&sd->dP.dy, data + 6, sizeof(float) * 3); - - object_position_transform(kg, sd, &sd->P); - object_dir_transform(kg, sd, &sd->dP.dx); - object_dir_transform(kg, sd, &sd->dP.dy); - - globals->P = TO_VEC3(sd->P); - globals->dPdx = TO_VEC3(sd->dP.dx); - globals->dPdy = TO_VEC3(sd->dP.dy); - } - - /* execute bump shader */ - ss->execute(octx, *(kg->osl->bump_state[shader]), *globals); - - /* reset state */ - sd->P = P; - sd->dP.dx = dPdx; - sd->dP.dy = dPdy; - - globals->P = TO_VEC3(P); - globals->dPdx = TO_VEC3(dPdx); - globals->dPdy = TO_VEC3(dPdy); - } - - /* surface shader */ - if (kg->osl->surface_state[shader]) { - ss->execute(octx, *(kg->osl->surface_state[shader]), *globals); - } - - /* flatten closure tree */ - if (globals->Ci) - flatten_surface_closure_tree(sd, path_flag, globals->Ci); -} - -/* Background */ - -static void flatten_background_closure_tree(ShaderData *sd, - const OSL::ClosureColor *closure, - float3 weight = make_float3(1.0f, 1.0f, 1.0f)) -{ - /* OSL gives us a closure tree, if we are shading for background there - * is only one supported closure type at the moment, which has no evaluation - * functions, so we just sum the weights */ - - switch (closure->id) { - case OSL::ClosureColor::MUL: { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight)); - break; - } - case OSL::ClosureColor::ADD: { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - - flatten_background_closure_tree(sd, add->closureA, weight); - flatten_background_closure_tree(sd, add->closureB, weight); - break; - } - default: { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - - if (prim) { -#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - weight = weight * TO_FLOAT3(comp->w); -#endif - prim->setup(sd, 0, weight); - } - break; - } - } -} - -void OSLShader::eval_background(const KernelGlobalsCPU *kg, - const void *state, - ShaderData *sd, - uint32_t path_flag) -{ - /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; - shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); - - /* execute shader for this point */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; - - if (kg->osl->background_state) { - ss->execute(octx, *(kg->osl->background_state), *globals); - } - - /* return background color immediately */ - if (globals->Ci) - flatten_background_closure_tree(sd, globals->Ci); -} - -/* Volume */ - -static void flatten_volume_closure_tree(ShaderData *sd, - const OSL::ClosureColor *closure, - float3 weight = make_float3(1.0f, 1.0f, 1.0f)) -{ - /* OSL gives us a closure tree, we flatten it into arrays per - * closure type, for evaluation, sampling, etc later on. */ - - switch (closure->id) { - case OSL::ClosureColor::MUL: { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight); - break; - } - case OSL::ClosureColor::ADD: { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - flatten_volume_closure_tree(sd, add->closureA, weight); - flatten_volume_closure_tree(sd, add->closureB, weight); - break; - } - default: { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - - if (prim) { -#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - weight = weight * TO_FLOAT3(comp->w); -#endif - prim->setup(sd, 0, weight); - } - } - } -} - -void OSLShader::eval_volume(const KernelGlobalsCPU *kg, - const void *state, - ShaderData *sd, - uint32_t path_flag) -{ - /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; - shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); - - /* execute shader */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; - int shader = sd->shader & SHADER_MASK; - - if (kg->osl->volume_state[shader]) { - ss->execute(octx, *(kg->osl->volume_state[shader]), *globals); - } - - /* flatten closure tree */ - if (globals->Ci) - flatten_volume_closure_tree(sd, globals->Ci); -} - -/* Displacement */ - -void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd) -{ - /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; - - shaderdata_to_shaderglobals(kg, sd, state, 0, tdata); - - /* execute shader */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; - int shader = sd->shader & SHADER_MASK; - - if (kg->osl->displacement_state[shader]) { - ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals); - } - - /* get back position */ - sd->P = TO_FLOAT3(globals->P); -} - -/* Attributes */ - -int OSLShader::find_attribute(const KernelGlobalsCPU *kg, - const ShaderData *sd, - uint id, - AttributeDescriptor *desc) -{ - /* for OSL, a hash map is used to lookup the attribute by name. */ - int object = sd->object * ATTR_PRIM_TYPES; - - OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object]; - ustring stdname(std::string("geom:") + - std::string(Attribute::standard_name((AttributeStandard)id))); - OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname); - - if (it != attr_map.end()) { - const OSLGlobals::Attribute &osl_attr = it->second; - *desc = osl_attr.desc; - - if (sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) { - desc->offset = ATTR_STD_NOT_FOUND; - return ATTR_STD_NOT_FOUND; - } - - /* return result */ - if (osl_attr.desc.element == ATTR_ELEMENT_NONE) { - desc->offset = ATTR_STD_NOT_FOUND; - } - return desc->offset; - } - else { - desc->offset = ATTR_STD_NOT_FOUND; - return (int)ATTR_STD_NOT_FOUND; - } -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/shaders/CMakeLists.txt b/intern/cycles/kernel/osl/shaders/CMakeLists.txt index 741bce7c399..c79af3f6112 100644 --- a/intern/cycles/kernel/osl/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/osl/shaders/CMakeLists.txt @@ -57,6 +57,10 @@ set(SRC_OSL node_math.osl node_mix.osl node_mix_closure.osl + node_mix_color.osl + node_mix_float.osl + node_mix_vector.osl + node_mix_vector_non_uniform.osl node_musgrave_texture.osl node_noise_texture.osl node_normal.osl @@ -109,6 +113,7 @@ file(GLOB SRC_OSL_HEADER_DIST ${OSL_SHADER_DIR}/*.h) set(SRC_OSL_HEADERS node_color.h + node_color_blend.h node_fresnel.h node_hash.h node_math.h diff --git a/intern/cycles/kernel/osl/shaders/node_color_blend.h b/intern/cycles/kernel/osl/shaders/node_color_blend.h new file mode 100644 index 00000000000..ab4b4809a97 --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_color_blend.h @@ -0,0 +1,264 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +color node_mix_blend(float t, color col1, color col2) +{ + return mix(col1, col2, t); +} + +color node_mix_add(float t, color col1, color col2) +{ + return mix(col1, col1 + col2, t); +} + +color node_mix_mul(float t, color col1, color col2) +{ + return mix(col1, col1 * col2, t); +} + +color node_mix_screen(float t, color col1, color col2) +{ + float tm = 1.0 - t; + + return color(1.0) - (color(tm) + t * (color(1.0) - col2)) * (color(1.0) - col1); +} + +color node_mix_overlay(float t, color col1, color col2) +{ + float tm = 1.0 - t; + + color outcol = col1; + + if (outcol[0] < 0.5) + outcol[0] *= tm + 2.0 * t * col2[0]; + else + outcol[0] = 1.0 - (tm + 2.0 * t * (1.0 - col2[0])) * (1.0 - outcol[0]); + + if (outcol[1] < 0.5) + outcol[1] *= tm + 2.0 * t * col2[1]; + else + outcol[1] = 1.0 - (tm + 2.0 * t * (1.0 - col2[1])) * (1.0 - outcol[1]); + + if (outcol[2] < 0.5) + outcol[2] *= tm + 2.0 * t * col2[2]; + else + outcol[2] = 1.0 - (tm + 2.0 * t * (1.0 - col2[2])) * (1.0 - outcol[2]); + + return outcol; +} + +color node_mix_sub(float t, color col1, color col2) +{ + return mix(col1, col1 - col2, t); +} + +color node_mix_div(float t, color col1, color col2) +{ + float tm = 1.0 - t; + + color outcol = col1; + + if (col2[0] != 0.0) + outcol[0] = tm * outcol[0] + t * outcol[0] / col2[0]; + if (col2[1] != 0.0) + outcol[1] = tm * outcol[1] + t * outcol[1] / col2[1]; + if (col2[2] != 0.0) + outcol[2] = tm * outcol[2] + t * outcol[2] / col2[2]; + + return outcol; +} + +color node_mix_diff(float t, color col1, color col2) +{ + return mix(col1, abs(col1 - col2), t); +} + +color node_mix_dark(float t, color col1, color col2) +{ + return mix(col1, min(col1, col2), t); +} + +color node_mix_light(float t, color col1, color col2) +{ + return mix(col1, max(col1, col2), t); +} + +color node_mix_dodge(float t, color col1, color col2) +{ + color outcol = col1; + + if (outcol[0] != 0.0) { + float tmp = 1.0 - t * col2[0]; + if (tmp <= 0.0) + outcol[0] = 1.0; + else if ((tmp = outcol[0] / tmp) > 1.0) + outcol[0] = 1.0; + else + outcol[0] = tmp; + } + if (outcol[1] != 0.0) { + float tmp = 1.0 - t * col2[1]; + if (tmp <= 0.0) + outcol[1] = 1.0; + else if ((tmp = outcol[1] / tmp) > 1.0) + outcol[1] = 1.0; + else + outcol[1] = tmp; + } + if (outcol[2] != 0.0) { + float tmp = 1.0 - t * col2[2]; + if (tmp <= 0.0) + outcol[2] = 1.0; + else if ((tmp = outcol[2] / tmp) > 1.0) + outcol[2] = 1.0; + else + outcol[2] = tmp; + } + + return outcol; +} + +color node_mix_burn(float t, color col1, color col2) +{ + float tmp, tm = 1.0 - t; + + color outcol = col1; + + tmp = tm + t * col2[0]; + if (tmp <= 0.0) + outcol[0] = 0.0; + else if ((tmp = (1.0 - (1.0 - outcol[0]) / tmp)) < 0.0) + outcol[0] = 0.0; + else if (tmp > 1.0) + outcol[0] = 1.0; + else + outcol[0] = tmp; + + tmp = tm + t * col2[1]; + if (tmp <= 0.0) + outcol[1] = 0.0; + else if ((tmp = (1.0 - (1.0 - outcol[1]) / tmp)) < 0.0) + outcol[1] = 0.0; + else if (tmp > 1.0) + outcol[1] = 1.0; + else + outcol[1] = tmp; + + tmp = tm + t * col2[2]; + if (tmp <= 0.0) + outcol[2] = 0.0; + else if ((tmp = (1.0 - (1.0 - outcol[2]) / tmp)) < 0.0) + outcol[2] = 0.0; + else if (tmp > 1.0) + outcol[2] = 1.0; + else + outcol[2] = tmp; + + return outcol; +} + +color node_mix_hue(float t, color col1, color col2) +{ + color outcol = col1; + color hsv2 = rgb_to_hsv(col2); + + if (hsv2[1] != 0.0) { + color hsv = rgb_to_hsv(outcol); + hsv[0] = hsv2[0]; + color tmp = hsv_to_rgb(hsv); + + outcol = mix(outcol, tmp, t); + } + + return outcol; +} + +color node_mix_sat(float t, color col1, color col2) +{ + float tm = 1.0 - t; + + color outcol = col1; + + color hsv = rgb_to_hsv(outcol); + + if (hsv[1] != 0.0) { + color hsv2 = rgb_to_hsv(col2); + + hsv[1] = tm * hsv[1] + t * hsv2[1]; + outcol = hsv_to_rgb(hsv); + } + + return outcol; +} + +color node_mix_val(float t, color col1, color col2) +{ + float tm = 1.0 - t; + + color hsv = rgb_to_hsv(col1); + color hsv2 = rgb_to_hsv(col2); + + hsv[2] = tm * hsv[2] + t * hsv2[2]; + + return hsv_to_rgb(hsv); +} + +color node_mix_color(float t, color col1, color col2) +{ + color outcol = col1; + color hsv2 = rgb_to_hsv(col2); + + if (hsv2[1] != 0.0) { + color hsv = rgb_to_hsv(outcol); + hsv[0] = hsv2[0]; + hsv[1] = hsv2[1]; + color tmp = hsv_to_rgb(hsv); + + outcol = mix(outcol, tmp, t); + } + + return outcol; +} + +color node_mix_soft(float t, color col1, color col2) +{ + float tm = 1.0 - t; + + color one = color(1.0); + color scr = one - (one - col2) * (one - col1); + + return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr); +} + +color node_mix_linear(float t, color col1, color col2) +{ + color outcol = col1; + + if (col2[0] > 0.5) + outcol[0] = col1[0] + t * (2.0 * (col2[0] - 0.5)); + else + outcol[0] = col1[0] + t * (2.0 * (col2[0]) - 1.0); + + if (col2[1] > 0.5) + outcol[1] = col1[1] + t * (2.0 * (col2[1] - 0.5)); + else + outcol[1] = col1[1] + t * (2.0 * (col2[1]) - 1.0); + + if (col2[2] > 0.5) + outcol[2] = col1[2] + t * (2.0 * (col2[2] - 0.5)); + else + outcol[2] = col1[2] + t * (2.0 * (col2[2]) - 1.0); + + return outcol; +} + +color node_mix_clamp(color col) +{ + color outcol = col; + + outcol[0] = clamp(col[0], 0.0, 1.0); + outcol[1] = clamp(col[1], 0.0, 1.0); + outcol[2] = clamp(col[2], 0.0, 1.0); + + return outcol; +} diff --git a/intern/cycles/kernel/osl/shaders/node_geometry.osl b/intern/cycles/kernel/osl/shaders/node_geometry.osl index 23d4c2ee66f..cc891abd6e3 100644 --- a/intern/cycles/kernel/osl/shaders/node_geometry.osl +++ b/intern/cycles/kernel/osl/shaders/node_geometry.osl @@ -20,7 +20,7 @@ shader node_geometry(normal NormalIn = N, Normal = NormalIn; TrueNormal = Ng; Incoming = I; - Parametric = point(u, v, 0.0); + Parametric = point(1.0 - u - v, u, 0.0); Backfacing = backfacing(); if (bump_offset == "dx") { diff --git a/intern/cycles/kernel/osl/shaders/node_mix_color.osl b/intern/cycles/kernel/osl/shaders/node_mix_color.osl new file mode 100644 index 00000000000..3ddd89ed306 --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_mix_color.osl @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "node_color.h" +#include "node_color_blend.h" +#include "stdcycles.h" + +shader node_mix_color(string blend_type = "mix", + int use_clamp = 0, + int use_clamp_result = 0, + float Factor = 0.5, + color A = 0.0, + color B = 0.0, + output color Result = 0.0) +{ + float t = (use_clamp) ? clamp(Factor, 0.0, 1.0) : Factor; + + if (blend_type == "mix") + Result = mix(A, B, t); + if (blend_type == "add") + Result = node_mix_add(t, A, B); + if (blend_type == "multiply") + Result = node_mix_mul(t, A, B); + if (blend_type == "screen") + Result = node_mix_screen(t, A, B); + if (blend_type == "overlay") + Result = node_mix_overlay(t, A, B); + if (blend_type == "subtract") + Result = node_mix_sub(t, A, B); + if (blend_type == "divide") + Result = node_mix_div(t, A, B); + if (blend_type == "difference") + Result = node_mix_diff(t, A, B); + if (blend_type == "darken") + Result = node_mix_dark(t, A, B); + if (blend_type == "lighten") + Result = node_mix_light(t, A, B); + if (blend_type == "dodge") + Result = node_mix_dodge(t, A, B); + if (blend_type == "burn") + Result = node_mix_burn(t, A, B); + if (blend_type == "hue") + Result = node_mix_hue(t, A, B); + if (blend_type == "saturation") + Result = node_mix_sat(t, A, B); + if (blend_type == "value") + Result = node_mix_val(t, A, B); + if (blend_type == "color") + Result = node_mix_color(t, A, B); + if (blend_type == "soft_light") + Result = node_mix_soft(t, A, B); + if (blend_type == "linear_light") + Result = node_mix_linear(t, A, B); + + if (use_clamp_result) + Result = clamp(Result, 0.0, 1.0); +} diff --git a/intern/cycles/kernel/osl/shaders/node_mix_float.osl b/intern/cycles/kernel/osl/shaders/node_mix_float.osl new file mode 100644 index 00000000000..fdc7b4eff6e --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_mix_float.osl @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "stdcycles.h" + +shader node_mix_float( + int use_clamp = 0, float Factor = 0.5, float A = 0.0, float B = 0.0, output float Result = 0.0) +{ + float t = (use_clamp) ? clamp(Factor, 0.0, 1.0) : Factor; + Result = mix(A, B, t); +} diff --git a/intern/cycles/kernel/osl/shaders/node_mix_vector.osl b/intern/cycles/kernel/osl/shaders/node_mix_vector.osl new file mode 100644 index 00000000000..d76396afb0d --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_mix_vector.osl @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "stdcycles.h" + +shader node_mix_vector(int use_clamp = 0, + float Factor = 0.5, + vector A = 0.0, + vector B = 0.0, + output vector Result = 0.0) +{ + float t = (use_clamp) ? clamp(Factor, 0.0, 1.0) : Factor; + Result = mix(A, B, t); +} diff --git a/intern/cycles/kernel/osl/shaders/node_mix_vector_non_uniform.osl b/intern/cycles/kernel/osl/shaders/node_mix_vector_non_uniform.osl new file mode 100644 index 00000000000..217856bcf2a --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_mix_vector_non_uniform.osl @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "stdcycles.h" + +shader node_mix_vector_non_uniform(int use_clamp = 0, + vector Factor = 0.5, + vector A = 0.0, + vector B = 0.0, + output vector Result = 0.0) +{ + vector t = (use_clamp) ? clamp(Factor, 0.0, 1.0) : Factor; + Result = mix(A, B, t); +} diff --git a/intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl index 391be8c14d7..fdda1ba9cd1 100644 --- a/intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl +++ b/intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl @@ -114,13 +114,12 @@ float noise_musgrave_hybrid_multi_fractal_1d( { float p = co; float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - float value = safe_snoise(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0; + float value = 0.0; + float weight = 1.0; - for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) { + for (int i = 0; (weight > 0.001) && (i < (int)octaves); i++) { if (weight > 1.0) { weight = 1.0; } @@ -133,8 +132,12 @@ float noise_musgrave_hybrid_multi_fractal_1d( } float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - value += rmd * ((safe_snoise(p) + offset) * pwr); + if ((rmd != 0.0) && (weight > 0.001)) { + if (weight > 1.0) { + weight = 1.0; + } + float signal = (safe_snoise(p) + offset) * pwr; + value += rmd * weight * signal; } return value; @@ -279,13 +282,12 @@ float noise_musgrave_hybrid_multi_fractal_2d( { vector2 p = co; float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - float value = safe_snoise(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0; + float value = 0.0; + float weight = 1.0; - for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) { + for (int i = 0; (weight > 0.001) && (i < (int)octaves); i++) { if (weight > 1.0) { weight = 1.0; } @@ -298,8 +300,12 @@ float noise_musgrave_hybrid_multi_fractal_2d( } float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - value += rmd * ((safe_snoise(p) + offset) * pwr); + if ((rmd != 0.0) && (weight > 0.001)) { + if (weight > 1.0) { + weight = 1.0; + } + float signal = (safe_snoise(p) + offset) * pwr; + value += rmd * weight * signal; } return value; @@ -444,13 +450,12 @@ float noise_musgrave_hybrid_multi_fractal_3d( { vector3 p = co; float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - float value = safe_snoise(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0; + float value = 0.0; + float weight = 1.0; - for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) { + for (int i = 0; (weight > 0.001) && (i < (int)octaves); i++) { if (weight > 1.0) { weight = 1.0; } @@ -463,8 +468,12 @@ float noise_musgrave_hybrid_multi_fractal_3d( } float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - value += rmd * ((safe_snoise(p) + offset) * pwr); + if ((rmd != 0.0) && (weight > 0.001)) { + if (weight > 1.0) { + weight = 1.0; + } + float signal = (safe_snoise(p) + offset) * pwr; + value += rmd * weight * signal; } return value; @@ -609,13 +618,12 @@ float noise_musgrave_hybrid_multi_fractal_4d( { vector4 p = co; float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - float value = safe_snoise(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0; + float value = 0.0; + float weight = 1.0; - for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) { + for (int i = 0; (weight > 0.001) && (i < (int)octaves); i++) { if (weight > 1.0) { weight = 1.0; } @@ -628,8 +636,12 @@ float noise_musgrave_hybrid_multi_fractal_4d( } float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - value += rmd * ((safe_snoise(p) + offset) * pwr); + if ((rmd != 0.0) && (weight > 0.001)) { + if (weight > 1.0) { + weight = 1.0; + } + float signal = (safe_snoise(p) + offset) * pwr; + value += rmd * weight * signal; } return value; diff --git a/intern/cycles/kernel/osl/types.h b/intern/cycles/kernel/osl/types.h new file mode 100644 index 00000000000..46e06114360 --- /dev/null +++ b/intern/cycles/kernel/osl/types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* Closure */ + +enum ClosureTypeOSL { + OSL_CLOSURE_MUL_ID = -1, + OSL_CLOSURE_ADD_ID = -2, + + OSL_CLOSURE_NONE_ID = 0, + +#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) OSL_CLOSURE_##Upper##_ID, +#include "closures_template.h" +}; + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/jitter.h b/intern/cycles/kernel/sample/jitter.h index b8da94248a4..e748f95fc7d 100644 --- a/intern/cycles/kernel/sample/jitter.h +++ b/intern/cycles/kernel/sample/jitter.h @@ -1,182 +1,97 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ +#include "kernel/sample/util.h" +#include "util/hash.h" + #pragma once CCL_NAMESPACE_BEGIN -ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed) +ccl_device float pmj_sample_1D(KernelGlobals kg, + uint sample, + const uint rng_hash, + const uint dimension) { - x += seed; - x ^= (x * 0x6c50b47cu); - x ^= x * 0xb82f1e52u; - x ^= x * 0xc7afe638u; - x ^= x * 0x8d22f6e6u; + uint seed = rng_hash; - return x; -} + /* Use the same sample sequence seed for all pixels when using + * scrambling distance. */ + if (kernel_data.integrator.scrambling_distance < 1.0f) { + seed = kernel_data.integrator.seed; + } -ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed) -{ - x = reverse_integer_bits(x); - x = laine_karras_permutation(x, seed); - x = reverse_integer_bits(x); + /* Shuffle the pattern order and sample index to better decorrelate + * dimensions and make the most of the finite patterns we have. + * The funky sample mask stuff is to ensure that we only shuffle + * *within* the current sample pattern, which is necessary to avoid + * early repeat pattern use. */ + const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); + /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */ + const uint sample_mask = NUM_PMJ_SAMPLES - 1; + const uint sample_shuffled = nested_uniform_scramble(sample, + hash_wang_seeded_uint(dimension, seed)); + sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); + + /* Fetch the sample. */ + const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % + (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS); + float x = kernel_data_fetch(sample_pattern_lut, index * 2); + + /* Do limited Cranley-Patterson rotation when using scrambling distance. */ + if (kernel_data.integrator.scrambling_distance < 1.0f) { + const float jitter_x = hash_wang_seeded_float(dimension, rng_hash) * + kernel_data.integrator.scrambling_distance; + x += jitter_x; + x -= floorf(x); + } return x; } -ccl_device_inline uint cmj_hash(uint i, uint p) +ccl_device float2 pmj_sample_2D(KernelGlobals kg, + uint sample, + const uint rng_hash, + const uint dimension) { - i ^= p; - i ^= i >> 17; - i ^= i >> 10; - i *= 0xb36534e5; - i ^= i >> 12; - i ^= i >> 21; - i *= 0x93fc4795; - i ^= 0xdf6e307f; - i ^= i >> 17; - i *= 1 | p >> 18; - - return i; -} - -ccl_device_inline uint cmj_hash_simple(uint i, uint p) -{ - i = (i ^ 61) ^ p; - i += i << 3; - i ^= i >> 4; - i *= 0x27d4eb2d; - return i; -} - -ccl_device_inline float cmj_randfloat(uint i, uint p) -{ - return cmj_hash(i, p) * (1.0f / 4294967808.0f); -} - -ccl_device_inline float cmj_randfloat_simple(uint i, uint p) -{ - return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF); -} + uint seed = rng_hash; -ccl_device_inline float cmj_randfloat_simple_dist(uint i, uint p, float d) -{ - return cmj_hash_simple(i, p) * (d / (float)0xFFFFFFFF); -} - -ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension) -{ - uint hash = rng_hash; - float jitter_x = 0.0f; + /* Use the same sample sequence seed for all pixels when using + * scrambling distance. */ if (kernel_data.integrator.scrambling_distance < 1.0f) { - hash = kernel_data.integrator.seed; - - jitter_x = cmj_randfloat_simple_dist( - dimension, rng_hash, kernel_data.integrator.scrambling_distance); + seed = kernel_data.integrator.seed; } - /* Perform Owen shuffle of the sample number to reorder the samples. */ -#ifdef _SIMPLE_HASH_ - const uint rv = cmj_hash_simple(dimension, hash); -#else /* Use a _REGULAR_HASH_. */ - const uint rv = cmj_hash(dimension, hash); -#endif -#ifdef _XOR_SHUFFLE_ -# warning "Using XOR shuffle." - const uint s = sample ^ rv; -#else /* Use _OWEN_SHUFFLE_ for reordering. */ - const uint s = nested_uniform_scramble(sample, rv); -#endif - - /* Based on the sample number a sample pattern is selected and offset by the dimension. */ - const uint sample_set = s / NUM_PMJ_SAMPLES; - const uint d = (dimension + sample_set); - const uint dim = d % NUM_PMJ_PATTERNS; - - /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D - * the x part is used for even dims and the y for odd. */ - int index = 2 * ((dim >> 1) * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)) + (dim & 1); - - float fx = kernel_tex_fetch(__sample_pattern_lut, index); - -#ifndef _NO_CRANLEY_PATTERSON_ROTATION_ - /* Use Cranley-Patterson rotation to displace the sample pattern. */ -# ifdef _SIMPLE_HASH_ - float dx = cmj_randfloat_simple(d, hash); -# else - float dx = cmj_randfloat(d, hash); -# endif - /* Jitter sample locations and map back into [0 1]. */ - fx = fx + dx + jitter_x; - fx = fx - floorf(fx); -#else -# warning "Not using Cranley-Patterson Rotation." -#endif - - return fx; -} - -ccl_device void pmj_sample_2D(KernelGlobals kg, - uint sample, - uint rng_hash, - uint dimension, - ccl_private float *x, - ccl_private float *y) -{ - uint hash = rng_hash; - float jitter_x = 0.0f; - float jitter_y = 0.0f; + /* Shuffle the pattern order and sample index to better decorrelate + * dimensions and make the most of the finite patterns we have. + * The funky sample mask stuff is to ensure that we only shuffle + * *within* the current sample pattern, which is necessary to avoid + * early repeat pattern use. */ + const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed); + /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */ + const uint sample_mask = NUM_PMJ_SAMPLES - 1; + const uint sample_shuffled = nested_uniform_scramble(sample, + hash_wang_seeded_uint(dimension, seed)); + sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask); + + /* Fetch the sample. */ + const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % + (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS); + float x = kernel_data_fetch(sample_pattern_lut, index * 2); + float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1); + + /* Do limited Cranley-Patterson rotation when using scrambling distance. */ if (kernel_data.integrator.scrambling_distance < 1.0f) { - hash = kernel_data.integrator.seed; - - jitter_x = cmj_randfloat_simple_dist( - dimension, rng_hash, kernel_data.integrator.scrambling_distance); - jitter_y = cmj_randfloat_simple_dist( - dimension + 1, rng_hash, kernel_data.integrator.scrambling_distance); + const float jitter_x = hash_wang_seeded_float(dimension, rng_hash) * + kernel_data.integrator.scrambling_distance; + const float jitter_y = hash_wang_seeded_float(dimension, rng_hash ^ 0xca0e1151) * + kernel_data.integrator.scrambling_distance; + x += jitter_x; + y += jitter_y; + x -= floorf(x); + y -= floorf(y); } - /* Perform a shuffle on the sample number to reorder the samples. */ -#ifdef _SIMPLE_HASH_ - const uint rv = cmj_hash_simple(dimension, hash); -#else /* Use a _REGULAR_HASH_. */ - const uint rv = cmj_hash(dimension, hash); -#endif -#ifdef _XOR_SHUFFLE_ -# warning "Using XOR shuffle." - const uint s = sample ^ rv; -#else /* Use _OWEN_SHUFFLE_ for reordering. */ - const uint s = nested_uniform_scramble(sample, rv); -#endif - - /* Based on the sample number a sample pattern is selected and offset by the dimension. */ - const uint sample_set = s / NUM_PMJ_SAMPLES; - const uint d = dimension + sample_set; - uint dim = d % NUM_PMJ_PATTERNS; - int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)); - - float fx = kernel_tex_fetch(__sample_pattern_lut, index); - float fy = kernel_tex_fetch(__sample_pattern_lut, index + 1); - -#ifndef _NO_CRANLEY_PATTERSON_ROTATION_ - /* Use Cranley-Patterson rotation to displace the sample pattern. */ -# ifdef _SIMPLE_HASH_ - float dx = cmj_randfloat_simple(d, hash); - float dy = cmj_randfloat_simple(d + 1, hash); -# else - float dx = cmj_randfloat(d, hash); - float dy = cmj_randfloat(d + 1, hash); -# endif - /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */ - float sx = fx + dx + jitter_x; - float sy = fy + dy + jitter_y; - sx = sx - floorf(sx); - sy = sy - floorf(sy); -#else -# warning "Not using Cranley Patterson Rotation." -#endif - - (*x) = sx; - (*y) = sy; + return make_float2(x, y); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/pattern.h b/intern/cycles/kernel/sample/pattern.h index 1e66f39ede2..ebdecc1bff9 100644 --- a/intern/cycles/kernel/sample/pattern.h +++ b/intern/cycles/kernel/sample/pattern.h @@ -4,6 +4,7 @@ #pragma once #include "kernel/sample/jitter.h" +#include "kernel/sample/sobol_burley.h" #include "util/hash.h" CCL_NAMESPACE_BEGIN @@ -12,33 +13,6 @@ CCL_NAMESPACE_BEGIN * this single threaded on a CPU for repeatable results. */ //#define __DEBUG_CORRELATION__ -/* High Dimensional Sobol. - * - * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal - * to classic Van der Corput and Sobol sequences. */ - -#ifdef __SOBOL__ - -/* Skip initial numbers that for some dimensions have clear patterns that - * don't cover the entire sample space. Ideally we would have a better - * progressive pattern that doesn't suffer from this problem, because even - * with this offset some dimensions are quite poor. - */ -# define SOBOL_SKIP 64 - -ccl_device uint sobol_dimension(KernelGlobals kg, int index, int dimension) -{ - uint result = 0; - uint i = index + SOBOL_SKIP; - for (int j = 0, x; (x = find_first_set(i)); i >>= x) { - j += x; - result ^= __float_as_uint(kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1)); - } - return result; -} - -#endif /* __SOBOL__ */ - ccl_device_forceinline float path_rng_1D(KernelGlobals kg, uint rng_hash, int sample, @@ -48,58 +22,29 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg, return (float)drand48(); #endif -#ifdef __SOBOL__ - if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) -#endif - { + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) { + return sobol_burley_sample_1D(sample, dimension, rng_hash); + } + else { return pmj_sample_1D(kg, sample, rng_hash, dimension); } - -#ifdef __SOBOL__ - /* Sobol sequence value using direction vectors. */ - uint result = sobol_dimension(kg, sample, dimension); - float r = (float)result * (1.0f / (float)0xFFFFFFFF); - - /* Cranly-Patterson rotation using rng seed */ - float shift; - - /* Hash rng with dimension to solve correlation issues. - * See T38710, T50116. - */ - uint tmp_rng = cmj_hash_simple(dimension, rng_hash); - shift = tmp_rng * (kernel_data.integrator.scrambling_distance / (float)0xFFFFFFFF); - - return r + shift - floorf(r + shift); -#endif } -ccl_device_forceinline void path_rng_2D(KernelGlobals kg, - uint rng_hash, - int sample, - int dimension, - ccl_private float *fx, - ccl_private float *fy) +ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg, + uint rng_hash, + int sample, + int dimension) { #ifdef __DEBUG_CORRELATION__ - *fx = (float)drand48(); - *fy = (float)drand48(); - return; -#endif - -#ifdef __SOBOL__ - if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) + return make_float2((float)drand48(), (float)drand48()); #endif - { - pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy); - return; + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) { + return sobol_burley_sample_2D(sample, dimension, rng_hash); + } + else { + return pmj_sample_2D(kg, sample, rng_hash, dimension); } - -#ifdef __SOBOL__ - /* Sobol. */ - *fx = path_rng_1D(kg, rng_hash, sample, dimension); - *fy = path_rng_1D(kg, rng_hash, sample, dimension + 1); -#endif } /** @@ -145,18 +90,33 @@ ccl_device_inline uint path_rng_hash_init(KernelGlobals kg, return rng_hash; } -ccl_device_inline bool sample_is_even(int pattern, int sample) +/** + * Splits samples into two different classes, A and B, which can be + * compared for variance estimation. + */ +ccl_device_inline bool sample_is_class_A(int pattern, int sample) { - if (pattern == SAMPLING_PATTERN_PMJ) { - /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al. - * We can use this to get divide sample sequence into two classes for easier variance - * estimation. */ - return popcount(uint(sample) & 0xaaaaaaaa) & 1; - } - else { - /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */ - return sample & 0x1; +#if 0 + if (!(pattern == SAMPLING_PATTERN_PMJ || pattern == SAMPLING_PATTERN_SOBOL_BURLEY)) { + /* Fallback: assign samples randomly. + * This is guaranteed to work "okay" for any sampler, but isn't good. + * (Note: the seed constant is just a random number to guard against + * possible interactions with other uses of the hash. There's nothing + * special about it.) + */ + return hash_hp_seeded_uint(sample, 0xa771f873) & 1; } -} +#else + (void)pattern; +#endif + /* This follows the approach from section 10.2.1 of "Progressive + * Multi-Jittered Sample Sequences" by Christensen et al., but + * implemented with efficient bit-fiddling. + * + * This approach also turns out to work equally well with Sobol-Burley + * (see https://developer.blender.org/D15746#429471). + */ + return popcount(uint(sample) & 0xaaaaaaaa) & 1; +} CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/sobol_burley.h b/intern/cycles/kernel/sample/sobol_burley.h new file mode 100644 index 00000000000..47796ae7998 --- /dev/null +++ b/intern/cycles/kernel/sample/sobol_burley.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* + * A shuffled, Owen-scrambled Sobol sampler, implemented with the + * techniques from the paper "Practical Hash-based Owen Scrambling" + * by Brent Burley, 2020, Journal of Computer Graphics Techniques. + * + * Note that unlike a standard high-dimensional Sobol sequence, this + * Sobol sampler uses padding to achieve higher dimensions, as described + * in Burley's paper. + */ + +#pragma once + +#include "kernel/sample/util.h" +#include "util/hash.h" +#include "util/math.h" +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* + * Computes a single dimension of a sample from an Owen-scrambled + * Sobol sequence. This is used in the main sampling functions, + * sobol_burley_sample_#D(), below. + * + * - rev_bit_index: the sample index, with reversed order bits. + * - dimension: the sample dimension. + * - scramble_seed: the Owen scrambling seed. + * + * Note that the seed must be well randomized before being + * passed to this function. + */ +ccl_device_forceinline float sobol_burley(uint rev_bit_index, + const uint dimension, + const uint scramble_seed) +{ + uint result = 0; + + if (dimension == 0) { + /* Fast-path for dimension 0, which is just Van der corput. + * This makes a notable difference in performance since we reuse + * dimensions for padding, and dimension 0 is reused the most. */ + result = reverse_integer_bits(rev_bit_index); + } + else { + uint i = 0; + while (rev_bit_index != 0) { + uint j = count_leading_zeros(rev_bit_index); + result ^= sobol_burley_table[dimension][i + j]; + i += j + 1; + + /* We can't do "<<= j + 1" because that can overflow the shift + * operator, which doesn't do what we need on at least x86. */ + rev_bit_index <<= j; + rev_bit_index <<= 1; + } + } + + /* Apply Owen scrambling. */ + result = reverse_integer_bits(reversed_bit_owen(result, scramble_seed)); + + return uint_to_float_excl(result); +} + +/* + * Computes a 1D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float sobol_burley_sample_1D(uint index, uint const dimension, uint seed) +{ + /* Include the dimension in the seed, so we get decorrelated + * sequences for different dimensions via shuffling. */ + seed ^= hash_hp_uint(dimension); + + /* Shuffle. */ + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xbff95bfe); + + return sobol_burley(index, 0, seed ^ 0x635c77bd); +} + +/* + * Computes a 2D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float2 sobol_burley_sample_2D(uint index, const uint dimension_set, uint seed) +{ + /* Include the dimension set in the seed, so we get decorrelated + * sequences for different dimension sets via shuffling. */ + seed ^= hash_hp_uint(dimension_set); + + /* Shuffle. */ + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xf8ade99a); + + return make_float2(sobol_burley(index, 0, seed ^ 0xe0aaaf76), + sobol_burley(index, 1, seed ^ 0x94964d4e)); +} + +/* + * Computes a 3D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float3 sobol_burley_sample_3D(uint index, const uint dimension_set, uint seed) +{ + /* Include the dimension set in the seed, so we get decorrelated + * sequences for different dimension sets via shuffling. */ + seed ^= hash_hp_uint(dimension_set); + + /* Shuffle. */ + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xcaa726ac); + + return make_float3(sobol_burley(index, 0, seed ^ 0x9e78e391), + sobol_burley(index, 1, seed ^ 0x67c33241), + sobol_burley(index, 2, seed ^ 0x78c395c5)); +} + +/* + * Computes a 4D Owen-scrambled and shuffled Sobol sample. + */ +ccl_device float4 sobol_burley_sample_4D(uint index, const uint dimension_set, uint seed) +{ + /* Include the dimension set in the seed, so we get decorrelated + * sequences for different dimension sets via shuffling. */ + seed ^= hash_hp_uint(dimension_set); + + /* Shuffle. */ + index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xc2c1a055); + + return make_float4(sobol_burley(index, 0, seed ^ 0x39468210), + sobol_burley(index, 1, seed ^ 0xe9d8a845), + sobol_burley(index, 2, seed ^ 0x5f32b482), + sobol_burley(index, 3, seed ^ 0x1524cc56)); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/sample/util.h b/intern/cycles/kernel/sample/util.h new file mode 100644 index 00000000000..29cda179aa2 --- /dev/null +++ b/intern/cycles/kernel/sample/util.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* + * Performs base-2 Owen scrambling on a reversed-bit unsigned integer. + * + * This is equivalent to the Laine-Karras permutation, but much higher + * quality. See https://psychopath.io/post/2021_01_30_building_a_better_lk_hash + */ +ccl_device_inline uint reversed_bit_owen(uint n, uint seed) +{ + n ^= n * 0x3d20adea; + n += seed; + n *= (seed >> 16) | 1; + n ^= n * 0x05526c56; + n ^= n * 0x53a22864; + + return n; +} + +/* + * Performs base-2 Owen scrambling on an unsigned integer. + */ +ccl_device_inline uint nested_uniform_scramble(uint i, uint seed) +{ + return reverse_integer_bits(reversed_bit_owen(reverse_integer_bits(i), seed)); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/ao.h b/intern/cycles/kernel/svm/ao.h index b477855dca3..70f52de789b 100644 --- a/intern/cycles/kernel/svm/ao.h +++ b/intern/cycles/kernel/svm/ao.h @@ -31,7 +31,7 @@ ccl_device float svm_ao( return 1.0f; } - /* Can't raytrace from shaders like displacement, before BVH exists. */ + /* Can't ray-trace from shaders like displacement, before BVH exists. */ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { return 1.0f; } @@ -49,17 +49,18 @@ ccl_device float svm_ao( int unoccluded = 0; for (int sample = 0; sample < num_samples; sample++) { - float disk_u, disk_v; - path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); + const float2 rand_disk = path_branched_rng_2D( + kg, &rng_state, sample, num_samples, PRNG_SURFACE_AO); - float2 d = concentric_sample_disk(disk_u, disk_v); + float2 d = concentric_sample_disk(rand_disk.x, rand_disk.y); float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d))); /* Create ray. */ Ray ray; ray.P = sd->P; ray.D = D.x * T + D.y * B + D.z * N; - ray.t = max_dist; + ray.tmin = 0.0f; + ray.tmax = max_dist; ray.time = sd->time; ray.self.object = sd->object; ray.self.prim = sd->prim; diff --git a/intern/cycles/kernel/svm/aov.h b/intern/cycles/kernel/svm/aov.h index 9b818f0e6f8..c574b28c078 100644 --- a/intern/cycles/kernel/svm/aov.h +++ b/intern/cycles/kernel/svm/aov.h @@ -3,7 +3,7 @@ #pragma once -#include "kernel/film/write_passes.h" +#include "kernel/film/aov_passes.h" CCL_NAMESPACE_BEGIN @@ -27,12 +27,7 @@ ccl_device void svm_node_aov_color(KernelGlobals kg, IF_KERNEL_NODES_FEATURE(AOV) { const float3 val = stack_load_float3(stack, node.y); - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - ccl_global float *buffer = render_buffer + render_buffer_offset + - (kernel_data.film.pass_aov_color + node.z); - kernel_write_pass_float4(buffer, make_float4(val.x, val.y, val.z, 1.0f)); + film_write_aov_pass_color(kg, state, render_buffer, node.z, val); } } @@ -47,12 +42,7 @@ ccl_device void svm_node_aov_value(KernelGlobals kg, IF_KERNEL_NODES_FEATURE(AOV) { const float val = stack_load_float(stack, node.y); - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - ccl_global float *buffer = render_buffer + render_buffer_offset + - (kernel_data.film.pass_aov_value + node.z); - kernel_write_pass_float(buffer, val); + film_write_aov_pass_value(kg, state, render_buffer, node.z, val); } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/attribute.h b/intern/cycles/kernel/svm/attribute.h index a3609d8b4b0..5f0d1609f08 100644 --- a/intern/cycles/kernel/svm/attribute.h +++ b/intern/cycles/kernel/svm/attribute.h @@ -140,6 +140,16 @@ ccl_device_noinline void svm_node_attr(KernelGlobals kg, } } +ccl_device_forceinline float3 svm_node_bump_P_dx(const ccl_private ShaderData *sd) +{ + return sd->P + differential_from_compact(sd->Ng, sd->dP).dx; +} + +ccl_device_forceinline float3 svm_node_bump_P_dy(const ccl_private ShaderData *sd) +{ + return sd->P + differential_from_compact(sd->Ng, sd->dP).dy; +} + ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, @@ -167,7 +177,7 @@ ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg, if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) { /* No generated attribute, fall back to object coordinates. */ - float3 f = sd->P + sd->dP.dx; + float3 f = svm_node_bump_P_dx(sd); if (sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &f); } @@ -265,7 +275,7 @@ ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg, if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) { /* No generated attribute, fall back to object coordinates. */ - float3 f = sd->P + sd->dP.dy; + float3 f = svm_node_bump_P_dy(sd); if (sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &f); } diff --git a/intern/cycles/kernel/svm/bevel.h b/intern/cycles/kernel/svm/bevel.h index 5abffe1c771..c1e227959f8 100644 --- a/intern/cycles/kernel/svm/bevel.h +++ b/intern/cycles/kernel/svm/bevel.h @@ -103,7 +103,7 @@ ccl_device float3 svm_bevel( return sd->N; } - /* Can't raytrace from shaders like displacement, before BVH exists. */ + /* Can't ray-trace from shaders like displacement, before BVH exists. */ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { return sd->N; } @@ -128,8 +128,8 @@ ccl_device float3 svm_bevel( path_state_rng_load(state, &rng_state); for (int sample = 0; sample < num_samples; sample++) { - float disk_u, disk_v; - path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); + float2 rand_disk = path_branched_rng_2D( + kg, &rng_state, sample, num_samples, PRNG_SURFACE_BEVEL); /* Pick random axis in local frame and point on disk. */ float3 disk_N, disk_T, disk_B; @@ -138,13 +138,13 @@ ccl_device float3 svm_bevel( disk_N = sd->Ng; make_orthonormals(disk_N, &disk_T, &disk_B); - float axisu = disk_u; + float axisu = rand_disk.x; if (axisu < 0.5f) { pick_pdf_N = 0.5f; pick_pdf_T = 0.25f; pick_pdf_B = 0.25f; - disk_u *= 2.0f; + rand_disk.x *= 2.0f; } else if (axisu < 0.75f) { float3 tmp = disk_N; @@ -153,7 +153,7 @@ ccl_device float3 svm_bevel( pick_pdf_N = 0.25f; pick_pdf_T = 0.5f; pick_pdf_B = 0.25f; - disk_u = (disk_u - 0.5f) * 4.0f; + rand_disk.x = (rand_disk.x - 0.5f) * 4.0f; } else { float3 tmp = disk_N; @@ -162,12 +162,12 @@ ccl_device float3 svm_bevel( pick_pdf_N = 0.25f; pick_pdf_T = 0.25f; pick_pdf_B = 0.5f; - disk_u = (disk_u - 0.75f) * 4.0f; + rand_disk.x = (rand_disk.x - 0.75f) * 4.0f; } /* Sample point on disk. */ - float phi = M_2PI_F * disk_u; - float disk_r = disk_v; + float phi = M_2PI_F * rand_disk.x; + float disk_r = rand_disk.y; float disk_height; /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */ @@ -179,7 +179,8 @@ ccl_device float3 svm_bevel( Ray ray ccl_optional_struct_init; ray.P = sd->P + disk_N * disk_height + disk_P; ray.D = -disk_N; - ray.t = 2.0f * disk_height; + ray.tmin = 0.0f; + ray.tmax = 2.0f * disk_height; ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); ray.time = sd->time; @@ -222,7 +223,7 @@ ccl_device float3 svm_bevel( /* Get geometric normal. */ float3 hit_Ng = isect.Ng[hit]; int object = isect.hits[hit].object; - int object_flag = kernel_tex_fetch(__object_flag, object); + int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { hit_Ng = -hit_Ng; } @@ -230,7 +231,7 @@ ccl_device float3 svm_bevel( /* Compute smooth normal. */ float3 N = hit_Ng; int prim = isect.hits[hit].prim; - int shader = kernel_tex_fetch(__tri_shader, prim); + int shader = kernel_data_fetch(tri_shader, prim); if (shader & SHADER_SMOOTH_NORMAL) { float u = isect.hits[hit].u; diff --git a/intern/cycles/kernel/svm/bump.h b/intern/cycles/kernel/svm/bump.h index 566c45f5f25..1009a6a4241 100644 --- a/intern/cycles/kernel/svm/bump.h +++ b/intern/cycles/kernel/svm/bump.h @@ -14,23 +14,21 @@ ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg, { /* save state */ stack_store_float3(stack, offset + 0, sd->P); - stack_store_float3(stack, offset + 3, sd->dP.dx); - stack_store_float3(stack, offset + 6, sd->dP.dy); + stack_store_float(stack, offset + 3, sd->dP); /* set state as if undisplaced */ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED); if (desc.offset != ATTR_STD_NOT_FOUND) { - float3 P, dPdx, dPdy; - P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy); + differential3 dP; + float3 P = primitive_surface_attribute_float3(kg, sd, desc, &dP.dx, &dP.dy); object_position_transform(kg, sd, &P); - object_dir_transform(kg, sd, &dPdx); - object_dir_transform(kg, sd, &dPdy); + object_dir_transform(kg, sd, &dP.dx); + object_dir_transform(kg, sd, &dP.dy); sd->P = P; - sd->dP.dx = dPdx; - sd->dP.dy = dPdy; + sd->dP = differential_make_compact(dP); } } @@ -41,8 +39,7 @@ ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg, { /* restore state */ sd->P = stack_load_float3(stack, offset + 0); - sd->dP.dx = stack_load_float3(stack, offset + 3); - sd->dP.dy = stack_load_float3(stack, offset + 6); + sd->dP = stack_load_float(stack, offset + 3); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h index 305bd404d27..2d91b014f60 100644 --- a/intern/cycles/kernel/svm/closure.h +++ b/intern/cycles/kernel/svm/closure.h @@ -3,6 +3,13 @@ #pragma once +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/emissive.h" + +#include "kernel/util/color.h" + CCL_NAMESPACE_BEGIN /* Closure Nodes */ @@ -104,7 +111,6 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, __uint_as_float(node.w); switch (type) { -#ifdef __PRINCIPLED__ case CLOSURE_BSDF_PRINCIPLED_ID: { uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, @@ -183,7 +189,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, } float3 subsurface_radius = stack_valid(data_cn_ssr.y) ? stack_load_float3(stack, data_cn_ssr.y) : - make_float3(1.0f, 1.0f, 1.0f); + one_float3(); float subsurface_ior = stack_valid(data_cn_ssr.z) ? stack_load_float(stack, data_cn_ssr.z) : 1.4f; float subsurface_anisotropy = stack_valid(data_cn_ssr.w) ? @@ -198,12 +204,12 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, __uint_as_float(data_subsurface_color.z), __uint_as_float(data_subsurface_color.w)); - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; -# ifdef __SUBSURFACE__ +#ifdef __SUBSURFACE__ float3 mixed_ss_base_color = subsurface_color * subsurface + base_color * (1.0f - subsurface); - float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight; + Spectrum subsurf_weight = weight * rgb_to_spectrum(mixed_ss_base_color) * diffuse_weight; /* disable in case of diffuse ancestor, can't see it well then and * adds considerably noise due to probabilities of continuing path @@ -220,7 +226,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, /* diffuse */ if (fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) { if (subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) { - float3 diff_weight = weight * base_color * diffuse_weight; + Spectrum diff_weight = weight * rgb_to_spectrum(base_color) * diffuse_weight; ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *) bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight); @@ -237,8 +243,8 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight); if (bssrdf) { - bssrdf->radius = subsurface_radius * subsurface; - bssrdf->albedo = mixed_ss_base_color; + bssrdf->radius = rgb_to_spectrum(subsurface_radius * subsurface); + bssrdf->albedo = rgb_to_spectrum(mixed_ss_base_color); bssrdf->N = N; bssrdf->roughness = roughness; @@ -251,10 +257,10 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, } } } -# else +#else /* diffuse */ if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF) { - float3 diff_weight = weight * base_color * diffuse_weight; + Spectrum diff_weight = weight * rgb_to_spectrum(base_color) * diffuse_weight; ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc( sd, sizeof(PrincipledDiffuseBsdf), diff_weight); @@ -267,20 +273,18 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_FULL); } } -# endif +#endif /* sheen */ if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) { float m_cdlum = linear_rgb_to_gray(kg, base_color); - float3 m_ctint = m_cdlum > 0.0f ? - base_color / m_cdlum : - make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat + float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : + one_float3(); // normalize lum. to isolate hue+sat /* color of the sheen component */ - float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) + - m_ctint * sheen_tint; + float3 sheen_color = make_float3(1.0f - sheen_tint) + m_ctint * sheen_tint; - float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight; + Spectrum sheen_weight = weight * sheen * rgb_to_spectrum(sheen_color) * diffuse_weight; ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)bsdf_alloc( sd, sizeof(PrincipledSheenBsdf), sheen_weight); @@ -294,12 +298,12 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, } /* specular reflection */ -# ifdef __CAUSTICS_TRICKS__ +#ifdef __CAUSTICS_TRICKS__ if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) { -# endif +#endif if (specular_weight > CLOSURE_WEIGHT_CUTOFF && (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) { - float3 spec_weight = weight * specular_weight; + Spectrum spec_weight = weight * specular_weight; ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), spec_weight); @@ -322,16 +326,13 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y + 0.1f * base_color.z; // luminance approx. - float3 m_ctint = m_cdlum > 0.0f ? - base_color / m_cdlum : - make_float3( - 1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat - float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) + - m_ctint * specular_tint; - - bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) + - base_color * metallic; - bsdf->extra->color = base_color; + float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : + one_float3(); // normalize lum. to isolate hue+sat + float3 tmp_col = make_float3(1.0f - specular_tint) + m_ctint * specular_tint; + + bsdf->extra->cspec0 = rgb_to_spectrum( + (specular * 0.08f * tmp_col) * (1.0f - metallic) + base_color * metallic); + bsdf->extra->color = rgb_to_spectrum(base_color); bsdf->extra->clearcoat = 0.0f; /* setup bsdf */ @@ -342,28 +343,27 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd); } } -# ifdef __CAUSTICS_TRICKS__ +#ifdef __CAUSTICS_TRICKS__ } -# endif +#endif /* BSDF */ -# ifdef __CAUSTICS_TRICKS__ +#ifdef __CAUSTICS_TRICKS__ if (kernel_data.integrator.caustics_reflective || kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) { -# endif +#endif if (final_transmission > CLOSURE_WEIGHT_CUTOFF) { - float3 glass_weight = weight * final_transmission; - float3 cspec0 = base_color * specular_tint + - make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint); + Spectrum glass_weight = weight * final_transmission; + float3 cspec0 = base_color * specular_tint + make_float3(1.0f - specular_tint); if (roughness <= 5e-2f || distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */ float refl_roughness = roughness; /* reflection */ -# ifdef __CAUSTICS_TRICKS__ +#ifdef __CAUSTICS_TRICKS__ if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) -# endif +#endif { ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), glass_weight * fresnel); @@ -374,15 +374,15 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (bsdf && extra) { bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); bsdf->extra = extra; bsdf->alpha_x = refl_roughness * refl_roughness; bsdf->alpha_y = refl_roughness * refl_roughness; bsdf->ior = ior; - bsdf->extra->color = base_color; - bsdf->extra->cspec0 = cspec0; + bsdf->extra->color = rgb_to_spectrum(base_color); + bsdf->extra->cspec0 = rgb_to_spectrum(cspec0); bsdf->extra->clearcoat = 0.0f; /* setup bsdf */ @@ -391,17 +391,19 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, } /* refraction */ -# ifdef __CAUSTICS_TRICKS__ +#ifdef __CAUSTICS_TRICKS__ if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) -# endif +#endif { - /* This is to prevent mnee from receiving a null bsdf. */ + /* This is to prevent MNEE from receiving a null BSDF. */ float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel); ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( - sd, sizeof(MicrofacetBsdf), base_color * glass_weight * refraction_fresnel); + sd, + sizeof(MicrofacetBsdf), + rgb_to_spectrum(base_color) * glass_weight * refraction_fresnel); if (bsdf) { bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); bsdf->extra = NULL; if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) @@ -430,14 +432,14 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (bsdf && extra) { bsdf->N = N; bsdf->extra = extra; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); bsdf->alpha_x = roughness * roughness; bsdf->alpha_y = roughness * roughness; bsdf->ior = ior; - bsdf->extra->color = base_color; - bsdf->extra->cspec0 = cspec0; + bsdf->extra->color = rgb_to_spectrum(base_color); + bsdf->extra->cspec0 = rgb_to_spectrum(cspec0); bsdf->extra->clearcoat = 0.0f; /* setup bsdf */ @@ -445,14 +447,14 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, } } } -# ifdef __CAUSTICS_TRICKS__ +#ifdef __CAUSTICS_TRICKS__ } -# endif +#endif /* clearcoat */ -# ifdef __CAUSTICS_TRICKS__ +#ifdef __CAUSTICS_TRICKS__ if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) { -# endif +#endif if (clearcoat > CLOSURE_WEIGHT_CUTOFF) { ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), weight); @@ -463,30 +465,29 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (bsdf && extra) { bsdf->N = clearcoat_normal; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); bsdf->ior = 1.5f; bsdf->extra = extra; bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness; bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness; - bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); + bsdf->extra->color = zero_spectrum(); + bsdf->extra->cspec0 = make_spectrum(0.04f); bsdf->extra->clearcoat = clearcoat; /* setup bsdf */ sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); } } -# ifdef __CAUSTICS_TRICKS__ +#ifdef __CAUSTICS_TRICKS__ } -# endif +#endif break; } -#endif /* __PRINCIPLED__ */ case CLOSURE_BSDF_DIFFUSE_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)bsdf_alloc( sd, sizeof(OrenNayarBsdf), weight); @@ -506,7 +507,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, break; } case CLOSURE_BSDF_TRANSLUCENT_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( sd, sizeof(DiffuseBsdf), weight); @@ -517,7 +518,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, break; } case CLOSURE_BSDF_TRANSPARENT_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; bsdf_transparent_setup(sd, weight, path_flag); break; } @@ -530,7 +531,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), weight); @@ -545,7 +546,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, bsdf->extra = NULL; if (data_node.y == SVM_STACK_INVALID) { - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); bsdf->alpha_x = roughness; bsdf->alpha_y = roughness; } @@ -581,8 +582,8 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, bsdf->extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); if (bsdf->extra) { - bsdf->extra->color = stack_load_float3(stack, data_node.w); - bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra->color = rgb_to_spectrum(stack_load_float3(stack, data_node.w)); + bsdf->extra->cspec0 = zero_spectrum(); bsdf->extra->clearcoat = 0.0f; sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); } @@ -600,13 +601,13 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), weight); if (bsdf) { bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); bsdf->extra = NULL; float eta = fmaxf(param2, 1e-5f); @@ -644,7 +645,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, break; } #endif - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; /* index of refraction */ float eta = fmaxf(param2, 1e-5f); @@ -665,7 +666,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (bsdf) { bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); bsdf->extra = NULL; svm_node_glass_setup(sd, bsdf, type, eta, roughness, false); } @@ -676,14 +677,14 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) #endif { - /* This is to prevent mnee from receiving a null bsdf. */ + /* This is to prevent MNEE from receiving a null BSDF. */ float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel); ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), weight * refraction_fresnel); if (bsdf) { bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); bsdf->extra = NULL; svm_node_glass_setup(sd, bsdf, type, eta, roughness, true); } @@ -697,7 +698,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), weight); if (!bsdf) { @@ -712,7 +713,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, bsdf->N = N; bsdf->extra = extra; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->T = zero_float3(); float roughness = sqr(param1); bsdf->alpha_x = roughness; @@ -721,8 +722,8 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta; kernel_assert(stack_valid(data_node.z)); - bsdf->extra->color = stack_load_float3(stack, data_node.z); - bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra->color = rgb_to_spectrum(stack_load_float3(stack, data_node.z)); + bsdf->extra->cspec0 = zero_spectrum(); bsdf->extra->clearcoat = 0.0f; /* setup bsdf */ @@ -730,7 +731,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, break; } case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private VelvetBsdf *bsdf = (ccl_private VelvetBsdf *)bsdf_alloc( sd, sizeof(VelvetBsdf), weight); @@ -749,7 +750,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, ATTR_FALLTHROUGH; #endif case CLOSURE_BSDF_DIFFUSE_TOON_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc( sd, sizeof(ToonBsdf), weight); @@ -771,7 +772,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, uint4 data_node3 = read_node(kg, &offset); uint4 data_node4 = read_node(kg, &offset); - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; uint offset_ofs, ior_ofs, color_ofs, parametrization; svm_unpack_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, ¶metrization); @@ -829,7 +830,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, switch (parametrization) { case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: { float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs); - bsdf->sigma = absorption_coefficient; + bsdf->sigma = rgb_to_spectrum(absorption_coefficient); break; } case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: { @@ -849,20 +850,21 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, /* Benedikt Bitterli's melanin ratio remapping. */ float eumelanin = melanin * (1.0f - melanin_redness); float pheomelanin = melanin * melanin_redness; - float3 melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin, - pheomelanin); + Spectrum melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin, + pheomelanin); /* Optional tint. */ float3 tint = stack_load_float3(stack, tint_ofs); - float3 tint_sigma = bsdf_principled_hair_sigma_from_reflectance(tint, - radial_roughness); + Spectrum tint_sigma = bsdf_principled_hair_sigma_from_reflectance( + rgb_to_spectrum(tint), radial_roughness); bsdf->sigma = melanin_sigma + tint_sigma; break; } case NODE_PRINCIPLED_HAIR_REFLECTANCE: { float3 color = stack_load_float3(stack, color_ofs); - bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(color, radial_roughness); + bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(rgb_to_spectrum(color), + radial_roughness); break; } default: { @@ -879,7 +881,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, } case CLOSURE_BSDF_HAIR_REFLECTION_ID: case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc( sd, sizeof(HairBsdf), weight); @@ -916,7 +918,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, case CLOSURE_BSSRDF_BURLEY_ID: case CLOSURE_BSSRDF_RANDOM_WALK_ID: case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; + Spectrum weight = sd->svm_closure_weight * mix_weight; ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, weight); if (bssrdf) { @@ -926,7 +928,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) param1 = 0.0f; - bssrdf->radius = stack_load_float3(stack, data_node.z) * param1; + bssrdf->radius = rgb_to_spectrum(stack_load_float3(stack, data_node.z) * param1); bssrdf->albedo = sd->svm_closure_weight; bssrdf->N = N; bssrdf->roughness = FLT_MAX; @@ -976,10 +978,10 @@ ccl_device_noinline void svm_node_closure_volume(KernelGlobals kg, density = mix_weight * fmaxf(density, 0.0f); /* Compute scattering coefficient. */ - float3 weight = sd->svm_closure_weight; + Spectrum weight = sd->svm_closure_weight; if (type == CLOSURE_VOLUME_ABSORPTION_ID) { - weight = make_float3(1.0f, 1.0f, 1.0f) - weight; + weight = one_spectrum() - weight; } weight *= density; @@ -1047,11 +1049,11 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg, if (density > CLOSURE_WEIGHT_CUTOFF) { /* Compute scattering color. */ - float3 color = sd->svm_closure_weight; + Spectrum color = sd->svm_closure_weight; const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y); if (attr_color.offset != ATTR_STD_NOT_FOUND) { - color *= primitive_volume_attribute_float3(kg, sd, attr_color); + color *= rgb_to_spectrum(primitive_volume_attribute_float3(kg, sd, attr_color)); } /* Add closure for volume scattering. */ @@ -1066,10 +1068,13 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg, } /* Add extinction weight. */ - float3 zero = make_float3(0.0f, 0.0f, 0.0f); - float3 one = make_float3(1.0f, 1.0f, 1.0f); - float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero); - float3 absorption = max(one - color, zero) * max(one - absorption_color, zero); + float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), + zero_float3()); + + Spectrum zero = zero_spectrum(); + Spectrum one = one_spectrum(); + Spectrum absorption = max(one - color, zero) * + max(one - rgb_to_spectrum(absorption_color), zero); volume_extinction_setup(sd, (color + absorption) * density); } @@ -1089,7 +1094,7 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg, if (emission > CLOSURE_WEIGHT_CUTOFF) { float3 emission_color = stack_load_float3(stack, emission_color_offset); - emission_setup(sd, emission * emission_color); + emission_setup(sd, rgb_to_spectrum(emission * emission_color)); } if (blackbody > CLOSURE_WEIGHT_CUTOFF) { @@ -1113,7 +1118,7 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg, float3 blackbody_tint = stack_load_float3(stack, node.w); float3 bb = blackbody_tint * intensity * rec709_to_rgb(kg, svm_math_blackbody_color_rec709(T)); - emission_setup(sd, bb); + emission_setup(sd, rgb_to_spectrum(bb)); } } #endif @@ -1125,7 +1130,7 @@ ccl_device_noinline void svm_node_closure_emission(ccl_private ShaderData *sd, uint4 node) { uint mix_weight_offset = node.y; - float3 weight = sd->svm_closure_weight; + Spectrum weight = sd->svm_closure_weight; if (stack_valid(mix_weight_offset)) { float mix_weight = stack_load_float(stack, mix_weight_offset); @@ -1144,7 +1149,7 @@ ccl_device_noinline void svm_node_closure_background(ccl_private ShaderData *sd, uint4 node) { uint mix_weight_offset = node.y; - float3 weight = sd->svm_closure_weight; + Spectrum weight = sd->svm_closure_weight; if (stack_valid(mix_weight_offset)) { float mix_weight = stack_load_float(stack, mix_weight_offset); @@ -1181,14 +1186,15 @@ ccl_device_noinline void svm_node_closure_holdout(ccl_private ShaderData *sd, /* Closure Nodes */ -ccl_device_inline void svm_node_closure_store_weight(ccl_private ShaderData *sd, float3 weight) +ccl_device_inline void svm_node_closure_store_weight(ccl_private ShaderData *sd, Spectrum weight) { sd->svm_closure_weight = weight; } ccl_device void svm_node_closure_set_weight(ccl_private ShaderData *sd, uint r, uint g, uint b) { - float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b)); + Spectrum weight = rgb_to_spectrum( + make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b))); svm_node_closure_store_weight(sd, weight); } @@ -1196,7 +1202,7 @@ ccl_device void svm_node_closure_weight(ccl_private ShaderData *sd, ccl_private float *stack, uint weight_offset) { - float3 weight = stack_load_float3(stack, weight_offset); + Spectrum weight = rgb_to_spectrum(stack_load_float3(stack, weight_offset)); svm_node_closure_store_weight(sd, weight); } @@ -1209,7 +1215,7 @@ ccl_device_noinline void svm_node_emission_weight(KernelGlobals kg, uint strength_offset = node.z; float strength = stack_load_float(stack, strength_offset); - float3 weight = stack_load_float3(stack, color_offset) * strength; + Spectrum weight = rgb_to_spectrum(stack_load_float3(stack, color_offset)) * strength; svm_node_closure_store_weight(sd, weight); } diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h index fa22d4bc8c2..96adb6fd64c 100644 --- a/intern/cycles/kernel/svm/color_util.h +++ b/intern/cycles/kernel/svm/color_util.h @@ -244,13 +244,11 @@ ccl_device float3 svm_mix_linear(float t, float3 col1, float3 col2) ccl_device float3 svm_mix_clamp(float3 col) { - return saturate3(col); + return saturate(col); } -ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2) +ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float t, float3 c1, float3 c2) { - float t = saturatef(fac); - switch (type) { case NODE_MIX_BLEND: return svm_mix_blend(t, c1, c2); @@ -282,7 +280,7 @@ ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float return svm_mix_sat(t, c1, c2); case NODE_MIX_VAL: return svm_mix_val(t, c1, c2); - case NODE_MIX_COLOR: + case NODE_MIX_COL: return svm_mix_color(t, c1, c2); case NODE_MIX_SOFT: return svm_mix_soft(t, c1, c2); @@ -295,6 +293,12 @@ ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float return make_float3(0.0f, 0.0f, 0.0f); } +ccl_device_noinline_cpu float3 svm_mix_clamped_factor(NodeMix type, float t, float3 c1, float3 c2) +{ + float fac = saturatef(t); + return svm_mix(type, fac, c1, c2); +} + ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, float contrast) { float a = 1.0f + contrast; diff --git a/intern/cycles/kernel/svm/displace.h b/intern/cycles/kernel/svm/displace.h index 128023263fd..230f8c73820 100644 --- a/intern/cycles/kernel/svm/displace.h +++ b/intern/cycles/kernel/svm/displace.h @@ -24,18 +24,17 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg, float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; - float3 dPdx = sd->dP.dx; - float3 dPdy = sd->dP.dy; + differential3 dP = differential_from_compact(sd->Ng, sd->dP); if (use_object_space) { object_inverse_normal_transform(kg, sd, &normal_in); - object_inverse_dir_transform(kg, sd, &dPdx); - object_inverse_dir_transform(kg, sd, &dPdy); + object_inverse_dir_transform(kg, sd, &dP.dx); + object_inverse_dir_transform(kg, sd, &dP.dy); } /* get surface tangents from normal */ - float3 Rx = cross(dPdy, normal_in); - float3 Ry = cross(normal_in, dPdx); + float3 Rx = cross(dP.dy, normal_in); + float3 Ry = cross(normal_in, dP.dx); /* get bump values */ uint c_offset, x_offset, y_offset, strength_offset; @@ -46,7 +45,7 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg, float h_y = stack_load_float(stack, y_offset); /* compute surface gradient and determinant */ - float det = dot(dPdx, Rx); + float det = dot(dP.dx, Rx); float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry; float absdet = fabsf(det); diff --git a/intern/cycles/kernel/svm/geometry.h b/intern/cycles/kernel/svm/geometry.h index 4b5368dd765..cbd87d84409 100644 --- a/intern/cycles/kernel/svm/geometry.h +++ b/intern/cycles/kernel/svm/geometry.h @@ -34,7 +34,7 @@ ccl_device_noinline void svm_node_geometry(KernelGlobals kg, data = sd->Ng; break; case NODE_GEOM_uv: - data = make_float3(sd->u, sd->v, 0.0f); + data = make_float3(1.0f - sd->u - sd->v, sd->u, 0.0f); break; default: data = make_float3(0.0f, 0.0f, 0.0f); @@ -54,10 +54,10 @@ ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg, switch (type) { case NODE_GEOM_P: - data = sd->P + sd->dP.dx; + data = svm_node_bump_P_dx(sd); break; case NODE_GEOM_uv: - data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f); + data = make_float3(1.0f - sd->u - sd->du.dx - sd->v - sd->dv.dx, sd->u + sd->du.dx, 0.0f); break; default: svm_node_geometry(kg, sd, stack, type, out_offset); @@ -81,10 +81,10 @@ ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg, switch (type) { case NODE_GEOM_P: - data = sd->P + sd->dP.dy; + data = svm_node_bump_P_dy(sd); break; case NODE_GEOM_uv: - data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f); + data = make_float3(1.0f - sd->u - sd->du.dy - sd->v - sd->dv.dy, sd->u + sd->du.dy, 0.0f); break; default: svm_node_geometry(kg, sd, stack, type, out_offset); diff --git a/intern/cycles/kernel/svm/ies.h b/intern/cycles/kernel/svm/ies.h index 201d88101cd..3648cb580d5 100644 --- a/intern/cycles/kernel/svm/ies.h +++ b/intern/cycles/kernel/svm/ies.h @@ -17,7 +17,7 @@ ccl_device_inline float interpolate_ies_vertical( * Therefore, the assumption is made that the light is going to be symmetrical, which means that * we can just take the corresponding value at the current horizontal coordinate. */ -#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v)) +#define IES_LOOKUP(v) kernel_data_fetch(ies, ofs + h * v_num + (v)) /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */ float a = IES_LOOKUP((v == 0) ? 1 : v - 1); float b = IES_LOOKUP(v); @@ -31,16 +31,16 @@ ccl_device_inline float interpolate_ies_vertical( ccl_device_inline float kernel_ies_interp(KernelGlobals kg, int slot, float h_angle, float v_angle) { /* Find offset of the IES data in the table. */ - int ofs = __float_as_int(kernel_tex_fetch(__ies, slot)); + int ofs = __float_as_int(kernel_data_fetch(ies, slot)); if (ofs == -1) { return 100.0f; } - int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++)); - int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++)); + int h_num = __float_as_int(kernel_data_fetch(ies, ofs++)); + int v_num = __float_as_int(kernel_data_fetch(ies, ofs++)); -#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs + (h)) -#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs + h_num + (v)) +#define IES_LOOKUP_ANGLE_H(h) kernel_data_fetch(ies, ofs + (h)) +#define IES_LOOKUP_ANGLE_V(v) kernel_data_fetch(ies, ofs + h_num + (v)) /* Check whether the angle is within the bounds of the IES texture. */ if (v_angle >= IES_LOOKUP_ANGLE_V(v_num - 1)) { diff --git a/intern/cycles/kernel/svm/map_range.h b/intern/cycles/kernel/svm/map_range.h index ff0e462041c..ea85bc43b74 100644 --- a/intern/cycles/kernel/svm/map_range.h +++ b/intern/cycles/kernel/svm/map_range.h @@ -112,10 +112,10 @@ ccl_device_noinline int svm_node_vector_map_range(KernelGlobals kg, switch (range_type_stack_offset) { default: case NODE_MAP_RANGE_LINEAR: - factor = safe_divide_float3_float3((value - from_min), (from_max - from_min)); + factor = safe_divide((value - from_min), (from_max - from_min)); break; case NODE_MAP_RANGE_STEPPED: { - factor = safe_divide_float3_float3((value - from_min), (from_max - from_min)); + factor = safe_divide((value - from_min), (from_max - from_min)); factor = make_float3((steps.x > 0.0f) ? floorf(factor.x * (steps.x + 1.0f)) / steps.x : 0.0f, (steps.y > 0.0f) ? floorf(factor.y * (steps.y + 1.0f)) / steps.y : 0.0f, (steps.z > 0.0f) ? floorf(factor.z * (steps.z + 1.0f)) / steps.z : @@ -123,13 +123,13 @@ ccl_device_noinline int svm_node_vector_map_range(KernelGlobals kg, break; } case NODE_MAP_RANGE_SMOOTHSTEP: { - factor = safe_divide_float3_float3((value - from_min), (from_max - from_min)); + factor = safe_divide((value - from_min), (from_max - from_min)); factor = clamp(factor, zero_float3(), one_float3()); factor = (make_float3(3.0f, 3.0f, 3.0f) - 2.0f * factor) * (factor * factor); break; } case NODE_MAP_RANGE_SMOOTHERSTEP: { - factor = safe_divide_float3_float3((value - from_min), (from_max - from_min)); + factor = safe_divide((value - from_min), (from_max - from_min)); factor = clamp(factor, zero_float3(), one_float3()); factor = factor * factor * factor * (factor * (factor * 6.0f - 15.0f) + 10.0f); break; diff --git a/intern/cycles/kernel/svm/mapping_util.h b/intern/cycles/kernel/svm/mapping_util.h index c616d4018c4..13257c762e7 100644 --- a/intern/cycles/kernel/svm/mapping_util.h +++ b/intern/cycles/kernel/svm/mapping_util.h @@ -13,13 +13,12 @@ svm_mapping(NodeMappingType type, float3 vector, float3 location, float3 rotatio case NODE_MAPPING_TYPE_POINT: return transform_direction(&rotationTransform, (vector * scale)) + location; case NODE_MAPPING_TYPE_TEXTURE: - return safe_divide_float3_float3( - transform_direction_transposed(&rotationTransform, (vector - location)), scale); + return safe_divide(transform_direction_transposed(&rotationTransform, (vector - location)), + scale); case NODE_MAPPING_TYPE_VECTOR: return transform_direction(&rotationTransform, (vector * scale)); case NODE_MAPPING_TYPE_NORMAL: - return safe_normalize( - transform_direction(&rotationTransform, safe_divide_float3_float3(vector, scale))); + return safe_normalize(transform_direction(&rotationTransform, safe_divide(vector, scale))); default: return make_float3(0.0f, 0.0f, 0.0f); } diff --git a/intern/cycles/kernel/svm/math_util.h b/intern/cycles/kernel/svm/math_util.h index 89bd4a501a7..d90d4f0f794 100644 --- a/intern/cycles/kernel/svm/math_util.h +++ b/intern/cycles/kernel/svm/math_util.h @@ -24,7 +24,7 @@ ccl_device void svm_vector_math(ccl_private float *value, *vector = a * b; break; case NODE_VECTOR_MATH_DIVIDE: - *vector = safe_divide_float3_float3(a, b); + *vector = safe_divide(a, b); break; case NODE_VECTOR_MATH_CROSS_PRODUCT: *vector = cross(a, b); @@ -60,7 +60,7 @@ ccl_device void svm_vector_math(ccl_private float *value, *vector = safe_normalize(a); break; case NODE_VECTOR_MATH_SNAP: - *vector = floor(safe_divide_float3_float3(a, b)) * b; + *vector = floor(safe_divide(a, b)) * b; break; case NODE_VECTOR_MATH_FLOOR: *vector = floor(a); diff --git a/intern/cycles/kernel/svm/mix.h b/intern/cycles/kernel/svm/mix.h index a9796096410..ead2fc44685 100644 --- a/intern/cycles/kernel/svm/mix.h +++ b/intern/cycles/kernel/svm/mix.h @@ -21,10 +21,94 @@ ccl_device_noinline int svm_node_mix(KernelGlobals kg, float fac = stack_load_float(stack, fac_offset); float3 c1 = stack_load_float3(stack, c1_offset); float3 c2 = stack_load_float3(stack, c2_offset); - float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2); + float3 result = svm_mix_clamped_factor((NodeMix)node1.y, fac, c1, c2); stack_store_float3(stack, node1.z, result); return offset; } +ccl_device_noinline void svm_node_mix_color(ccl_private ShaderData *sd, + ccl_private float *stack, + uint options, + uint input_offset, + uint result_offset) +{ + uint use_clamp, blend_type, use_clamp_result; + uint fac_in_stack_offset, a_in_stack_offset, b_in_stack_offset; + svm_unpack_node_uchar3(options, &use_clamp, &blend_type, &use_clamp_result); + svm_unpack_node_uchar3( + input_offset, &fac_in_stack_offset, &a_in_stack_offset, &b_in_stack_offset); + + float t = stack_load_float(stack, fac_in_stack_offset); + if (use_clamp > 0) { + t = saturatef(t); + } + float3 a = stack_load_float3(stack, a_in_stack_offset); + float3 b = stack_load_float3(stack, b_in_stack_offset); + float3 result = svm_mix((NodeMix)blend_type, t, a, b); + if (use_clamp_result) { + result = saturate(result); + } + stack_store_float3(stack, result_offset, result); +} + +ccl_device_noinline void svm_node_mix_float(ccl_private ShaderData *sd, + ccl_private float *stack, + uint use_clamp, + uint input_offset, + uint result_offset) +{ + uint fac_in_stack_offset, a_in_stack_offset, b_in_stack_offset; + svm_unpack_node_uchar3( + input_offset, &fac_in_stack_offset, &a_in_stack_offset, &b_in_stack_offset); + + float t = stack_load_float(stack, fac_in_stack_offset); + if (use_clamp > 0) { + t = saturatef(t); + } + float a = stack_load_float(stack, a_in_stack_offset); + float b = stack_load_float(stack, b_in_stack_offset); + float result = a * (1 - t) + b * t; + + stack_store_float(stack, result_offset, result); +} + +ccl_device_noinline void svm_node_mix_vector(ccl_private ShaderData *sd, + ccl_private float *stack, + uint input_offset, + uint result_offset) +{ + uint use_clamp, fac_in_stack_offset, a_in_stack_offset, b_in_stack_offset; + svm_unpack_node_uchar4( + input_offset, &use_clamp, &fac_in_stack_offset, &a_in_stack_offset, &b_in_stack_offset); + + float t = stack_load_float(stack, fac_in_stack_offset); + if (use_clamp > 0) { + t = saturatef(t); + } + float3 a = stack_load_float3(stack, a_in_stack_offset); + float3 b = stack_load_float3(stack, b_in_stack_offset); + float3 result = a * (one_float3() - t) + b * t; + stack_store_float3(stack, result_offset, result); +} + +ccl_device_noinline void svm_node_mix_vector_non_uniform(ccl_private ShaderData *sd, + ccl_private float *stack, + uint input_offset, + uint result_offset) +{ + uint use_clamp, fac_in_stack_offset, a_in_stack_offset, b_in_stack_offset; + svm_unpack_node_uchar4( + input_offset, &use_clamp, &fac_in_stack_offset, &a_in_stack_offset, &b_in_stack_offset); + + float3 t = stack_load_float3(stack, fac_in_stack_offset); + if (use_clamp > 0) { + t = saturate(t); + } + float3 a = stack_load_float3(stack, a_in_stack_offset); + float3 b = stack_load_float3(stack, b_in_stack_offset); + float3 result = a * (one_float3() - t) + b * t; + stack_store_float3(stack, result_offset, result); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/musgrave.h b/intern/cycles/kernel/svm/musgrave.h index 521c96d9f37..8bf172f0981 100644 --- a/intern/cycles/kernel/svm/musgrave.h +++ b/intern/cycles/kernel/svm/musgrave.h @@ -119,13 +119,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_1d( { float p = co; float pwHL = powf(lacunarity, -H); - float pwr = pwHL; - float value = snoise_1d(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0f; + float value = 0.0f; + float weight = 1.0f; - for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { + for (int i = 0; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { if (weight > 1.0f) { weight = 1.0f; } @@ -138,8 +137,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_1d( } float rmd = octaves - floorf(octaves); - if (rmd != 0.0f) { - value += rmd * ((snoise_1d(p) + offset) * pwr); + if ((rmd != 0.0f) && (weight > 0.001f)) { + if (weight > 1.0f) { + weight = 1.0f; + } + float signal = (snoise_1d(p) + offset) * pwr; + value += rmd * weight * signal; } return value; @@ -290,13 +293,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_2d( { float2 p = co; float pwHL = powf(lacunarity, -H); - float pwr = pwHL; - float value = snoise_2d(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0f; + float value = 0.0f; + float weight = 1.0f; - for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { + for (int i = 0; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { if (weight > 1.0f) { weight = 1.0f; } @@ -309,8 +311,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_2d( } float rmd = octaves - floorf(octaves); - if (rmd != 0.0f) { - value += rmd * ((snoise_2d(p) + offset) * pwr); + if ((rmd != 0.0f) && (weight > 0.001f)) { + if (weight > 1.0f) { + weight = 1.0f; + } + float signal = (snoise_2d(p) + offset) * pwr; + value += rmd * weight * signal; } return value; @@ -461,13 +467,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_3d( { float3 p = co; float pwHL = powf(lacunarity, -H); - float pwr = pwHL; - float value = snoise_3d(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0f; + float value = 0.0f; + float weight = 1.0f; - for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { + for (int i = 0; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { if (weight > 1.0f) { weight = 1.0f; } @@ -480,8 +485,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_3d( } float rmd = octaves - floorf(octaves); - if (rmd != 0.0f) { - value += rmd * ((snoise_3d(p) + offset) * pwr); + if ((rmd != 0.0f) && (weight > 0.001f)) { + if (weight > 1.0f) { + weight = 1.0f; + } + float signal = (snoise_3d(p) + offset) * pwr; + value += rmd * weight * signal; } return value; @@ -632,13 +641,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_4d( { float4 p = co; float pwHL = powf(lacunarity, -H); - float pwr = pwHL; - float value = snoise_4d(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0f; + float value = 0.0f; + float weight = 1.0f; - for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { + for (int i = 0; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { if (weight > 1.0f) { weight = 1.0f; } @@ -651,8 +659,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_4d( } float rmd = octaves - floorf(octaves); - if (rmd != 0.0f) { - value += rmd * ((snoise_4d(p) + offset) * pwr); + if ((rmd != 0.0f) && (weight > 0.001f)) { + if (weight > 1.0f) { + weight = 1.0f; + } + float signal = (snoise_4d(p) + offset) * pwr; + value += rmd * weight * signal; } return value; diff --git a/intern/cycles/kernel/svm/node_types_template.h b/intern/cycles/kernel/svm/node_types_template.h new file mode 100644 index 00000000000..aab9b9f1158 --- /dev/null +++ b/intern/cycles/kernel/svm/node_types_template.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#ifndef SHADER_NODE_TYPE +# define SHADER_NODE_TYPE(name) +#endif + +/* NOTE: for best OpenCL performance, item definition in the enum must + * match the switch case order in `svm.h`. */ + +SHADER_NODE_TYPE(NODE_END) +SHADER_NODE_TYPE(NODE_SHADER_JUMP) +SHADER_NODE_TYPE(NODE_CLOSURE_BSDF) +SHADER_NODE_TYPE(NODE_CLOSURE_EMISSION) +SHADER_NODE_TYPE(NODE_CLOSURE_BACKGROUND) +SHADER_NODE_TYPE(NODE_CLOSURE_SET_WEIGHT) +SHADER_NODE_TYPE(NODE_CLOSURE_WEIGHT) +SHADER_NODE_TYPE(NODE_EMISSION_WEIGHT) +SHADER_NODE_TYPE(NODE_MIX_CLOSURE) +SHADER_NODE_TYPE(NODE_JUMP_IF_ZERO) +SHADER_NODE_TYPE(NODE_JUMP_IF_ONE) +SHADER_NODE_TYPE(NODE_GEOMETRY) +SHADER_NODE_TYPE(NODE_CONVERT) +SHADER_NODE_TYPE(NODE_TEX_COORD) +SHADER_NODE_TYPE(NODE_VALUE_F) +SHADER_NODE_TYPE(NODE_VALUE_V) +SHADER_NODE_TYPE(NODE_ATTR) +SHADER_NODE_TYPE(NODE_VERTEX_COLOR) +SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DX) +SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DY) +SHADER_NODE_TYPE(NODE_SET_DISPLACEMENT) +SHADER_NODE_TYPE(NODE_DISPLACEMENT) +SHADER_NODE_TYPE(NODE_VECTOR_DISPLACEMENT) +SHADER_NODE_TYPE(NODE_TEX_IMAGE) +SHADER_NODE_TYPE(NODE_TEX_IMAGE_BOX) +SHADER_NODE_TYPE(NODE_TEX_NOISE) +SHADER_NODE_TYPE(NODE_SET_BUMP) +SHADER_NODE_TYPE(NODE_ATTR_BUMP_DX) +SHADER_NODE_TYPE(NODE_ATTR_BUMP_DY) +SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DX) +SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DY) +SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DX) +SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DY) +SHADER_NODE_TYPE(NODE_CLOSURE_SET_NORMAL) +SHADER_NODE_TYPE(NODE_ENTER_BUMP_EVAL) +SHADER_NODE_TYPE(NODE_LEAVE_BUMP_EVAL) +SHADER_NODE_TYPE(NODE_HSV) +SHADER_NODE_TYPE(NODE_CLOSURE_HOLDOUT) +SHADER_NODE_TYPE(NODE_FRESNEL) +SHADER_NODE_TYPE(NODE_LAYER_WEIGHT) +SHADER_NODE_TYPE(NODE_CLOSURE_VOLUME) +SHADER_NODE_TYPE(NODE_PRINCIPLED_VOLUME) +SHADER_NODE_TYPE(NODE_MATH) +SHADER_NODE_TYPE(NODE_VECTOR_MATH) +SHADER_NODE_TYPE(NODE_RGB_RAMP) +SHADER_NODE_TYPE(NODE_GAMMA) +SHADER_NODE_TYPE(NODE_BRIGHTCONTRAST) +SHADER_NODE_TYPE(NODE_LIGHT_PATH) +SHADER_NODE_TYPE(NODE_OBJECT_INFO) +SHADER_NODE_TYPE(NODE_PARTICLE_INFO) +SHADER_NODE_TYPE(NODE_HAIR_INFO) +SHADER_NODE_TYPE(NODE_POINT_INFO) +SHADER_NODE_TYPE(NODE_TEXTURE_MAPPING) +SHADER_NODE_TYPE(NODE_MAPPING) +SHADER_NODE_TYPE(NODE_MIN_MAX) +SHADER_NODE_TYPE(NODE_CAMERA) +SHADER_NODE_TYPE(NODE_TEX_ENVIRONMENT) +SHADER_NODE_TYPE(NODE_TEX_SKY) +SHADER_NODE_TYPE(NODE_TEX_GRADIENT) +SHADER_NODE_TYPE(NODE_TEX_VORONOI) +SHADER_NODE_TYPE(NODE_TEX_MUSGRAVE) +SHADER_NODE_TYPE(NODE_TEX_WAVE) +SHADER_NODE_TYPE(NODE_TEX_MAGIC) +SHADER_NODE_TYPE(NODE_TEX_CHECKER) +SHADER_NODE_TYPE(NODE_TEX_BRICK) +SHADER_NODE_TYPE(NODE_TEX_WHITE_NOISE) +SHADER_NODE_TYPE(NODE_NORMAL) +SHADER_NODE_TYPE(NODE_LIGHT_FALLOFF) +SHADER_NODE_TYPE(NODE_IES) +SHADER_NODE_TYPE(NODE_CURVES) +SHADER_NODE_TYPE(NODE_TANGENT) +SHADER_NODE_TYPE(NODE_NORMAL_MAP) +SHADER_NODE_TYPE(NODE_INVERT) +SHADER_NODE_TYPE(NODE_MIX) +SHADER_NODE_TYPE(NODE_SEPARATE_COLOR) +SHADER_NODE_TYPE(NODE_COMBINE_COLOR) +SHADER_NODE_TYPE(NODE_SEPARATE_VECTOR) +SHADER_NODE_TYPE(NODE_COMBINE_VECTOR) +SHADER_NODE_TYPE(NODE_SEPARATE_HSV) +SHADER_NODE_TYPE(NODE_COMBINE_HSV) +SHADER_NODE_TYPE(NODE_VECTOR_ROTATE) +SHADER_NODE_TYPE(NODE_VECTOR_TRANSFORM) +SHADER_NODE_TYPE(NODE_WIREFRAME) +SHADER_NODE_TYPE(NODE_WAVELENGTH) +SHADER_NODE_TYPE(NODE_BLACKBODY) +SHADER_NODE_TYPE(NODE_MAP_RANGE) +SHADER_NODE_TYPE(NODE_VECTOR_MAP_RANGE) +SHADER_NODE_TYPE(NODE_CLAMP) +SHADER_NODE_TYPE(NODE_BEVEL) +SHADER_NODE_TYPE(NODE_AMBIENT_OCCLUSION) +SHADER_NODE_TYPE(NODE_TEX_VOXEL) +SHADER_NODE_TYPE(NODE_AOV_START) +SHADER_NODE_TYPE(NODE_AOV_COLOR) +SHADER_NODE_TYPE(NODE_AOV_VALUE) +SHADER_NODE_TYPE(NODE_FLOAT_CURVE) +SHADER_NODE_TYPE(NODE_MIX_COLOR) +SHADER_NODE_TYPE(NODE_MIX_FLOAT) +SHADER_NODE_TYPE(NODE_MIX_VECTOR) +SHADER_NODE_TYPE(NODE_MIX_VECTOR_NON_UNIFORM) + +/* Padding for struct alignment. */ +SHADER_NODE_TYPE(NODE_PAD1) + +#undef SHADER_NODE_TYPE diff --git a/intern/cycles/kernel/svm/ramp.h b/intern/cycles/kernel/svm/ramp.h index 342b15da9ed..0df9268bd9c 100644 --- a/intern/cycles/kernel/svm/ramp.h +++ b/intern/cycles/kernel/svm/ramp.h @@ -9,7 +9,7 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float fetch_float(KernelGlobals kg, int offset) { - uint4 node = kernel_tex_fetch(__svm_nodes, offset); + uint4 node = kernel_data_fetch(svm_nodes, offset); return __uint_as_float(node.x); } diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 5def943c87f..3ca632c5f0b 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -95,14 +95,14 @@ ccl_device_inline bool stack_valid(uint a) ccl_device_inline uint4 read_node(KernelGlobals kg, ccl_private int *offset) { - uint4 node = kernel_tex_fetch(__svm_nodes, *offset); + uint4 node = kernel_data_fetch(svm_nodes, *offset); (*offset)++; return node; } ccl_device_inline float4 read_node_float(KernelGlobals kg, ccl_private int *offset) { - uint4 node = kernel_tex_fetch(__svm_nodes, *offset); + uint4 node = kernel_data_fetch(svm_nodes, *offset); float4 f = make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), @@ -113,7 +113,7 @@ ccl_device_inline float4 read_node_float(KernelGlobals kg, ccl_private int *offs ccl_device_inline float4 fetch_node_float(KernelGlobals kg, int offset) { - uint4 node = kernel_tex_fetch(__svm_nodes, offset); + uint4 node = kernel_data_fetch(svm_nodes, offset); return make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), @@ -204,6 +204,15 @@ CCL_NAMESPACE_END CCL_NAMESPACE_BEGIN +#ifdef __KERNEL_USE_DATA_CONSTANTS__ +# define SVM_CASE(node) \ + case node: \ + if (!kernel_data_svm_usage_##node) \ + break; +#else +# define SVM_CASE(node) case node: +#endif + /* Main Interpreter Loop */ template<uint node_feature_mask, ShaderType type, typename ConstIntegratorGenericState> ccl_device void svm_eval_nodes(KernelGlobals kg, @@ -219,9 +228,10 @@ ccl_device void svm_eval_nodes(KernelGlobals kg, uint4 node = read_node(kg, &offset); switch (node.x) { - case NODE_END: - return; - case NODE_SHADER_JUMP: { + SVM_CASE(NODE_END) + return; + SVM_CASE(NODE_SHADER_JUMP) + { if (type == SHADER_TYPE_SURFACE) offset = node.y; else if (type == SHADER_TYPE_VOLUME) @@ -232,351 +242,361 @@ ccl_device void svm_eval_nodes(KernelGlobals kg, return; break; } - case NODE_CLOSURE_BSDF: - offset = svm_node_closure_bsdf<node_feature_mask, type>( - kg, sd, stack, node, path_flag, offset); - break; - case NODE_CLOSURE_EMISSION: - IF_KERNEL_NODES_FEATURE(EMISSION) - { - svm_node_closure_emission(sd, stack, node); - } - break; - case NODE_CLOSURE_BACKGROUND: - IF_KERNEL_NODES_FEATURE(EMISSION) - { - svm_node_closure_background(sd, stack, node); - } - break; - case NODE_CLOSURE_SET_WEIGHT: - svm_node_closure_set_weight(sd, node.y, node.z, node.w); - break; - case NODE_CLOSURE_WEIGHT: - svm_node_closure_weight(sd, stack, node.y); - break; - case NODE_EMISSION_WEIGHT: - IF_KERNEL_NODES_FEATURE(EMISSION) - { - svm_node_emission_weight(kg, sd, stack, node); - } - break; - case NODE_MIX_CLOSURE: - svm_node_mix_closure(sd, stack, node); - break; - case NODE_JUMP_IF_ZERO: - if (stack_load_float(stack, node.z) == 0.0f) - offset += node.y; - break; - case NODE_JUMP_IF_ONE: - if (stack_load_float(stack, node.z) == 1.0f) - offset += node.y; - break; - case NODE_GEOMETRY: - svm_node_geometry(kg, sd, stack, node.y, node.z); - break; - case NODE_CONVERT: - svm_node_convert(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_TEX_COORD: - offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); - break; - case NODE_VALUE_F: - svm_node_value_f(kg, sd, stack, node.y, node.z); - break; - case NODE_VALUE_V: - offset = svm_node_value_v(kg, sd, stack, node.y, offset); - break; - case NODE_ATTR: - svm_node_attr<node_feature_mask>(kg, sd, stack, node); - break; - case NODE_VERTEX_COLOR: - svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_GEOMETRY_BUMP_DX: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); - } - break; - case NODE_GEOMETRY_BUMP_DY: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); - } - break; - case NODE_SET_DISPLACEMENT: - svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y); - break; - case NODE_DISPLACEMENT: - svm_node_displacement<node_feature_mask>(kg, sd, stack, node); - break; - case NODE_VECTOR_DISPLACEMENT: - offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset); - break; - case NODE_TEX_IMAGE: - offset = svm_node_tex_image(kg, sd, stack, node, offset); - break; - case NODE_TEX_IMAGE_BOX: - svm_node_tex_image_box(kg, sd, stack, node); - break; - case NODE_TEX_NOISE: - offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_SET_BUMP: - svm_node_set_bump<node_feature_mask>(kg, sd, stack, node); - break; - case NODE_ATTR_BUMP_DX: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_attr_bump_dx(kg, sd, stack, node); - } - break; - case NODE_ATTR_BUMP_DY: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_attr_bump_dy(kg, sd, stack, node); - } - break; - case NODE_VERTEX_COLOR_BUMP_DX: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w); - } - break; - case NODE_VERTEX_COLOR_BUMP_DY: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w); - } - break; - case NODE_TEX_COORD_BUMP_DX: - IF_KERNEL_NODES_FEATURE(BUMP) - { - offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset); - } - break; - case NODE_TEX_COORD_BUMP_DY: - IF_KERNEL_NODES_FEATURE(BUMP) - { - offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset); - } - break; - case NODE_CLOSURE_SET_NORMAL: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_set_normal(kg, sd, stack, node.y, node.z); - } - break; - case NODE_ENTER_BUMP_EVAL: - IF_KERNEL_NODES_FEATURE(BUMP_STATE) - { - svm_node_enter_bump_eval(kg, sd, stack, node.y); - } - break; - case NODE_LEAVE_BUMP_EVAL: - IF_KERNEL_NODES_FEATURE(BUMP_STATE) - { - svm_node_leave_bump_eval(kg, sd, stack, node.y); - } - break; - case NODE_HSV: - svm_node_hsv(kg, sd, stack, node); - break; - - case NODE_CLOSURE_HOLDOUT: - svm_node_closure_holdout(sd, stack, node); - break; - case NODE_FRESNEL: - svm_node_fresnel(sd, stack, node.y, node.z, node.w); - break; - case NODE_LAYER_WEIGHT: - svm_node_layer_weight(sd, stack, node); - break; - case NODE_CLOSURE_VOLUME: - IF_KERNEL_NODES_FEATURE(VOLUME) - { - svm_node_closure_volume<type>(kg, sd, stack, node); - } - break; - case NODE_PRINCIPLED_VOLUME: - IF_KERNEL_NODES_FEATURE(VOLUME) - { - offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset); - } - break; - case NODE_MATH: - svm_node_math(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_VECTOR_MATH: - offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_RGB_RAMP: - offset = svm_node_rgb_ramp(kg, sd, stack, node, offset); - break; - case NODE_GAMMA: - svm_node_gamma(sd, stack, node.y, node.z, node.w); - break; - case NODE_BRIGHTCONTRAST: - svm_node_brightness(sd, stack, node.y, node.z, node.w); - break; - case NODE_LIGHT_PATH: - svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag); - break; - case NODE_OBJECT_INFO: - svm_node_object_info(kg, sd, stack, node.y, node.z); - break; - case NODE_PARTICLE_INFO: - svm_node_particle_info(kg, sd, stack, node.y, node.z); - break; + SVM_CASE(NODE_CLOSURE_BSDF) + offset = svm_node_closure_bsdf<node_feature_mask, type>( + kg, sd, stack, node, path_flag, offset); + break; + SVM_CASE(NODE_CLOSURE_EMISSION) + IF_KERNEL_NODES_FEATURE(EMISSION) + { + svm_node_closure_emission(sd, stack, node); + } + break; + SVM_CASE(NODE_CLOSURE_BACKGROUND) + IF_KERNEL_NODES_FEATURE(EMISSION) + { + svm_node_closure_background(sd, stack, node); + } + break; + SVM_CASE(NODE_CLOSURE_SET_WEIGHT) + svm_node_closure_set_weight(sd, node.y, node.z, node.w); + break; + SVM_CASE(NODE_CLOSURE_WEIGHT) + svm_node_closure_weight(sd, stack, node.y); + break; + SVM_CASE(NODE_EMISSION_WEIGHT) + IF_KERNEL_NODES_FEATURE(EMISSION) + { + svm_node_emission_weight(kg, sd, stack, node); + } + break; + SVM_CASE(NODE_MIX_CLOSURE) + svm_node_mix_closure(sd, stack, node); + break; + SVM_CASE(NODE_JUMP_IF_ZERO) + if (stack_load_float(stack, node.z) <= 0.0f) + offset += node.y; + break; + SVM_CASE(NODE_JUMP_IF_ONE) + if (stack_load_float(stack, node.z) >= 1.0f) + offset += node.y; + break; + SVM_CASE(NODE_GEOMETRY) + svm_node_geometry(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_CONVERT) + svm_node_convert(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_TEX_COORD) + offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); + break; + SVM_CASE(NODE_VALUE_F) + svm_node_value_f(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_VALUE_V) + offset = svm_node_value_v(kg, sd, stack, node.y, offset); + break; + SVM_CASE(NODE_ATTR) + svm_node_attr<node_feature_mask>(kg, sd, stack, node); + break; + SVM_CASE(NODE_VERTEX_COLOR) + svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_GEOMETRY_BUMP_DX) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); + } + break; + SVM_CASE(NODE_GEOMETRY_BUMP_DY) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); + } + break; + SVM_CASE(NODE_SET_DISPLACEMENT) + svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y); + break; + SVM_CASE(NODE_DISPLACEMENT) + svm_node_displacement<node_feature_mask>(kg, sd, stack, node); + break; + SVM_CASE(NODE_VECTOR_DISPLACEMENT) + offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_IMAGE) + offset = svm_node_tex_image(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_IMAGE_BOX) + svm_node_tex_image_box(kg, sd, stack, node); + break; + SVM_CASE(NODE_TEX_NOISE) + offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_SET_BUMP) + svm_node_set_bump<node_feature_mask>(kg, sd, stack, node); + break; + SVM_CASE(NODE_ATTR_BUMP_DX) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_attr_bump_dx(kg, sd, stack, node); + } + break; + SVM_CASE(NODE_ATTR_BUMP_DY) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_attr_bump_dy(kg, sd, stack, node); + } + break; + SVM_CASE(NODE_VERTEX_COLOR_BUMP_DX) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w); + } + break; + SVM_CASE(NODE_VERTEX_COLOR_BUMP_DY) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w); + } + break; + SVM_CASE(NODE_TEX_COORD_BUMP_DX) + IF_KERNEL_NODES_FEATURE(BUMP) + { + offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset); + } + break; + SVM_CASE(NODE_TEX_COORD_BUMP_DY) + IF_KERNEL_NODES_FEATURE(BUMP) + { + offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset); + } + break; + SVM_CASE(NODE_CLOSURE_SET_NORMAL) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_set_normal(kg, sd, stack, node.y, node.z); + } + break; + SVM_CASE(NODE_ENTER_BUMP_EVAL) + IF_KERNEL_NODES_FEATURE(BUMP_STATE) + { + svm_node_enter_bump_eval(kg, sd, stack, node.y); + } + break; + SVM_CASE(NODE_LEAVE_BUMP_EVAL) + IF_KERNEL_NODES_FEATURE(BUMP_STATE) + { + svm_node_leave_bump_eval(kg, sd, stack, node.y); + } + break; + SVM_CASE(NODE_HSV) + svm_node_hsv(kg, sd, stack, node); + break; + SVM_CASE(NODE_CLOSURE_HOLDOUT) + svm_node_closure_holdout(sd, stack, node); + break; + SVM_CASE(NODE_FRESNEL) + svm_node_fresnel(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_LAYER_WEIGHT) + svm_node_layer_weight(sd, stack, node); + break; + SVM_CASE(NODE_CLOSURE_VOLUME) + IF_KERNEL_NODES_FEATURE(VOLUME) + { + svm_node_closure_volume<type>(kg, sd, stack, node); + } + break; + SVM_CASE(NODE_PRINCIPLED_VOLUME) + IF_KERNEL_NODES_FEATURE(VOLUME) + { + offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset); + } + break; + SVM_CASE(NODE_MATH) + svm_node_math(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_VECTOR_MATH) + offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_RGB_RAMP) + offset = svm_node_rgb_ramp(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_GAMMA) + svm_node_gamma(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_BRIGHTCONTRAST) + svm_node_brightness(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_LIGHT_PATH) + svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag); + break; + SVM_CASE(NODE_OBJECT_INFO) + svm_node_object_info(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_PARTICLE_INFO) + svm_node_particle_info(kg, sd, stack, node.y, node.z); + break; #if defined(__HAIR__) - case NODE_HAIR_INFO: - svm_node_hair_info(kg, sd, stack, node.y, node.z); - break; + SVM_CASE(NODE_HAIR_INFO) + svm_node_hair_info(kg, sd, stack, node.y, node.z); + break; #endif #if defined(__POINTCLOUD__) - case NODE_POINT_INFO: - svm_node_point_info(kg, sd, stack, node.y, node.z); - break; + SVM_CASE(NODE_POINT_INFO) + svm_node_point_info(kg, sd, stack, node.y, node.z); + break; #endif - case NODE_TEXTURE_MAPPING: - offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset); - break; - case NODE_MAPPING: - svm_node_mapping(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_MIN_MAX: - offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset); - break; - case NODE_CAMERA: - svm_node_camera(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_TEX_ENVIRONMENT: - svm_node_tex_environment(kg, sd, stack, node); - break; - case NODE_TEX_SKY: - offset = svm_node_tex_sky(kg, sd, stack, node, offset); - break; - case NODE_TEX_GRADIENT: - svm_node_tex_gradient(sd, stack, node); - break; - case NODE_TEX_VORONOI: - offset = svm_node_tex_voronoi<node_feature_mask>( - kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_TEX_MUSGRAVE: - offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_TEX_WAVE: - offset = svm_node_tex_wave(kg, sd, stack, node, offset); - break; - case NODE_TEX_MAGIC: - offset = svm_node_tex_magic(kg, sd, stack, node, offset); - break; - case NODE_TEX_CHECKER: - svm_node_tex_checker(kg, sd, stack, node); - break; - case NODE_TEX_BRICK: - offset = svm_node_tex_brick(kg, sd, stack, node, offset); - break; - case NODE_TEX_WHITE_NOISE: - svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_NORMAL: - offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_LIGHT_FALLOFF: - svm_node_light_falloff(sd, stack, node); - break; - case NODE_IES: - svm_node_ies(kg, sd, stack, node); - break; - case NODE_RGB_CURVES: - case NODE_VECTOR_CURVES: - offset = svm_node_curves(kg, sd, stack, node, offset); - break; - case NODE_FLOAT_CURVE: - offset = svm_node_curve(kg, sd, stack, node, offset); - break; - case NODE_TANGENT: - svm_node_tangent(kg, sd, stack, node); - break; - case NODE_NORMAL_MAP: - svm_node_normal_map(kg, sd, stack, node); - break; - case NODE_INVERT: - svm_node_invert(sd, stack, node.y, node.z, node.w); - break; - case NODE_MIX: - offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_SEPARATE_COLOR: - svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_COMBINE_COLOR: - svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_SEPARATE_VECTOR: - svm_node_separate_vector(sd, stack, node.y, node.z, node.w); - break; - case NODE_COMBINE_VECTOR: - svm_node_combine_vector(sd, stack, node.y, node.z, node.w); - break; - case NODE_SEPARATE_HSV: - offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_COMBINE_HSV: - offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_VECTOR_ROTATE: - svm_node_vector_rotate(sd, stack, node.y, node.z, node.w); - break; - case NODE_VECTOR_TRANSFORM: - svm_node_vector_transform(kg, sd, stack, node); - break; - case NODE_WIREFRAME: - svm_node_wireframe(kg, sd, stack, node); - break; - case NODE_WAVELENGTH: - svm_node_wavelength(kg, sd, stack, node.y, node.z); - break; - case NODE_BLACKBODY: - svm_node_blackbody(kg, sd, stack, node.y, node.z); - break; - case NODE_MAP_RANGE: - offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_VECTOR_MAP_RANGE: - offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_CLAMP: - offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset); - break; + SVM_CASE(NODE_TEXTURE_MAPPING) + offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset); + break; + SVM_CASE(NODE_MAPPING) + svm_node_mapping(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_MIN_MAX) + offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset); + break; + SVM_CASE(NODE_CAMERA) + svm_node_camera(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_TEX_ENVIRONMENT) + svm_node_tex_environment(kg, sd, stack, node); + break; + SVM_CASE(NODE_TEX_SKY) + offset = svm_node_tex_sky(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_GRADIENT) + svm_node_tex_gradient(sd, stack, node); + break; + SVM_CASE(NODE_TEX_VORONOI) + offset = svm_node_tex_voronoi<node_feature_mask>( + kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_TEX_MUSGRAVE) + offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_TEX_WAVE) + offset = svm_node_tex_wave(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_MAGIC) + offset = svm_node_tex_magic(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_CHECKER) + svm_node_tex_checker(kg, sd, stack, node); + break; + SVM_CASE(NODE_TEX_BRICK) + offset = svm_node_tex_brick(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_WHITE_NOISE) + svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_NORMAL) + offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_LIGHT_FALLOFF) + svm_node_light_falloff(sd, stack, node); + break; + SVM_CASE(NODE_IES) + svm_node_ies(kg, sd, stack, node); + break; + SVM_CASE(NODE_CURVES) + offset = svm_node_curves(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_FLOAT_CURVE) + offset = svm_node_curve(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TANGENT) + svm_node_tangent(kg, sd, stack, node); + break; + SVM_CASE(NODE_NORMAL_MAP) + svm_node_normal_map(kg, sd, stack, node); + break; + SVM_CASE(NODE_INVERT) + svm_node_invert(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_MIX) + offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_SEPARATE_COLOR) + svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_COMBINE_COLOR) + svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_SEPARATE_VECTOR) + svm_node_separate_vector(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_COMBINE_VECTOR) + svm_node_combine_vector(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_SEPARATE_HSV) + offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_COMBINE_HSV) + offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_VECTOR_ROTATE) + svm_node_vector_rotate(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_VECTOR_TRANSFORM) + svm_node_vector_transform(kg, sd, stack, node); + break; + SVM_CASE(NODE_WIREFRAME) + svm_node_wireframe(kg, sd, stack, node); + break; + SVM_CASE(NODE_WAVELENGTH) + svm_node_wavelength(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_BLACKBODY) + svm_node_blackbody(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_MAP_RANGE) + offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_VECTOR_MAP_RANGE) + offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_CLAMP) + offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset); + break; #ifdef __SHADER_RAYTRACE__ - case NODE_BEVEL: - svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node); - break; - case NODE_AMBIENT_OCCLUSION: - svm_node_ao<node_feature_mask>(kg, state, sd, stack, node); - break; + SVM_CASE(NODE_BEVEL) + svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node); + break; + SVM_CASE(NODE_AMBIENT_OCCLUSION) + svm_node_ao<node_feature_mask>(kg, state, sd, stack, node); + break; #endif - case NODE_TEX_VOXEL: - IF_KERNEL_NODES_FEATURE(VOLUME) - { - offset = svm_node_tex_voxel(kg, sd, stack, node, offset); - } - break; - case NODE_AOV_START: - if (!svm_node_aov_check(path_flag, render_buffer)) { - return; - } - break; - case NODE_AOV_COLOR: - svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer); - break; - case NODE_AOV_VALUE: - svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer); - break; + SVM_CASE(NODE_TEX_VOXEL) + IF_KERNEL_NODES_FEATURE(VOLUME) + { + offset = svm_node_tex_voxel(kg, sd, stack, node, offset); + } + break; + SVM_CASE(NODE_AOV_START) + if (!svm_node_aov_check(path_flag, render_buffer)) { + return; + } + break; + SVM_CASE(NODE_AOV_COLOR) + svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer); + break; + SVM_CASE(NODE_AOV_VALUE) + svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer); + break; + SVM_CASE(NODE_MIX_COLOR) + svm_node_mix_color(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_MIX_FLOAT) + svm_node_mix_float(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_MIX_VECTOR) + svm_node_mix_vector(sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_MIX_VECTOR_NON_UNIFORM) + svm_node_mix_vector_non_uniform(sd, stack, node.y, node.z); + break; default: kernel_assert(!"Unknown node type was passed to the SVM machine"); return; diff --git a/intern/cycles/kernel/svm/tex_coord.h b/intern/cycles/kernel/svm/tex_coord.h index d9138796c45..8154c542e6f 100644 --- a/intern/cycles/kernel/svm/tex_coord.h +++ b/intern/cycles/kernel/svm/tex_coord.h @@ -106,7 +106,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, switch (type) { case NODE_TEXCO_OBJECT: { - data = sd->P + sd->dP.dx; + data = svm_node_bump_P_dx(sd); if (node.w == 0) { if (sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &data); @@ -130,17 +130,17 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, Transform tfm = kernel_data.cam.worldtocamera; if (sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P + sd->dP.dx); + data = transform_point(&tfm, svm_node_bump_P_dx(sd)); else - data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg)); + data = transform_point(&tfm, svm_node_bump_P_dx(sd) + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)); + data = camera_world_to_ndc(kg, sd, sd->ray_P); else - data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); + data = camera_world_to_ndc(kg, sd, svm_node_bump_P_dx(sd)); data.z = 0.0f; break; } @@ -160,7 +160,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P + sd->dP.dx; + data = svm_node_bump_P_dx(sd); # ifdef __VOLUME__ if (sd->object != OBJECT_NONE) @@ -191,7 +191,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, switch (type) { case NODE_TEXCO_OBJECT: { - data = sd->P + sd->dP.dy; + data = svm_node_bump_P_dy(sd); if (node.w == 0) { if (sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &data); @@ -215,17 +215,17 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, Transform tfm = kernel_data.cam.worldtocamera; if (sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P + sd->dP.dy); + data = transform_point(&tfm, svm_node_bump_P_dy(sd)); else - data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg)); + data = transform_point(&tfm, svm_node_bump_P_dy(sd) + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)); + data = camera_world_to_ndc(kg, sd, sd->ray_P); else - data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); + data = camera_world_to_ndc(kg, sd, svm_node_bump_P_dy(sd)); data.z = 0.0f; break; } @@ -245,7 +245,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P + sd->dP.dy; + data = svm_node_bump_P_dy(sd); # ifdef __VOLUME__ if (sd->object != OBJECT_NONE) diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h index 82109ec4c4f..9dd8f196e0f 100644 --- a/intern/cycles/kernel/svm/types.h +++ b/intern/cycles/kernel/svm/types.h @@ -12,109 +12,14 @@ CCL_NAMESPACE_BEGIN /* SVM stack offsets with this value indicate that it's not on the stack */ #define SVM_STACK_INVALID 255 -#define SVM_BUMP_EVAL_STATE_SIZE 9 +#define SVM_BUMP_EVAL_STATE_SIZE 4 /* Nodes */ typedef enum ShaderNodeType { - NODE_END = 0, - NODE_SHADER_JUMP, - NODE_CLOSURE_BSDF, - NODE_CLOSURE_EMISSION, - NODE_CLOSURE_BACKGROUND, - NODE_CLOSURE_SET_WEIGHT, - NODE_CLOSURE_WEIGHT, - NODE_EMISSION_WEIGHT, - NODE_MIX_CLOSURE, - NODE_JUMP_IF_ZERO, - NODE_JUMP_IF_ONE, - NODE_GEOMETRY, - NODE_CONVERT, - NODE_TEX_COORD, - NODE_VALUE_F, - NODE_VALUE_V, - NODE_ATTR, - NODE_VERTEX_COLOR, - NODE_GEOMETRY_BUMP_DX, - NODE_GEOMETRY_BUMP_DY, - NODE_SET_DISPLACEMENT, - NODE_DISPLACEMENT, - NODE_VECTOR_DISPLACEMENT, - NODE_TEX_IMAGE, - NODE_TEX_IMAGE_BOX, - NODE_TEX_NOISE, - NODE_SET_BUMP, - NODE_ATTR_BUMP_DX, - NODE_ATTR_BUMP_DY, - NODE_VERTEX_COLOR_BUMP_DX, - NODE_VERTEX_COLOR_BUMP_DY, - NODE_TEX_COORD_BUMP_DX, - NODE_TEX_COORD_BUMP_DY, - NODE_CLOSURE_SET_NORMAL, - NODE_ENTER_BUMP_EVAL, - NODE_LEAVE_BUMP_EVAL, - NODE_HSV, - NODE_CLOSURE_HOLDOUT, - NODE_FRESNEL, - NODE_LAYER_WEIGHT, - NODE_CLOSURE_VOLUME, - NODE_PRINCIPLED_VOLUME, - NODE_MATH, - NODE_VECTOR_MATH, - NODE_RGB_RAMP, - NODE_GAMMA, - NODE_BRIGHTCONTRAST, - NODE_LIGHT_PATH, - NODE_OBJECT_INFO, - NODE_PARTICLE_INFO, - NODE_HAIR_INFO, - NODE_POINT_INFO, - NODE_TEXTURE_MAPPING, - NODE_MAPPING, - NODE_MIN_MAX, - NODE_CAMERA, - NODE_TEX_ENVIRONMENT, - NODE_TEX_SKY, - NODE_TEX_GRADIENT, - NODE_TEX_VORONOI, - NODE_TEX_MUSGRAVE, - NODE_TEX_WAVE, - NODE_TEX_MAGIC, - NODE_TEX_CHECKER, - NODE_TEX_BRICK, - NODE_TEX_WHITE_NOISE, - NODE_NORMAL, - NODE_LIGHT_FALLOFF, - NODE_IES, - NODE_RGB_CURVES, - NODE_VECTOR_CURVES, - NODE_TANGENT, - NODE_NORMAL_MAP, - NODE_INVERT, - NODE_MIX, - NODE_SEPARATE_COLOR, - NODE_COMBINE_COLOR, - NODE_SEPARATE_VECTOR, - NODE_COMBINE_VECTOR, - NODE_SEPARATE_HSV, - NODE_COMBINE_HSV, - NODE_VECTOR_ROTATE, - NODE_VECTOR_TRANSFORM, - NODE_WIREFRAME, - NODE_WAVELENGTH, - NODE_BLACKBODY, - NODE_MAP_RANGE, - NODE_VECTOR_MAP_RANGE, - NODE_CLAMP, - NODE_BEVEL, - NODE_AMBIENT_OCCLUSION, - NODE_TEX_VOXEL, - NODE_AOV_START, - NODE_AOV_COLOR, - NODE_AOV_VALUE, - NODE_FLOAT_CURVE, - /* NOTE: for best OpenCL performance, item definition in the enum must - * match the switch case order in `svm.h`. */ +#define SHADER_NODE_TYPE(name) name, +#include "node_types_template.h" + NODE_NUM } ShaderNodeType; typedef enum NodeAttributeOutputType { @@ -228,7 +133,7 @@ typedef enum NodeMix { NODE_MIX_HUE, NODE_MIX_SAT, NODE_MIX_VAL, - NODE_MIX_COLOR, + NODE_MIX_COL, NODE_MIX_SOFT, NODE_MIX_LINEAR, NODE_MIX_CLAMP /* used for the clamp UI option */ diff --git a/intern/cycles/kernel/svm/voronoi.h b/intern/cycles/kernel/svm/voronoi.h index 4ff1047aab7..53c1bda0904 100644 --- a/intern/cycles/kernel/svm/voronoi.h +++ b/intern/cycles/kernel/svm/voronoi.h @@ -1079,7 +1079,7 @@ ccl_device_noinline int svm_node_tex_voronoi(KernelGlobals kg, default: kernel_assert(0); } - position_out = safe_divide_float3_float(position_out, scale); + position_out = safe_divide(position_out, scale); break; } @@ -1126,7 +1126,7 @@ ccl_device_noinline int svm_node_tex_voronoi(KernelGlobals kg, default: kernel_assert(0); } - position_out_4d = safe_divide_float4_float(position_out_4d, scale); + position_out_4d = safe_divide(position_out_4d, scale); position_out = make_float3(position_out_4d.x, position_out_4d.y, position_out_4d.z); w_out = position_out_4d.w; } diff --git a/intern/cycles/kernel/svm/wireframe.h b/intern/cycles/kernel/svm/wireframe.h index e5fe08e5d04..91fadf4cfc4 100644 --- a/intern/cycles/kernel/svm/wireframe.h +++ b/intern/cycles/kernel/svm/wireframe.h @@ -14,6 +14,7 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float wireframe(KernelGlobals kg, ccl_private ShaderData *sd, + const differential3 dP, float size, int pixel_size, ccl_private float3 *P) @@ -46,8 +47,8 @@ ccl_device_inline float wireframe(KernelGlobals kg, if (pixel_size) { // Project the derivatives of P to the viewing plane defined // by I so we have a measure of how big is a pixel at this point - float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I); - float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I); + float pixelwidth_x = len(dP.dx - dot(dP.dx, sd->I) * sd->I); + float pixelwidth_y = len(dP.dy - dot(dP.dy, sd->I) * sd->I); // Take the average of both axis' length pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f; } @@ -86,16 +87,17 @@ ccl_device_noinline void svm_node_wireframe(KernelGlobals kg, int pixel_size = (int)use_pixel_size; /* Calculate wireframe */ - float f = wireframe(kg, sd, size, pixel_size, &sd->P); + const differential3 dP = differential_from_compact(sd->Ng, sd->dP); + float f = wireframe(kg, sd, dP, size, pixel_size, &sd->P); /* TODO(sergey): Think of faster way to calculate derivatives. */ if (bump_offset == NODE_BUMP_OFFSET_DX) { - float3 Px = sd->P - sd->dP.dx; - f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx); + float3 Px = sd->P - dP.dx; + f += (f - wireframe(kg, sd, dP, size, pixel_size, &Px)) / len(dP.dx); } else if (bump_offset == NODE_BUMP_OFFSET_DY) { - float3 Py = sd->P - sd->dP.dy; - f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy); + float3 Py = sd->P - dP.dy; + f += (f - wireframe(kg, sd, dP, size, pixel_size, &Py)) / len(dP.dy); } if (stack_valid(out_fac)) diff --git a/intern/cycles/kernel/tables.h b/intern/cycles/kernel/tables.h index c1fdbba3fa7..399eea1e2b1 100644 --- a/intern/cycles/kernel/tables.h +++ b/intern/cycles/kernel/tables.h @@ -63,4 +63,57 @@ ccl_inline_constant float cie_colour_match[][3] = { {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f} }; +/* + * The direction vectors for the first four dimensions of the Sobol + * sequence, stored with reversed-order bits. + * + * This is used in the Sobol-Burley sampler implementation. We don't + * need more than four dimensions because we achieve higher dimensions + * with padding. They're stored with reversed bits because we need + * them reversed for the fast hash-based Owen scrambling anyway, and + * this avoids doing that at run time. + */ +ccl_inline_constant unsigned int sobol_burley_table[4][32] = { + { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080, + 0x00000100, 0x00000200, 0x00000400, 0x00000800, + 0x00001000, 0x00002000, 0x00004000, 0x00008000, + 0x00010000, 0x00020000, 0x00040000, 0x00080000, + 0x00100000, 0x00200000, 0x00400000, 0x00800000, + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + }, + { + 0x00000001, 0x00000003, 0x00000005, 0x0000000f, + 0x00000011, 0x00000033, 0x00000055, 0x000000ff, + 0x00000101, 0x00000303, 0x00000505, 0x00000f0f, + 0x00001111, 0x00003333, 0x00005555, 0x0000ffff, + 0x00010001, 0x00030003, 0x00050005, 0x000f000f, + 0x00110011, 0x00330033, 0x00550055, 0x00ff00ff, + 0x01010101, 0x03030303, 0x05050505, 0x0f0f0f0f, + 0x11111111, 0x33333333, 0x55555555, 0xffffffff, + }, + { + 0x00000001, 0x00000003, 0x00000006, 0x00000009, + 0x00000017, 0x0000003a, 0x00000071, 0x000000a3, + 0x00000116, 0x00000339, 0x00000677, 0x000009aa, + 0x00001601, 0x00003903, 0x00007706, 0x0000aa09, + 0x00010117, 0x0003033a, 0x00060671, 0x000909a3, + 0x00171616, 0x003a3939, 0x00717777, 0x00a3aaaa, + 0x01170001, 0x033a0003, 0x06710006, 0x09a30009, + 0x16160017, 0x3939003a, 0x77770071, 0xaaaa00a3, + }, + { + 0x00000001, 0x00000003, 0x00000004, 0x0000000a, + 0x0000001f, 0x0000002e, 0x00000045, 0x000000c9, + 0x0000011b, 0x000002a4, 0x0000079a, 0x00000b67, + 0x0000101e, 0x0000302d, 0x00004041, 0x0000a0c3, + 0x0001f104, 0x0002e28a, 0x000457df, 0x000c9bae, + 0x0011a105, 0x002a7289, 0x0079e7db, 0x00b6dba4, + 0x0100011a, 0x030002a7, 0x0400079e, 0x0a000b6d, + 0x1f001001, 0x2e003003, 0x45004004, 0xc900a00a, + }, +}; + /* clang-format on */ diff --git a/intern/cycles/kernel/textures.h b/intern/cycles/kernel/textures.h deleted file mode 100644 index d8ac9cbe51f..00000000000 --- a/intern/cycles/kernel/textures.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -#ifndef KERNEL_TEX -# define KERNEL_TEX(type, name) -#endif - -/* BVH2, not used for OptiX or Embree. */ -KERNEL_TEX(float4, __bvh_nodes) -KERNEL_TEX(float4, __bvh_leaf_nodes) -KERNEL_TEX(uint, __prim_type) -KERNEL_TEX(uint, __prim_visibility) -KERNEL_TEX(uint, __prim_index) -KERNEL_TEX(uint, __prim_object) -KERNEL_TEX(uint, __object_node) -KERNEL_TEX(float2, __prim_time) - -/* objects */ -KERNEL_TEX(KernelObject, __objects) -KERNEL_TEX(Transform, __object_motion_pass) -KERNEL_TEX(DecomposedTransform, __object_motion) -KERNEL_TEX(uint, __object_flag) -KERNEL_TEX(float, __object_volume_step) -KERNEL_TEX(uint, __object_prim_offset) - -/* cameras */ -KERNEL_TEX(DecomposedTransform, __camera_motion) - -/* triangles */ -KERNEL_TEX(uint, __tri_shader) -KERNEL_TEX(packed_float3, __tri_vnormal) -KERNEL_TEX(uint4, __tri_vindex) -KERNEL_TEX(uint, __tri_patch) -KERNEL_TEX(float2, __tri_patch_uv) -KERNEL_TEX(packed_float3, __tri_verts) - -/* curves */ -KERNEL_TEX(KernelCurve, __curves) -KERNEL_TEX(float4, __curve_keys) -KERNEL_TEX(KernelCurveSegment, __curve_segments) - -/* patches */ -KERNEL_TEX(uint, __patches) - -/* pointclouds */ -KERNEL_TEX(float4, __points) -KERNEL_TEX(uint, __points_shader) - -/* attributes */ -KERNEL_TEX(AttributeMap, __attributes_map) -KERNEL_TEX(float, __attributes_float) -KERNEL_TEX(float2, __attributes_float2) -KERNEL_TEX(packed_float3, __attributes_float3) -KERNEL_TEX(float4, __attributes_float4) -KERNEL_TEX(uchar4, __attributes_uchar4) - -/* lights */ -KERNEL_TEX(KernelLightDistribution, __light_distribution) -KERNEL_TEX(KernelLight, __lights) -KERNEL_TEX(float2, __light_background_marginal_cdf) -KERNEL_TEX(float2, __light_background_conditional_cdf) - -/* particles */ -KERNEL_TEX(KernelParticle, __particles) - -/* shaders */ -KERNEL_TEX(uint4, __svm_nodes) -KERNEL_TEX(KernelShader, __shaders) - -/* lookup tables */ -KERNEL_TEX(float, __lookup_table) - -/* sobol */ -KERNEL_TEX(float, __sample_pattern_lut) - -/* image textures */ -KERNEL_TEX(TextureInfo, __texture_info) - -/* ies lights */ -KERNEL_TEX(float, __ies) - -#undef KERNEL_TEX diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index ad022716207..bd3791594e0 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -19,10 +19,6 @@ #include "kernel/svm/types.h" -#ifndef __KERNEL_GPU__ -# define __KERNEL_CPU__ -#endif - CCL_NAMESPACE_BEGIN /* Constants */ @@ -51,57 +47,40 @@ CCL_NAMESPACE_BEGIN #define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U #define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U -#ifdef __KERNEL_CPU__ -# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU -#else +#ifdef __KERNEL_GPU__ # define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU +#else +# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU #endif /* Kernel features */ -#define __SOBOL__ -#define __DPDU__ -#define __BACKGROUND__ +#define __AO__ #define __CAUSTICS_TRICKS__ -#define __VISIBILITY_FLAG__ -#define __RAY_DIFFERENTIALS__ -#define __CAMERA_CLIPPING__ -#define __INTERSECTION_REFINE__ #define __CLAMP_SAMPLE__ -#define __PATCH_EVAL__ -#define __SHADOW_CATCHER__ #define __DENOISING_FEATURES__ -#define __SHADER_RAYTRACE__ -#define __AO__ -#define __PASSES__ +#define __DPDU__ #define __HAIR__ +#define __OBJECT_MOTION__ +#define __PASSES__ +#define __PATCH_EVAL__ #define __POINTCLOUD__ +#define __RAY_DIFFERENTIALS__ +#define __SHADER_RAYTRACE__ +#define __SHADOW_CATCHER__ +#define __SHADOW_RECORD_ALL__ +#define __SUBSURFACE__ #define __SVM__ -#define __EMISSION__ -#define __HOLDOUT__ #define __TRANSPARENT_SHADOWS__ -#define __BACKGROUND_MIS__ -#define __LAMP_MIS__ -#define __CAMERA_MOTION__ -#define __OBJECT_MOTION__ -#define __BAKING__ -#define __PRINCIPLED__ -#define __SUBSURFACE__ +#define __VISIBILITY_FLAG__ #define __VOLUME__ -#define __CMJ__ -#define __SHADOW_RECORD_ALL__ -#define __BRANCHED_PATH__ /* Device specific features */ -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ # ifdef WITH_OSL # define __OSL__ # endif # define __VOLUME_RECORD_ALL__ -#endif /* __KERNEL_CPU__ */ - -#ifdef __KERNEL_GPU_RAYTRACING__ -# undef __BAKING__ -#endif /* __KERNEL_GPU_RAYTRACING__ */ +#endif /* !__KERNEL_GPU__ */ /* MNEE currently causes "Compute function exceeds available temporary registers" * on Metal, disabled for now. */ @@ -111,9 +90,6 @@ CCL_NAMESPACE_BEGIN /* Scene-based selective features compilation. */ #ifdef __KERNEL_FEATURES__ -# if !(__KERNEL_FEATURES & KERNEL_FEATURE_CAMERA_MOTION) -# undef __CAMERA_MOTION__ -# endif # if !(__KERNEL_FEATURES & KERNEL_FEATURE_OBJECT_MOTION) # undef __OBJECT_MOTION__ # endif @@ -129,9 +105,6 @@ CCL_NAMESPACE_BEGIN # if !(__KERNEL_FEATURES & KERNEL_FEATURE_SUBSURFACE) # undef __SUBSURFACE__ # endif -# if !(__KERNEL_FEATURES & KERNEL_FEATURE_BAKING) -# undef __BAKING__ -# endif # if !(__KERNEL_FEATURES & KERNEL_FEATURE_PATCH_EVALUATION) # undef __PATCH_EVAL__ # endif @@ -141,9 +114,6 @@ CCL_NAMESPACE_BEGIN # if !(__KERNEL_FEATURES & KERNEL_FEATURE_SHADOW_CATCHER) # undef __SHADOW_CATCHER__ # endif -# if !(__KERNEL_FEATURES & KERNEL_FEATURE_PRINCIPLED) -# undef __PRINCIPLED__ -# endif # if !(__KERNEL_FEATURES & KERNEL_FEATURE_DENOISING) # undef __DENOISING_FEATURES__ # endif @@ -159,36 +129,48 @@ CCL_NAMESPACE_BEGIN # define __BVH_LOCAL__ #endif -/* Path Tracing - * note we need to keep the u/v pairs at even values */ +/* Sampling Patterns */ +/* Unique numbers for sampling patterns in each bounce. */ enum PathTraceDimension { - PRNG_FILTER_U = 0, - PRNG_FILTER_V = 1, - PRNG_LENS_U = 2, - PRNG_LENS_V = 3, - PRNG_TIME = 4, - PRNG_UNUSED_0 = 5, - PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */ - PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */ - PRNG_BASE_NUM = 10, - - PRNG_BSDF_U = 0, - PRNG_BSDF_V = 1, - PRNG_LIGHT_U = 2, - PRNG_LIGHT_V = 3, - PRNG_LIGHT_TERMINATE = 4, - PRNG_TERMINATE = 5, - PRNG_PHASE_CHANNEL = 6, - PRNG_SCATTER_DISTANCE = 7, - PRNG_BOUNCE_NUM = 8, - - PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */ - PRNG_BEVEL_V = 7, + /* Init bounce */ + PRNG_FILTER = 0, + PRNG_LENS = 1, + PRNG_TIME = 2, + + /* Shade bounce */ + PRNG_TERMINATE = 0, + PRNG_LIGHT = 1, + PRNG_LIGHT_TERMINATE = 2, + /* Surface */ + PRNG_SURFACE_BSDF = 3, + PRNG_SURFACE_AO = 4, + PRNG_SURFACE_BEVEL = 5, + /* Volume */ + PRNG_VOLUME_PHASE = 3, + PRNG_VOLUME_PHASE_CHANNEL = 4, + PRNG_VOLUME_SCATTER_DISTANCE = 5, + PRNG_VOLUME_OFFSET = 6, + PRNG_VOLUME_SHADE_OFFSET = 7, + + /* Subsurface random walk bounces */ + PRNG_SUBSURFACE_BSDF = 0, + PRNG_SUBSURFACE_PHASE_CHANNEL = 1, + PRNG_SUBSURFACE_SCATTER_DISTANCE = 2, + PRNG_SUBSURFACE_GUIDE_STRATEGY = 3, + PRNG_SUBSURFACE_GUIDE_DIRECTION = 4, + + /* Subsurface disk bounce */ + PRNG_SUBSURFACE_DISK = 0, + PRNG_SUBSURFACE_DISK_RESAMPLE = 1, + + /* High enough number so we don't need to change it when adding new dimensions, + * low enough so there is no uint16_t overflow with many bounces. */ + PRNG_BOUNCE_NUM = 16, }; enum SamplingPattern { - SAMPLING_PATTERN_SOBOL = 0, + SAMPLING_PATTERN_SOBOL_BURLEY = 0, SAMPLING_PATTERN_PMJ = 1, SAMPLING_NUM_PATTERNS, @@ -425,9 +407,9 @@ typedef enum CryptomatteType { } CryptomatteType; typedef struct BsdfEval { - float3 diffuse; - float3 glossy; - float3 sum; + Spectrum diffuse; + Spectrum glossy; + Spectrum sum; } BsdfEval; /* Closure Filter */ @@ -535,7 +517,8 @@ typedef struct RaySelfPrimitives { typedef struct Ray { float3 P; /* origin */ float3 D; /* direction */ - float t; /* length of the ray */ + float tmin; /* start distance */ + float tmax; /* end distance */ float time; /* time (for motion blur) */ RaySelfPrimitives self; @@ -672,12 +655,11 @@ typedef struct AttributeDescriptor { /* For looking up attributes on objects and geometry. */ typedef struct AttributeMap { - uint id; /* Global unique identifier. */ - uint element; /* AttributeElement. */ - int offset; /* Offset into __attributes global arrays. */ - uint8_t type; /* NodeAttributeType. */ - uint8_t flags; /* AttributeFlag. */ - uint8_t pad[2]; + uint64_t id; /* Global unique identifier. */ + int offset; /* Offset into __attributes global arrays. */ + uint16_t element; /* AttributeElement. */ + uint8_t type; /* NodeAttributeType. */ + uint8_t flags; /* AttributeFlag. */ } AttributeMap; /* Closure data */ @@ -720,7 +702,7 @@ typedef struct AttributeMap { * padded to be 16 bytes, while it's only 12 bytes on the GPU. */ #define SHADER_CLOSURE_BASE \ - float3 weight; \ + Spectrum weight; \ ClosureType type; \ float sample_weight; \ float3 N @@ -729,10 +711,9 @@ typedef struct ccl_align(16) ShaderClosure { SHADER_CLOSURE_BASE; -#ifdef __KERNEL_CPU__ - float pad[2]; -#endif - float data[10]; + /* Extra space for closures to store data, somewhat arbitrary but closures + * assert that their size fits. */ + char pad[sizeof(Spectrum) * 2 + sizeof(float) * 4]; } ShaderClosure; @@ -885,10 +866,10 @@ typedef struct ccl_align(16) ShaderData float ray_length; #ifdef __RAY_DIFFERENTIALS__ - /* differential of P. these are orthogonal to Ng, not N */ - differential3 dP; - /* differential of I */ - differential3 dI; + /* Radius of differential of P. */ + float dP; + /* Radius of differential of I. */ + float dI; /* differential of u, v */ differential du; differential dv; @@ -923,12 +904,12 @@ typedef struct ccl_align(16) ShaderData /* Closure data, we store a fixed array of closures */ int num_closure; int num_closure_left; - float3 svm_closure_weight; + Spectrum svm_closure_weight; /* Closure weights summed directly, so we can evaluate * emission and shadow transparency with MAX_CLOSURE 0. */ - float3 closure_emission_background; - float3 closure_transparent_extinction; + Spectrum closure_emission_background; + Spectrum closure_transparent_extinction; /* At the end so we can adjust size in ShaderDataTinyStorage. */ struct ShaderClosure closure[MAX_CLOSURE]; @@ -959,7 +940,7 @@ ShaderDataCausticsStorage; * Used for decoupled direct/indirect light closure storage. */ typedef struct ShaderVolumeClosure { - float3 weight; + Spectrum weight; float sample_weight; float g; } ShaderVolumeClosure; @@ -1072,94 +1053,6 @@ typedef struct KernelCamera { } KernelCamera; static_assert_align(KernelCamera, 16); -typedef struct KernelFilm { - float exposure; - int pass_flag; - - int light_pass_flag; - int pass_stride; - - int pass_combined; - int pass_depth; - int pass_position; - int pass_normal; - int pass_roughness; - int pass_motion; - - int pass_motion_weight; - int pass_uv; - int pass_object_id; - int pass_material_id; - - int pass_diffuse_color; - int pass_glossy_color; - int pass_transmission_color; - - int pass_diffuse_indirect; - int pass_glossy_indirect; - int pass_transmission_indirect; - int pass_volume_indirect; - - int pass_diffuse_direct; - int pass_glossy_direct; - int pass_transmission_direct; - int pass_volume_direct; - - int pass_emission; - int pass_background; - int pass_ao; - float pass_alpha_threshold; - - int pass_shadow; - float pass_shadow_scale; - - int pass_shadow_catcher; - int pass_shadow_catcher_sample_count; - int pass_shadow_catcher_matte; - - int filter_table_offset; - - int cryptomatte_passes; - int cryptomatte_depth; - int pass_cryptomatte; - - int pass_adaptive_aux_buffer; - int pass_sample_count; - - int pass_mist; - float mist_start; - float mist_inv_depth; - float mist_falloff; - - int pass_denoising_normal; - int pass_denoising_albedo; - int pass_denoising_depth; - - int pass_aov_color; - int pass_aov_value; - int pass_lightgroup; - - /* XYZ to rendering color space transform. float4 instead of float3 to - * ensure consistent padding/alignment across devices. */ - float4 xyz_to_r; - float4 xyz_to_g; - float4 xyz_to_b; - float4 rgb_to_y; - /* Rec709 to rendering color space. */ - float4 rec709_to_r; - float4 rec709_to_g; - float4 rec709_to_b; - int is_rec709; - - int pass_bake_primitive; - int pass_bake_differential; - - int use_approximate_shadow_catcher; - - int pad1; -} KernelFilm; -static_assert_align(KernelFilm, 16); - typedef struct KernelFilmConvert { int pass_offset; int pass_stride; @@ -1201,108 +1094,6 @@ typedef struct KernelFilmConvert { } KernelFilmConvert; static_assert_align(KernelFilmConvert, 16); -typedef struct KernelBackground { - /* only shader index */ - int surface_shader; - int volume_shader; - float volume_step_size; - int transparent; - float transparent_roughness_squared_threshold; - - /* portal sampling */ - float portal_weight; - int num_portals; - int portal_offset; - - /* sun sampling */ - float sun_weight; - /* xyz store direction, w the angle. float4 instead of float3 is used - * to ensure consistent padding/alignment across devices. */ - float4 sun; - - /* map sampling */ - float map_weight; - int map_res_x; - int map_res_y; - - int use_mis; - - int lightgroup; - - /* Padding */ - int pad1, pad2; -} KernelBackground; -static_assert_align(KernelBackground, 16); - -typedef struct KernelIntegrator { - /* emission */ - int use_direct_light; - int num_distribution; - int num_all_lights; - float pdf_triangles; - float pdf_lights; - float light_inv_rr_threshold; - - /* bounces */ - int min_bounce; - int max_bounce; - - int max_diffuse_bounce; - int max_glossy_bounce; - int max_transmission_bounce; - int max_volume_bounce; - - /* AO bounces */ - int ao_bounces; - float ao_bounces_distance; - float ao_bounces_factor; - float ao_additive_factor; - - /* transparent */ - int transparent_min_bounce; - int transparent_max_bounce; - int transparent_shadows; - - /* caustics */ - int caustics_reflective; - int caustics_refractive; - float filter_glossy; - - /* seed */ - int seed; - - /* clamp */ - float sample_clamp_direct; - float sample_clamp_indirect; - - /* mis */ - int use_lamp_mis; - - /* caustics */ - int use_caustics; - - /* sampler */ - int sampling_pattern; - - /* volume render */ - int use_volumes; - int volume_max_steps; - float volume_step_rate; - - int has_shadow_catcher; - float scrambling_distance; - - /* Closure filter. */ - int filter_closures; - - /* MIS debugging. */ - int direct_light_sampling_type; - - /* padding */ - int pad1; -} KernelIntegrator; -static_assert_align(KernelIntegrator, 16); - typedef enum KernelBVHLayout { BVH_LAYOUT_NONE = 0, @@ -1320,36 +1111,25 @@ typedef enum KernelBVHLayout { BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL, } KernelBVHLayout; -typedef struct KernelBVH { - /* Own BVH */ - int root; - int have_motion; - int have_curves; - int bvh_layout; - int use_bvh_steps; - int curve_subdivisions; +/* Specialized struct that can become constants in dynamic compilation. */ +#define KERNEL_STRUCT_BEGIN(name, parent) struct name { +#define KERNEL_STRUCT_END(name) \ + } \ + ; \ + static_assert_align(name, 16); - /* Custom BVH */ -#ifdef __KERNEL_OPTIX__ - OptixTraversableHandle scene; -#elif defined __METALRT__ - metalrt_as_type scene; +#ifdef __KERNEL_USE_DATA_CONSTANTS__ +# define KERNEL_STRUCT_MEMBER(parent, type, name) type __unused_##name; #else -# ifdef __EMBREE__ - RTCScene scene; -# ifndef __KERNEL_64_BIT__ - int pad2; -# endif -# else - int scene, pad2; -# endif +# define KERNEL_STRUCT_MEMBER(parent, type, name) type name; #endif -} KernelBVH; -static_assert_align(KernelBVH, 16); + +#include "kernel/data_template.h" typedef struct KernelTables { int beckmann_offset; - int pad1, pad2, pad3; + int filter_table_offset; + int pad1, pad2; } KernelTables; static_assert_align(KernelTables, 16); @@ -1362,18 +1142,37 @@ typedef struct KernelBake { static_assert_align(KernelBake, 16); typedef struct KernelData { + /* Features and limits. */ uint kernel_features; uint max_closures; uint max_shaders; uint volume_stack_size; + /* Always dynamic data members. */ KernelCamera cam; - KernelFilm film; - KernelBackground background; - KernelIntegrator integrator; - KernelBVH bvh; - KernelTables tables; KernelBake bake; + KernelTables tables; + + /* Potentially specialized data members. */ +#define KERNEL_STRUCT_BEGIN(name, parent) name parent; +#include "kernel/data_template.h" + + /* Device specific BVH. */ +#ifdef __KERNEL_OPTIX__ + OptixTraversableHandle device_bvh; +#elif defined __METALRT__ + metalrt_as_type device_bvh; +#else +# ifdef __EMBREE__ + RTCScene device_bvh; +# ifndef __KERNEL_64_BIT__ + int pad1; +# endif +# else + int device_bvh, pad1; +# endif +#endif + int pad2, pad3; } KernelData; static_assert_align(KernelData, 16); @@ -1557,10 +1356,14 @@ typedef struct KernelShaderEvalInput { } KernelShaderEvalInput; static_assert_align(KernelShaderEvalInput, 16); -/* Pre-computed sample table sizes for PMJ02 sampler. */ +/* Pre-computed sample table sizes for PMJ02 sampler. + * + * NOTE: divisions *must* be a power of two, and patterns + * ideally should be as well. + */ #define NUM_PMJ_DIVISIONS 32 #define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS)) -#define NUM_PMJ_PATTERNS 1 +#define NUM_PMJ_PATTERNS 64 /* Device kernels. * @@ -1571,7 +1374,7 @@ static_assert_align(KernelShaderEvalInput, 16); * If the kernel uses shared CUDA memory, `CUDADeviceQueue::enqueue` is to be modified. * The path iteration kernels are handled in `PathTraceWorkGPU::enqueue_path_iteration`. */ -typedef enum DeviceKernel { +typedef enum DeviceKernel : int { DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA = 0, DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST, @@ -1667,42 +1470,38 @@ enum KernelFeatureFlag : uint32_t { KERNEL_FEATURE_HAIR = (1U << 12U), KERNEL_FEATURE_HAIR_THICK = (1U << 13U), KERNEL_FEATURE_OBJECT_MOTION = (1U << 14U), - KERNEL_FEATURE_CAMERA_MOTION = (1U << 15U), /* Denotes whether baking functionality is needed. */ - KERNEL_FEATURE_BAKING = (1U << 16U), + KERNEL_FEATURE_BAKING = (1U << 15U), /* Use subsurface scattering materials. */ - KERNEL_FEATURE_SUBSURFACE = (1U << 17U), + KERNEL_FEATURE_SUBSURFACE = (1U << 16U), /* Use volume materials. */ - KERNEL_FEATURE_VOLUME = (1U << 18U), + KERNEL_FEATURE_VOLUME = (1U << 17U), /* Use OpenSubdiv patch evaluation */ - KERNEL_FEATURE_PATCH_EVALUATION = (1U << 19U), + KERNEL_FEATURE_PATCH_EVALUATION = (1U << 18U), /* Use Transparent shadows */ - KERNEL_FEATURE_TRANSPARENT = (1U << 20U), + KERNEL_FEATURE_TRANSPARENT = (1U << 19U), /* Use shadow catcher. */ - KERNEL_FEATURE_SHADOW_CATCHER = (1U << 21U), - - /* Per-uber shader usage flags. */ - KERNEL_FEATURE_PRINCIPLED = (1U << 22U), + KERNEL_FEATURE_SHADOW_CATCHER = (1U << 29U), /* Light render passes. */ - KERNEL_FEATURE_LIGHT_PASSES = (1U << 23U), + KERNEL_FEATURE_LIGHT_PASSES = (1U << 21U), /* Shadow render pass. */ - KERNEL_FEATURE_SHADOW_PASS = (1U << 24U), + KERNEL_FEATURE_SHADOW_PASS = (1U << 22U), /* AO. */ - KERNEL_FEATURE_AO_PASS = (1U << 25U), - KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U), + KERNEL_FEATURE_AO_PASS = (1U << 23U), + KERNEL_FEATURE_AO_ADDITIVE = (1U << 24U), KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE), /* MNEE. */ - KERNEL_FEATURE_MNEE = (1U << 27U), + KERNEL_FEATURE_MNEE = (1U << 25U), }; /* Shader node feature mask, to specialize shader evaluation for kernels. */ @@ -1729,15 +1528,15 @@ enum KernelFeatureFlag : uint32_t { /* Must be constexpr on the CPU to avoid compile errors because the state types * are different depending on the main, shadow or null path. For GPU we don't have * C++17 everywhere so can't use it. */ -#ifdef __KERNEL_CPU__ +#ifdef __KERNEL_GPU__ +# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) +# define IF_KERNEL_NODES_FEATURE(feature) \ + if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) +#else # define IF_KERNEL_FEATURE(feature) \ if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) # define IF_KERNEL_NODES_FEATURE(feature) \ if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) -#else -# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) -# define IF_KERNEL_NODES_FEATURE(feature) \ - if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/util/color.h b/intern/cycles/kernel/util/color.h index c85ef262d88..4983b9048d4 100644 --- a/intern/cycles/kernel/util/color.h +++ b/intern/cycles/kernel/util/color.h @@ -33,4 +33,19 @@ ccl_device float linear_rgb_to_gray(KernelGlobals kg, float3 c) return dot(c, float4_to_float3(kernel_data.film.rgb_to_y)); } +ccl_device_inline Spectrum rgb_to_spectrum(float3 rgb) +{ + return rgb; +} + +ccl_device_inline float3 spectrum_to_rgb(Spectrum s) +{ + return s; +} + +ccl_device float spectrum_to_gray(KernelGlobals kg, Spectrum c) +{ + return linear_rgb_to_gray(kg, spectrum_to_rgb(c)); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/util/differential.h b/intern/cycles/kernel/util/differential.h index 3682e91ea66..aad9bb6bb22 100644 --- a/intern/cycles/kernel/util/differential.h +++ b/intern/cycles/kernel/util/differential.h @@ -101,53 +101,59 @@ ccl_device differential3 differential3_zero() return d; } -/* Compact ray differentials that are just a scale to reduce memory usage and - * access cost in GPU. +/* Compact ray differentials that are just a radius to reduce memory usage and access cost + * on GPUs, basically cone tracing. * - * See above for more accurate reference implementations. - * - * TODO: also store the more compact version in ShaderData and recompute where - * needed? */ + * See above for more accurate reference implementations of ray differentials. */ ccl_device_forceinline float differential_zero_compact() { return 0.0f; } -ccl_device_forceinline float differential_make_compact(const differential3 D) +ccl_device_forceinline float differential_make_compact(const float dD) { - return 0.5f * (len(D.dx) + len(D.dy)); + return dD; } -ccl_device_forceinline void differential_transfer_compact(ccl_private differential3 *surface_dP, - const float ray_dP, - const float3 /* ray_D */, - const float ray_dD, - const float3 surface_Ng, - const float ray_t) +ccl_device_forceinline float differential_make_compact(const differential3 dD) { - /* ray differential transfer through homogeneous medium, to - * compute dPdx/dy at a shading point from the incoming ray */ - float scale = ray_dP + ray_t * ray_dD; + return 0.5f * (len(dD.dx) + len(dD.dy)); +} - float3 dx, dy; - make_orthonormals(surface_Ng, &dx, &dy); - surface_dP->dx = dx * scale; - surface_dP->dy = dy * scale; +ccl_device_forceinline float differential_incoming_compact(const float dD) +{ + return dD; } -ccl_device_forceinline void differential_incoming_compact(ccl_private differential3 *dI, - const float3 D, - const float dD) +ccl_device_forceinline float differential_transfer_compact(const float ray_dP, + const float3 /* ray_D */, + const float ray_dD, + const float ray_t) { - /* compute dIdx/dy at a shading point, we just need to negate the - * differential of the ray direction */ + return ray_dP + ray_t * ray_dD; +} +ccl_device_forceinline differential3 differential_from_compact(const float3 D, const float dD) +{ float3 dx, dy; make_orthonormals(D, &dx, &dy); - dI->dx = dD * dx; - dI->dy = dD * dy; + differential3 d; + d.dx = dD * dx; + d.dy = dD * dy; + return d; +} + +ccl_device void differential_dudv_compact(ccl_private differential *du, + ccl_private differential *dv, + float3 dPdu, + float3 dPdv, + float dP, + float3 Ng) +{ + /* TODO: can we speed this up? */ + differential_dudv(du, dv, dPdu, dPdv, differential_from_compact(Ng, dP), Ng); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/util/lookup_table.h b/intern/cycles/kernel/util/lookup_table.h index e19e2ce5bd1..4db4dadab0e 100644 --- a/intern/cycles/kernel/util/lookup_table.h +++ b/intern/cycles/kernel/util/lookup_table.h @@ -15,11 +15,11 @@ ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int si int nindex = min(index + 1, size - 1); float t = x - index; - float data0 = kernel_tex_fetch(__lookup_table, index + offset); + float data0 = kernel_data_fetch(lookup_table, index + offset); if (t == 0.0f) return data0; - float data1 = kernel_tex_fetch(__lookup_table, nindex + offset); + float data1 = kernel_data_fetch(lookup_table, nindex + offset); return (1.0f - t) * data0 + t * data1; } diff --git a/intern/cycles/kernel/util/profiling.h b/intern/cycles/kernel/util/profiling.h index 39cabd35967..b8afaf1166d 100644 --- a/intern/cycles/kernel/util/profiling.h +++ b/intern/cycles/kernel/util/profiling.h @@ -3,13 +3,13 @@ #pragma once -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ # include "util/profiling.h" #endif CCL_NAMESPACE_BEGIN -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ # define PROFILING_INIT(kg, event) \ ProfilingHelper profiling_helper((ProfilingState *)&kg->profiler, event) # define PROFILING_EVENT(event) profiling_helper.set_event(event) @@ -22,6 +22,6 @@ CCL_NAMESPACE_BEGIN # define PROFILING_EVENT(event) # define PROFILING_INIT_FOR_SHADER(kg, event) # define PROFILING_SHADER(object, shader) -#endif /* __KERNEL_CPU__ */ +#endif /* !__KERNEL_GPU__ */ CCL_NAMESPACE_END |