Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt19
-rw-r--r--intern/cycles/kernel/bvh/bvh.h817
-rw-r--r--intern/cycles/kernel/bvh/embree.h176
-rw-r--r--intern/cycles/kernel/bvh/local.h11
-rw-r--r--intern/cycles/kernel/bvh/metal.h37
-rw-r--r--intern/cycles/kernel/bvh/nodes.h32
-rw-r--r--intern/cycles/kernel/bvh/shadow_all.h52
-rw-r--r--intern/cycles/kernel/bvh/traversal.h33
-rw-r--r--intern/cycles/kernel/bvh/util.h30
-rw-r--r--intern/cycles/kernel/bvh/volume.h19
-rw-r--r--intern/cycles/kernel/bvh/volume_all.h73
-rw-r--r--intern/cycles/kernel/camera/camera.h22
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair_principled.h2
-rw-r--r--intern/cycles/kernel/data_template.h206
-rw-r--r--intern/cycles/kernel/device/cpu/bvh.h609
-rw-r--r--intern/cycles/kernel/device/cpu/compat.h2
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h2
-rw-r--r--intern/cycles/kernel/device/metal/bvh.h1123
-rw-r--r--intern/cycles/kernel/device/metal/compat.h2
-rw-r--r--intern/cycles/kernel/device/metal/function_constants.h15
-rw-r--r--intern/cycles/kernel/device/metal/kernel.metal745
-rw-r--r--intern/cycles/kernel/device/oneapi/compat.h2
-rw-r--r--intern/cycles/kernel/device/oneapi/device_id.h11
-rw-r--r--intern/cycles/kernel/device/oneapi/dll_interface_template.h5
-rw-r--r--intern/cycles/kernel/device/oneapi/image.h4
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.cpp72
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel_templates.h10
-rw-r--r--intern/cycles/kernel/device/optix/bvh.h646
-rw-r--r--intern/cycles/kernel/device/optix/compat.h1
-rw-r--r--intern/cycles/kernel/device/optix/kernel.cu454
-rw-r--r--intern/cycles/kernel/geom/curve_intersect.h105
-rw-r--r--intern/cycles/kernel/geom/motion_triangle_intersect.h6
-rw-r--r--intern/cycles/kernel/geom/object.h113
-rw-r--r--intern/cycles/kernel/geom/point_intersect.h29
-rw-r--r--intern/cycles/kernel/geom/shader_data.h5
-rw-r--r--intern/cycles/kernel/geom/triangle_intersect.h6
-rw-r--r--intern/cycles/kernel/integrator/init_from_bake.h16
-rw-r--r--intern/cycles/kernel/integrator/init_from_camera.h6
-rw-r--r--intern/cycles/kernel/integrator/intersect_closest.h77
-rw-r--r--intern/cycles/kernel/integrator/intersect_shadow.h8
-rw-r--r--intern/cycles/kernel/integrator/intersect_subsurface.h2
-rw-r--r--intern/cycles/kernel/integrator/intersect_volume_stack.h23
-rw-r--r--intern/cycles/kernel/integrator/mnee.h30
-rw-r--r--intern/cycles/kernel/integrator/path_state.h3
-rw-r--r--intern/cycles/kernel/integrator/shade_background.h5
-rw-r--r--intern/cycles/kernel/integrator/shade_light.h21
-rw-r--r--intern/cycles/kernel/integrator/shade_shadow.h23
-rw-r--r--intern/cycles/kernel/integrator/shade_surface.h63
-rw-r--r--intern/cycles/kernel/integrator/shade_volume.h83
-rw-r--r--intern/cycles/kernel/integrator/shadow_catcher.h2
-rw-r--r--intern/cycles/kernel/integrator/shadow_state_template.h3
-rw-r--r--intern/cycles/kernel/integrator/state.h9
-rw-r--r--intern/cycles/kernel/integrator/state_flow.h273
-rw-r--r--intern/cycles/kernel/integrator/state_template.h10
-rw-r--r--intern/cycles/kernel/integrator/state_util.h14
-rw-r--r--intern/cycles/kernel/integrator/subsurface.h17
-rw-r--r--intern/cycles/kernel/integrator/subsurface_disk.h17
-rw-r--r--intern/cycles/kernel/integrator/subsurface_random_walk.h29
-rw-r--r--intern/cycles/kernel/light/light.h34
-rw-r--r--intern/cycles/kernel/light/sample.h7
-rw-r--r--intern/cycles/kernel/osl/services.cpp13
-rw-r--r--intern/cycles/kernel/svm/ao.h5
-rw-r--r--intern/cycles/kernel/svm/bevel.h5
-rw-r--r--intern/cycles/kernel/svm/closure.h4
-rw-r--r--intern/cycles/kernel/svm/node_types_template.h110
-rw-r--r--intern/cycles/kernel/svm/svm.h690
-rw-r--r--intern/cycles/kernel/svm/tex_coord.h4
-rw-r--r--intern/cycles/kernel/svm/types.h101
-rw-r--r--intern/cycles/kernel/types.h293
-rw-r--r--intern/cycles/kernel/util/profiling.h6
70 files changed, 4009 insertions, 3493 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index ccd694dfdfd..8ecdac6ee27 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -42,6 +42,7 @@ set(SRC_KERNEL_DEVICE_ONEAPI
)
set(SRC_KERNEL_DEVICE_CPU_HEADERS
+ device/cpu/bvh.h
device/cpu/compat.h
device/cpu/image.h
device/cpu/globals.h
@@ -71,14 +72,17 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS
)
set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
+ device/optix/bvh.h
device/optix/compat.h
device/optix/globals.h
)
set(SRC_KERNEL_DEVICE_METAL_HEADERS
+ device/metal/bvh.h
device/metal/compat.h
device/metal/context_begin.h
device/metal/context_end.h
+ device/metal/function_constants.h
device/metal/globals.h
)
@@ -86,7 +90,6 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
device/oneapi/compat.h
device/oneapi/context_begin.h
device/oneapi/context_end.h
- device/oneapi/device_id.h
device/oneapi/globals.h
device/oneapi/image.h
device/oneapi/kernel.h
@@ -155,6 +158,7 @@ set(SRC_KERNEL_SVM_HEADERS
svm/math_util.h
svm/mix.h
svm/musgrave.h
+ svm/node_types_template.h
svm/noise.h
svm/noisetex.h
svm/normal.h
@@ -213,8 +217,6 @@ set(SRC_KERNEL_BVH_HEADERS
bvh/util.h
bvh/volume.h
bvh/volume_all.h
- bvh/embree.h
- bvh/metal.h
)
set(SRC_KERNEL_CAMERA_HEADERS
@@ -283,6 +285,7 @@ set(SRC_KERNEL_UTIL_HEADERS
set(SRC_KERNEL_TYPES_HEADERS
data_arrays.h
+ data_template.h
tables.h
types.h
)
@@ -314,6 +317,7 @@ set(SRC_UTIL_HEADERS
../util/math_float2.h
../util/math_float3.h
../util/math_float4.h
+ ../util/math_float8.h
../util/math_int2.h
../util/math_int3.h
../util/math_int4.h
@@ -732,8 +736,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
-O2
-o ${cycles_kernel_oneapi_lib}
-I${CMAKE_CURRENT_SOURCE_DIR}/..
- -I${LEVEL_ZERO_INCLUDE_DIR}
- ${LEVEL_ZERO_LIBRARY}
${SYCL_CPP_FLAGS}
)
@@ -847,10 +849,9 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
else()
list(APPEND sycl_compiler_flags -fPIC)
- # avoid getting __FAST_MATH__ to be defined for the graphics compiler on CentOS 7 until the compile-time issue it triggers gets fixed.
- if(WITH_CYCLES_ONEAPI_BINARIES)
- list(APPEND sycl_compiler_flags -fhonor-nans)
- endif()
+ # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash
+ # it triggers at either AoT or JIT stages gets fixed.
+ list(APPEND sycl_compiler_flags -fhonor-nans)
# add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and
# libpi_level_zero.so can be placed next to it and get found.
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index a1d0e307170..bcefe5d970c 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -1,40 +1,46 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
-/* BVH
- *
- * Bounding volume hierarchy for ray tracing. We compile different variations
- * of the same BVH traversal function for faster rendering when some types of
- * primitives are not needed, using #includes to work around the lack of
- * C++ templates in OpenCL.
- *
- * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
- * the code has been extended and modified to support more primitives and work
- * with CPU/CUDA/OpenCL. */
-
#pragma once
-#ifdef __EMBREE__
-# include "kernel/bvh/embree.h"
-#endif
-
-#ifdef __METALRT__
-# include "kernel/bvh/metal.h"
-#endif
-
#include "kernel/bvh/types.h"
#include "kernel/bvh/util.h"
#include "kernel/integrator/state_util.h"
+/* Device specific accleration structures for ray tracing. */
+
+#if defined(__EMBREE__)
+# include "kernel/device/cpu/bvh.h"
+#elif defined(__METALRT__)
+# include "kernel/device/metal/bvh.h"
+#elif defined(__KERNEL_OPTIX__)
+# include "kernel/device/optix/bvh.h"
+#else
+# define __BVH2__
+#endif
+
CCL_NAMESPACE_BEGIN
-#if !defined(__KERNEL_GPU_RAYTRACING__)
+#ifdef __BVH2__
-/* Regular BVH traversal */
+/* BVH2
+ *
+ * Bounding volume hierarchy for ray tracing, when no native acceleration
+ * structure is available for the device.
+
+ * We compile different variations of the same BVH traversal function for
+ * faster rendering when some types of primitives are not needed, using #includes
+ * to work around the lack of C++ templates in OpenCL.
+ *
+ * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
+ * the code has been extended and modified to support more primitives and work
+ * with CPU and various GPU kernel languages. */
# include "kernel/bvh/nodes.h"
+/* Regular BVH traversal */
+
# define BVH_FUNCTION_NAME bvh_intersect
# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
# include "kernel/bvh/traversal.h"
@@ -57,261 +63,15 @@ CCL_NAMESPACE_BEGIN
# include "kernel/bvh/traversal.h"
# endif
-/* Subsurface scattering BVH traversal */
-
-# if defined(__BVH_LOCAL__)
-# define BVH_FUNCTION_NAME bvh_intersect_local
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/local.h"
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_local_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/local.h"
-# endif
-# endif /* __BVH_LOCAL__ */
-
-/* Volume BVH traversal */
-
-# if defined(__VOLUME__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/volume.h"
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/volume.h"
-# endif
-# endif /* __VOLUME__ */
-
-/* Record all intersections - Shadow BVH traversal */
-
-# if defined(__SHADOW_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
-# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
-# include "kernel/bvh/shadow_all.h"
-
-# if defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD
-# include "kernel/bvh/shadow_all.h"
-# endif
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD
-# include "kernel/bvh/shadow_all.h"
-# endif
-
-# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD
-# include "kernel/bvh/shadow_all.h"
-# endif
-
-# endif /* __SHADOW_RECORD_ALL__ */
-
-/* Record all intersections - Volume BVH traversal. */
-
-# if defined(__VOLUME_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/volume_all.h"
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/volume_all.h"
-# endif
-# endif /* __VOLUME_RECORD_ALL__ */
-
-# undef BVH_FEATURE
-# undef BVH_NAME_JOIN
-# undef BVH_NAME_EVAL
-# undef BVH_FUNCTION_FULL_NAME
-
-#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */
-
-ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
-{
- /* NOTE: Due to some vectorization code non-finite origin point might
- * cause lots of false-positive intersections which will overflow traversal
- * stack.
- * This code is a quick way to perform early output, to avoid crashes in
- * such cases.
- * From production scenes so far it seems it's enough to test first element
- * only.
- * Scene intersection may also called with empty rays for conditional trace
- * calls that evaluate to false, so filter those out.
- */
- return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
-}
-
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
ccl_private const Ray *ray,
const uint visibility,
ccl_private Intersection *isect)
{
-#ifdef __KERNEL_OPTIX__
- uint p0 = 0;
- uint p1 = 0;
- uint p2 = 0;
- uint p3 = 0;
- uint p4 = visibility;
- uint p5 = PRIMITIVE_NONE;
- uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
- uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
- uint ray_mask = visibility & 0xFF;
- uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
- else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
- }
-
- optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
- ray->P,
- ray->D,
- 0.0f,
- ray->t,
- ray->time,
- ray_mask,
- ray_flags,
- 0, /* SBT offset for PG_HITD */
- 0,
- 0,
- p0,
- p1,
- p2,
- p3,
- p4,
- p5,
- p6,
- p7);
-
- isect->t = __uint_as_float(p0);
- isect->u = __uint_as_float(p1);
- isect->v = __uint_as_float(p2);
- isect->prim = p3;
- isect->object = p4;
- isect->type = p5;
-
- return p5 != PRIMITIVE_NONE;
-#elif defined(__METALRT__)
-
- if (!scene_intersect_valid(ray)) {
- isect->t = ray->t;
- isect->type = PRIMITIVE_NONE;
+ if (!intersection_ray_valid(ray)) {
return false;
}
-# if defined(__KERNEL_DEBUG__)
- if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
- isect->t = ray->t;
- isect->type = PRIMITIVE_NONE;
- kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
- return false;
- }
-
- if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
- isect->t = ray->t;
- isect->type = PRIMITIVE_NONE;
- kernel_assert(!"Invalid ift_default");
- return false;
- }
-# endif
-
- metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
- metalrt_intersector_type metalrt_intersect;
-
- if (!kernel_data.bvh.have_curves) {
- metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
- }
-
- MetalRTIntersectionPayload payload;
- payload.self = ray->self;
- payload.u = 0.0f;
- payload.v = 0.0f;
- payload.visibility = visibility;
-
- typename metalrt_intersector_type::result_type intersection;
-
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- /* No further intersector setup required: Default MetalRT behavior is any-hit. */
- }
- else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- /* No further intersector setup required: Shadow ray early termination is controlled by the
- * intersection handler */
- }
-
-# if defined(__METALRT_MOTION__)
- payload.time = ray->time;
- intersection = metalrt_intersect.intersect(r,
- metal_ancillaries->accel_struct,
- ray_mask,
- ray->time,
- metal_ancillaries->ift_default,
- payload);
-# else
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
-# endif
-
- if (intersection.type == intersection_type::none) {
- isect->t = ray->t;
- isect->type = PRIMITIVE_NONE;
-
- return false;
- }
-
- isect->t = intersection.distance;
-
- isect->prim = payload.prim;
- isect->type = payload.type;
- isect->object = intersection.user_instance_id;
-
- isect->t = intersection.distance;
- if (intersection.type == intersection_type::triangle) {
- isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
- intersection.triangle_barycentric_coord.x;
- isect->v = intersection.triangle_barycentric_coord.x;
- }
- else {
- isect->u = payload.u;
- isect->v = payload.v;
- }
-
- return isect->type != PRIMITIVE_NONE;
-
-#else
-
- if (!scene_intersect_valid(ray)) {
- return false;
- }
-
-# ifdef __EMBREE__
- if (kernel_data.bvh.scene) {
- isect->t = ray->t;
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
- IntersectContext rtc_ctx(&ctx);
- RTCRayHit ray_hit;
- ctx.ray = ray;
- kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
- rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
- if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
- ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
- kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
- return true;
- }
- return false;
- }
-# endif /* __EMBREE__ */
-
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
@@ -322,7 +82,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
return bvh_intersect_motion(kg, ray, isect, visibility);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
@@ -331,10 +91,22 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
# endif /* __HAIR__ */
return bvh_intersect(kg, ray, isect, visibility);
-#endif /* __KERNEL_OPTIX__ */
}
-#ifdef __BVH_LOCAL__
+/* Single object BVH traversal, for SSS/AO/bevel. */
+
+# ifdef __BVH_LOCAL__
+
+# define BVH_FUNCTION_NAME bvh_intersect_local
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "kernel/bvh/local.h"
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_local_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/local.h"
+# endif
+
ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_private LocalIntersection *local_isect,
@@ -342,180 +114,48 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
ccl_private uint *lcg_state,
int max_hits)
{
-# ifdef __KERNEL_OPTIX__
- uint p0 = pointer_pack_to_uint_0(lcg_state);
- uint p1 = pointer_pack_to_uint_1(lcg_state);
- uint p2 = pointer_pack_to_uint_0(local_isect);
- uint p3 = pointer_pack_to_uint_1(local_isect);
- uint p4 = local_object;
- uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
- uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
- /* Is set to zero on miss or if ray is aborted, so can be used as return value. */
- uint p5 = max_hits;
-
- if (local_isect) {
- local_isect->num_hits = 0; /* Initialize hit count to zero. */
- }
- optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
- ray->P,
- ray->D,
- 0.0f,
- ray->t,
- ray->time,
- 0xFF,
- /* Need to always call into __anyhit__kernel_optix_local_hit. */
- OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
- 2, /* SBT offset for PG_HITL */
- 0,
- 0,
- p0,
- p1,
- p2,
- p3,
- p4,
- p5,
- p6,
- p7);
-
- return p5;
-# elif defined(__METALRT__)
- if (!scene_intersect_valid(ray)) {
- if (local_isect) {
- local_isect->num_hits = 0;
- }
- return false;
- }
-
-# if defined(__KERNEL_DEBUG__)
- if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ if (!intersection_ray_valid(ray)) {
if (local_isect) {
local_isect->num_hits = 0;
}
- kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
return false;
}
- if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
- if (local_isect) {
- local_isect->num_hits = 0;
- }
- kernel_assert(!"Invalid ift_local");
- return false;
+# ifdef __OBJECT_MOTION__
+ if (kernel_data.bvh.have_motion) {
+ return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
-# endif
-
- metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
- metalrt_intersector_type metalrt_intersect;
+# endif /* __OBJECT_MOTION__ */
+ return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+}
+# endif
- metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
- if (!kernel_data.bvh.have_curves) {
- metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
- }
+/* Transparent shadow BVH traversal, recording multiple intersections. */
- MetalRTIntersectionLocalPayload payload;
- payload.self = ray->self;
- payload.local_object = local_object;
- payload.max_hits = max_hits;
- payload.local_isect.num_hits = 0;
- if (lcg_state) {
- payload.has_lcg_state = true;
- payload.lcg_state = *lcg_state;
- }
- payload.result = false;
+# ifdef __SHADOW_RECORD_ALL__
- typename metalrt_intersector_type::result_type intersection;
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
+# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
+# include "kernel/bvh/shadow_all.h"
-# if defined(__METALRT_MOTION__)
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
-# else
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
+# if defined(__HAIR__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD
+# include "kernel/bvh/shadow_all.h"
# endif
- if (lcg_state) {
- *lcg_state = payload.lcg_state;
- }
- *local_isect = payload.local_isect;
-
- return payload.result;
-
-# else
-
- if (!scene_intersect_valid(ray)) {
- if (local_isect) {
- local_isect->num_hits = 0;
- }
- return false;
- }
-
-# ifdef __EMBREE__
- if (kernel_data.bvh.scene) {
- const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
- SD_OBJECT_TRANSFORM_APPLIED);
- CCLIntersectContext ctx(
- kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
- ctx.lcg_state = lcg_state;
- ctx.max_hits = max_hits;
- ctx.ray = ray;
- ctx.local_isect = local_isect;
- if (local_isect) {
- local_isect->num_hits = 0;
- }
- ctx.local_object_id = local_object;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
-
- /* If this object has its own BVH, use it. */
- if (has_bvh) {
- RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
- if (geom) {
- float3 P = ray->P;
- float3 dir = ray->D;
- float3 idir = ray->D;
- Transform ob_itfm;
- rtc_ray.tfar = ray->t *
- bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
- /* bvh_instance_motion_push() returns the inverse transform but
- * it's not needed here. */
- (void)ob_itfm;
-
- rtc_ray.org_x = P.x;
- rtc_ray.org_y = P.y;
- rtc_ray.org_z = P.z;
- rtc_ray.dir_x = dir.x;
- rtc_ray.dir_y = dir.y;
- rtc_ray.dir_z = dir.z;
- RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
- kernel_assert(scene);
- if (scene) {
- rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
- }
- }
- }
- else {
- rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
- }
-
- /* rtcOccluded1 sets tfar to -inf if a hit was found. */
- return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
- ;
- }
-# endif /* __EMBREE__ */
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD
+# include "kernel/bvh/shadow_all.h"
+# endif
-# ifdef __OBJECT_MOTION__
- if (kernel_data.bvh.have_motion) {
- return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
- }
-# endif /* __OBJECT_MOTION__ */
- return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
-# endif /* __KERNEL_OPTIX__ */
-}
-#endif
+# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD
+# include "kernel/bvh/shadow_all.h"
+# endif
-#ifdef __SHADOW_RECORD_ALL__
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
IntegratorShadowState state,
ccl_private const Ray *ray,
@@ -524,132 +164,12 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
ccl_private uint *num_recorded_hits,
ccl_private float *throughput)
{
-# ifdef __KERNEL_OPTIX__
- uint p0 = state;
- uint p1 = __float_as_uint(1.0f); /* Throughput. */
- uint p2 = 0; /* Number of hits. */
- uint p3 = max_hits;
- uint p4 = visibility;
- uint p5 = false;
- uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
- uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
-
- optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
- ray->P,
- ray->D,
- 0.0f,
- ray->t,
- ray->time,
- ray_mask,
- /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
- OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
- 1, /* SBT offset for PG_HITS */
- 0,
- 0,
- p0,
- p1,
- p2,
- p3,
- p4,
- p5,
- p6,
- p7);
-
- *num_recorded_hits = uint16_unpack_from_uint_0(p2);
- *throughput = __uint_as_float(p1);
-
- return p5;
-# elif defined(__METALRT__)
-
- if (!scene_intersect_valid(ray)) {
- return false;
- }
-
-# if defined(__KERNEL_DEBUG__)
- if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
- kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
- return false;
- }
-
- if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
- kernel_assert(!"Invalid ift_shadow");
- return false;
- }
-# endif
-
- metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
- metalrt_intersector_type metalrt_intersect;
-
- metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
- if (!kernel_data.bvh.have_curves) {
- metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
- }
-
- MetalRTIntersectionShadowPayload payload;
- payload.self = ray->self;
- payload.visibility = visibility;
- payload.max_hits = max_hits;
- payload.num_hits = 0;
- payload.num_recorded_hits = 0;
- payload.throughput = 1.0f;
- payload.result = false;
- payload.state = state;
-
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
-
- typename metalrt_intersector_type::result_type intersection;
-
-# if defined(__METALRT_MOTION__)
- payload.time = ray->time;
- intersection = metalrt_intersect.intersect(r,
- metal_ancillaries->accel_struct,
- ray_mask,
- ray->time,
- metal_ancillaries->ift_shadow,
- payload);
-# else
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
-# endif
-
- *num_recorded_hits = payload.num_recorded_hits;
- *throughput = payload.throughput;
-
- return payload.result;
-
-# else
- if (!scene_intersect_valid(ray)) {
+ if (!intersection_ray_valid(ray)) {
*num_recorded_hits = 0;
*throughput = 1.0f;
return false;
}
-# ifdef __EMBREE__
- if (kernel_data.bvh.scene) {
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
- Intersection *isect_array = (Intersection *)state->shadow_isect;
- ctx.isect_s = isect_array;
- ctx.max_hits = max_hits;
- ctx.ray = ray;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, visibility);
- rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
-
- *num_recorded_hits = ctx.num_recorded_hits;
- *throughput = ctx.throughput;
- return ctx.opaque_hit;
- }
-# endif /* __EMBREE__ */
-
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
@@ -662,7 +182,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
return bvh_intersect_shadow_all_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
@@ -673,180 +193,83 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
return bvh_intersect_shadow_all(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-# endif /* __KERNEL_OPTIX__ */
}
-#endif /* __SHADOW_RECORD_ALL__ */
+# endif /* __SHADOW_RECORD_ALL__ */
+
+/* Volume BVH traversal, for initializing or updating the volume stack. */
+
+# if defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__)
+
+# define BVH_FUNCTION_NAME bvh_intersect_volume
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "kernel/bvh/volume.h"
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/volume.h"
+# endif
-#ifdef __VOLUME__
ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_private Intersection *isect,
const uint visibility)
{
-# ifdef __KERNEL_OPTIX__
- uint p0 = 0;
- uint p1 = 0;
- uint p2 = 0;
- uint p3 = 0;
- uint p4 = visibility;
- uint p5 = PRIMITIVE_NONE;
- uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
- uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
-
- optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
- ray->P,
- ray->D,
- 0.0f,
- ray->t,
- ray->time,
- ray_mask,
- /* Need to always call into __anyhit__kernel_optix_volume_test. */
- OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
- 3, /* SBT offset for PG_HITV */
- 0,
- 0,
- p0,
- p1,
- p2,
- p3,
- p4,
- p5,
- p6,
- p7);
-
- isect->t = __uint_as_float(p0);
- isect->u = __uint_as_float(p1);
- isect->v = __uint_as_float(p2);
- isect->prim = p3;
- isect->object = p4;
- isect->type = p5;
-
- return p5 != PRIMITIVE_NONE;
-# elif defined(__METALRT__)
-
- if (!scene_intersect_valid(ray)) {
- return false;
- }
-# if defined(__KERNEL_DEBUG__)
- if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
- kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ if (!intersection_ray_valid(ray)) {
return false;
}
- if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
- kernel_assert(!"Invalid ift_default");
- return false;
+# ifdef __OBJECT_MOTION__
+ if (kernel_data.bvh.have_motion) {
+ return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
-# endif
-
- metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
- metalrt_intersector_type metalrt_intersect;
+# endif /* __OBJECT_MOTION__ */
- metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
- if (!kernel_data.bvh.have_curves) {
- metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
- }
+ return bvh_intersect_volume(kg, ray, isect, visibility);
+}
+# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
- MetalRTIntersectionPayload payload;
- payload.self = ray->self;
- payload.visibility = visibility;
+/* Volume BVH traversal, for initializing or updating the volume stack.
+ * Variation that records multiple intersections at once. */
- typename metalrt_intersector_type::result_type intersection;
+# if defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__)
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "kernel/bvh/volume_all.h"
-# if defined(__METALRT_MOTION__)
- payload.time = ray->time;
- intersection = metalrt_intersect.intersect(r,
- metal_ancillaries->accel_struct,
- ray_mask,
- ray->time,
- metal_ancillaries->ift_default,
- payload);
-# else
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/volume_all.h"
# endif
- if (intersection.type == intersection_type::none) {
- return false;
- }
-
- isect->prim = payload.prim;
- isect->type = payload.type;
- isect->object = intersection.user_instance_id;
-
- isect->t = intersection.distance;
- if (intersection.type == intersection_type::triangle) {
- isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
- intersection.triangle_barycentric_coord.x;
- isect->v = intersection.triangle_barycentric_coord.x;
- }
- else {
- isect->u = payload.u;
- isect->v = payload.v;
- }
-
- return isect->type != PRIMITIVE_NONE;
-
-# else
- if (!scene_intersect_valid(ray)) {
+ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint max_hits,
+ const uint visibility)
+{
+ if (!intersection_ray_valid(ray)) {
return false;
}
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
- return bvh_intersect_volume_motion(kg, ray, isect, visibility);
+ return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
# endif /* __OBJECT_MOTION__ */
- return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __KERNEL_OPTIX__ */
+ return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
-#endif /* __VOLUME__ */
-#ifdef __VOLUME_RECORD_ALL__
-ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
- ccl_private const Ray *ray,
- ccl_private Intersection *isect,
- const uint max_hits,
- const uint visibility)
-{
- if (!scene_intersect_valid(ray)) {
- return false;
- }
+# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
-# ifdef __EMBREE__
- if (kernel_data.bvh.scene) {
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
- ctx.isect_s = isect;
- ctx.max_hits = max_hits;
- ctx.num_hits = 0;
- ctx.ray = ray;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, visibility);
- rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
- return ctx.num_hits;
- }
-# endif /* __EMBREE__ */
-
-# ifdef __OBJECT_MOTION__
- if (kernel_data.bvh.have_motion) {
- return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
- }
-# endif /* __OBJECT_MOTION__ */
+# undef BVH_FEATURE
+# undef BVH_NAME_JOIN
+# undef BVH_NAME_EVAL
+# undef BVH_FUNCTION_FULL_NAME
- return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
-}
-#endif /* __VOLUME_RECORD_ALL__ */
+#endif /* __BVH2__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/embree.h b/intern/cycles/kernel/bvh/embree.h
deleted file mode 100644
index 1c6b9bc1e62..00000000000
--- a/intern/cycles/kernel/bvh/embree.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2022 Blender Foundation. */
-
-#pragma once
-
-#include <embree3/rtcore_ray.h>
-#include <embree3/rtcore_scene.h>
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "kernel/bvh/util.h"
-
-#include "util/vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-struct CCLIntersectContext {
- typedef enum {
- RAY_REGULAR = 0,
- RAY_SHADOW_ALL = 1,
- RAY_LOCAL = 2,
- RAY_SSS = 3,
- RAY_VOLUME_ALL = 4,
- } RayType;
-
- KernelGlobals kg;
- RayType type;
-
- /* For avoiding self intersections */
- const Ray *ray;
-
- /* for shadow rays */
- Intersection *isect_s;
- uint max_hits;
- uint num_hits;
- uint num_recorded_hits;
- float throughput;
- float max_t;
- bool opaque_hit;
-
- /* for SSS Rays: */
- LocalIntersection *local_isect;
- int local_object_id;
- uint *lcg_state;
-
- CCLIntersectContext(KernelGlobals kg_, RayType type_)
- {
- kg = kg_;
- type = type_;
- ray = NULL;
- max_hits = 1;
- num_hits = 0;
- num_recorded_hits = 0;
- throughput = 1.0f;
- max_t = FLT_MAX;
- opaque_hit = false;
- isect_s = NULL;
- local_isect = NULL;
- local_object_id = -1;
- lcg_state = NULL;
- }
-};
-
-class IntersectContext {
- public:
- IntersectContext(CCLIntersectContext *ctx)
- {
- rtcInitIntersectContext(&context);
- userRayExt = ctx;
- }
- RTCIntersectContext context;
- CCLIntersectContext *userRayExt;
-};
-
-ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
- RTCRay &rtc_ray,
- const uint visibility)
-{
- rtc_ray.org_x = ray.P.x;
- rtc_ray.org_y = ray.P.y;
- rtc_ray.org_z = ray.P.z;
- rtc_ray.dir_x = ray.D.x;
- rtc_ray.dir_y = ray.D.y;
- rtc_ray.dir_z = ray.D.z;
- rtc_ray.tnear = 0.0f;
- rtc_ray.tfar = ray.t;
- rtc_ray.time = ray.time;
- rtc_ray.mask = visibility;
-}
-
-ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
- RTCRayHit &rayhit,
- const uint visibility)
-{
- kernel_embree_setup_ray(ray, rayhit.ray, visibility);
- rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
- rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
-}
-
-ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
- const RTCHit *hit,
- const Ray *ray)
-{
- bool status = false;
- if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
- const int oID = hit->instID[0] / 2;
- if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
- RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
- const int pID = hit->primID +
- (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
- status = intersection_skip_self_shadow(ray->self, oID, pID);
- }
- }
- else {
- const int oID = hit->geomID / 2;
- if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
- const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
- status = intersection_skip_self_shadow(ray->self, oID, pID);
- }
- }
-
- return status;
-}
-
-ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
- const RTCRay *ray,
- const RTCHit *hit,
- Intersection *isect)
-{
- isect->t = ray->tfar;
- if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
- RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
- isect->prim = hit->primID +
- (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
- isect->object = hit->instID[0] / 2;
- }
- else {
- isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
- isect->object = hit->geomID / 2;
- }
-
- const bool is_hair = hit->geomID & 1;
- if (is_hair) {
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
- isect->type = segment.type;
- isect->prim = segment.prim;
- isect->u = hit->u;
- isect->v = hit->v;
- }
- else {
- isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
- isect->u = 1.0f - hit->v - hit->u;
- isect->v = hit->u;
- }
-}
-
-ccl_device_inline void kernel_embree_convert_sss_hit(
- KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
-{
- isect->u = 1.0f - hit->v - hit->u;
- isect->v = hit->u;
- isect->t = ray->tfar;
- RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.bvh.scene, object * 2));
- isect->prim = hit->primID +
- (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
- isect->object = object;
- isect->type = kernel_data_fetch(objects, object).primitive_type;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/local.h b/intern/cycles/kernel/bvh/local.h
index 3b6b30ea93d..add61adc126 100644
--- a/intern/cycles/kernel/bvh/local.h
+++ b/intern/cycles/kernel/bvh/local.h
@@ -47,8 +47,9 @@ ccl_device_inline
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
+ float tmin = ray->tmin;
int object = OBJECT_NONE;
- float isect_t = ray->t;
+ float isect_t = ray->tmax;
if (local_isect != NULL) {
local_isect->num_hits = 0;
@@ -58,10 +59,9 @@ ccl_device_inline
const int object_flag = kernel_data_fetch(object_flag, local_object);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
#else
- isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
#endif
object = local_object;
}
@@ -81,6 +81,7 @@ ccl_device_inline
dir,
#endif
idir,
+ tmin,
isect_t,
node_addr,
PATH_RAY_ALL_VISIBILITY,
@@ -155,6 +156,7 @@ ccl_device_inline
local_object,
prim,
prim_addr,
+ tmin,
isect_t,
lcg_state,
max_hits)) {
@@ -191,6 +193,7 @@ ccl_device_inline
local_object,
prim,
prim_addr,
+ tmin,
isect_t,
lcg_state,
max_hits)) {
diff --git a/intern/cycles/kernel/bvh/metal.h b/intern/cycles/kernel/bvh/metal.h
deleted file mode 100644
index 04289e259a7..00000000000
--- a/intern/cycles/kernel/bvh/metal.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2021-2022 Blender Foundation */
-
-struct MetalRTIntersectionPayload {
- RaySelfPrimitives self;
- uint visibility;
- float u, v;
- int prim;
- int type;
-#if defined(__METALRT_MOTION__)
- float time;
-#endif
-};
-
-struct MetalRTIntersectionLocalPayload {
- RaySelfPrimitives self;
- uint local_object;
- uint lcg_state;
- short max_hits;
- bool has_lcg_state;
- bool result;
- LocalIntersection local_isect;
-};
-
-struct MetalRTIntersectionShadowPayload {
- RaySelfPrimitives self;
- uint visibility;
-#if defined(__METALRT_MOTION__)
- float time;
-#endif
- int state;
- float throughput;
- short max_hits;
- short num_hits;
- short num_recorded_hits;
- bool result;
-};
diff --git a/intern/cycles/kernel/bvh/nodes.h b/intern/cycles/kernel/bvh/nodes.h
index c19dea9223b..e02841fad16 100644
--- a/intern/cycles/kernel/bvh/nodes.h
+++ b/intern/cycles/kernel/bvh/nodes.h
@@ -18,7 +18,8 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
const float3 P,
const float3 idir,
- const float t,
+ const float tmin,
+ const float tmax,
const int node_addr,
const uint visibility,
float dist[2])
@@ -39,8 +40,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
float c0hiy = (node1.z - P.y) * idir.y;
float c0loz = (node2.x - P.z) * idir.z;
float c0hiz = (node2.z - P.z) * idir.z;
- float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
- float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+ float c0min = max4(tmin, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+ float c0max = min4(tmax, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
float c1lox = (node0.y - P.x) * idir.x;
float c1hix = (node0.w - P.x) * idir.x;
@@ -48,8 +49,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
float c1hiy = (node1.w - P.y) * idir.y;
float c1loz = (node2.y - P.z) * idir.z;
float c1hiz = (node2.w - P.z) * idir.z;
- float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
- float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+ float c1min = max4(tmin, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+ float c1max = min4(tmax, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
dist[0] = c0min;
dist[1] = c1min;
@@ -66,7 +67,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
const float3 P,
const float3 dir,
- const float t,
+ const float tmin,
+ const float tmax,
int node_addr,
int child,
float dist[2])
@@ -83,8 +85,8 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
const float far_x = max(lower_xyz.x, upper_xyz.x);
const float far_y = max(lower_xyz.y, upper_xyz.y);
const float far_z = max(lower_xyz.z, upper_xyz.z);
- const float tnear = max4(0.0f, near_x, near_y, near_z);
- const float tfar = min4(t, far_x, far_y, far_z);
+ const float tnear = max4(tmin, near_x, near_y, near_z);
+ const float tfar = min4(tmax, far_x, far_y, far_z);
*dist = tnear;
return tnear <= tfar;
}
@@ -93,7 +95,8 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
const float3 P,
const float3 dir,
const float3 idir,
- const float t,
+ const float tmin,
+ const float tmax,
const int node_addr,
const uint visibility,
float dist[2])
@@ -102,7 +105,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0);
#endif
- if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
+ if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 0, &dist[0])) {
#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.x) & visibility))
#endif
@@ -110,7 +113,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
mask |= 1;
}
}
- if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
+ if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 1, &dist[1])) {
#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.y) & visibility))
#endif
@@ -125,16 +128,17 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg,
const float3 P,
const float3 dir,
const float3 idir,
- const float t,
+ const float tmin,
+ const float tmax,
const int node_addr,
const uint visibility,
float dist[2])
{
float4 node = kernel_data_fetch(bvh_nodes, node_addr);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
+ return bvh_unaligned_node_intersect(kg, P, dir, idir, tmin, tmax, node_addr, visibility, dist);
}
else {
- return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
+ return bvh_aligned_node_intersect(kg, P, idir, tmin, tmax, node_addr, visibility, dist);
}
}
diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h
index e86fe867eac..f37af2a1e65 100644
--- a/intern/cycles/kernel/bvh/shadow_all.h
+++ b/intern/cycles/kernel/bvh/shadow_all.h
@@ -49,26 +49,15 @@ ccl_device_inline
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
+ float tmin = ray->tmin;
int object = OBJECT_NONE;
uint num_hits = 0;
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
/* Max distance in world space. May be dynamically reduced when max number of
* recorded hits is exceeded and we no longer need to find hits beyond the max
* distance found. */
- float t_max_world = ray->t;
-
- /* Current maximum distance to the intersection.
- * Is calculated as a ray length, transformed to an object space when entering
- * instance node. */
- float t_max_current = ray->t;
-
- /* Conversion from world to local space for the current instance if any, 1.0
- * otherwise. */
- float t_world_to_instance = 1.0f;
+ const float tmax = ray->tmax;
+ float tmax_hits = tmax;
*r_num_recorded_hits = 0;
*r_throughput = 1.0f;
@@ -88,7 +77,8 @@ ccl_device_inline
dir,
#endif
idir,
- t_max_current,
+ tmin,
+ tmax,
node_addr,
visibility,
dist);
@@ -157,7 +147,7 @@ ccl_device_inline
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
hit = triangle_intersect(
- kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr);
+ kg, &isect, P, dir, tmin, tmax, visibility, prim_object, prim, prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
@@ -166,7 +156,8 @@ ccl_device_inline
&isect,
P,
dir,
- t_max_current,
+ tmin,
+ tmax,
ray->time,
visibility,
prim_object,
@@ -190,7 +181,7 @@ ccl_device_inline
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
hit = curve_intersect(
- kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type);
+ kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, curve_type);
break;
}
@@ -208,7 +199,7 @@ ccl_device_inline
const int point_type = kernel_data_fetch(prim_type, prim_addr);
hit = point_intersect(
- kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type);
+ kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, point_type);
break;
}
#endif /* BVH_FEATURE(BVH_POINTCLOUD) */
@@ -220,9 +211,6 @@ ccl_device_inline
/* shadow ray early termination */
if (hit) {
- /* Convert intersection distance to world space. */
- isect.t /= t_world_to_instance;
-
/* detect if this surface has a shader with transparent shadows */
/* todo: optimize so primitive visibility flag indicates if
* the primitive has a transparent shadow shader? */
@@ -254,7 +242,7 @@ ccl_device_inline
if (record_intersection) {
/* Test if we need to record this transparent intersection. */
const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
- if (*r_num_recorded_hits < max_record_hits || isect.t < t_max_world) {
+ if (*r_num_recorded_hits < max_record_hits || isect.t < tmax_hits) {
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
const uint num_recorded_hits = min(*r_num_recorded_hits, max_record_hits);
@@ -276,7 +264,7 @@ ccl_device_inline
}
/* Limit the ray distance and stop counting hits beyond this. */
- t_max_world = max(isect.t, max_t);
+ tmax_hits = max(isect.t, max_t);
}
integrator_state_write_shadow_isect(state, &isect, isect_index);
@@ -294,15 +282,11 @@ ccl_device_inline
object = kernel_data_fetch(prim_object, -prim_addr - 1);
#if BVH_FEATURE(BVH_MOTION)
- t_world_to_instance = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
#else
- t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
- /* Convert intersection to object space. */
- t_max_current *= t_world_to_instance;
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -317,16 +301,12 @@ ccl_device_inline
/* Instance pop. */
#if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir);
#else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir);
#endif
- /* Restore world space ray length. */
- t_max_current = ray->t;
-
object = OBJECT_NONE;
- t_world_to_instance = 1.0f;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
diff --git a/intern/cycles/kernel/bvh/traversal.h b/intern/cycles/kernel/bvh/traversal.h
index 784fbf4fd11..9069d16912b 100644
--- a/intern/cycles/kernel/bvh/traversal.h
+++ b/intern/cycles/kernel/bvh/traversal.h
@@ -43,13 +43,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
+ const float tmin = ray->tmin;
int object = OBJECT_NONE;
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
+ isect->t = ray->tmax;
isect->u = 0.0f;
isect->v = 0.0f;
isect->prim = PRIM_NONE;
@@ -71,6 +68,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
dir,
#endif
idir,
+ tmin,
isect->t,
node_addr,
visibility,
@@ -133,8 +131,16 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
- if (triangle_intersect(
- kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) {
+ if (triangle_intersect(kg,
+ isect,
+ P,
+ dir,
+ tmin,
+ isect->t,
+ visibility,
+ prim_object,
+ prim,
+ prim_addr)) {
/* shadow ray early termination */
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
@@ -147,6 +153,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
isect,
P,
dir,
+ tmin,
isect->t,
ray->time,
visibility,
@@ -174,7 +181,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
const bool hit = curve_intersect(
- kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type);
+ kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, curve_type);
if (hit) {
/* shadow ray early termination */
if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -195,7 +202,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
const int point_type = kernel_data_fetch(prim_type, prim_addr);
const bool hit = point_intersect(
- kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type);
+ kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, point_type);
if (hit) {
/* shadow ray early termination */
if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -212,9 +219,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
object = kernel_data_fetch(prim_object, -prim_addr - 1);
#if BVH_FEATURE(BVH_MOTION)
- isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
#else
- isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
++stack_ptr;
@@ -231,9 +238,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
/* instance pop */
#if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir);
#else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir);
#endif
object = OBJECT_NONE;
diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h
index 572e023db25..b67c9394bea 100644
--- a/intern/cycles/kernel/bvh/util.h
+++ b/intern/cycles/kernel/bvh/util.h
@@ -5,7 +5,35 @@
CCL_NAMESPACE_BEGIN
-#if defined(__KERNEL_CPU__)
+ccl_device_inline bool intersection_ray_valid(ccl_private const Ray *ray)
+{
+ /* NOTE: Due to some vectorization code non-finite origin point might
+ * cause lots of false-positive intersections which will overflow traversal
+ * stack.
+ * This code is a quick way to perform early output, to avoid crashes in
+ * such cases.
+ * From production scenes so far it seems it's enough to test first element
+ * only.
+ * Scene intersection may also called with empty rays for conditional trace
+ * calls that evaluate to false, so filter those out.
+ */
+ return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
+}
+
+/* Offset intersection distance by the smallest possible amount, to skip
+ * intersections at this distance. This works in cases where the ray start
+ * position is unchanged and only tmin is updated, since for self
+ * intersection we'll be comparing against the exact same distances. */
+ccl_device_forceinline float intersection_t_offset(const float t)
+{
+ /* This is a simplified version of `nextafterf(t, FLT_MAX)`, only dealing with
+ * non-negative and finite t. */
+ kernel_assert(t >= 0.0f && isfinite_safe(t));
+ const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1;
+ return __uint_as_float(bits);
+}
+
+#ifndef __KERNEL_GPU__
ccl_device int intersections_compare(const void *a, const void *b)
{
const Intersection *isect_a = (const Intersection *)a;
diff --git a/intern/cycles/kernel/bvh/volume.h b/intern/cycles/kernel/bvh/volume.h
index 9715712a8f2..cc3915b4bf7 100644
--- a/intern/cycles/kernel/bvh/volume.h
+++ b/intern/cycles/kernel/bvh/volume.h
@@ -46,13 +46,10 @@ ccl_device_inline
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
+ const float tmin = ray->tmin;
int object = OBJECT_NONE;
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
+ isect->t = ray->tmax;
isect->u = 0.0f;
isect->v = 0.0f;
isect->prim = PRIM_NONE;
@@ -73,6 +70,7 @@ ccl_device_inline
dir,
#endif
idir,
+ tmin,
isect->t,
node_addr,
visibility,
@@ -140,7 +138,7 @@ ccl_device_inline
continue;
}
triangle_intersect(
- kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr);
+ kg, isect, P, dir, tmin, isect->t, visibility, prim_object, prim, prim_addr);
}
break;
}
@@ -165,6 +163,7 @@ ccl_device_inline
isect,
P,
dir,
+ tmin,
isect->t,
ray->time,
visibility,
@@ -186,9 +185,9 @@ ccl_device_inline
int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
- isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
#else
- isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
++stack_ptr;
@@ -212,9 +211,9 @@ ccl_device_inline
/* instance pop */
#if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir);
#else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir);
#endif
object = OBJECT_NONE;
diff --git a/intern/cycles/kernel/bvh/volume_all.h b/intern/cycles/kernel/bvh/volume_all.h
index d06ea8fe557..5cdea3e354c 100644
--- a/intern/cycles/kernel/bvh/volume_all.h
+++ b/intern/cycles/kernel/bvh/volume_all.h
@@ -44,21 +44,17 @@ ccl_device_inline
int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
- const float tmax = ray->t;
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
+ const float tmin = ray->tmin;
int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
+ float isect_t = ray->tmax;
int num_hits_in_instance = 0;
uint num_hits = 0;
- isect_array->t = tmax;
+ isect_array->t = ray->tmax;
/* traversal loop */
do {
@@ -75,6 +71,7 @@ ccl_device_inline
dir,
#endif
idir,
+ tmin,
isect_t,
node_addr,
visibility,
@@ -141,8 +138,16 @@ ccl_device_inline
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
- hit = triangle_intersect(
- kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr);
+ hit = triangle_intersect(kg,
+ isect_array,
+ P,
+ dir,
+ tmin,
+ isect_t,
+ visibility,
+ prim_object,
+ prim,
+ prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@@ -150,18 +155,6 @@ ccl_device_inline
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
- if (object != OBJECT_NONE) {
-#if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-#else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-#endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
return num_hits;
}
}
@@ -189,6 +182,7 @@ ccl_device_inline
isect_array,
P,
dir,
+ tmin,
isect_t,
ray->time,
visibility,
@@ -202,18 +196,6 @@ ccl_device_inline
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
- if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
return num_hits;
}
}
@@ -232,9 +214,9 @@ ccl_device_inline
int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
- isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
#else
- isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
num_hits_in_instance = 0;
@@ -260,28 +242,11 @@ ccl_device_inline
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
#if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir);
#else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir);
#endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-#if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
diff --git a/intern/cycles/kernel/camera/camera.h b/intern/cycles/kernel/camera/camera.h
index 25960a94ddb..926ccf7b86f 100644
--- a/intern/cycles/kernel/camera/camera.h
+++ b/intern/cycles/kernel/camera/camera.h
@@ -165,9 +165,11 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
float nearclip = kernel_data.cam.nearclip * z_inv;
ray->P += nearclip * ray->D;
ray->dP += nearclip * ray->dD;
- ray->t = kernel_data.cam.cliplength * z_inv;
+ ray->tmin = 0.0f;
+ ray->tmax = kernel_data.cam.cliplength * z_inv;
#else
- ray->t = FLT_MAX;
+ ray->tmin = 0.0f;
+ ray->tmax = FLT_MAX;
#endif
}
@@ -231,9 +233,11 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg,
#ifdef __CAMERA_CLIPPING__
/* clipping */
- ray->t = kernel_data.cam.cliplength;
+ ray->tmin = 0.0f;
+ ray->tmax = kernel_data.cam.cliplength;
#else
- ray->t = FLT_MAX;
+ ray->tmin = 0.0f;
+ ray->tmax = FLT_MAX;
#endif
}
@@ -258,7 +262,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
/* indicates ray should not receive any light, outside of the lens */
if (is_zero(D)) {
- ray->t = 0.0f;
+ ray->tmax = 0.0f;
return;
}
@@ -349,9 +353,11 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
float nearclip = cam->nearclip;
ray->P += nearclip * ray->D;
ray->dP += nearclip * ray->dD;
- ray->t = cam->cliplength;
+ ray->tmin = 0.0f;
+ ray->tmax = cam->cliplength;
#else
- ray->t = FLT_MAX;
+ ray->tmin = 0.0f;
+ ray->tmax = FLT_MAX;
#endif
}
@@ -368,7 +374,7 @@ ccl_device_inline void camera_sample(KernelGlobals kg,
ccl_private Ray *ray)
{
/* pixel filter */
- int filter_table_offset = kernel_data.film.filter_table_offset;
+ int filter_table_offset = kernel_data.tables.filter_table_offset;
float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index 2cdf6c9f349..e7f24b89458 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -3,7 +3,7 @@
#pragma once
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
# include <fenv.h>
#endif
diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h
new file mode 100644
index 00000000000..807d0650fc3
--- /dev/null
+++ b/intern/cycles/kernel/data_template.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifndef KERNEL_STRUCT_BEGIN
+# define KERNEL_STRUCT_BEGIN(name, parent)
+#endif
+#ifndef KERNEL_STRUCT_END
+# define KERNEL_STRUCT_END(name)
+#endif
+#ifndef KERNEL_STRUCT_MEMBER
+# define KERNEL_STRUCT_MEMBER(parent, type, name)
+#endif
+
+/* Background. */
+
+KERNEL_STRUCT_BEGIN(KernelBackground, background)
+/* xyz store direction, w the angle. float4 instead of float3 is used
+ * to ensure consistent padding/alignment across devices. */
+KERNEL_STRUCT_MEMBER(background, float4, sun)
+/* Only shader index. */
+KERNEL_STRUCT_MEMBER(background, int, surface_shader)
+KERNEL_STRUCT_MEMBER(background, int, volume_shader)
+KERNEL_STRUCT_MEMBER(background, float, volume_step_size)
+KERNEL_STRUCT_MEMBER(background, int, transparent)
+KERNEL_STRUCT_MEMBER(background, float, transparent_roughness_squared_threshold)
+/* Portal sampling. */
+KERNEL_STRUCT_MEMBER(background, float, portal_weight)
+KERNEL_STRUCT_MEMBER(background, int, num_portals)
+KERNEL_STRUCT_MEMBER(background, int, portal_offset)
+/* Sun sampling. */
+KERNEL_STRUCT_MEMBER(background, float, sun_weight)
+/* Importance map sampling. */
+KERNEL_STRUCT_MEMBER(background, float, map_weight)
+KERNEL_STRUCT_MEMBER(background, int, map_res_x)
+KERNEL_STRUCT_MEMBER(background, int, map_res_y)
+/* Multiple importance sampling. */
+KERNEL_STRUCT_MEMBER(background, int, use_mis)
+/* Lightgroup. */
+KERNEL_STRUCT_MEMBER(background, int, lightgroup)
+/* Padding. */
+KERNEL_STRUCT_MEMBER(background, int, pad1)
+KERNEL_STRUCT_MEMBER(background, int, pad2)
+KERNEL_STRUCT_MEMBER(background, int, pad3)
+KERNEL_STRUCT_END(KernelBackground)
+
+/* BVH: own BVH2 if no native device acceleration struct used. */
+
+KERNEL_STRUCT_BEGIN(KernelBVH, bvh)
+KERNEL_STRUCT_MEMBER(bvh, int, root)
+KERNEL_STRUCT_MEMBER(bvh, int, have_motion)
+KERNEL_STRUCT_MEMBER(bvh, int, have_curves)
+KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout)
+KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps)
+KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions)
+KERNEL_STRUCT_MEMBER(bvh, int, pad1)
+KERNEL_STRUCT_MEMBER(bvh, int, pad2)
+KERNEL_STRUCT_END(KernelBVH)
+
+/* Film. */
+
+KERNEL_STRUCT_BEGIN(KernelFilm, film)
+/* XYZ to rendering color space transform. float4 instead of float3 to
+ * ensure consistent padding/alignment across devices. */
+KERNEL_STRUCT_MEMBER(film, float4, xyz_to_r)
+KERNEL_STRUCT_MEMBER(film, float4, xyz_to_g)
+KERNEL_STRUCT_MEMBER(film, float4, xyz_to_b)
+KERNEL_STRUCT_MEMBER(film, float4, rgb_to_y)
+/* Rec709 to rendering color space. */
+KERNEL_STRUCT_MEMBER(film, float4, rec709_to_r)
+KERNEL_STRUCT_MEMBER(film, float4, rec709_to_g)
+KERNEL_STRUCT_MEMBER(film, float4, rec709_to_b)
+KERNEL_STRUCT_MEMBER(film, int, is_rec709)
+/* Exposure. */
+KERNEL_STRUCT_MEMBER(film, float, exposure)
+/* Passed used. */
+KERNEL_STRUCT_MEMBER(film, int, pass_flag)
+KERNEL_STRUCT_MEMBER(film, int, light_pass_flag)
+/* Pass offsets. */
+KERNEL_STRUCT_MEMBER(film, int, pass_stride)
+KERNEL_STRUCT_MEMBER(film, int, pass_combined)
+KERNEL_STRUCT_MEMBER(film, int, pass_depth)
+KERNEL_STRUCT_MEMBER(film, int, pass_position)
+KERNEL_STRUCT_MEMBER(film, int, pass_normal)
+KERNEL_STRUCT_MEMBER(film, int, pass_roughness)
+KERNEL_STRUCT_MEMBER(film, int, pass_motion)
+KERNEL_STRUCT_MEMBER(film, int, pass_motion_weight)
+KERNEL_STRUCT_MEMBER(film, int, pass_uv)
+KERNEL_STRUCT_MEMBER(film, int, pass_object_id)
+KERNEL_STRUCT_MEMBER(film, int, pass_material_id)
+KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_color)
+KERNEL_STRUCT_MEMBER(film, int, pass_glossy_color)
+KERNEL_STRUCT_MEMBER(film, int, pass_transmission_color)
+KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_indirect)
+KERNEL_STRUCT_MEMBER(film, int, pass_glossy_indirect)
+KERNEL_STRUCT_MEMBER(film, int, pass_transmission_indirect)
+KERNEL_STRUCT_MEMBER(film, int, pass_volume_indirect)
+KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_direct)
+KERNEL_STRUCT_MEMBER(film, int, pass_glossy_direct)
+KERNEL_STRUCT_MEMBER(film, int, pass_transmission_direct)
+KERNEL_STRUCT_MEMBER(film, int, pass_volume_direct)
+KERNEL_STRUCT_MEMBER(film, int, pass_emission)
+KERNEL_STRUCT_MEMBER(film, int, pass_background)
+KERNEL_STRUCT_MEMBER(film, int, pass_ao)
+KERNEL_STRUCT_MEMBER(film, float, pass_alpha_threshold)
+KERNEL_STRUCT_MEMBER(film, int, pass_shadow)
+KERNEL_STRUCT_MEMBER(film, float, pass_shadow_scale)
+KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher)
+KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_sample_count)
+KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_matte)
+/* Cryptomatte. */
+KERNEL_STRUCT_MEMBER(film, int, cryptomatte_passes)
+KERNEL_STRUCT_MEMBER(film, int, cryptomatte_depth)
+KERNEL_STRUCT_MEMBER(film, int, pass_cryptomatte)
+/* Adaptive sampling. */
+KERNEL_STRUCT_MEMBER(film, int, pass_adaptive_aux_buffer)
+KERNEL_STRUCT_MEMBER(film, int, pass_sample_count)
+/* Mist. */
+KERNEL_STRUCT_MEMBER(film, int, pass_mist)
+KERNEL_STRUCT_MEMBER(film, float, mist_start)
+KERNEL_STRUCT_MEMBER(film, float, mist_inv_depth)
+KERNEL_STRUCT_MEMBER(film, float, mist_falloff)
+/* Denoising. */
+KERNEL_STRUCT_MEMBER(film, int, pass_denoising_normal)
+KERNEL_STRUCT_MEMBER(film, int, pass_denoising_albedo)
+KERNEL_STRUCT_MEMBER(film, int, pass_denoising_depth)
+/* AOVs. */
+KERNEL_STRUCT_MEMBER(film, int, pass_aov_color)
+KERNEL_STRUCT_MEMBER(film, int, pass_aov_value)
+/* Light groups. */
+KERNEL_STRUCT_MEMBER(film, int, pass_lightgroup)
+/* Baking. */
+KERNEL_STRUCT_MEMBER(film, int, pass_bake_primitive)
+KERNEL_STRUCT_MEMBER(film, int, pass_bake_differential)
+/* Shadow catcher. */
+KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher)
+/* Padding. */
+KERNEL_STRUCT_MEMBER(film, int, pad1)
+KERNEL_STRUCT_MEMBER(film, int, pad2)
+KERNEL_STRUCT_END(KernelFilm)
+
+/* Integrator. */
+
+KERNEL_STRUCT_BEGIN(KernelIntegrator, integrator)
+/* Emission. */
+KERNEL_STRUCT_MEMBER(integrator, int, use_direct_light)
+KERNEL_STRUCT_MEMBER(integrator, int, num_distribution)
+KERNEL_STRUCT_MEMBER(integrator, int, num_all_lights)
+KERNEL_STRUCT_MEMBER(integrator, float, pdf_triangles)
+KERNEL_STRUCT_MEMBER(integrator, float, pdf_lights)
+KERNEL_STRUCT_MEMBER(integrator, float, light_inv_rr_threshold)
+/* Bounces. */
+KERNEL_STRUCT_MEMBER(integrator, int, min_bounce)
+KERNEL_STRUCT_MEMBER(integrator, int, max_bounce)
+KERNEL_STRUCT_MEMBER(integrator, int, max_diffuse_bounce)
+KERNEL_STRUCT_MEMBER(integrator, int, max_glossy_bounce)
+KERNEL_STRUCT_MEMBER(integrator, int, max_transmission_bounce)
+KERNEL_STRUCT_MEMBER(integrator, int, max_volume_bounce)
+/* AO bounces. */
+KERNEL_STRUCT_MEMBER(integrator, int, ao_bounces)
+KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_distance)
+KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_factor)
+KERNEL_STRUCT_MEMBER(integrator, float, ao_additive_factor)
+/* Transparency. */
+KERNEL_STRUCT_MEMBER(integrator, int, transparent_min_bounce)
+KERNEL_STRUCT_MEMBER(integrator, int, transparent_max_bounce)
+KERNEL_STRUCT_MEMBER(integrator, int, transparent_shadows)
+/* Caustics. */
+KERNEL_STRUCT_MEMBER(integrator, int, caustics_reflective)
+KERNEL_STRUCT_MEMBER(integrator, int, caustics_refractive)
+KERNEL_STRUCT_MEMBER(integrator, float, filter_glossy)
+/* Seed. */
+KERNEL_STRUCT_MEMBER(integrator, int, seed)
+/* Clamp. */
+KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct)
+KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect)
+/* MIS. */
+KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
+/* Caustics. */
+KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
+/* Sampling pattern. */
+KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
+KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
+/* Volume render. */
+KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
+KERNEL_STRUCT_MEMBER(integrator, int, volume_max_steps)
+KERNEL_STRUCT_MEMBER(integrator, float, volume_step_rate)
+/* Shadow catcher. */
+KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher)
+/* Closure filter. */
+KERNEL_STRUCT_MEMBER(integrator, int, filter_closures)
+/* MIS debugging. */
+KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type)
+/* Padding */
+KERNEL_STRUCT_MEMBER(integrator, int, pad1)
+KERNEL_STRUCT_END(KernelIntegrator)
+
+/* SVM. For shader specialization. */
+
+KERNEL_STRUCT_BEGIN(KernelSVMUsage, svm_usage)
+#define SHADER_NODE_TYPE(type) KERNEL_STRUCT_MEMBER(svm_usage, int, type)
+#include "kernel/svm/node_types_template.h"
+KERNEL_STRUCT_END(KernelSVMUsage)
+
+#undef KERNEL_STRUCT_BEGIN
+#undef KERNEL_STRUCT_MEMBER
+#undef KERNEL_STRUCT_END
diff --git a/intern/cycles/kernel/device/cpu/bvh.h b/intern/cycles/kernel/device/cpu/bvh.h
new file mode 100644
index 00000000000..b5ea3d831f4
--- /dev/null
+++ b/intern/cycles/kernel/device/cpu/bvh.h
@@ -0,0 +1,609 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* CPU Embree implementation of ray-scene intersection. */
+
+#pragma once
+
+#include <embree3/rtcore_ray.h>
+#include <embree3/rtcore_scene.h>
+
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+#include "kernel/geom/object.h"
+#include "kernel/integrator/state.h"
+#include "kernel/sample/lcg.h"
+
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define EMBREE_IS_HAIR(x) (x & 1)
+
+/* Intersection context. */
+
+struct CCLIntersectContext {
+ typedef enum {
+ RAY_REGULAR = 0,
+ RAY_SHADOW_ALL = 1,
+ RAY_LOCAL = 2,
+ RAY_SSS = 3,
+ RAY_VOLUME_ALL = 4,
+ } RayType;
+
+ KernelGlobals kg;
+ RayType type;
+
+ /* For avoiding self intersections */
+ const Ray *ray;
+
+ /* for shadow rays */
+ Intersection *isect_s;
+ uint max_hits;
+ uint num_hits;
+ uint num_recorded_hits;
+ float throughput;
+ float max_t;
+ bool opaque_hit;
+
+ /* for SSS Rays: */
+ LocalIntersection *local_isect;
+ int local_object_id;
+ uint *lcg_state;
+
+ CCLIntersectContext(KernelGlobals kg_, RayType type_)
+ {
+ kg = kg_;
+ type = type_;
+ ray = NULL;
+ max_hits = 1;
+ num_hits = 0;
+ num_recorded_hits = 0;
+ throughput = 1.0f;
+ max_t = FLT_MAX;
+ opaque_hit = false;
+ isect_s = NULL;
+ local_isect = NULL;
+ local_object_id = -1;
+ lcg_state = NULL;
+ }
+};
+
+class IntersectContext {
+ public:
+ IntersectContext(CCLIntersectContext *ctx)
+ {
+ rtcInitIntersectContext(&context);
+ userRayExt = ctx;
+ }
+ RTCIntersectContext context;
+ CCLIntersectContext *userRayExt;
+};
+
+/* Utilities. */
+
+ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
+ RTCRay &rtc_ray,
+ const uint visibility)
+{
+ rtc_ray.org_x = ray.P.x;
+ rtc_ray.org_y = ray.P.y;
+ rtc_ray.org_z = ray.P.z;
+ rtc_ray.dir_x = ray.D.x;
+ rtc_ray.dir_y = ray.D.y;
+ rtc_ray.dir_z = ray.D.z;
+ rtc_ray.tnear = ray.tmin;
+ rtc_ray.tfar = ray.tmax;
+ rtc_ray.time = ray.time;
+ rtc_ray.mask = visibility;
+}
+
+ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
+ RTCRayHit &rayhit,
+ const uint visibility)
+{
+ kernel_embree_setup_ray(ray, rayhit.ray, visibility);
+ rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
+ rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
+}
+
+ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
+ const RTCHit *hit,
+ const Ray *ray)
+{
+ bool status = false;
+ if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+ const int oID = hit->instID[0] / 2;
+ if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
+ const int pID = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ status = intersection_skip_self_shadow(ray->self, oID, pID);
+ }
+ }
+ else {
+ const int oID = hit->geomID / 2;
+ if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
+ const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
+ status = intersection_skip_self_shadow(ray->self, oID, pID);
+ }
+ }
+
+ return status;
+}
+
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
+ const RTCRay *ray,
+ const RTCHit *hit,
+ Intersection *isect)
+{
+ isect->t = ray->tfar;
+ if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
+ isect->prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ isect->object = hit->instID[0] / 2;
+ }
+ else {
+ isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
+ isect->object = hit->geomID / 2;
+ }
+
+ const bool is_hair = hit->geomID & 1;
+ if (is_hair) {
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
+ isect->type = segment.type;
+ isect->prim = segment.prim;
+ isect->u = hit->u;
+ isect->v = hit->v;
+ }
+ else {
+ isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
+ isect->u = 1.0f - hit->v - hit->u;
+ isect->v = hit->u;
+ }
+}
+
+ccl_device_inline void kernel_embree_convert_sss_hit(
+ KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
+{
+ isect->u = 1.0f - hit->v - hit->u;
+ isect->v = hit->u;
+ isect->t = ray->tfar;
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, object * 2));
+ isect->prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ isect->object = object;
+ isect->type = kernel_data_fetch(objects, object).primitive_type;
+}
+
+/* Ray filter functions. */
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls. */
+ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
+{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
+ assert(args->N == 1);
+
+ RTCHit *hit = (RTCHit *)args->hit;
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ }
+}
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls.
+ */
+ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArguments *args)
+{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
+ assert(args->N == 1);
+
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ switch (ctx->type) {
+ case CCLIntersectContext::RAY_SHADOW_ALL: {
+ Intersection current_isect;
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+ /* If no transparent shadows or max number of hits exceeded, all light is blocked. */
+ const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
+ if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
+ ctx->opaque_hit = true;
+ return;
+ }
+
+ ++ctx->num_hits;
+
+ /* Always use baked shadow transparency for curves. */
+ if (current_isect.type & PRIMITIVE_CURVE) {
+ ctx->throughput *= intersection_curve_shadow_transparency(
+ kg, current_isect.object, current_isect.prim, current_isect.u);
+
+ if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ ctx->opaque_hit = true;
+ return;
+ }
+ else {
+ *args->valid = 0;
+ return;
+ }
+ }
+
+ /* Test if we need to record this transparent intersection. */
+ const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
+ /* If maximum number of hits was reached, replace the intersection with the
+ * highest distance. We want to find the N closest intersections. */
+ const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
+ uint isect_index = num_recorded_hits;
+ if (num_recorded_hits + 1 >= max_record_hits) {
+ float max_t = ctx->isect_s[0].t;
+ uint max_recorded_hit = 0;
+
+ for (uint i = 1; i < num_recorded_hits; ++i) {
+ if (ctx->isect_s[i].t > max_t) {
+ max_recorded_hit = i;
+ max_t = ctx->isect_s[i].t;
+ }
+ }
+
+ if (num_recorded_hits >= max_record_hits) {
+ isect_index = max_recorded_hit;
+ }
+
+ /* Limit the ray distance and stop counting hits beyond this.
+ * TODO: is there some way we can tell Embree to stop intersecting beyond
+ * this distance when max number of hits is reached?. Or maybe it will
+ * become irrelevant if we make max_hits a very high number on the CPU. */
+ ctx->max_t = max(current_isect.t, max_t);
+ }
+
+ ctx->isect_s[isect_index] = current_isect;
+ }
+
+ /* Always increase the number of recorded hits, even beyond the maximum,
+ * so that we can detect this and trace another ray if needed. */
+ ++ctx->num_recorded_hits;
+
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ case CCLIntersectContext::RAY_LOCAL:
+ case CCLIntersectContext::RAY_SSS: {
+ /* Check if it's hitting the correct object. */
+ Intersection current_isect;
+ if (ctx->type == CCLIntersectContext::RAY_SSS) {
+ kernel_embree_convert_sss_hit(kg, ray, hit, &current_isect, ctx->local_object_id);
+ }
+ else {
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (ctx->local_object_id != current_isect.object) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ }
+ if (intersection_skip_self_local(cray->self, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+
+ /* No intersection information requested, just return a hit. */
+ if (ctx->max_hits == 0) {
+ break;
+ }
+
+ /* Ignore curves. */
+ if (EMBREE_IS_HAIR(hit->geomID)) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+
+ LocalIntersection *local_isect = ctx->local_isect;
+ int hit_idx = 0;
+
+ if (ctx->lcg_state) {
+ /* See triangle_intersect_subsurface() for the native equivalent. */
+ for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (local_isect->hits[i].t == ray->tfar) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ return;
+ }
+ }
+
+ local_isect->num_hits++;
+
+ if (local_isect->num_hits <= ctx->max_hits) {
+ hit_idx = local_isect->num_hits - 1;
+ }
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
+
+ if (hit_idx >= ctx->max_hits) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ return;
+ }
+ }
+ }
+ else {
+ /* Record closest intersection only. */
+ if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
+ *args->valid = 0;
+ return;
+ }
+
+ local_isect->num_hits = 1;
+ }
+
+ /* record intersection */
+ local_isect->hits[hit_idx] = current_isect;
+ local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ case CCLIntersectContext::RAY_VOLUME_ALL: {
+ /* Append the intersection to the end of the array. */
+ if (ctx->num_hits < ctx->max_hits) {
+ Intersection current_isect;
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+
+ Intersection *isect = &ctx->isect_s[ctx->num_hits];
+ ++ctx->num_hits;
+ *isect = current_isect;
+ /* Only primitives from volume object. */
+ uint tri_object = isect->object;
+ int object_flag = kernel_data_fetch(object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ --ctx->num_hits;
+ }
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ }
+ break;
+ }
+ case CCLIntersectContext::RAY_REGULAR:
+ default:
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ return;
+ }
+ break;
+ }
+}
+
+ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore back-facing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ }
+}
+
+ccl_device void kernel_embree_filter_occluded_func_backface_cull(
+ const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore back-facing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+
+ kernel_embree_filter_occluded_func(args);
+}
+
+/* Scene intersection. */
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ if (!intersection_ray_valid(ray)) {
+ return false;
+ }
+
+ if (!kernel_data.device_bvh) {
+ return false;
+ }
+
+ isect->t = ray->tmax;
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
+ IntersectContext rtc_ctx(&ctx);
+ RTCRayHit ray_hit;
+ ctx.ray = ray;
+ kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
+ rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
+ if (ray_hit.hit.geomID == RTC_INVALID_GEOMETRY_ID ||
+ ray_hit.hit.primID == RTC_INVALID_GEOMETRY_ID) {
+ return false;
+ }
+
+ kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
+ return true;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ if (!intersection_ray_valid(ray)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ return false;
+ }
+
+ if (!kernel_data.device_bvh) {
+ return false;
+ }
+
+ const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
+ SD_OBJECT_TRANSFORM_APPLIED);
+ CCLIntersectContext ctx(kg,
+ has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
+ ctx.lcg_state = lcg_state;
+ ctx.max_hits = max_hits;
+ ctx.ray = ray;
+ ctx.local_isect = local_isect;
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ ctx.local_object_id = local_object;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+
+ /* If this object has its own BVH, use it. */
+ if (has_bvh) {
+ RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
+ if (geom) {
+ float3 P = ray->P;
+ float3 dir = ray->D;
+ float3 idir = ray->D;
+ bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
+
+ rtc_ray.org_x = P.x;
+ rtc_ray.org_y = P.y;
+ rtc_ray.org_z = P.z;
+ rtc_ray.dir_x = dir.x;
+ rtc_ray.dir_y = dir.y;
+ rtc_ray.dir_z = dir.z;
+ rtc_ray.tnear = ray->tmin;
+ rtc_ray.tfar = ray->tmax;
+ RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
+ kernel_assert(scene);
+ if (scene) {
+ rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
+ }
+ }
+ }
+ else {
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+ }
+
+ /* rtcOccluded1 sets tfar to -inf if a hit was found. */
+ return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowStateCPU *state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ if (!intersection_ray_valid(ray)) {
+ *num_recorded_hits = 0;
+ *throughput = 1.0f;
+ return false;
+ }
+
+ if (!kernel_data.device_bvh) {
+ return false;
+ }
+
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
+ Intersection *isect_array = (Intersection *)state->shadow_isect;
+ ctx.isect_s = isect_array;
+ ctx.max_hits = max_hits;
+ ctx.ray = ray;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+
+ *num_recorded_hits = ctx.num_recorded_hits;
+ *throughput = ctx.throughput;
+ return ctx.opaque_hit;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint max_hits,
+ const uint visibility)
+{
+ if (!intersection_ray_valid(ray)) {
+ return false;
+ }
+
+ if (!kernel_data.device_bvh) {
+ return false;
+ }
+
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
+ ctx.isect_s = isect;
+ ctx.max_hits = max_hits;
+ ctx.num_hits = 0;
+ ctx.ray = ray;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+ return ctx.num_hits;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index 3bfc37e98ee..631e55e0d42 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -3,8 +3,6 @@
#pragma once
-#define __KERNEL_CPU__
-
/* Release kernel has too much false-positive maybe-uninitialized warnings,
* which makes it possible to miss actual warnings.
*/
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index b9a44ccad02..e1ab802aa80 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -246,7 +246,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_postfix
#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
-constant int __dummy_constant [[function_constant(0)]];
+constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
#endif
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
diff --git a/intern/cycles/kernel/device/metal/bvh.h b/intern/cycles/kernel/device/metal/bvh.h
new file mode 100644
index 00000000000..d3a0ab1b519
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/bvh.h
@@ -0,0 +1,1123 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* MetalRT implementation of ray-scene intersection. */
+
+#pragma once
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Payload types. */
+
+struct MetalRTIntersectionPayload {
+ RaySelfPrimitives self;
+ uint visibility;
+ float u, v;
+ int prim;
+ int type;
+#if defined(__METALRT_MOTION__)
+ float time;
+#endif
+};
+
+struct MetalRTIntersectionLocalPayload {
+ RaySelfPrimitives self;
+ uint local_object;
+ uint lcg_state;
+ short max_hits;
+ bool has_lcg_state;
+ bool result;
+ LocalIntersection local_isect;
+};
+
+struct MetalRTIntersectionShadowPayload {
+ RaySelfPrimitives self;
+ uint visibility;
+#if defined(__METALRT_MOTION__)
+ float time;
+#endif
+ int state;
+ float throughput;
+ short max_hits;
+ short num_hits;
+ short num_recorded_hits;
+ bool result;
+};
+
+/* Intersection return types. */
+
+/* For a bounding box intersection function. */
+struct BoundingBoxIntersectionResult {
+ bool accept [[accept_intersection]];
+ bool continue_search [[continue_search]];
+ float distance [[distance]];
+};
+
+/* For a triangle intersection function. */
+struct TriangleIntersectionResult {
+ bool accept [[accept_intersection]];
+ bool continue_search [[continue_search]];
+};
+
+enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
+
+/* Utilities. */
+
+ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self,
+ const int object,
+ const int prim)
+{
+ return (self.prim == prim) && (self.object == object);
+}
+
+ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self,
+ const int object,
+ const int prim)
+{
+ return ((self.prim == prim) && (self.object == object)) ||
+ ((self.light_prim == prim) && (self.light_object == object));
+}
+
+ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self,
+ const int prim)
+{
+ return (self.prim == prim);
+}
+
+/* Hit functions. */
+
+template<typename TReturn, uint intersection_type>
+TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
+ const uint object,
+ const uint primitive_id,
+ const float2 barycentrics,
+ const float ray_tmax)
+{
+ TReturn result;
+
+#ifdef __BVH_LOCAL__
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+
+ if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) {
+ /* Only intersect with matching object and skip self-intersecton. */
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+
+ const short max_hits = payload.max_hits;
+ if (max_hits == 0) {
+ /* Special case for when no hit information is requested, just report that something was hit */
+ payload.result = true;
+ result.accept = true;
+ result.continue_search = false;
+ return result;
+ }
+
+ int hit = 0;
+ if (payload.has_lcg_state) {
+ for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) {
+ if (ray_tmax == payload.local_isect.hits[i].t) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+ }
+
+ hit = payload.local_isect.num_hits++;
+
+ if (payload.local_isect.num_hits > max_hits) {
+ hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits;
+ if (hit >= max_hits) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+ }
+ }
+ else {
+ if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
+ /* Record closest intersection only. Do not terminate ray here, since there is no guarantee
+ * about distance ordering in any-hit */
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+
+ payload.local_isect.num_hits = 1;
+ }
+
+ ray_data Intersection *isect = &payload.local_isect.hits[hit];
+ isect->t = ray_tmax;
+ isect->prim = prim;
+ isect->object = object;
+ isect->type = kernel_data_fetch(objects, object).primitive_type;
+
+ isect->u = 1.0f - barycentrics.y - barycentrics.x;
+ isect->v = barycentrics.x;
+
+ /* Record geometric normal */
+ const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
+ const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0));
+ const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1));
+ const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2));
+ payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+
+ /* Continue tracing (without this the trace call would return after the first hit) */
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+#endif
+}
+
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_local_hit_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
+ uint instance_id [[user_instance_id]],
+ uint primitive_id [[primitive_id]],
+ float2 barycentrics [[barycentric_coord]],
+ float ray_tmax [[distance]])
+{
+ return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
+ launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
+{
+ /* unused function */
+ BoundingBoxIntersectionResult result;
+ result.distance = ray_tmax;
+ result.accept = false;
+ result.continue_search = false;
+ return result;
+}
+
+template<uint intersection_type>
+bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ uint object,
+ uint prim,
+ const float2 barycentrics,
+ const float ray_tmax)
+{
+#ifdef __SHADOW_RECORD_ALL__
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = payload.visibility;
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ /* continue search */
+ return true;
+ }
+# endif
+
+ if (intersection_skip_self_shadow(payload.self, object, prim)) {
+ /* continue search */
+ return true;
+ }
+
+ float u = 0.0f, v = 0.0f;
+ int type = 0;
+ if (intersection_type == METALRT_HIT_TRIANGLE) {
+ u = 1.0f - barycentrics.y - barycentrics.x;
+ v = barycentrics.x;
+ type = kernel_data_fetch(objects, object).primitive_type;
+ }
+# ifdef __HAIR__
+ else {
+ u = barycentrics.x;
+ v = barycentrics.y;
+
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+ type = segment.type;
+ prim = segment.prim;
+
+ /* Filter out curve endcaps */
+ if (u == 0.0f || u == 1.0f) {
+ /* continue search */
+ return true;
+ }
+ }
+# endif
+
+# ifndef __TRANSPARENT_SHADOWS__
+ /* No transparent shadows support compiled in, make opaque. */
+ payload.result = true;
+ /* terminate ray */
+ return false;
+# else
+ short max_hits = payload.max_hits;
+ short num_hits = payload.num_hits;
+ short num_recorded_hits = payload.num_recorded_hits;
+
+ MetalKernelContext context(launch_params_metal);
+
+ /* If no transparent shadows, all light is blocked and we can stop immediately. */
+ if (num_hits >= max_hits ||
+ !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+ payload.result = true;
+ /* terminate ray */
+ return false;
+ }
+
+ /* Always use baked shadow transparency for curves. */
+ if (type & PRIMITIVE_CURVE) {
+ float throughput = payload.throughput;
+ throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
+ payload.throughput = throughput;
+ payload.num_hits += 1;
+
+ if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ /* Accept result and terminate if throughput is sufficiently low */
+ payload.result = true;
+ return false;
+ }
+ else {
+ return true;
+ }
+ }
+
+ payload.num_hits += 1;
+ payload.num_recorded_hits += 1;
+
+ uint record_index = num_recorded_hits;
+
+ const IntegratorShadowState state = payload.state;
+
+ const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (record_index >= max_record_hits) {
+ /* If maximum number of hits reached, find a hit to replace. */
+ float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+ uint max_recorded_hit = 0;
+
+ for (int i = 1; i < max_record_hits; i++) {
+ const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+ if (isect_t > max_recorded_t) {
+ max_recorded_t = isect_t;
+ max_recorded_hit = i;
+ }
+ }
+
+ if (ray_tmax >= max_recorded_t) {
+ /* Accept hit, so that we don't consider any more hits beyond the distance of the
+ * current hit anymore. */
+ payload.result = true;
+ return true;
+ }
+
+ record_index = max_recorded_hit;
+ }
+
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
+
+ /* Continue tracing. */
+# endif /* __TRANSPARENT_SHADOWS__ */
+#endif /* __SHADOW_RECORD_ALL__ */
+
+ return true;
+}
+
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_shadow_all_hit_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ unsigned int object [[user_instance_id]],
+ unsigned int primitive_id [[primitive_id]],
+ float2 barycentrics [[barycentric_coord]],
+ float ray_tmax [[distance]])
+{
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+
+ TriangleIntersectionResult result;
+ result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
+ launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
+ result.accept = !result.continue_search;
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
+{
+ /* unused function */
+ BoundingBoxIntersectionResult result;
+ result.distance = ray_tmax;
+ result.accept = false;
+ result.continue_search = false;
+ return result;
+}
+
+template<typename TReturnType, uint intersection_type>
+inline TReturnType metalrt_visibility_test(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const float u)
+{
+ TReturnType result;
+
+#ifdef __HAIR__
+ if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
+ /* Filter out curve endcaps. */
+ if (u == 0.0f || u == 1.0f) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+ }
+#endif
+
+ uint visibility = payload.visibility;
+#ifdef __VISIBILITY_FLAG__
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+#endif
+
+ /* Shadow ray early termination. */
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ if (intersection_skip_self_shadow(payload.self, object, prim)) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+ else {
+ result.accept = true;
+ result.continue_search = false;
+ return result;
+ }
+ }
+ else {
+ if (intersection_skip_self(payload.self, object, prim)) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+ }
+
+ result.accept = true;
+ result.continue_search = true;
+ return result;
+}
+
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_visibility_test_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ unsigned int object [[user_instance_id]],
+ unsigned int primitive_id [[primitive_id]])
+{
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ TriangleIntersectionResult result =
+ metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
+ launch_params_metal, payload, object, prim, 0.0f);
+ if (result.accept) {
+ payload.prim = prim;
+ payload.type = kernel_data_fetch(objects, object).primitive_type;
+ }
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
+{
+ /* Unused function */
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+ return result;
+}
+
+/* Primitive intersection functions. */
+
+#ifdef __HAIR__
+ccl_device_inline void metalrt_intersection_curve(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
+{
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = payload.visibility;
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ Intersection isect;
+ isect.t = ray_tmax;
+
+ MetalKernelContext context(launch_params_metal);
+ if (context.curve_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
+ launch_params_metal, payload, object, prim, isect.u);
+ if (result.accept) {
+ result.distance = isect.t;
+ payload.u = isect.u;
+ payload.v = isect.v;
+ payload.prim = prim;
+ payload.type = type;
+ }
+ }
+}
+
+ccl_device_inline void metalrt_intersection_curve_shadow(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
+{
+ const uint visibility = payload.visibility;
+
+ Intersection isect;
+ isect.t = ray_tmax;
+
+ MetalKernelContext context(launch_params_metal);
+ if (context.curve_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
+ launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
+ result.accept = !result.continue_search;
+ }
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
+ [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
+{
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+
+ if (segment.type & PRIMITIVE_CURVE_RIBBON) {
+ metalrt_intersection_curve(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmin,
+ ray_tmax,
+ result);
+ }
+
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__curve_ribbon_shadow(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
+{
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+
+ if (segment.type & PRIMITIVE_CURVE_RIBBON) {
+ metalrt_intersection_curve_shadow(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmin,
+ ray_tmax,
+ result);
+ }
+
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
+ [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
+{
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+ metalrt_intersection_curve(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmin,
+ ray_tmax,
+ result);
+
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__curve_all_shadow(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
+{
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+
+ metalrt_intersection_curve_shadow(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmin,
+ ray_tmax,
+ result);
+
+ return result;
+}
+#endif /* __HAIR__ */
+
+#ifdef __POINTCLOUD__
+ccl_device_inline void metalrt_intersection_point(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
+{
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = payload.visibility;
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ Intersection isect;
+ isect.t = ray_tmax;
+
+ MetalKernelContext context(launch_params_metal);
+ if (context.point_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
+ launch_params_metal, payload, object, prim, isect.u);
+ if (result.accept) {
+ result.distance = isect.t;
+ payload.u = isect.u;
+ payload.v = isect.v;
+ payload.prim = prim;
+ payload.type = type;
+ }
+ }
+}
+
+ccl_device_inline void metalrt_intersection_point_shadow(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
+{
+ const uint visibility = payload.visibility;
+
+ Intersection isect;
+ isect.t = ray_tmax;
+
+ MetalKernelContext context(launch_params_metal);
+ if (context.point_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
+ launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
+ result.accept = !result.continue_search;
+
+ if (result.accept) {
+ result.distance = isect.t;
+ }
+ }
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
+{
+ const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const int type = kernel_data_fetch(objects, object).primitive_type;
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+
+ metalrt_intersection_point(launch_params_metal,
+ payload,
+ object,
+ prim,
+ type,
+ ray_origin,
+ ray_direction,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmin,
+ ray_tmax,
+ result);
+
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload
+ [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
+{
+ const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const int type = kernel_data_fetch(objects, object).primitive_type;
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+
+ metalrt_intersection_point_shadow(launch_params_metal,
+ payload,
+ object,
+ prim,
+ type,
+ ray_origin,
+ ray_direction,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmin,
+ ray_tmax,
+ result);
+
+ return result;
+}
+#endif /* __POINTCLOUD__ */
+
+/* Scene intersection. */
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ if (!scene_intersect_valid(ray)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ return false;
+ }
+
+#if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ kernel_assert(!"Invalid ift_default");
+ return false;
+ }
+#endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionPayload payload;
+ payload.self = ray->self;
+ payload.u = 0.0f;
+ payload.v = 0.0f;
+ payload.visibility = visibility;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ /* No further intersector setup required: Default MetalRT behavior is any-hit. */
+ }
+ else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ /* No further intersector setup required: Shadow ray early termination is controlled by the
+ * intersection handler */
+ }
+
+#if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_default,
+ payload);
+#else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
+#endif
+
+ if (intersection.type == intersection_type::none) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+
+ return false;
+ }
+
+ isect->t = intersection.distance;
+
+ isect->prim = payload.prim;
+ isect->type = payload.type;
+ isect->object = intersection.user_instance_id;
+
+ isect->t = intersection.distance;
+ if (intersection.type == intersection_type::triangle) {
+ isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
+ intersection.triangle_barycentric_coord.x;
+ isect->v = intersection.triangle_barycentric_coord.x;
+ }
+ else {
+ isect->u = payload.u;
+ isect->v = payload.v;
+ }
+
+ return isect->type != PRIMITIVE_NONE;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ if (!intersection_ray_valid(ray)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert(!"Invalid ift_local");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionLocalPayload payload;
+ payload.self = ray->self;
+ payload.local_object = local_object;
+ payload.max_hits = max_hits;
+ payload.local_isect.num_hits = 0;
+ if (lcg_state) {
+ payload.has_lcg_state = true;
+ payload.lcg_state = *lcg_state;
+ }
+ payload.result = false;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+# if defined(__METALRT_MOTION__)
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
+# endif
+
+ if (lcg_state) {
+ *lcg_state = payload.lcg_state;
+ }
+ *local_isect = payload.local_isect;
+
+ return payload.result;
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowState state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ if (!intersection_ray_valid(ray)) {
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
+ kernel_assert(!"Invalid ift_shadow");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionShadowPayload payload;
+ payload.self = ray->self;
+ payload.visibility = visibility;
+ payload.max_hits = max_hits;
+ payload.num_hits = 0;
+ payload.num_recorded_hits = 0;
+ payload.throughput = 1.0f;
+ payload.result = false;
+ payload.state = state;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ typename metalrt_intersector_type::result_type intersection;
+
+# if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_shadow,
+ payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
+# endif
+
+ *num_recorded_hits = payload.num_recorded_hits;
+ *throughput = payload.throughput;
+
+ return payload.result;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint visibility)
+{
+ if (!intersection_ray_valid(ray)) {
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
+ kernel_assert(!"Invalid ift_default");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionPayload payload;
+ payload.self = ray->self;
+ payload.visibility = visibility;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+# if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_default,
+ payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
+# endif
+
+ if (intersection.type == intersection_type::none) {
+ return false;
+ }
+
+ isect->prim = payload.prim;
+ isect->type = payload.type;
+ isect->object = intersection.user_instance_id;
+
+ isect->t = intersection.distance;
+ if (intersection.type == intersection_type::triangle) {
+ isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
+ intersection.triangle_barycentric_coord.x;
+ isect->v = intersection.triangle_barycentric_coord.x;
+ }
+ else {
+ isect->u = payload.u;
+ isect->v = payload.v;
+ }
+
+ return isect->type != PRIMITIVE_NONE;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 0ed52074a90..80ee8ef5b57 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -260,8 +260,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
#ifdef __METALRT__
-# define __KERNEL_GPU_RAYTRACING__
-
# if defined(__METALRT_MOTION__)
# define METALRT_TAGS instancing, instance_motion, primitive_motion
# else
diff --git a/intern/cycles/kernel/device/metal/function_constants.h b/intern/cycles/kernel/device/metal/function_constants.h
new file mode 100644
index 00000000000..3adf390c7f6
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/function_constants.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+enum {
+ Kernel_DummyConstant,
+#define KERNEL_STRUCT_MEMBER(parent, type, name) KernelData_##parent##_##name,
+#include "kernel/data_template.h"
+};
+
+#ifdef __KERNEL_METAL__
+# define KERNEL_STRUCT_MEMBER(parent, type, name) \
+ constant type kernel_data_##parent##_##name \
+ [[function_constant(KernelData_##parent##_##name)]];
+# include "kernel/data_template.h"
+#endif
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
index 3c31dc3354c..3df81fcf369 100644
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -5,748 +5,5 @@
#include "kernel/device/metal/compat.h"
#include "kernel/device/metal/globals.h"
+#include "kernel/device/metal/function_constants.h"
#include "kernel/device/gpu/kernel.h"
-
-/* MetalRT intersection handlers */
-#ifdef __METALRT__
-
-/* Return type for a bounding box intersection function. */
-struct BoundingBoxIntersectionResult
-{
- bool accept [[accept_intersection]];
- bool continue_search [[continue_search]];
- float distance [[distance]];
-};
-
-/* Return type for a triangle intersection function. */
-struct TriangleIntersectionResult
-{
- bool accept [[accept_intersection]];
- bool continue_search [[continue_search]];
-};
-
-enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
-
-ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives& self,
- const int object,
- const int prim)
-{
- return (self.prim == prim) && (self.object == object);
-}
-
-ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives& self,
- const int object,
- const int prim)
-{
- return ((self.prim == prim) && (self.object == object)) ||
- ((self.light_prim == prim) && (self.light_object == object));
-}
-
-ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives& self,
- const int prim)
-{
- return (self.prim == prim);
-}
-
-template<typename TReturn, uint intersection_type>
-TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
- const uint object,
- const uint primitive_id,
- const float2 barycentrics,
- const float ray_tmax)
-{
- TReturn result;
-
-#ifdef __BVH_LOCAL__
- uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
-
- if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) {
- /* Only intersect with matching object and skip self-intersecton. */
- result.accept = false;
- result.continue_search = true;
- return result;
- }
-
- const short max_hits = payload.max_hits;
- if (max_hits == 0) {
- /* Special case for when no hit information is requested, just report that something was hit */
- payload.result = true;
- result.accept = true;
- result.continue_search = false;
- return result;
- }
-
- int hit = 0;
- if (payload.has_lcg_state) {
- for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) {
- if (ray_tmax == payload.local_isect.hits[i].t) {
- result.accept = false;
- result.continue_search = true;
- return result;
- }
- }
-
- hit = payload.local_isect.num_hits++;
-
- if (payload.local_isect.num_hits > max_hits) {
- hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits;
- if (hit >= max_hits) {
- result.accept = false;
- result.continue_search = true;
- return result;
- }
- }
- }
- else {
- if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
- /* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */
- result.accept = false;
- result.continue_search = true;
- return result;
- }
-
- payload.local_isect.num_hits = 1;
- }
-
- ray_data Intersection *isect = &payload.local_isect.hits[hit];
- isect->t = ray_tmax;
- isect->prim = prim;
- isect->object = object;
- isect->type = kernel_data_fetch(objects, object).primitive_type;
-
- isect->u = 1.0f - barycentrics.y - barycentrics.x;
- isect->v = barycentrics.x;
-
- /* Record geometric normal */
- const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
- const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0));
- const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1));
- const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2));
- payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
-
- /* Continue tracing (without this the trace call would return after the first hit) */
- result.accept = false;
- result.continue_search = true;
- return result;
-#endif
-}
-
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
- uint instance_id [[user_instance_id]],
- uint primitive_id [[primitive_id]],
- float2 barycentrics [[barycentric_coord]],
- float ray_tmax [[distance]])
-{
- return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
-{
- /* unused function */
- BoundingBoxIntersectionResult result;
- result.distance = ray_tmax;
- result.accept = false;
- result.continue_search = false;
- return result;
-}
-
-template<uint intersection_type>
-bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
- uint object,
- uint prim,
- const float2 barycentrics,
- const float ray_tmax)
-{
-#ifdef __SHADOW_RECORD_ALL__
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = payload.visibility;
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- /* continue search */
- return true;
- }
-# endif
-
- if (intersection_skip_self_shadow(payload.self, object, prim)) {
- /* continue search */
- return true;
- }
-
- float u = 0.0f, v = 0.0f;
- int type = 0;
- if (intersection_type == METALRT_HIT_TRIANGLE) {
- u = 1.0f - barycentrics.y - barycentrics.x;
- v = barycentrics.x;
- type = kernel_data_fetch(objects, object).primitive_type;
- }
-# ifdef __HAIR__
- else {
- u = barycentrics.x;
- v = barycentrics.y;
-
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
- type = segment.type;
- prim = segment.prim;
-
- /* Filter out curve endcaps */
- if (u == 0.0f || u == 1.0f) {
- /* continue search */
- return true;
- }
- }
-# endif
-
-# ifndef __TRANSPARENT_SHADOWS__
- /* No transparent shadows support compiled in, make opaque. */
- payload.result = true;
- /* terminate ray */
- return false;
-# else
- short max_hits = payload.max_hits;
- short num_hits = payload.num_hits;
- short num_recorded_hits = payload.num_recorded_hits;
-
- MetalKernelContext context(launch_params_metal);
-
- /* If no transparent shadows, all light is blocked and we can stop immediately. */
- if (num_hits >= max_hits ||
- !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
- payload.result = true;
- /* terminate ray */
- return false;
- }
-
- /* Always use baked shadow transparency for curves. */
- if (type & PRIMITIVE_CURVE) {
- float throughput = payload.throughput;
- throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
- payload.throughput = throughput;
- payload.num_hits += 1;
-
- if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
- /* Accept result and terminate if throughput is sufficiently low */
- payload.result = true;
- return false;
- }
- else {
- return true;
- }
- }
-
- payload.num_hits += 1;
- payload.num_recorded_hits += 1;
-
- uint record_index = num_recorded_hits;
-
- const IntegratorShadowState state = payload.state;
-
- const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE);
- if (record_index >= max_record_hits) {
- /* If maximum number of hits reached, find a hit to replace. */
- float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
- uint max_recorded_hit = 0;
-
- for (int i = 1; i < max_record_hits; i++) {
- const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
- if (isect_t > max_recorded_t) {
- max_recorded_t = isect_t;
- max_recorded_hit = i;
- }
- }
-
- if (ray_tmax >= max_recorded_t) {
- /* Accept hit, so that we don't consider any more hits beyond the distance of the
- * current hit anymore. */
- payload.result = true;
- return true;
- }
-
- record_index = max_recorded_hit;
- }
-
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
-
- /* Continue tracing. */
-# endif /* __TRANSPARENT_SHADOWS__ */
-#endif /* __SHADOW_RECORD_ALL__ */
-
- return true;
-}
-
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- unsigned int object [[user_instance_id]],
- unsigned int primitive_id [[primitive_id]],
- float2 barycentrics [[barycentric_coord]],
- float ray_tmax [[distance]])
-{
- uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
-
- TriangleIntersectionResult result;
- result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
- result.accept = !result.continue_search;
- return result;
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
-{
- /* unused function */
- BoundingBoxIntersectionResult result;
- result.distance = ray_tmax;
- result.accept = false;
- result.continue_search = false;
- return result;
-}
-
-template<typename TReturnType, uint intersection_type>
-inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const float u)
-{
- TReturnType result;
-
-# ifdef __HAIR__
- if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
- /* Filter out curve endcaps. */
- if (u == 0.0f || u == 1.0f) {
- result.accept = false;
- result.continue_search = true;
- return result;
- }
- }
-# endif
-
- uint visibility = payload.visibility;
-# ifdef __VISIBILITY_FLAG__
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- result.accept = false;
- result.continue_search = true;
- return result;
- }
-# endif
-
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- if (intersection_skip_self_shadow(payload.self, object, prim)) {
- result.accept = false;
- result.continue_search = true;
- return result;
- }
- else {
- result.accept = true;
- result.continue_search = false;
- return result;
- }
- }
- else {
- if (intersection_skip_self(payload.self, object, prim)) {
- result.accept = false;
- result.continue_search = true;
- return result;
- }
- }
-
- result.accept = true;
- result.continue_search = true;
- return result;
-}
-
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
- unsigned int object [[user_instance_id]],
- unsigned int primitive_id [[primitive_id]])
-{
- uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, object, prim, 0.0f);
- if (result.accept) {
- payload.prim = prim;
- payload.type = kernel_data_fetch(objects, object).primitive_type;
- }
- return result;
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
-{
- /* Unused function */
- BoundingBoxIntersectionResult result;
- result.accept = false;
- result.continue_search = true;
- result.distance = ray_tmax;
- return result;
-}
-
-#ifdef __HAIR__
-ccl_device_inline
-void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
-{
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = payload.visibility;
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return;
- }
-# endif
-
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
- Intersection isect;
- isect.t = ray_tmax;
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
-
- MetalKernelContext context(launch_params_metal);
- if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
- result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, isect.u);
- if (result.accept) {
- result.distance = isect.t / len;
- payload.u = isect.u;
- payload.v = isect.v;
- payload.prim = prim;
- payload.type = type;
- }
- }
-}
-
-ccl_device_inline
-void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
-{
- const uint visibility = payload.visibility;
-
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
- Intersection isect;
- isect.t = ray_tmax;
- /* Transform maximum distance into object space */
- if (isect.t != FLT_MAX)
- isect.t *= len;
-
- MetalKernelContext context(launch_params_metal);
- if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
- result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
- result.accept = !result.continue_search;
-
- if (result.accept) {
- result.distance = isect.t / len;
- }
- }
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmax [[max_distance]])
-{
- uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
-
- BoundingBoxIntersectionResult result;
- result.accept = false;
- result.continue_search = true;
- result.distance = ray_tmax;
-
- if (segment.type & PRIMITIVE_CURVE_RIBBON) {
- metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
-# if defined(__METALRT_MOTION__)
- payload.time,
-# else
- 0.0f,
-# endif
- ray_tmax, result);
- }
-
- return result;
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmax [[max_distance]])
-{
- uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
-
- BoundingBoxIntersectionResult result;
- result.accept = false;
- result.continue_search = true;
- result.distance = ray_tmax;
-
- if (segment.type & PRIMITIVE_CURVE_RIBBON) {
- metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
-# if defined(__METALRT_MOTION__)
- payload.time,
-# else
- 0.0f,
-# endif
- ray_tmax, result);
- }
-
- return result;
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmax [[max_distance]])
-{
- uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
-
- BoundingBoxIntersectionResult result;
- result.accept = false;
- result.continue_search = true;
- result.distance = ray_tmax;
- metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
-# if defined(__METALRT_MOTION__)
- payload.time,
-# else
- 0.0f,
-# endif
- ray_tmax, result);
-
- return result;
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmax [[max_distance]])
-{
- uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
-
- BoundingBoxIntersectionResult result;
- result.accept = false;
- result.continue_search = true;
- result.distance = ray_tmax;
-
- metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
-# if defined(__METALRT_MOTION__)
- payload.time,
-# else
- 0.0f,
-# endif
- ray_tmax, result);
-
- return result;
-}
-#endif /* __HAIR__ */
-
-#ifdef __POINTCLOUD__
-ccl_device_inline
-void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
-{
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = payload.visibility;
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return;
- }
-# endif
-
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the point intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
- Intersection isect;
- isect.t = ray_tmax;
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
-
- MetalKernelContext context(launch_params_metal);
- if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
- result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, isect.u);
- if (result.accept) {
- result.distance = isect.t / len;
- payload.u = isect.u;
- payload.v = isect.v;
- payload.prim = prim;
- payload.type = type;
- }
- }
-}
-
-ccl_device_inline
-void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
-{
- const uint visibility = payload.visibility;
-
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the point intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
- Intersection isect;
- isect.t = ray_tmax;
- /* Transform maximum distance into object space */
- if (isect.t != FLT_MAX)
- isect.t *= len;
-
- MetalKernelContext context(launch_params_metal);
- if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
- result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
- result.accept = !result.continue_search;
-
- if (result.accept) {
- result.distance = isect.t / len;
- }
- }
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmax [[max_distance]])
-{
- const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- const int type = kernel_data_fetch(objects, object).primitive_type;
-
- BoundingBoxIntersectionResult result;
- result.accept = false;
- result.continue_search = true;
- result.distance = ray_tmax;
-
- metalrt_intersection_point(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction,
-# if defined(__METALRT_MOTION__)
- payload.time,
-# else
- 0.0f,
-# endif
- ray_tmax, result);
-
- return result;
-}
-
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmax [[max_distance]])
-{
- const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- const int type = kernel_data_fetch(objects, object).primitive_type;
-
- BoundingBoxIntersectionResult result;
- result.accept = false;
- result.continue_search = true;
- result.distance = ray_tmax;
-
- metalrt_intersection_point_shadow(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction,
-# if defined(__METALRT_MOTION__)
- payload.time,
-# else
- 0.0f,
-# endif
- ray_tmax, result);
-
- return result;
-}
-#endif /* __POINTCLOUD__ */
-#endif /* __METALRT__ */
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index 30b0f088ede..1b25259bcf5 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -193,7 +193,7 @@ ccl_always_inline float3 make_float3(float x)
#include "util/types.h"
/* NOTE(@nsirgien): Declaring these functions after types headers is very important because they
- * include oneAPI headers, which transitively include math.h headers which will cause redefintions
+ * include oneAPI headers, which transitively include math.h headers which will cause redefinitions
* of the math defines because math.h also uses them and having them defined before math.h include
* is actually UB. */
/* Use fast math functions - get them from sycl::native namespace for native math function
diff --git a/intern/cycles/kernel/device/oneapi/device_id.h b/intern/cycles/kernel/device/oneapi/device_id.h
deleted file mode 100644
index b4c94ac27a2..00000000000
--- a/intern/cycles/kernel/device/oneapi/device_id.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2021-2022 Intel Corporation */
-
-#pragma once
-
-/* from public source :
- * https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/include/pci_ids/iris_pci_ids.h */
-const static std::set<uint32_t> intel_arc_alchemist_device_ids = {
- 0x4f80, 0x4f81, 0x4f82, 0x4f83, 0x4f84, 0x4f87, 0x4f88, 0x5690, 0x5691,
- 0x5692, 0x5693, 0x5694, 0x5695, 0x5696, 0x5697, 0x56a0, 0x56a1, 0x56a2,
- 0x56a3, 0x56a4, 0x56a5, 0x56a6, 0x56b0, 0x56b1, 0x56b2, 0x56b3};
diff --git a/intern/cycles/kernel/device/oneapi/dll_interface_template.h b/intern/cycles/kernel/device/oneapi/dll_interface_template.h
index 2d740b4c64a..662068c0fed 100644
--- a/intern/cycles/kernel/device/oneapi/dll_interface_template.h
+++ b/intern/cycles/kernel/device/oneapi/dll_interface_template.h
@@ -1,3 +1,6 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2022 Intel Corporation */
+
/* device_capabilities() returns a C string that must be free'd with oneapi_free(). */
DLL_INTERFACE_CALL(oneapi_device_capabilities, char *)
DLL_INTERFACE_CALL(oneapi_free, void, void *)
@@ -27,7 +30,7 @@ DLL_INTERFACE_CALL(oneapi_usm_memset,
DLL_INTERFACE_CALL(oneapi_run_test_kernel, bool, SyclQueue *queue)
/* Operation with Kernel globals structure - map of global/constant allocation - filled before
- * render/kernel execution As we don't know in cycles sizeof this - Cycles will manage just as
+ * render/kernel execution As we don't know in cycles `sizeof` this - Cycles will manage just as
* pointer. */
DLL_INTERFACE_CALL(oneapi_kernel_globals_size, bool, SyclQueue *queue, size_t &kernel_global_size)
DLL_INTERFACE_CALL(oneapi_set_global_memory,
diff --git a/intern/cycles/kernel/device/oneapi/image.h b/intern/cycles/kernel/device/oneapi/image.h
index 892558d40bf..6681977a675 100644
--- a/intern/cycles/kernel/device/oneapi/image.h
+++ b/intern/cycles/kernel/device/oneapi/image.h
@@ -216,7 +216,7 @@ template<typename T> struct NanoVDBInterpolator {
int nix, niy, niz;
int pix, piy, piz;
int nnix, nniy, nniz;
- /* Tricubic b-spline interpolation. */
+ /* Tri-cubic b-spline interpolation. */
const float tx = svm_image_texture_frac(x - 0.5f, &ix);
const float ty = svm_image_texture_frac(y - 0.5f, &iy);
const float tz = svm_image_texture_frac(z - 0.5f, &iz);
@@ -355,7 +355,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals, int id, float3 P, in
return r;
}
else {
- /* Tricubic interpolation. */
+ /* Tri-cubic interpolation. */
int ix, iy, iz;
float tx = svm_image_texture_frac(x - 0.5f, &ix);
float ty = svm_image_texture_frac(y - 0.5f, &iy);
diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp
index 62affe6e58e..300e201600c 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -9,12 +9,9 @@
# include <map>
# include <set>
-# include <level_zero/ze_api.h>
# include <CL/sycl.hpp>
-# include <ext/oneapi/backend/level_zero.hpp>
# include "kernel/device/oneapi/compat.h"
-# include "kernel/device/oneapi/device_id.h"
# include "kernel/device/oneapi/globals.h"
# include "kernel/device/oneapi/kernel_templates.h"
@@ -103,8 +100,12 @@ bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_byte
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
oneapi_check_usm(queue_, dest, true);
oneapi_check_usm(queue_, src, true);
+ sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
+# ifdef WITH_CYCLES_DEBUG
try {
- sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
+ /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
+ * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
+ */
mem_event.wait_and_throw();
return true;
}
@@ -114,6 +115,20 @@ bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_byte
}
return false;
}
+# else
+ sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
+ sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
+ bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
+ src_type == sycl::usm::alloc::device;
+ bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
+ src_type == sycl::usm::alloc::unknown;
+ /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
+ * may not wait until the end of the transfer before using the memory.
+ */
+ if (from_device_to_host || host_or_device_memop_with_offset)
+ mem_event.wait();
+ return true;
+# endif
}
bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, size_t num_bytes)
@@ -121,8 +136,12 @@ bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, si
assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
oneapi_check_usm(queue_, usm_ptr, true);
+ sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
+# ifdef WITH_CYCLES_DEBUG
try {
- sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
+ /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
+ * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
+ */
mem_event.wait_and_throw();
return true;
}
@@ -132,6 +151,10 @@ bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, si
}
return false;
}
+# else
+ (void)mem_event;
+ return true;
+# endif
}
bool oneapi_queue_synchronize(SyclQueue *queue_)
@@ -328,8 +351,8 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
int num_states = *((int *)(args[0]));
/* Round up to the next work-group. */
size_t groups_count = (num_states + local_size - 1) / local_size;
- /* NOTE(@nsirgien): As for now non-uniform workgroups don't work on most oneAPI devices, we
- * extend work size to fit uniformity requirements. */
+ /* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices,
+ * we extend work size to fit uniformity requirements. */
global_size = groups_count * local_size;
# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
@@ -647,7 +670,7 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
}
static const int lowest_supported_driver_version_win = 1011660;
-static const int lowest_supported_driver_version_neo = 20066;
+static const int lowest_supported_driver_version_neo = 23570;
static int parse_driver_build_version(const sycl::device &device)
{
@@ -726,21 +749,25 @@ static std::vector<sycl::device> oneapi_available_devices()
else {
bool filter_out = false;
- /* For now we support all Intel(R) Arc(TM) devices
- * and any future GPU with more than 128 execution units
- * official support can be broaden to older and smaller GPUs once ready. */
+ /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU,
+ * assuming they have either more than 96 Execution Units or not 7 threads per EU.
+ * Official support can be broaden to older and smaller GPUs once ready. */
if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) {
- ze_device_handle_t ze_device = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(
- device);
- ze_device_properties_t props = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
- zeDeviceGetProperties(ze_device, &props);
- bool is_dg2 = (intel_arc_alchemist_device_ids.find(props.deviceId) !=
- intel_arc_alchemist_device_ids.end());
- int number_of_eus = props.numEUsPerSubslice * props.numSubslicesPerSlice *
- props.numSlices;
- if (!is_dg2 || number_of_eus < 128)
+ /* Filtered-out defaults in-case these values aren't available through too old L0
+ * runtime. */
+ int number_of_eus = 96;
+ int threads_per_eu = 7;
+ if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
+ number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
+ }
+ if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
+ threads_per_eu =
+ device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
+ }
+ /* This filters out all Level-Zero supported GPUs from older generation than Arc. */
+ if (number_of_eus <= 96 && threads_per_eu == 7) {
filter_out = true;
-
+ }
/* if not already filtered out, check driver version. */
if (!filter_out) {
int driver_build_version = parse_driver_build_version(device);
@@ -862,6 +889,9 @@ void oneapi_iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr)
device.get_platform().get_info<sycl::info::platform::name>();
std::string name = device.get_info<sycl::info::device::name>();
std::string id = "ONEAPI_" + platform_name + "_" + name;
+ if (device.has(sycl::aspect::ext_intel_pci_address)) {
+ id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>());
+ }
(cb)(id.c_str(), name.c_str(), num, user_ptr);
num++;
}
diff --git a/intern/cycles/kernel/device/oneapi/kernel_templates.h b/intern/cycles/kernel/device/oneapi/kernel_templates.h
index 2dfc96292ed..d8964d9b672 100644
--- a/intern/cycles/kernel/device/oneapi/kernel_templates.h
+++ b/intern/cycles/kernel/device/oneapi/kernel_templates.h
@@ -1,10 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Intel Corporation */
+
#pragma once
/* Some macro magic to generate templates for kernel arguments.
- The resulting oneapi_call() template allows to call a SYCL/C++ kernel
- with typed arguments by only giving it a void **args as given by Cycles.
- The template will automatically cast from void* to the expectd type.
- */
+ * The resulting oneapi_call() template allows to call a SYCL/C++ kernel
+ * with typed arguments by only giving it a void `**args` as given by Cycles.
+ * The template will automatically cast from void* to the expected type. */
/* When expanded by the preprocessor, the generated templates will look like this example: */
#if 0
diff --git a/intern/cycles/kernel/device/optix/bvh.h b/intern/cycles/kernel/device/optix/bvh.h
new file mode 100644
index 00000000000..a1621277ec7
--- /dev/null
+++ b/intern/cycles/kernel/device/optix/bvh.h
@@ -0,0 +1,646 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* OptiX implementation of ray-scene intersection. */
+
+#pragma once
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+
+#define OPTIX_DEFINE_ABI_VERSION_ONLY
+#include <optix_function_table.h>
+
+CCL_NAMESPACE_BEGIN
+
+/* Utilities. */
+
+template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
+{
+ return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
+}
+template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
+{
+ return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3());
+}
+
+template<typename T> ccl_device_forceinline T *get_payload_ptr_6()
+{
+ return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6());
+}
+
+ccl_device_forceinline int get_object_id()
+{
+#ifdef __OBJECT_MOTION__
+ /* Always get the instance ID from the TLAS
+ * There might be a motion transform node between TLAS and BLAS which does not have one. */
+ return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
+#else
+ return optixGetInstanceId();
+#endif
+}
+
+/* Hit/miss functions. */
+
+extern "C" __global__ void __miss__kernel_optix_miss()
+{
+ /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
+ optixSetPayload_5(PRIMITIVE_NONE);
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_local_hit()
+{
+#if defined(__HAIR__) || defined(__POINTCLOUD__)
+ if (!optixIsTriangleHit()) {
+ /* Ignore curves and points. */
+ return optixIgnoreIntersection();
+ }
+#endif
+
+#ifdef __BVH_LOCAL__
+ const int object = get_object_id();
+ if (object != optixGetPayload_4() /* local_object */) {
+ /* Only intersect with matching object. */
+ return optixIgnoreIntersection();
+ }
+
+ const int prim = optixGetPrimitiveIndex();
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self_local(ray->self, prim)) {
+ return optixIgnoreIntersection();
+ }
+
+ const uint max_hits = optixGetPayload_5();
+ if (max_hits == 0) {
+ /* Special case for when no hit information is requested, just report that something was hit */
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+
+ int hit = 0;
+ uint *const lcg_state = get_payload_ptr_0<uint>();
+ LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
+
+ if (lcg_state) {
+ for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (optixGetRayTmax() == local_isect->hits[i].t) {
+ return optixIgnoreIntersection();
+ }
+ }
+
+ hit = local_isect->num_hits++;
+
+ if (local_isect->num_hits > max_hits) {
+ hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+ if (hit >= max_hits) {
+ return optixIgnoreIntersection();
+ }
+ }
+ }
+ else {
+ if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
+ /* Record closest intersection only.
+ * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
+ */
+ return optixIgnoreIntersection();
+ }
+
+ local_isect->num_hits = 1;
+ }
+
+ Intersection *isect = &local_isect->hits[hit];
+ isect->t = optixGetRayTmax();
+ isect->prim = prim;
+ isect->object = get_object_id();
+ isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
+
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ isect->u = 1.0f - barycentrics.y - barycentrics.x;
+ isect->v = barycentrics.x;
+
+ /* Record geometric normal. */
+ const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
+ const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
+ const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
+ const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
+ local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+
+ /* Continue tracing (without this the trace call would return after the first hit). */
+ optixIgnoreIntersection();
+#endif
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
+{
+#ifdef __SHADOW_RECORD_ALL__
+ int prim = optixGetPrimitiveIndex();
+ const uint object = get_object_id();
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+# endif
+
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self_shadow(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+
+ float u = 0.0f, v = 0.0f;
+ int type = 0;
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ u = 1.0f - barycentrics.y - barycentrics.x;
+ v = barycentrics.x;
+ type = kernel_data_fetch(objects, object).primitive_type;
+ }
+# ifdef __HAIR__
+ else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
+ u = __uint_as_float(optixGetAttribute_0());
+ v = __uint_as_float(optixGetAttribute_1());
+
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+ type = segment.type;
+ prim = segment.prim;
+
+# if OPTIX_ABI_VERSION < 55
+ /* Filter out curve endcaps. */
+ if (u == 0.0f || u == 1.0f) {
+ return optixIgnoreIntersection();
+ }
+# endif
+ }
+# endif
+ else {
+ type = kernel_data_fetch(objects, object).primitive_type;
+ u = 0.0f;
+ v = 0.0f;
+ }
+
+# ifndef __TRANSPARENT_SHADOWS__
+ /* No transparent shadows support compiled in, make opaque. */
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+# else
+ const uint max_hits = optixGetPayload_3();
+ const uint num_hits_packed = optixGetPayload_2();
+ const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
+ const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
+
+ /* If no transparent shadows, all light is blocked and we can stop immediately. */
+ if (num_hits >= max_hits ||
+ !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+
+ /* Always use baked shadow transparency for curves. */
+ if (type & PRIMITIVE_CURVE) {
+ float throughput = __uint_as_float(optixGetPayload_1());
+ throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
+ optixSetPayload_1(__float_as_uint(throughput));
+ optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
+
+ if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+ else {
+ /* Continue tracing. */
+ optixIgnoreIntersection();
+ return;
+ }
+ }
+
+ /* Record transparent intersection. */
+ optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
+
+ uint record_index = num_recorded_hits;
+
+ const IntegratorShadowState state = optixGetPayload_0();
+
+ const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (record_index >= max_record_hits) {
+ /* If maximum number of hits reached, find a hit to replace. */
+ float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+ uint max_recorded_hit = 0;
+
+ for (int i = 1; i < max_record_hits; i++) {
+ const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+ if (isect_t > max_recorded_t) {
+ max_recorded_t = isect_t;
+ max_recorded_hit = i;
+ }
+ }
+
+ if (optixGetRayTmax() >= max_recorded_t) {
+ /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
+ * current hit anymore. */
+ return;
+ }
+
+ record_index = max_recorded_hit;
+ }
+
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax();
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
+
+ /* Continue tracing. */
+ optixIgnoreIntersection();
+# endif /* __TRANSPARENT_SHADOWS__ */
+#endif /* __SHADOW_RECORD_ALL__ */
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_volume_test()
+{
+#if defined(__HAIR__) || defined(__POINTCLOUD__)
+ if (!optixIsTriangleHit()) {
+ /* Ignore curves. */
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ const uint object = get_object_id();
+#ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
+ return optixIgnoreIntersection();
+ }
+
+ const int prim = optixGetPrimitiveIndex();
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
+{
+#ifdef __HAIR__
+# if OPTIX_ABI_VERSION < 55
+ if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) {
+ /* Filter out curve endcaps. */
+ const float u = __uint_as_float(optixGetAttribute_0());
+ if (u == 0.0f || u == 1.0f) {
+ return optixIgnoreIntersection();
+ }
+ }
+# endif
+#endif
+
+ const uint object = get_object_id();
+ const uint visibility = optixGetPayload_4();
+#ifdef __VISIBILITY_FLAG__
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ const int prim = optixGetPrimitiveIndex();
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ if (intersection_skip_self_shadow(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+ else {
+ /* Shadow ray early termination. */
+ return optixTerminateRay();
+ }
+ }
+ else {
+ if (intersection_skip_self(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+ }
+}
+
+extern "C" __global__ void __closesthit__kernel_optix_hit()
+{
+ const int object = get_object_id();
+ const int prim = optixGetPrimitiveIndex();
+
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
+ optixSetPayload_4(object);
+
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
+ optixSetPayload_2(__float_as_uint(barycentrics.x));
+ optixSetPayload_3(prim);
+ optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
+ }
+ else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+ optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
+ optixSetPayload_2(optixGetAttribute_1());
+ optixSetPayload_3(segment.prim);
+ optixSetPayload_5(segment.type);
+ }
+ else {
+ optixSetPayload_1(0);
+ optixSetPayload_2(0);
+ optixSetPayload_3(prim);
+ optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
+ }
+}
+
+/* Custom primitive intersection functions. */
+
+#ifdef __HAIR__
+ccl_device_inline void optix_intersection_curve(const int prim, const int type)
+{
+ const int object = get_object_id();
+
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ const float3 ray_P = optixGetObjectRayOrigin();
+ const float3 ray_D = optixGetObjectRayDirection();
+ const float ray_tmin = optixGetRayTmin();
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+
+ if (curve_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
+ optixReportIntersection(isect.t,
+ type & PRIMITIVE_ALL,
+ __float_as_int(isect.u), /* Attribute_0 */
+ __float_as_int(isect.v)); /* Attribute_1 */
+ }
+}
+
+extern "C" __global__ void __intersection__curve_ribbon()
+{
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
+ const int prim = segment.prim;
+ const int type = segment.type;
+ if (type & PRIMITIVE_CURVE_RIBBON) {
+ optix_intersection_curve(prim, type);
+ }
+}
+
+#endif
+
+#ifdef __POINTCLOUD__
+extern "C" __global__ void __intersection__point()
+{
+ const int prim = optixGetPrimitiveIndex();
+ const int object = get_object_id();
+ const int type = kernel_data_fetch(objects, object).primitive_type;
+
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ const float3 ray_P = optixGetObjectRayOrigin();
+ const float3 ray_D = optixGetObjectRayDirection();
+ const float ray_tmin = optixGetRayTmin();
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+
+ if (point_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
+ optixReportIntersection(isect.t, type & PRIMITIVE_ALL);
+ }
+}
+#endif
+
+/* Scene intersection. */
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+ else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ ray_flags,
+ 0, /* SBT offset for PG_HITD */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ uint p0 = pointer_pack_to_uint_0(lcg_state);
+ uint p1 = pointer_pack_to_uint_1(lcg_state);
+ uint p2 = pointer_pack_to_uint_0(local_isect);
+ uint p3 = pointer_pack_to_uint_1(local_isect);
+ uint p4 = local_object;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ /* Is set to zero on miss or if ray is aborted, so can be used as return value. */
+ uint p5 = max_hits;
+
+ if (local_isect) {
+ local_isect->num_hits = 0; /* Initialize hit count to zero. */
+ }
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ 0xFF,
+ /* Need to always call into __anyhit__kernel_optix_local_hit. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 2, /* SBT offset for PG_HITL */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ return p5;
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowState state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ uint p0 = state;
+ uint p1 = __float_as_uint(1.0f); /* Throughput. */
+ uint p2 = 0; /* Number of hits. */
+ uint p3 = max_hits;
+ uint p4 = visibility;
+ uint p5 = false;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 1, /* SBT offset for PG_HITS */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ *num_recorded_hits = uint16_unpack_from_uint_0(p2);
+ *throughput = __uint_as_float(p1);
+
+ return p5;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint visibility)
+{
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ /* Need to always call into __anyhit__kernel_optix_volume_test. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 3, /* SBT offset for PG_HITV */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index aa4a6321a8b..1a11a533b7e 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -8,7 +8,6 @@
#include <optix.h>
#define __KERNEL_GPU__
-#define __KERNEL_GPU_RAYTRACING__
#define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */
#define __KERNEL_OPTIX__
#define CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 949bf41d171..6abb5aeacb9 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -20,469 +20,39 @@
#include "kernel/integrator/intersect_volume_stack.h"
// clang-format on
-#define OPTIX_DEFINE_ABI_VERSION_ONLY
-#include <optix_function_table.h>
-
-template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
-{
- return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
-}
-template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
-{
- return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3());
-}
-
-template<typename T> ccl_device_forceinline T *get_payload_ptr_6()
-{
- return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6());
-}
-
-ccl_device_forceinline int get_object_id()
-{
-#ifdef __OBJECT_MOTION__
- /* Always get the instance ID from the TLAS
- * There might be a motion transform node between TLAS and BLAS which does not have one. */
- return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
-#else
- return optixGetInstanceId();
-#endif
-}
-
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
- global_index;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
integrator_intersect_closest(nullptr, path_index, kernel_params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
- global_index;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
integrator_intersect_shadow(nullptr, path_index);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurface()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
- global_index;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
integrator_intersect_subsurface(nullptr, path_index);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_stack()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
- global_index;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
integrator_intersect_volume_stack(nullptr, path_index);
}
-extern "C" __global__ void __miss__kernel_optix_miss()
-{
- /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
- optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
- optixSetPayload_5(PRIMITIVE_NONE);
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_local_hit()
-{
-#if defined(__HAIR__) || defined(__POINTCLOUD__)
- if (!optixIsTriangleHit()) {
- /* Ignore curves and points. */
- return optixIgnoreIntersection();
- }
-#endif
-
-#ifdef __BVH_LOCAL__
- const int object = get_object_id();
- if (object != optixGetPayload_4() /* local_object */) {
- /* Only intersect with matching object. */
- return optixIgnoreIntersection();
- }
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self_local(ray->self, prim)) {
- return optixIgnoreIntersection();
- }
-
- const uint max_hits = optixGetPayload_5();
- if (max_hits == 0) {
- /* Special case for when no hit information is requested, just report that something was hit */
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
-
- int hit = 0;
- uint *const lcg_state = get_payload_ptr_0<uint>();
- LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
-
- if (lcg_state) {
- for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
- if (optixGetRayTmax() == local_isect->hits[i].t) {
- return optixIgnoreIntersection();
- }
- }
-
- hit = local_isect->num_hits++;
-
- if (local_isect->num_hits > max_hits) {
- hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
- if (hit >= max_hits) {
- return optixIgnoreIntersection();
- }
- }
- }
- else {
- if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
- /* Record closest intersection only.
- * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
- */
- return optixIgnoreIntersection();
- }
-
- local_isect->num_hits = 1;
- }
-
- Intersection *isect = &local_isect->hits[hit];
- isect->t = optixGetRayTmax();
- isect->prim = prim;
- isect->object = get_object_id();
- isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
-
- const float2 barycentrics = optixGetTriangleBarycentrics();
- isect->u = 1.0f - barycentrics.y - barycentrics.x;
- isect->v = barycentrics.x;
-
- /* Record geometric normal. */
- const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
- const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
- const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
- const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
- local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
-
- /* Continue tracing (without this the trace call would return after the first hit). */
- optixIgnoreIntersection();
-#endif
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
-{
-#ifdef __SHADOW_RECORD_ALL__
- int prim = optixGetPrimitiveIndex();
- const uint object = get_object_id();
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-# endif
-
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self_shadow(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
-
- float u = 0.0f, v = 0.0f;
- int type = 0;
- if (optixIsTriangleHit()) {
- const float2 barycentrics = optixGetTriangleBarycentrics();
- u = 1.0f - barycentrics.y - barycentrics.x;
- v = barycentrics.x;
- type = kernel_data_fetch(objects, object).primitive_type;
- }
-# ifdef __HAIR__
- else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
- u = __uint_as_float(optixGetAttribute_0());
- v = __uint_as_float(optixGetAttribute_1());
-
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
- type = segment.type;
- prim = segment.prim;
-
-# if OPTIX_ABI_VERSION < 55
- /* Filter out curve endcaps. */
- if (u == 0.0f || u == 1.0f) {
- return optixIgnoreIntersection();
- }
-# endif
- }
-# endif
- else {
- type = kernel_data_fetch(objects, object).primitive_type;
- u = 0.0f;
- v = 0.0f;
- }
-
-# ifndef __TRANSPARENT_SHADOWS__
- /* No transparent shadows support compiled in, make opaque. */
- optixSetPayload_5(true);
- return optixTerminateRay();
-# else
- const uint max_hits = optixGetPayload_3();
- const uint num_hits_packed = optixGetPayload_2();
- const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
- const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
-
- /* If no transparent shadows, all light is blocked and we can stop immediately. */
- if (num_hits >= max_hits ||
- !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
-
- /* Always use baked shadow transparency for curves. */
- if (type & PRIMITIVE_CURVE) {
- float throughput = __uint_as_float(optixGetPayload_1());
- throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
- optixSetPayload_1(__float_as_uint(throughput));
- optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
-
- if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
- else {
- /* Continue tracing. */
- optixIgnoreIntersection();
- return;
- }
- }
-
- /* Record transparent intersection. */
- optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
-
- uint record_index = num_recorded_hits;
-
- const IntegratorShadowState state = optixGetPayload_0();
-
- const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
- if (record_index >= max_record_hits) {
- /* If maximum number of hits reached, find a hit to replace. */
- float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
- uint max_recorded_hit = 0;
-
- for (int i = 1; i < max_record_hits; i++) {
- const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
- if (isect_t > max_recorded_t) {
- max_recorded_t = isect_t;
- max_recorded_hit = i;
- }
- }
-
- if (optixGetRayTmax() >= max_recorded_t) {
- /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
- * current hit anymore. */
- return;
- }
-
- record_index = max_recorded_hit;
- }
-
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax();
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
-
- /* Continue tracing. */
- optixIgnoreIntersection();
-# endif /* __TRANSPARENT_SHADOWS__ */
-#endif /* __SHADOW_RECORD_ALL__ */
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_volume_test()
-{
-#if defined(__HAIR__) || defined(__POINTCLOUD__)
- if (!optixIsTriangleHit()) {
- /* Ignore curves. */
- return optixIgnoreIntersection();
- }
-#endif
-
- const uint object = get_object_id();
-#ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-#endif
-
- if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
- return optixIgnoreIntersection();
- }
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
-{
-#ifdef __HAIR__
-# if OPTIX_ABI_VERSION < 55
- if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) {
- /* Filter out curve endcaps. */
- const float u = __uint_as_float(optixGetAttribute_0());
- if (u == 0.0f || u == 1.0f) {
- return optixIgnoreIntersection();
- }
- }
-# endif
-#endif
-
- const uint object = get_object_id();
- const uint visibility = optixGetPayload_4();
-#ifdef __VISIBILITY_FLAG__
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-#endif
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
-
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- if (intersection_skip_self_shadow(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
- else {
- /* Shadow ray early termination. */
- return optixTerminateRay();
- }
- }
- else {
- if (intersection_skip_self(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
- }
-}
-
-extern "C" __global__ void __closesthit__kernel_optix_hit()
-{
- const int object = get_object_id();
- const int prim = optixGetPrimitiveIndex();
-
- optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
- optixSetPayload_4(object);
-
- if (optixIsTriangleHit()) {
- const float2 barycentrics = optixGetTriangleBarycentrics();
- optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
- optixSetPayload_2(__float_as_uint(barycentrics.x));
- optixSetPayload_3(prim);
- optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
- }
- else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
- optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
- optixSetPayload_2(optixGetAttribute_1());
- optixSetPayload_3(segment.prim);
- optixSetPayload_5(segment.type);
- }
- else {
- optixSetPayload_1(0);
- optixSetPayload_2(0);
- optixSetPayload_3(prim);
- optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
- }
-}
-
-#ifdef __HAIR__
-ccl_device_inline void optix_intersection_curve(const int prim, const int type)
-{
- const int object = get_object_id();
-
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return;
- }
-# endif
-
- float3 P = optixGetObjectRayOrigin();
- float3 dir = optixGetObjectRayDirection();
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
-# ifdef __OBJECT_MOTION__
- const float time = optixGetRayTime();
-# else
- const float time = 0.0f;
-# endif
-
- Intersection isect;
- isect.t = optixGetRayTmax();
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
-
- if (curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
- static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
- optixReportIntersection(isect.t / len,
- type & PRIMITIVE_ALL,
- __float_as_int(isect.u), /* Attribute_0 */
- __float_as_int(isect.v)); /* Attribute_1 */
- }
-}
-
-extern "C" __global__ void __intersection__curve_ribbon()
-{
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
- const int prim = segment.prim;
- const int type = segment.type;
- if (type & PRIMITIVE_CURVE_RIBBON) {
- optix_intersection_curve(prim, type);
- }
-}
-
-#endif
-
-#ifdef __POINTCLOUD__
-extern "C" __global__ void __intersection__point()
-{
- const int prim = optixGetPrimitiveIndex();
- const int object = get_object_id();
- const int type = kernel_data_fetch(objects, object).primitive_type;
-
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return;
- }
-# endif
-
- float3 P = optixGetObjectRayOrigin();
- float3 dir = optixGetObjectRayDirection();
-
- /* The direction is not normalized by default, the point intersection routine expects that. */
- float len;
- dir = normalize_len(dir, &len);
-
-# ifdef __OBJECT_MOTION__
- const float time = optixGetRayTime();
-# else
- const float time = 0.0f;
-# endif
-
- Intersection isect;
- isect.t = optixGetRayTmax();
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX) {
- isect.t *= len;
- }
-
- if (point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
- static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
- optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL);
- }
-}
-#endif
diff --git a/intern/cycles/kernel/geom/curve_intersect.h b/intern/cycles/kernel/geom/curve_intersect.h
index 001bec01749..97644aacaa8 100644
--- a/intern/cycles/kernel/geom/curve_intersect.h
+++ b/intern/cycles/kernel/geom/curve_intersect.h
@@ -72,7 +72,7 @@ ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const floa
ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
const float3 cylinder_end,
const float cylinder_radius,
- const float3 ray_dir,
+ const float3 ray_D,
ccl_private float2 *t_o,
ccl_private float *u0_o,
ccl_private float3 *Ng0_o,
@@ -82,7 +82,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
/* Calculate quadratic equation to solve. */
const float rl = 1.0f / len(cylinder_end - cylinder_start);
const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl;
- const float3 O = -P0, dO = ray_dir;
+ const float3 O = -P0, dO = ray_D;
const float dOdO = dot(dO, dO);
const float OdO = dot(dO, O);
@@ -123,7 +123,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
/* Calculates u and Ng for near hit. */
{
*u0_o = (t0 * dOz + Oz) * rl;
- const float3 Pr = t0 * ray_dir;
+ const float3 Pr = t0 * ray_D;
const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start;
*Ng0_o = Pr - Pl;
}
@@ -131,7 +131,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
/* Calculates u and Ng for far hit. */
{
*u1_o = (t1 * dOz + Oz) * rl;
- const float3 Pr = t1 * ray_dir;
+ const float3 Pr = t1 * ray_D;
const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start;
*Ng1_o = Pr - Pl;
}
@@ -141,10 +141,10 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
return true;
}
-ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir)
+ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_D)
{
const float3 O = -P;
- const float3 D = ray_dir;
+ const float3 D = ray_D;
const float ON = dot(O, N);
const float DN = dot(D, N);
const float min_rcp_input = 1e-18f;
@@ -155,8 +155,9 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co
return make_float2(lower, upper);
}
-ccl_device bool curve_intersect_iterative(const float3 ray_dir,
- ccl_private float *ray_tfar,
+ccl_device bool curve_intersect_iterative(const float3 ray_D,
+ const float ray_tmin,
+ ccl_private float *ray_tmax,
const float dt,
const float4 curve[4],
float u,
@@ -164,7 +165,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
const bool use_backfacing,
ccl_private Intersection *isect)
{
- const float length_ray_dir = len(ray_dir);
+ const float length_ray_D = len(ray_D);
/* Error of curve evaluations is proportional to largest coordinate. */
const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3]));
@@ -175,9 +176,9 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
const float radius_max = box_max.w;
for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) {
- const float3 Q = ray_dir * t;
- const float3 dQdt = ray_dir;
- const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t;
+ const float3 Q = ray_D * t;
+ const float3 dQdt = ray_D;
+ const float Q_err = 16.0f * FLT_EPSILON * length_ray_D * t;
const float4 P4 = catmull_rom_basis_eval(curve, u);
const float4 dPdu4 = catmull_rom_basis_derivative(curve, u);
@@ -220,7 +221,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
if (fabsf(f) < f_err && fabsf(g) < g_err) {
t += dt;
- if (!(0.0f <= t && t <= *ray_tfar)) {
+ if (!(t >= ray_tmin && t <= *ray_tmax)) {
return false; /* Rejects NaNs */
}
if (!(u >= 0.0f && u <= 1.0f)) {
@@ -232,12 +233,12 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
const float3 U = dradiusdu * R + dPdu;
const float3 V = cross(dPdu, R);
const float3 Ng = cross(V, U);
- if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) {
+ if (!use_backfacing && dot(ray_D, Ng) > 0.0f) {
return false;
}
/* Record intersection. */
- *ray_tfar = t;
+ *ray_tmax = t;
isect->t = t;
isect->u = u;
isect->v = 0.0f;
@@ -248,16 +249,17 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
return false;
}
-ccl_device bool curve_intersect_recursive(const float3 ray_orig,
- const float3 ray_dir,
- float ray_tfar,
+ccl_device bool curve_intersect_recursive(const float3 ray_P,
+ const float3 ray_D,
+ const float ray_tmin,
+ float ray_tmax,
float4 curve[4],
ccl_private Intersection *isect)
{
/* Move ray closer to make intersection stable. */
const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
- const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir);
- const float3 ref = ray_orig + ray_dir * dt;
+ const float dt = dot(center - ray_P, ray_D) / dot(ray_D, ray_D);
+ const float3 ref = ray_P + ray_D * dt;
const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f);
curve[0] -= ref4;
curve[1] -= ref4;
@@ -320,7 +322,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
valid = cylinder_intersect(float4_to_float3(P0),
float4_to_float3(P3),
r_outer,
- ray_dir,
+ ray_D,
&tc_outer,
&u_outer0,
&Ng_outer0,
@@ -331,13 +333,12 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
}
/* Intersect with cap-planes. */
- float2 tp = make_float2(-dt, ray_tfar - dt);
+ float2 tp = make_float2(ray_tmin - dt, ray_tmax - dt);
tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
- const float2 h0 = half_plane_intersect(
- float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
+ const float2 h0 = half_plane_intersect(float4_to_float3(P0), float4_to_float3(dP0du), ray_D);
tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y));
const float2 h1 = half_plane_intersect(
- float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir);
+ float4_to_float3(P3), -float4_to_float3(dP3du), ray_D);
tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y));
valid = tp.x <= tp.y;
if (!valid) {
@@ -357,7 +358,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
const bool valid_inner = cylinder_intersect(float4_to_float3(P0),
float4_to_float3(P3),
r_inner,
- ray_dir,
+ ray_D,
&tc_inner,
&u_inner0,
&Ng_inner0,
@@ -367,9 +368,9 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
/* At the unstable area we subdivide deeper. */
# if 0
const bool unstable0 = (!valid_inner) |
- (fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f);
+ (fabsf(dot(normalize(ray_D), normalize(Ng_inner0))) < 0.3f);
const bool unstable1 = (!valid_inner) |
- (fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f);
+ (fabsf(dot(normalize(ray_D), normalize(Ng_inner1))) < 0.3f);
# else
/* On the GPU appears to be a little faster if always enabled. */
(void)valid_inner;
@@ -394,19 +395,20 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
CURVE_NUM_BEZIER_SUBDIVISIONS;
if (depth >= termDepth) {
found |= curve_intersect_iterative(
- ray_dir, &ray_tfar, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
+ ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
}
else {
recurse = true;
}
}
- if (valid1 && (tp1.x + dt <= ray_tfar)) {
+ const float t1 = tp1.x + dt;
+ if (valid1 && (t1 >= ray_tmin && t1 <= ray_tmax)) {
const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
CURVE_NUM_BEZIER_SUBDIVISIONS;
if (depth >= termDepth) {
found |= curve_intersect_iterative(
- ray_dir, &ray_tfar, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
+ ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
}
else {
recurse = true;
@@ -456,7 +458,8 @@ ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, c
* v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
* triangles gets intersected.
*/
-ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
+ccl_device_inline bool ribbon_intersect_quad(const float ray_tmin,
+ const float ray_tmax,
const float3 quad_v0,
const float3 quad_v1,
const float3 quad_v2,
@@ -497,7 +500,7 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
/* Perform depth test? */
const float t = rcpDen * dot(v0, Ng);
- if (!(0.0f <= t && t <= ray_tfar)) {
+ if (!(t >= ray_tmin && t <= ray_tmax)) {
return false;
}
@@ -515,13 +518,16 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
return true;
}
-ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3])
+ccl_device_inline void ribbon_ray_space(const float3 ray_D,
+ const float ray_D_invlen,
+ float3 ray_space[3])
{
- const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y);
- const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x);
+ const float3 D = ray_D * ray_D_invlen;
+ const float3 dx0 = make_float3(0, D.z, -D.y);
+ const float3 dx1 = make_float3(-D.z, 0, D.x);
ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1);
- ray_space[1] = normalize(cross(ray_dir, ray_space[0]));
- ray_space[2] = ray_dir;
+ ray_space[1] = normalize(cross(D, ray_space[0]));
+ ray_space[2] = D * ray_D_invlen;
}
ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
@@ -533,15 +539,17 @@ ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
}
ccl_device_inline bool ribbon_intersect(const float3 ray_org,
- const float3 ray_dir,
- float ray_tfar,
+ const float3 ray_D,
+ const float ray_tmin,
+ float ray_tmax,
const int N,
float4 curve[4],
ccl_private Intersection *isect)
{
/* Transform control points into ray space. */
+ const float ray_D_invlen = 1.0f / len(ray_D);
float3 ray_space[3];
- ribbon_ray_space(ray_dir, ray_space);
+ ribbon_ray_space(ray_D, ray_D_invlen, ray_space);
curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]);
curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]);
@@ -582,21 +590,21 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
/* Intersect quad. */
float vu, vv, vt;
- bool valid0 = ribbon_intersect_quad(ray_tfar, lp0, lp1, up1, up0, &vu, &vv, &vt);
+ bool valid0 = ribbon_intersect_quad(ray_tmin, ray_tmax, lp0, lp1, up1, up0, &vu, &vv, &vt);
if (valid0) {
/* ignore self intersections */
const float avoidance_factor = 2.0f;
if (avoidance_factor != 0.0f) {
float r = mix(p0.w, p1.w, vu);
- valid0 = vt > avoidance_factor * r;
+ valid0 = vt > avoidance_factor * r * ray_D_invlen;
}
if (valid0) {
vv = 2.0f * vv - 1.0f;
/* Record intersection. */
- ray_tfar = vt;
+ ray_tmax = vt;
isect->t = vt;
isect->u = u + vu * step_size;
isect->v = vv;
@@ -614,8 +622,9 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
ccl_private Intersection *isect,
- const float3 P,
- const float3 dir,
+ const float3 ray_P,
+ const float3 ray_D,
+ const float tmin,
const float tmax,
int object,
int prim,
@@ -645,7 +654,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
if (type & PRIMITIVE_CURVE_RIBBON) {
/* todo: adaptive number of subdivisions could help performance here. */
const int subdivisions = kernel_data.bvh.curve_subdivisions;
- if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) {
+ if (ribbon_intersect(ray_P, ray_D, tmin, tmax, subdivisions, curve, isect)) {
isect->prim = prim;
isect->object = object;
isect->type = type;
@@ -655,7 +664,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
return false;
}
else {
- if (curve_intersect_recursive(P, dir, tmax, curve, isect)) {
+ if (curve_intersect_recursive(ray_P, ray_D, tmin, tmax, curve, isect)) {
isect->prim = prim;
isect->object = object;
isect->type = type;
diff --git a/intern/cycles/kernel/geom/motion_triangle_intersect.h b/intern/cycles/kernel/geom/motion_triangle_intersect.h
index 6eea5096567..b59c5c43c20 100644
--- a/intern/cycles/kernel/geom/motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h
@@ -46,6 +46,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
ccl_private Intersection *isect,
float3 P,
float3 dir,
+ float tmin,
float tmax,
float time,
uint visibility,
@@ -58,7 +59,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
motion_triangle_vertices(kg, object, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if (ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
+ if (ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
#ifdef __VISIBILITY_FLAG__
/* Visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags.
@@ -92,6 +93,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
int object,
int prim,
int prim_addr,
+ float tmin,
float tmax,
ccl_private uint *lcg_state,
int max_hits)
@@ -101,7 +103,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
motion_triangle_vertices(kg, object, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if (!ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
+ if (!ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
return false;
}
diff --git a/intern/cycles/kernel/geom/object.h b/intern/cycles/kernel/geom/object.h
index b15f6b5dda5..badfd311985 100644
--- a/intern/cycles/kernel/geom/object.h
+++ b/intern/cycles/kernel/geom/object.h
@@ -86,7 +86,7 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg,
Transform tfm = object_fetch_transform_motion(kg, object, time);
if (itfm)
- *itfm = transform_quick_inverse(tfm);
+ *itfm = transform_inverse(tfm);
return tfm;
}
@@ -488,59 +488,30 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
/* Transform ray into object space to enter static object in BVH */
-ccl_device_inline float bvh_instance_push(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir)
+ccl_device_inline void bvh_instance_push(KernelGlobals kg,
+ int object,
+ ccl_private const Ray *ray,
+ ccl_private float3 *P,
+ ccl_private float3 *dir,
+ ccl_private float3 *idir)
{
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
*P = transform_point(&tfm, ray->P);
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
+ *dir = bvh_clamp_direction(transform_direction(&tfm, ray->D));
*idir = bvh_inverse_direction(*dir);
-
- return len;
}
/* Transform ray to exit static object in BVH. */
-ccl_device_inline float bvh_instance_pop(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir,
- float t)
+ccl_device_inline void bvh_instance_pop(KernelGlobals kg,
+ int object,
+ ccl_private const Ray *ray,
+ ccl_private float3 *P,
+ ccl_private float3 *dir,
+ ccl_private float3 *idir)
{
- if (t != FLT_MAX) {
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- t /= len(transform_direction(&tfm, ray->D));
- }
-
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
- *idir = bvh_inverse_direction(*dir);
-
- return t;
-}
-
-/* Same as above, but returns scale factor to apply to multiple intersection distances */
-
-ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir,
- ccl_private float *t_fac)
-{
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
-
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
@@ -549,59 +520,31 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg,
#ifdef __OBJECT_MOTION__
/* Transform ray into object space to enter motion blurred object in BVH */
-ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir,
- ccl_private Transform *itfm)
-{
- object_fetch_transform_motion_test(kg, object, ray->time, itfm);
-
- *P = transform_point(itfm, ray->P);
-
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
-
- return len;
-}
-
-/* Transform ray to exit motion blurred object in BVH. */
-
-ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg,
+ccl_device_inline void bvh_instance_motion_push(KernelGlobals kg,
int object,
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
- ccl_private float3 *idir,
- float t,
- ccl_private Transform *itfm)
+ ccl_private float3 *idir)
{
- if (t != FLT_MAX) {
- t /= len(transform_direction(itfm, ray->D));
- }
+ Transform tfm;
+ object_fetch_transform_motion_test(kg, object, ray->time, &tfm);
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
- *idir = bvh_inverse_direction(*dir);
+ *P = transform_point(&tfm, ray->P);
- return t;
+ *dir = bvh_clamp_direction(transform_direction(&tfm, ray->D));
+ *idir = bvh_inverse_direction(*dir);
}
-/* Same as above, but returns scale factor to apply to multiple intersection distances */
+/* Transform ray to exit motion blurred object in BVH. */
-ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir,
- ccl_private float *t_fac,
- ccl_private Transform *itfm)
+ccl_device_inline void bvh_instance_motion_pop(KernelGlobals kg,
+ int object,
+ ccl_private const Ray *ray,
+ ccl_private float3 *P,
+ ccl_private float3 *dir,
+ ccl_private float3 *idir)
{
- *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
diff --git a/intern/cycles/kernel/geom/point_intersect.h b/intern/cycles/kernel/geom/point_intersect.h
index dfd9d9a015b..15fb814c58d 100644
--- a/intern/cycles/kernel/geom/point_intersect.h
+++ b/intern/cycles/kernel/geom/point_intersect.h
@@ -9,17 +9,21 @@ CCL_NAMESPACE_BEGIN
#ifdef __POINTCLOUD__
-ccl_device_forceinline bool point_intersect_test(
- const float4 point, const float3 P, const float3 dir, const float tmax, ccl_private float *t)
+ccl_device_forceinline bool point_intersect_test(const float4 point,
+ const float3 ray_P,
+ const float3 ray_D,
+ const float ray_tmin,
+ const float ray_tmax,
+ ccl_private float *t)
{
const float3 center = float4_to_float3(point);
const float radius = point.w;
- const float rd2 = 1.0f / dot(dir, dir);
+ const float rd2 = 1.0f / dot(ray_D, ray_D);
- const float3 c0 = center - P;
- const float projC0 = dot(c0, dir) * rd2;
- const float3 perp = c0 - projC0 * dir;
+ const float3 c0 = center - ray_P;
+ const float projC0 = dot(c0, ray_D) * rd2;
+ const float3 perp = c0 - projC0 * ray_D;
const float l2 = dot(perp, perp);
const float r2 = radius * radius;
if (!(l2 <= r2)) {
@@ -28,12 +32,12 @@ ccl_device_forceinline bool point_intersect_test(
const float td = sqrt((r2 - l2) * rd2);
const float t_front = projC0 - td;
- const bool valid_front = (0.0f <= t_front) & (t_front <= tmax);
+ const bool valid_front = (ray_tmin <= t_front) & (t_front <= ray_tmax);
/* Always back-face culling for now. */
# if 0
const float t_back = projC0 + td;
- const bool valid_back = (0.0f <= t_back) & (t_back <= tmax);
+ const bool valid_back = (ray_tmin <= t_back) & (t_back <= ray_tmax);
/* check if there is a first hit */
const bool valid_first = valid_front | valid_back;
@@ -54,9 +58,10 @@ ccl_device_forceinline bool point_intersect_test(
ccl_device_forceinline bool point_intersect(KernelGlobals kg,
ccl_private Intersection *isect,
- const float3 P,
- const float3 dir,
- const float tmax,
+ const float3 ray_P,
+ const float3 ray_D,
+ const float ray_tmin,
+ const float ray_tmax,
const int object,
const int prim,
const float time,
@@ -65,7 +70,7 @@ ccl_device_forceinline bool point_intersect(KernelGlobals kg,
const float4 point = (type & PRIMITIVE_MOTION) ? motion_point(kg, object, prim, time) :
kernel_data_fetch(points, prim);
- if (!point_intersect_test(point, P, dir, tmax, &isect->t)) {
+ if (!point_intersect_test(point, ray_P, ray_D, ray_tmin, ray_tmax, &isect->t)) {
return false;
}
diff --git a/intern/cycles/kernel/geom/shader_data.h b/intern/cycles/kernel/geom/shader_data.h
index e5dbeac5e66..5af89b45f20 100644
--- a/intern/cycles/kernel/geom/shader_data.h
+++ b/intern/cycles/kernel/geom/shader_data.h
@@ -18,7 +18,7 @@ ccl_device void shader_setup_object_transforms(KernelGlobals kg,
{
if (sd->object_flag & SD_OBJECT_MOTION) {
sd->ob_tfm_motion = object_fetch_transform_motion(kg, sd->object, time);
- sd->ob_itfm_motion = transform_quick_inverse(sd->ob_tfm_motion);
+ sd->ob_itfm_motion = transform_inverse(sd->ob_tfm_motion);
}
}
#endif
@@ -407,7 +407,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
{
/* vectors */
- sd->P = ray->P;
+ sd->P = ray->P + ray->D * ray->tmin;
sd->N = -ray->D;
sd->Ng = -ray->D;
sd->I = -ray->D;
@@ -441,7 +441,6 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
/* for NDC coordinates */
sd->ray_P = ray->P;
- sd->ray_dP = ray->dP;
}
#endif /* __VOLUME__ */
diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h
index 0c76de9ccc7..f968e537cfa 100644
--- a/intern/cycles/kernel/geom/triangle_intersect.h
+++ b/intern/cycles/kernel/geom/triangle_intersect.h
@@ -17,6 +17,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
ccl_private Intersection *isect,
float3 P,
float3 dir,
+ float tmin,
float tmax,
uint visibility,
int object,
@@ -28,7 +29,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
float t, u, v;
- if (ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
+ if (ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
#ifdef __VISIBILITY_FLAG__
/* Visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags.
@@ -62,6 +63,7 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
int object,
int prim,
int prim_addr,
+ float tmin,
float tmax,
ccl_private uint *lcg_state,
int max_hits)
@@ -71,7 +73,7 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
float t, u, v;
- if (!ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
+ if (!ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
return false;
}
diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h
index c63684d58e6..bf3f41b52b9 100644
--- a/intern/cycles/kernel/integrator/init_from_bake.h
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -174,14 +174,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
Ray ray ccl_optional_struct_init;
ray.P = zero_float3();
ray.D = normalize(P);
- ray.t = FLT_MAX;
+ ray.tmin = 0.0f;
+ ray.tmax = FLT_MAX;
ray.time = 0.5f;
ray.dP = differential_zero_compact();
ray.dD = differential_zero_compact();
integrator_state_write_ray(kg, state, &ray);
/* Setup next kernel to execute. */
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
}
else {
/* Surface baking. */
@@ -210,7 +211,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
Ray ray ccl_optional_struct_init;
ray.P = P + N;
ray.D = -N;
- ray.t = FLT_MAX;
+ ray.tmin = 0.0f;
+ ray.tmax = FLT_MAX;
ray.time = 0.5f;
/* Setup differentials. */
@@ -247,13 +249,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
+ integrator_path_init_sorted(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
+ integrator_path_init_sorted(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
}
else {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
+ integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
}
}
diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h
index 9fe27cdda9a..e89ab3991c7 100644
--- a/intern/cycles/kernel/integrator/init_from_camera.h
+++ b/intern/cycles/kernel/integrator/init_from_camera.h
@@ -86,7 +86,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
/* Generate camera ray. */
Ray ray;
integrate_camera_sample(kg, sample, x, y, rng_hash, &ray);
- if (ray.t == 0.0f) {
+ if (ray.tmax == 0.0f) {
return true;
}
@@ -100,10 +100,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
/* Continue with intersect_closest kernel, optionally initializing volume
* stack before that if the camera may be inside a volume. */
if (kernel_data.cam.is_inside_volume) {
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
}
else {
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
}
return true;
diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h
index 621aa05f46b..60299f2cb2f 100644
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -109,14 +109,14 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
/* If using background pass, schedule background shading kernel so that we have a background
* to alpha-over on. The background kernel will then continue the path afterwards. */
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
return;
}
if (!integrator_state_volume_stack_is_empty(kg, state)) {
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
* objects from it, and then continue shading volume and shadow catcher surface after. */
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
return;
}
@@ -128,18 +128,19 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ integrator_path_init_sorted(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
}
}
/* Schedule next kernel to be executed after updating volume stack for shadow catcher. */
-template<uint32_t current_kernel>
+template<DeviceKernel current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume(
KernelGlobals kg, IntegratorState state)
{
@@ -156,20 +157,21 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
- INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
}
}
/* Schedule next kernel to be executed after executing background shader for shadow catcher. */
-template<uint32_t current_kernel>
+template<DeviceKernel current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background(
KernelGlobals kg, IntegratorState state)
{
@@ -177,7 +179,8 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
if (!integrator_state_volume_stack_is_empty(kg, state)) {
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
* objects from it, and then continue shading volume and shadow catcher surface after. */
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+ integrator_path_next(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
return;
}
@@ -190,7 +193,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
*
* Note that current_kernel is a template value since making this a variable
* leads to poor performance with CUDA atomics. */
-template<uint32_t current_kernel>
+template<DeviceKernel current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel(
KernelGlobals kg,
IntegratorState state,
@@ -206,10 +209,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0;
if (!integrator_intersect_terminate(kg, state, flags)) {
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
}
else {
- INTEGRATOR_PATH_TERMINATE(current_kernel);
+ integrator_path_terminate(kg, state, current_kernel);
}
return;
}
@@ -218,7 +221,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
if (hit) {
/* Hit a surface, continue with light or surface kernel. */
if (isect->type & PRIMITIVE_LAMP) {
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
}
else {
/* Hit a surface, continue with surface kernel unless terminated. */
@@ -231,16 +234,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
}
#ifdef __SHADOW_CATCHER__
@@ -249,13 +252,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
#endif
}
else {
- INTEGRATOR_PATH_TERMINATE(current_kernel);
+ integrator_path_terminate(kg, state, current_kernel);
}
}
}
else {
/* Nothing hit, continue with background kernel. */
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
}
}
@@ -263,7 +266,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
*
* The logic here matches integrator_intersect_next_kernel, except that
* volume shading and termination testing have already been done. */
-template<uint32_t current_kernel>
+template<DeviceKernel current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
KernelGlobals kg,
IntegratorState state,
@@ -273,7 +276,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
if (isect->prim != PRIM_NONE) {
/* Hit a surface, continue with light or surface kernel. */
if (isect->type & PRIMITIVE_LAMP) {
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
return;
}
else {
@@ -286,16 +289,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
}
#ifdef __SHADOW_CATCHER__
@@ -307,7 +310,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
}
else {
/* Nothing hit, continue with background kernel. */
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
return;
}
}
@@ -321,7 +324,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
/* Read ray from integrator state into local memory. */
Ray ray ccl_optional_struct_init;
integrator_state_read_ray(kg, state, &ray);
- kernel_assert(ray.t != 0.0f);
+ kernel_assert(ray.tmax != 0.0f);
const uint visibility = path_state_ray_visibility(state);
const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim);
@@ -329,12 +332,12 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
/* Trick to use short AO rays to approximate indirect light at the end of the path. */
if (path_state_ao_bounce(kg, state)) {
- ray.t = kernel_data.integrator.ao_bounces_distance;
+ ray.tmax = kernel_data.integrator.ao_bounces_distance;
if (last_isect_object != OBJECT_NONE) {
const float object_ao_distance = kernel_data_fetch(objects, last_isect_object).ao_distance;
if (object_ao_distance != 0.0f) {
- ray.t = object_ao_distance;
+ ray.tmax = object_ao_distance;
}
}
}
diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h
index 3e746998225..25ff3d5b23f 100644
--- a/intern/cycles/kernel/integrator/intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/intersect_shadow.h
@@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k
}
#ifdef __TRANSPARENT_SHADOWS__
-# if defined(__KERNEL_CPU__)
+# ifndef __KERNEL_GPU__
ccl_device int shadow_intersections_compare(const void *a, const void *b)
{
const Intersection *isect_a = (const Intersection *)a;
@@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
if (opaque_hit) {
/* Hit an opaque surface, shadow path ends here. */
- INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+ integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
return;
}
else {
@@ -171,7 +171,9 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
*
* TODO: could also write to render buffer directly if no transparent shadows?
* Could save a kernel execution for the common case. */
- INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
+ integrator_shadow_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
return;
}
diff --git a/intern/cycles/kernel/integrator/intersect_subsurface.h b/intern/cycles/kernel/integrator/intersect_subsurface.h
index 0a2c4ad680d..f439d6905a0 100644
--- a/intern/cycles/kernel/integrator/intersect_subsurface.h
+++ b/intern/cycles/kernel/integrator/intersect_subsurface.h
@@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat
}
#endif
- INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+ integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h
index 49ef01dc870..b53bee11312 100644
--- a/intern/cycles/kernel/integrator/intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h
@@ -24,7 +24,8 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
Ray volume_ray ccl_optional_struct_init;
volume_ray.P = from_P;
- volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t);
+ volume_ray.D = normalize_len(to_P - from_P, &volume_ray.tmax);
+ volume_ray.tmin = 0.0f;
volume_ray.self.object = INTEGRATOR_STATE(state, isect, object);
volume_ray.self.prim = INTEGRATOR_STATE(state, isect, prim);
volume_ray.self.light_object = OBJECT_NONE;
@@ -37,8 +38,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
#ifdef __VOLUME_RECORD_ALL__
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
- uint num_hits = scene_intersect_volume_all(
- kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
+ uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
if (num_hits > 0) {
Intersection *isect = hits;
@@ -58,12 +58,9 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
volume_stack_enter_exit(kg, state, stack_sd);
/* Move ray forward. */
- volume_ray.P = stack_sd->P;
+ volume_ray.tmin = intersection_t_offset(isect.t);
volume_ray.self.object = isect.object;
volume_ray.self.prim = isect.prim;
- if (volume_ray.t != FLT_MAX) {
- volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t);
- }
++step;
}
#endif
@@ -82,7 +79,8 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
/* Trace ray in random direction. Any direction works, Z up is a guess to get the
* fewest hits. */
volume_ray.D = make_float3(0.0f, 0.0f, 1.0f);
- volume_ray.t = FLT_MAX;
+ volume_ray.tmin = 0.0f;
+ volume_ray.tmax = FLT_MAX;
volume_ray.self.object = OBJECT_NONE;
volume_ray.self.prim = PRIM_NONE;
volume_ray.self.light_object = OBJECT_NONE;
@@ -109,8 +107,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
#ifdef __VOLUME_RECORD_ALL__
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
- uint num_hits = scene_intersect_volume_all(
- kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
+ uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
if (num_hits > 0) {
int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
Intersection *isect = hits;
@@ -199,7 +196,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
}
/* Move ray forward. */
- volume_ray.P = stack_sd->P;
+ volume_ray.tmin = intersection_t_offset(isect.t);
volume_ray.self.object = isect.object;
volume_ray.self.prim = isect.prim;
++step;
@@ -222,7 +219,9 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt
}
else {
/* Volume stack init for camera rays, continue with intersection of camera ray. */
- INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
+ integrator_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
}
}
diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h
index 67505b9b612..7a6f866b1a0 100644
--- a/intern/cycles/kernel/integrator/mnee.h
+++ b/intern/cycles/kernel/integrator/mnee.h
@@ -137,8 +137,14 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg,
}
}
else if (ls->type == LIGHT_AREA) {
+ float invarea = fabsf(klight->area.invarea);
ls->D = normalize_len(ls->P - P, &ls->t);
- ls->pdf = fabsf(klight->area.invarea);
+ ls->pdf = invarea;
+ if (klight->area.tan_spread > 0.f) {
+ ls->eval_fac = 0.25f * invarea;
+ ls->eval_fac *= light_spread_attenuation(
+ ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
+ }
}
ls->pdf *= kernel_data.integrator.pdf_lights;
@@ -436,6 +442,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
projection_ray.self.light_prim = PRIM_NONE;
projection_ray.dP = differential_make_compact(sd->dP);
projection_ray.dD = differential_zero_compact();
+ projection_ray.tmin = 0.0f;
projection_ray.time = sd->time;
Intersection projection_isect;
@@ -499,8 +506,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
projection_ray.self.prim = pv.prim;
projection_ray.P = pv.p;
}
- projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t);
- projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;
+ projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.tmax);
+ projection_ray.tmax *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;
bool projection_success = false;
for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) {
@@ -519,8 +526,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
projection_ray.self.object = projection_isect.object;
projection_ray.self.prim = projection_isect.prim;
- projection_ray.P += projection_isect.t * projection_ray.D;
- projection_ray.t -= projection_isect.t;
+ projection_ray.tmin = intersection_t_offset(projection_isect.t);
}
if (!projection_success) {
reduce_stepsize = true;
@@ -852,6 +858,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
Ray probe_ray;
probe_ray.self.light_object = ls->object;
probe_ray.self.light_prim = ls->prim;
+ probe_ray.tmin = 0.0f;
probe_ray.dP = differential_make_compact(sd->dP);
probe_ray.dD = differential_zero_compact();
probe_ray.time = sd->time;
@@ -867,13 +874,13 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
ccl_private const ManifoldVertex &v = vertices[vi];
/* Check visibility. */
- probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t);
+ probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.tmax);
if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) {
int hit_object = (probe_isect.object == OBJECT_NONE) ?
kernel_data_fetch(prim_object, probe_isect.prim) :
probe_isect.object;
/* Test whether the ray hit the appropriate object at its intended location. */
- if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE)
+ if (hit_object != v.object || fabsf(probe_ray.tmax - probe_isect.t) > MNEE_MIN_DISTANCE)
return false;
}
probe_ray.self.object = v.object;
@@ -952,15 +959,16 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
probe_ray.self.light_object = ls->object;
probe_ray.self.light_prim = ls->prim;
probe_ray.P = sd->P;
+ probe_ray.tmin = 0.0f;
if (ls->t == FLT_MAX) {
/* Distant / env light. */
probe_ray.D = ls->D;
- probe_ray.t = ls->t;
+ probe_ray.tmax = ls->t;
}
else {
/* Other lights, avoid self-intersection. */
probe_ray.D = ls->P - probe_ray.P;
- probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t);
+ probe_ray.D = normalize_len(probe_ray.D, &probe_ray.tmax);
}
probe_ray.dP = differential_make_compact(sd->dP);
probe_ray.dD = differential_zero_compact();
@@ -1042,9 +1050,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
probe_ray.self.object = probe_isect.object;
probe_ray.self.prim = probe_isect.prim;
- probe_ray.P += probe_isect.t * probe_ray.D;
- if (ls->t != FLT_MAX)
- probe_ray.t -= probe_isect.t;
+ probe_ray.tmin = intersection_t_offset(probe_isect.t);
};
/* Mark the manifold walk invalid to keep mollification on by default. */
diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h
index 1a085506a70..b09bc117d78 100644
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void path_state_init_queues(IntegratorState state)
{
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0;
INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0;
#endif
@@ -52,7 +52,6 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
PATH_RAY_TRANSPARENT_BACKGROUND;
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h
index 4791a963ae6..a7edfffd175 100644
--- a/intern/cycles/kernel/integrator/shade_background.h
+++ b/intern/cycles/kernel/integrator/shade_background.h
@@ -62,11 +62,10 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
- const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
/* multiple importance sampling, get background light pdf for ray
* direction, and compute weight with respect to BSDF pdf */
- const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D);
+ const float pdf = background_light_pdf(kg, ray_P, ray_D);
const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
L *= mis_weight;
}
@@ -213,7 +212,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg,
}
#endif
- INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h
index be926c78439..910e3383f51 100644
--- a/intern/cycles/kernel/integrator/shade_light.h
+++ b/intern/cycles/kernel/integrator/shade_light.h
@@ -22,19 +22,8 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
const float ray_time = INTEGRATOR_STATE(state, ray, time);
- /* Advance ray beyond light. */
- /* TODO: can we make this more numerically robust to avoid reintersecting the
- * same light in some cases? Ray should not intersect surface anymore as the
- * object and prim ids will prevent self intersection. */
- const float3 new_ray_P = ray_P + ray_D * isect.t;
- INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
- INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
-
- /* Set position to where the BSDF was sampled, for correct MIS PDF. */
- const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
- ray_P -= ray_D * mis_ray_t;
- isect.t += mis_ray_t;
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t;
+ /* Advance ray to new start distance. */
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(isect.t);
LightSample ls ccl_optional_struct_init;
const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
@@ -99,11 +88,13 @@ ccl_device void integrator_shade_light(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
- INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
return;
}
else {
- INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
+ integrator_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
return;
}
diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h
index 2b929b7b62e..4b002a47bee 100644
--- a/intern/cycles/kernel/integrator/shade_shadow.h
+++ b/intern/cycles/kernel/integrator/shade_shadow.h
@@ -75,13 +75,9 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
ray.self.light_object = OBJECT_NONE;
ray.self.light_prim = PRIM_NONE;
/* Modify ray position and length to match current segment. */
- const float start_t = (hit == 0) ? 0.0f :
- INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
- const float end_t = (hit < num_recorded_hits) ?
- INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
- ray.t;
- ray.P += start_t * ray.D;
- ray.t = end_t - start_t;
+ ray.tmin = (hit == 0) ? ray.tmin : INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
+ ray.tmax = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
+ ray.tmax;
shader_setup_from_volume(kg, shadow_sd, &ray);
@@ -137,10 +133,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
/* There are more hits that we could not recorded due to memory usage,
* adjust ray to intersect again from the last hit. */
const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
- const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
- const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
- INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D;
- INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
+ INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = intersection_t_offset(last_hit_t);
}
return false;
@@ -158,20 +151,22 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg,
/* Evaluate transparent shadows. */
const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
if (opaque) {
- INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+ integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
return;
}
#endif
if (shadow_intersections_has_remaining(num_hits)) {
/* More intersections to find, continue shadow ray. */
- INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
+ integrator_shadow_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
return;
}
else {
kernel_accum_light(kg, state, render_buffer);
- INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+ integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
return;
}
}
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
index 57b88b806a4..1514b3956ad 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -77,7 +77,7 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
# endif
{
const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
- const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);
+ const float t = sd->ray_length;
/* Multiple importance sampling, get triangle light pdf,
* and compute weight with respect to BSDF pdf. */
@@ -190,8 +190,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
const bool is_light = light_sample_is_light(&ls);
/* Branch off shadow kernel. */
- INTEGRATOR_SHADOW_PATH_INIT(
- shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
+ IntegratorShadowState shadow_state = integrator_shadow_path_init(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
/* Copy volume stack and enter/exit volume. */
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
@@ -323,16 +323,21 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
return LABEL_NONE;
}
- /* Setup ray. Note that clipping works through transparent bounces. */
- INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
- INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
- INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
- INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
- FLT_MAX;
+ if (label & LABEL_TRANSPARENT) {
+ /* Only need to modify start distance for transparent. */
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
+ }
+ else {
+ /* Setup ray with changed origin and direction. */
+ INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+ INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+ INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
#ifdef __RAY_DIFFERENTIALS__
- INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
- INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
+ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+ INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
#endif
+ }
/* Update throughput. */
float3 throughput = INTEGRATOR_STATE(state, path, throughput);
@@ -349,12 +354,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
}
/* Update path state */
- if (label & LABEL_TRANSPARENT) {
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
- }
- else {
+ if (!(label & LABEL_TRANSPARENT)) {
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
}
@@ -371,17 +372,8 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState
return LABEL_NONE;
}
- /* Setup ray position, direction stays unchanged. */
- INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
-
- /* Clipping works through transparent. */
- INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
-
-# ifdef __RAY_DIFFERENTIALS__
- INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-# endif
-
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
+ /* Only modify start distance. */
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
return LABEL_TRANSMIT | LABEL_TRANSPARENT;
}
@@ -432,7 +424,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
Ray ray ccl_optional_struct_init;
ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self);
ray.D = ao_D;
- ray.t = kernel_data.integrator.ao_bounces_distance;
+ ray.tmin = 0.0f;
+ ray.tmax = kernel_data.integrator.ao_bounces_distance;
ray.time = sd->time;
ray.self.object = (skip_self) ? sd->object : OBJECT_NONE;
ray.self.prim = (skip_self) ? sd->prim : PRIM_NONE;
@@ -442,7 +435,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
ray.dD = differential_zero_compact();
/* Branch off shadow kernel. */
- INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao);
+ IntegratorShadowState shadow_state = integrator_shadow_path_init(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true);
/* Copy volume stack and enter/exit volume. */
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
@@ -604,22 +598,23 @@ ccl_device bool integrate_surface(KernelGlobals kg,
}
template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
- int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
+ DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
IntegratorState state,
ccl_global float *ccl_restrict render_buffer)
{
if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+ integrator_path_next(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
}
else {
- kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ kernel_assert(INTEGRATOR_STATE(state, ray, tmax) != 0.0f);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
}
}
else {
- INTEGRATOR_PATH_TERMINATE(current_kernel);
+ integrator_path_terminate(kg, state, current_kernel);
}
}
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index 6cf80f4ddc5..4aab097a7d8 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -114,7 +114,8 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
ccl_device_forceinline void volume_step_init(KernelGlobals kg,
ccl_private const RNGState *rng_state,
const float object_step_size,
- float t,
+ const float tmin,
+ const float tmax,
ccl_private float *step_size,
ccl_private float *step_shade_offset,
ccl_private float *steps_offset,
@@ -122,7 +123,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
{
if (object_step_size == FLT_MAX) {
/* Homogeneous volume. */
- *step_size = t;
+ *step_size = tmax - tmin;
*step_shade_offset = 0.0f;
*steps_offset = 1.0f;
*max_steps = 1;
@@ -130,6 +131,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
else {
/* Heterogeneous volume. */
*max_steps = kernel_data.integrator.volume_max_steps;
+ const float t = tmax - tmin;
float step = min(object_step_size, t);
/* compute exact steps in advance for malloc */
@@ -165,7 +167,7 @@ ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState stat
float3 sigma_t = zero_float3();
if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
- *throughput *= volume_color_transmittance(sigma_t, ray->t);
+ *throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin);
}
}
# endif
@@ -194,7 +196,8 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
volume_step_init(kg,
&rng_state,
object_step_size,
- ray->t,
+ ray->tmin,
+ ray->tmax,
&step_size,
&step_shade_offset,
&unused,
@@ -202,13 +205,13 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
const float steps_offset = 1.0f;
/* compute extinction at the start */
- float t = 0.0f;
+ float t = ray->tmin;
float3 sum = zero_float3();
for (int i = 0; i < max_steps; i++) {
/* advance to new position */
- float new_t = min(ray->t, (i + steps_offset) * step_size);
+ float new_t = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size);
float dt = new_t - t;
float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset);
@@ -233,7 +236,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
/* stop if at the end of the volume */
t = new_t;
- if (t == ray->t) {
+ if (t == ray->tmax) {
/* Update throughput in case we haven't done it above */
tp = *throughput * exp(sum);
break;
@@ -257,15 +260,16 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
const float xi,
ccl_private float *pdf)
{
- const float t = ray->t;
+ const float tmin = ray->tmin;
+ const float tmax = ray->tmax;
const float delta = dot((light_P - ray->P), ray->D);
const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
if (UNLIKELY(D == 0.0f)) {
*pdf = 0.0f;
return 0.0f;
}
- const float theta_a = -atan2f(delta, D);
- const float theta_b = atan2f(t - delta, D);
+ const float theta_a = atan2f(tmin - delta, D);
+ const float theta_b = atan2f(tmax - delta, D);
const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
if (UNLIKELY(theta_b == theta_a)) {
*pdf = 0.0f;
@@ -273,7 +277,7 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
}
*pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
- return min(t, delta + t_); /* min is only for float precision errors */
+ return clamp(delta + t_, tmin, tmax); /* clamp is only for float precision errors */
}
ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
@@ -286,11 +290,12 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
return 0.0f;
}
- const float t = ray->t;
+ const float tmin = ray->tmin;
+ const float tmax = ray->tmax;
const float t_ = sample_t - delta;
- const float theta_a = -atan2f(delta, D);
- const float theta_b = atan2f(t - delta, D);
+ const float theta_a = atan2f(tmin - delta, D);
+ const float theta_b = atan2f(tmax - delta, D);
if (UNLIKELY(theta_b == theta_a)) {
return 0.0f;
}
@@ -310,11 +315,12 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
return 0.0f;
}
- const float t = ray->t;
+ const float tmin = ray->tmin;
+ const float tmax = ray->tmax;
const float t_ = sample_t - delta;
- const float theta_a = -atan2f(delta, D);
- const float theta_b = atan2f(t - delta, D);
+ const float theta_a = atan2f(tmin - delta, D);
+ const float theta_b = atan2f(tmax - delta, D);
if (UNLIKELY(theta_b == theta_a)) {
return 0.0f;
}
@@ -390,8 +396,8 @@ ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients
typedef struct VolumeIntegrateState {
/* Volume segment extents. */
- float start_t;
- float end_t;
+ float tmin;
+ float tmax;
/* If volume is absorption-only up to this point, and no probabilistic
* scattering or termination has been used yet. */
@@ -426,9 +432,9 @@ ccl_device_forceinline void volume_integrate_step_scattering(
/* Equiangular sampling for direct lighting. */
if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
- if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t &&
+ if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax &&
vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
- const float new_dt = result.direct_t - vstate.start_t;
+ const float new_dt = result.direct_t - vstate.tmin;
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
result.direct_scatter = true;
@@ -458,7 +464,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
/* compute sampling distance */
const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t;
- const float new_t = vstate.start_t + new_dt;
+ const float new_t = vstate.tmin + new_dt;
/* transmittance and pdf */
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
@@ -528,7 +534,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
volume_step_init(kg,
rng_state,
object_step_size,
- ray->t,
+ ray->tmin,
+ ray->tmax,
&step_size,
&step_shade_offset,
&steps_offset,
@@ -536,8 +543,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
/* Initialize volume integration state. */
VolumeIntegrateState vstate ccl_optional_struct_init;
- vstate.start_t = 0.0f;
- vstate.end_t = 0.0f;
+ vstate.tmin = ray->tmin;
+ vstate.tmax = ray->tmin;
vstate.absorption_only = true;
vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
@@ -578,8 +585,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
for (int i = 0; i < max_steps; i++) {
/* Advance to new position */
- vstate.end_t = min(ray->t, (i + steps_offset) * step_size);
- const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset;
+ vstate.tmax = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size);
+ const float shade_t = vstate.tmin + (vstate.tmax - vstate.tmin) * step_shade_offset;
sd->P = ray->P + ray->D * shade_t;
/* compute segment */
@@ -588,7 +595,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
const int closure_flag = sd->flag;
/* Evaluate transmittance over segment. */
- const float dt = (vstate.end_t - vstate.start_t);
+ const float dt = (vstate.tmax - vstate.tmin);
const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
volume_color_transmittance(coeff.sigma_t, dt) :
one_float3();
@@ -645,8 +652,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
}
/* Stop if at the end of the volume. */
- vstate.start_t = vstate.end_t;
- if (vstate.start_t == ray->t) {
+ vstate.tmin = vstate.tmax;
+ if (vstate.tmin == ray->tmax) {
break;
}
}
@@ -774,8 +781,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
const bool is_light = light_sample_is_light(ls);
/* Branch off shadow kernel. */
- INTEGRATOR_SHADOW_PATH_INIT(
- shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
+ IntegratorShadowState shadow_state = integrator_shadow_path_init(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
/* Write shadow ray and associated state to global memory. */
integrator_state_write_shadow_ray(kg, shadow_state, &ray);
@@ -880,7 +887,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
/* Setup ray. */
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
- INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+ INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
# ifdef __RAY_DIFFERENTIALS__
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
@@ -901,7 +909,6 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
/* Update path state */
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
@@ -1021,7 +1028,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
integrator_state_read_isect(kg, state, &isect);
/* Set ray length to current segment. */
- ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
+ ray.tmax = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
/* Clean volume stack for background rays. */
if (isect.prim == PRIM_NONE) {
@@ -1032,13 +1039,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
if (event == VOLUME_PATH_SCATTERED) {
/* Queue intersect_closest kernel. */
- INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+ integrator_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
return;
}
else if (event == VOLUME_PATH_MISSED) {
/* End path. */
- INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+ integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
return;
}
else {
diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h
index 42d44580f80..ff63625aceb 100644
--- a/intern/cycles/kernel/integrator/shadow_catcher.h
+++ b/intern/cycles/kernel/integrator/shadow_catcher.h
@@ -50,7 +50,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
ConstIntegratorState state)
{
- if (INTEGRATOR_PATH_IS_TERMINATED) {
+ if (integrator_path_is_terminated(state)) {
return false;
}
diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h
index eaee65ada40..c340467606d 100644
--- a/intern/cycles/kernel/integrator/shadow_state_template.h
+++ b/intern/cycles/kernel/integrator/shadow_state_template.h
@@ -47,7 +47,8 @@ KERNEL_STRUCT_END(shadow_path)
KERNEL_STRUCT_BEGIN(shadow_ray)
KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, tmin, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(shadow_ray, int, object, KERNEL_FEATURE_PATH_TRACING)
diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h
index d6fef27f344..d1907bd6e16 100644
--- a/intern/cycles/kernel/integrator/state.h
+++ b/intern/cycles/kernel/integrator/state.h
@@ -127,6 +127,9 @@ typedef struct IntegratorStateGPU {
/* Index of main path which will be used by a next shadow catcher split. */
ccl_global int *next_main_path_index;
+
+ /* Divisor used to partition active indices by locality when sorting by material. */
+ uint sort_partition_divisor;
} IntegratorStateGPU;
/* Abstraction
@@ -137,7 +140,7 @@ typedef struct IntegratorStateGPU {
* happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
* from a kernel which operates on a shadow catcher state will cause bad memory access. */
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
/* Scalar access on CPU. */
@@ -156,7 +159,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
((state)->nested_struct[array_index].member)
-#else /* __KERNEL_CPU__ */
+#else /* !__KERNEL_GPU__ */
/* Array access on GPU with Structure-of-Arrays. */
@@ -177,6 +180,6 @@ typedef int ConstIntegratorShadowState;
# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h
index fed74d49434..4b03c665e17 100644
--- a/intern/cycles/kernel/integrator/state_flow.h
+++ b/intern/cycles/kernel/integrator/state_flow.h
@@ -10,125 +10,196 @@ CCL_NAMESPACE_BEGIN
/* Control Flow
*
- * Utilities for control flow between kernels. The implementation may differ per device
- * or even be handled on the host side. To abstract such differences, experiment with
- * different implementations and for debugging, this is abstracted using macros.
+ * Utilities for control flow between kernels. The implementation is different between CPU and
+ * GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
*
* There is a main path for regular path tracing camera for path tracing. Shadows for next
* event estimation branch off from this into their own path, that may be computed in
- * parallel while the main path continues.
+ * parallel while the main path continues. Additionally, shading kernels are sorted using
+ * a key for coherence.
*
* Each kernel on the main path must call one of these functions. These may not be called
* multiple times from the same kernel.
*
- * INTEGRATOR_PATH_INIT(next_kernel)
- * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel)
- * INTEGRATOR_PATH_TERMINATE(current_kernel)
+ * integrator_path_init(kg, state, next_kernel)
+ * integrator_path_next(kg, state, current_kernel, next_kernel)
+ * integrator_path_terminate(kg, state, current_kernel)
*
* For the shadow path similar functions are used, and again each shadow kernel must call
* one of them, and only once.
*/
-#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
-#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
- (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)
+ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
+{
+ return INTEGRATOR_STATE(state, path, queued_kernel) == 0;
+}
+
+ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
+{
+ return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0;
+}
#ifdef __KERNEL_GPU__
-# define INTEGRATOR_PATH_INIT(next_kernel) \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
- 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
- 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-
-# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
- IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \
- &kernel_integrator_state.next_shadow_path_index[0], 1); \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
- 1); \
- INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
-# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
- 1); \
- INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
-# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
-
-# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
- { \
- const int key_ = key; \
- atomic_fetch_and_add_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
- 1); \
- }
-# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
- { \
- const int key_ = key; \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- atomic_fetch_and_add_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
- 1); \
- }
+ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel next_kernel)
+{
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+}
+
+ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel)
+{
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+}
+
+ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel)
+{
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+}
+
+ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
+ KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
+{
+ IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32(
+ &kernel_integrator_state.next_shadow_path_index[0], 1);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+ return shadow_state;
+}
+
+ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
+ IntegratorShadowState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel)
+{
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
+}
+
+ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
+ IntegratorShadowState state,
+ const DeviceKernel current_kernel)
+{
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
+}
+
+/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
+# define INTEGRATOR_SORT_KEY(key, state) \
+ (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
+
+ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel next_kernel,
+ const uint32_t key)
+{
+ const int key_ = INTEGRATOR_SORT_KEY(key, state);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
+}
+
+ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel,
+ const uint32_t key)
+{
+ const int key_ = INTEGRATOR_SORT_KEY(key, state);
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
+}
#else
-# define INTEGRATOR_PATH_INIT(next_kernel) \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
- { \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- (void)key; \
- }
-# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
- { \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- (void)current_kernel; \
- }
-# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
- { \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
- (void)current_kernel; \
- }
-# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
- { \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- (void)key; \
- (void)current_kernel; \
- }
-
-# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
- IntegratorShadowState shadow_state = &state->shadow_type; \
- INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
-# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
- { \
- INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
- (void)current_kernel; \
- }
-# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
- { \
- INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
- (void)current_kernel; \
- }
+ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel next_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+}
+
+ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel next_kernel,
+ const uint32_t key)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ (void)key;
+}
+
+ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ (void)current_kernel;
+}
+
+ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+ (void)current_kernel;
+}
+
+ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel,
+ const uint32_t key)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ (void)key;
+ (void)current_kernel;
+}
+
+ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
+ KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
+{
+ IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+ return shadow_state;
+}
+
+ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
+ IntegratorShadowState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
+ (void)current_kernel;
+}
+
+ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
+ IntegratorShadowState state,
+ const DeviceKernel current_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
+ (void)current_kernel;
+}
#endif
diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h
index e7e6db037b0..5c2af131945 100644
--- a/intern/cycles/kernel/integrator/state_template.h
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -37,11 +37,10 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
/* enum PathRayMNEE */
KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING)
/* Multiple importance sampling
- * The PDF of BSDF sampling at the last scatter point, and distance to the
- * last scatter point minus the last ray segment. This distance lets us
- * compute the complete distance through transparent surfaces and volumes. */
+ * The PDF of BSDF sampling at the last scatter point, which is at ray distance
+ * zero and distance. Note that transparency and volume attenuation increase
+ * the ray tmin but keep P unmodified so that this works. */
KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING)
/* Filter glossy. */
KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
/* Continuation probability for path termination. */
@@ -63,7 +62,8 @@ KERNEL_STRUCT_END(path)
KERNEL_STRUCT_BEGIN(ray)
KERNEL_STRUCT_MEMBER(ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, tmin, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h
index 280db2d1aac..168122d3a78 100644
--- a/intern/cycles/kernel/integrator/state_util.h
+++ b/intern/cycles/kernel/integrator/state_util.h
@@ -17,7 +17,8 @@ ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg,
{
INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
- INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t;
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = ray->tmin;
+ INTEGRATOR_STATE_WRITE(state, ray, tmax) = ray->tmax;
INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
@@ -29,7 +30,8 @@ ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg,
{
ray->P = INTEGRATOR_STATE(state, ray, P);
ray->D = INTEGRATOR_STATE(state, ray, D);
- ray->t = INTEGRATOR_STATE(state, ray, t);
+ ray->tmin = INTEGRATOR_STATE(state, ray, tmin);
+ ray->tmax = INTEGRATOR_STATE(state, ray, tmax);
ray->time = INTEGRATOR_STATE(state, ray, time);
ray->dP = INTEGRATOR_STATE(state, ray, dP);
ray->dD = INTEGRATOR_STATE(state, ray, dD);
@@ -42,7 +44,8 @@ ccl_device_forceinline void integrator_state_write_shadow_ray(
{
INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P;
INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D;
- INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t;
+ INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = ray->tmin;
+ INTEGRATOR_STATE_WRITE(state, shadow_ray, tmax) = ray->tmax;
INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time;
INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP;
}
@@ -53,7 +56,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg,
{
ray->P = INTEGRATOR_STATE(state, shadow_ray, P);
ray->D = INTEGRATOR_STATE(state, shadow_ray, D);
- ray->t = INTEGRATOR_STATE(state, shadow_ray, t);
+ ray->tmin = INTEGRATOR_STATE(state, shadow_ray, tmin);
+ ray->tmax = INTEGRATOR_STATE(state, shadow_ray, tmax);
ray->time = INTEGRATOR_STATE(state, shadow_ray, time);
ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP);
ray->dD = differential_zero_compact();
@@ -334,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl
return to_state;
}
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int)
{
return INTEGRATOR_STATE(state, path, bounce);
diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h
index 1e6fcf4aff0..2f96f215d8a 100644
--- a/intern/cycles/kernel/integrator/subsurface.h
+++ b/intern/cycles/kernel/integrator/subsurface.h
@@ -38,7 +38,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
/* Setup ray into surface. */
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N;
- INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+ INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
@@ -160,7 +161,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
/* Pretend ray is coming from the outside towards the exit point. This ensures
* correct front/back facing normals.
* TODO: find a more elegant solution? */
- ray.P += ray.D * ray.t * 2.0f;
+ ray.P += ray.D * ray.tmax * 2.0f;
ray.D = -ray.D;
integrator_state_write_isect(kg, state, &ss_isect.hits[0]);
@@ -177,17 +178,23 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ integrator_path_next_sorted(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ integrator_path_next_sorted(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
shader);
}
else {
- INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ integrator_path_next_sorted(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
shader);
}
diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h
index ae857c50493..60b63c075a0 100644
--- a/intern/cycles/kernel/integrator/subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/subsurface_disk.h
@@ -82,7 +82,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
/* Create ray. */
ray.P = P + disk_N * disk_height + disk_P;
ray.D = -disk_N;
- ray.t = 2.0f * disk_height;
+ ray.tmin = 0.0f;
+ ray.tmax = 2.0f * disk_height;
ray.dP = ray_dP;
ray.dD = differential_zero_compact();
ray.time = time;
@@ -125,17 +126,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
/* Transform normal to world space. */
Transform itfm;
- Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
+ object_fetch_transform_motion_test(kg, object, time, &itfm);
hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng));
-
- /* Transform t to world space, except for OptiX and MetalRT where it already is. */
-#ifdef __KERNEL_GPU_RAYTRACING__
- (void)tfm;
-#else
- float3 D = transform_direction(&itfm, ray.D);
- D = normalize(D) * ss_isect.hits[hit].t;
- ss_isect.hits[hit].t = len(transform_direction(&tfm, D));
-#endif
}
/* Quickly retrieve P and Ng without setting up ShaderData. */
@@ -188,7 +180,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
ray.P = ray.P + ray.D * ss_isect.hits[hit].t;
ray.D = ss_isect.Ng[hit];
- ray.t = 1.0f;
+ ray.tmin = 0.0f;
+ ray.tmax = 1.0f;
return true;
}
diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h
index 8094bf7159e..e43bbb3c50a 100644
--- a/intern/cycles/kernel/integrator/subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h
@@ -195,7 +195,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* Setup ray. */
ray.P = P;
ray.D = D;
- ray.t = FLT_MAX;
+ ray.tmin = 0.0f;
+ ray.tmax = FLT_MAX;
ray.time = time;
ray.dP = ray_dP;
ray.dD = differential_zero_compact();
@@ -204,12 +205,6 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
ray.self.light_object = OBJECT_NONE;
ray.self.light_prim = PRIM_NONE;
-#ifndef __KERNEL_GPU_RAYTRACING__
- /* Compute or fetch object transforms. */
- Transform ob_itfm ccl_optional_struct_init;
- Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm);
-#endif
-
/* Convert subsurface to volume coefficients.
* The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo);
@@ -370,10 +365,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
* chance of connecting to it.
* TODO: Maybe use less than 10 times the mean free path? */
if (bounce == 0) {
- ray.t = max(t, 10.0f / (reduce_min(sigma_t)));
+ ray.tmax = max(t, 10.0f / (reduce_min(sigma_t)));
}
else {
- ray.t = t;
+ ray.tmax = t;
/* After the first bounce the object can intersect the same surface again */
ray.self.object = OBJECT_NONE;
ray.self.prim = PRIM_NONE;
@@ -382,31 +377,23 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
hit = (ss_isect.num_hits > 0);
if (hit) {
-#ifdef __KERNEL_GPU_RAYTRACING__
- /* t is always in world space with OptiX and MetalRT. */
- ray.t = ss_isect.hits[0].t;
-#else
- /* Compute world space distance to surface hit. */
- float3 D = transform_direction(&ob_itfm, ray.D);
- D = normalize(D) * ss_isect.hits[0].t;
- ray.t = len(transform_direction(&ob_tfm, D));
-#endif
+ ray.tmax = ss_isect.hits[0].t;
}
if (bounce == 0) {
/* Check if we hit the opposite side. */
if (hit) {
have_opposite_interface = true;
- opposite_distance = dot(ray.P + ray.t * ray.D - P, -N);
+ opposite_distance = dot(ray.P + ray.tmax * ray.D - P, -N);
}
/* Apart from the opposite side check, we were supposed to only trace up to distance t,
* so check if there would have been a hit in that case. */
- hit = ray.t < t;
+ hit = ray.tmax < t;
}
/* Use the distance to the exit point for the throughput update if we found one. */
if (hit) {
- t = ray.t;
+ t = ray.tmax;
}
/* Advance to new scatter location. */
diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h
index 1e7a333d013..b939489bb18 100644
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -270,31 +270,26 @@ ccl_device bool lights_intersect(KernelGlobals kg,
if (type == LIGHT_SPOT) {
/* Spot/Disk light. */
- const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
- const float3 ray_P = ray->P - ray->D * mis_ray_t;
-
const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
const float radius = klight->spot.radius;
if (radius == 0.0f) {
continue;
}
/* disk oriented normal */
- const float3 lightN = normalize(ray_P - lightP);
+ const float3 lightN = normalize(ray->P - lightP);
/* One sided. */
if (dot(ray->D, lightN) >= 0.0f) {
continue;
}
float3 P;
- if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) {
+ if (!ray_disk_intersect(
+ ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) {
continue;
}
}
else if (type == LIGHT_POINT) {
/* Sphere light (aka, aligned disk light). */
- const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
- const float3 ray_P = ray->P - ray->D * mis_ray_t;
-
const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
const float radius = klight->spot.radius;
if (radius == 0.0f) {
@@ -302,9 +297,10 @@ ccl_device bool lights_intersect(KernelGlobals kg,
}
/* disk oriented normal */
- const float3 lightN = normalize(ray_P - lightP);
+ const float3 lightN = normalize(ray->P - lightP);
float3 P;
- if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) {
+ if (!ray_disk_intersect(
+ ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) {
continue;
}
}
@@ -330,8 +326,19 @@ ccl_device bool lights_intersect(KernelGlobals kg,
const float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
float3 P;
- if (!ray_quad_intersect(
- ray->P, ray->D, 0.0f, ray->t, light_P, axisu, axisv, Ng, &P, &t, &u, &v, is_round)) {
+ if (!ray_quad_intersect(ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ light_P,
+ axisu,
+ axisv,
+ Ng,
+ &P,
+ &t,
+ &u,
+ &v,
+ is_round)) {
continue;
}
}
@@ -775,7 +782,8 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals kg,
ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);
/* calculate intersection with the planar triangle */
- if (!ray_triangle_intersect(P, ls->D, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
+ if (!ray_triangle_intersect(
+ P, ls->D, 0.0f, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
ls->pdf = 0.0f;
return;
}
diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h
index 5cf7dce683a..210bb1b35c2 100644
--- a/intern/cycles/kernel/light/sample.h
+++ b/intern/cycles/kernel/light/sample.h
@@ -227,23 +227,24 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri
if (ls->shader & SHADER_CAST_SHADOW) {
/* setup ray */
ray->P = P;
+ ray->tmin = 0.0f;
if (ls->t == FLT_MAX) {
/* distant light */
ray->D = ls->D;
- ray->t = ls->t;
+ ray->tmax = ls->t;
}
else {
/* other lights, avoid self-intersection */
ray->D = ls->P - P;
- ray->D = normalize_len(ray->D, &ray->t);
+ ray->D = normalize_len(ray->D, &ray->tmax);
}
}
else {
/* signal to not cast shadow ray */
ray->P = zero_float3();
ray->D = zero_float3();
- ray->t = 0.0f;
+ ray->tmax = 0.0f;
}
ray->dP = differential_make_compact(sd->dP);
diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp
index 6e75ae54f33..6b7981b7f3a 100644
--- a/intern/cycles/kernel/osl/services.cpp
+++ b/intern/cycles/kernel/osl/services.cpp
@@ -1094,10 +1094,8 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
if (derivatives) {
- ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)) -
- ndc[0];
- ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)) -
- ndc[0];
+ ndc[1] = zero_float3();
+ ndc[2] = zero_float3();
}
}
else {
@@ -1671,7 +1669,8 @@ bool OSLRenderServices::trace(TraceOpt &options,
ray.P = TO_FLOAT3(P);
ray.D = TO_FLOAT3(R);
- ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
+ ray.tmin = 0.0f;
+ ray.tmax = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
ray.time = sd->time;
ray.self.object = OBJECT_NONE;
ray.self.prim = PRIM_NONE;
@@ -1710,12 +1709,12 @@ bool OSLRenderServices::trace(TraceOpt &options,
const KernelGlobalsCPU *kg = sd->osl_globals;
- /* Can't raytrace from shaders like displacement, before BVH exists. */
+ /* Can't ray-trace from shaders like displacement, before BVH exists. */
if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
return false;
}
- /* Raytrace, leaving out shadow opaque to avoid early exit. */
+ /* Ray-trace, leaving out shadow opaque to avoid early exit. */
uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
tracedata->hit = scene_intersect(kg, &ray, visibility, &tracedata->isect);
return tracedata->hit;
diff --git a/intern/cycles/kernel/svm/ao.h b/intern/cycles/kernel/svm/ao.h
index b477855dca3..c57c68d6230 100644
--- a/intern/cycles/kernel/svm/ao.h
+++ b/intern/cycles/kernel/svm/ao.h
@@ -31,7 +31,7 @@ ccl_device float svm_ao(
return 1.0f;
}
- /* Can't raytrace from shaders like displacement, before BVH exists. */
+ /* Can't ray-trace from shaders like displacement, before BVH exists. */
if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
return 1.0f;
}
@@ -59,7 +59,8 @@ ccl_device float svm_ao(
Ray ray;
ray.P = sd->P;
ray.D = D.x * T + D.y * B + D.z * N;
- ray.t = max_dist;
+ ray.tmin = 0.0f;
+ ray.tmax = max_dist;
ray.time = sd->time;
ray.self.object = sd->object;
ray.self.prim = sd->prim;
diff --git a/intern/cycles/kernel/svm/bevel.h b/intern/cycles/kernel/svm/bevel.h
index f79bcae5cd2..4617a056a52 100644
--- a/intern/cycles/kernel/svm/bevel.h
+++ b/intern/cycles/kernel/svm/bevel.h
@@ -103,7 +103,7 @@ ccl_device float3 svm_bevel(
return sd->N;
}
- /* Can't raytrace from shaders like displacement, before BVH exists. */
+ /* Can't ray-trace from shaders like displacement, before BVH exists. */
if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
return sd->N;
}
@@ -179,7 +179,8 @@ ccl_device float3 svm_bevel(
Ray ray ccl_optional_struct_init;
ray.P = sd->P + disk_N * disk_height + disk_P;
ray.D = -disk_N;
- ray.t = 2.0f * disk_height;
+ ray.tmin = 0.0f;
+ ray.tmax = 2.0f * disk_height;
ray.dP = differential_zero_compact();
ray.dD = differential_zero_compact();
ray.time = sd->time;
diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h
index 305bd404d27..99a8fdd3be9 100644
--- a/intern/cycles/kernel/svm/closure.h
+++ b/intern/cycles/kernel/svm/closure.h
@@ -395,7 +395,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
# endif
{
- /* This is to prevent mnee from receiving a null bsdf. */
+ /* This is to prevent MNEE from receiving a null BSDF. */
float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel);
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
sd, sizeof(MicrofacetBsdf), base_color * glass_weight * refraction_fresnel);
@@ -676,7 +676,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
#endif
{
- /* This is to prevent mnee from receiving a null bsdf. */
+ /* This is to prevent MNEE from receiving a null BSDF. */
float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel);
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
sd, sizeof(MicrofacetBsdf), weight * refraction_fresnel);
diff --git a/intern/cycles/kernel/svm/node_types_template.h b/intern/cycles/kernel/svm/node_types_template.h
new file mode 100644
index 00000000000..39d279be4cb
--- /dev/null
+++ b/intern/cycles/kernel/svm/node_types_template.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifndef SHADER_NODE_TYPE
+# define SHADER_NODE_TYPE(name)
+#endif
+
+/* NOTE: for best OpenCL performance, item definition in the enum must
+ * match the switch case order in `svm.h`. */
+
+SHADER_NODE_TYPE(NODE_END)
+SHADER_NODE_TYPE(NODE_SHADER_JUMP)
+SHADER_NODE_TYPE(NODE_CLOSURE_BSDF)
+SHADER_NODE_TYPE(NODE_CLOSURE_EMISSION)
+SHADER_NODE_TYPE(NODE_CLOSURE_BACKGROUND)
+SHADER_NODE_TYPE(NODE_CLOSURE_SET_WEIGHT)
+SHADER_NODE_TYPE(NODE_CLOSURE_WEIGHT)
+SHADER_NODE_TYPE(NODE_EMISSION_WEIGHT)
+SHADER_NODE_TYPE(NODE_MIX_CLOSURE)
+SHADER_NODE_TYPE(NODE_JUMP_IF_ZERO)
+SHADER_NODE_TYPE(NODE_JUMP_IF_ONE)
+SHADER_NODE_TYPE(NODE_GEOMETRY)
+SHADER_NODE_TYPE(NODE_CONVERT)
+SHADER_NODE_TYPE(NODE_TEX_COORD)
+SHADER_NODE_TYPE(NODE_VALUE_F)
+SHADER_NODE_TYPE(NODE_VALUE_V)
+SHADER_NODE_TYPE(NODE_ATTR)
+SHADER_NODE_TYPE(NODE_VERTEX_COLOR)
+SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DX)
+SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DY)
+SHADER_NODE_TYPE(NODE_SET_DISPLACEMENT)
+SHADER_NODE_TYPE(NODE_DISPLACEMENT)
+SHADER_NODE_TYPE(NODE_VECTOR_DISPLACEMENT)
+SHADER_NODE_TYPE(NODE_TEX_IMAGE)
+SHADER_NODE_TYPE(NODE_TEX_IMAGE_BOX)
+SHADER_NODE_TYPE(NODE_TEX_NOISE)
+SHADER_NODE_TYPE(NODE_SET_BUMP)
+SHADER_NODE_TYPE(NODE_ATTR_BUMP_DX)
+SHADER_NODE_TYPE(NODE_ATTR_BUMP_DY)
+SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DX)
+SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DY)
+SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DX)
+SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DY)
+SHADER_NODE_TYPE(NODE_CLOSURE_SET_NORMAL)
+SHADER_NODE_TYPE(NODE_ENTER_BUMP_EVAL)
+SHADER_NODE_TYPE(NODE_LEAVE_BUMP_EVAL)
+SHADER_NODE_TYPE(NODE_HSV)
+SHADER_NODE_TYPE(NODE_CLOSURE_HOLDOUT)
+SHADER_NODE_TYPE(NODE_FRESNEL)
+SHADER_NODE_TYPE(NODE_LAYER_WEIGHT)
+SHADER_NODE_TYPE(NODE_CLOSURE_VOLUME)
+SHADER_NODE_TYPE(NODE_PRINCIPLED_VOLUME)
+SHADER_NODE_TYPE(NODE_MATH)
+SHADER_NODE_TYPE(NODE_VECTOR_MATH)
+SHADER_NODE_TYPE(NODE_RGB_RAMP)
+SHADER_NODE_TYPE(NODE_GAMMA)
+SHADER_NODE_TYPE(NODE_BRIGHTCONTRAST)
+SHADER_NODE_TYPE(NODE_LIGHT_PATH)
+SHADER_NODE_TYPE(NODE_OBJECT_INFO)
+SHADER_NODE_TYPE(NODE_PARTICLE_INFO)
+SHADER_NODE_TYPE(NODE_HAIR_INFO)
+SHADER_NODE_TYPE(NODE_POINT_INFO)
+SHADER_NODE_TYPE(NODE_TEXTURE_MAPPING)
+SHADER_NODE_TYPE(NODE_MAPPING)
+SHADER_NODE_TYPE(NODE_MIN_MAX)
+SHADER_NODE_TYPE(NODE_CAMERA)
+SHADER_NODE_TYPE(NODE_TEX_ENVIRONMENT)
+SHADER_NODE_TYPE(NODE_TEX_SKY)
+SHADER_NODE_TYPE(NODE_TEX_GRADIENT)
+SHADER_NODE_TYPE(NODE_TEX_VORONOI)
+SHADER_NODE_TYPE(NODE_TEX_MUSGRAVE)
+SHADER_NODE_TYPE(NODE_TEX_WAVE)
+SHADER_NODE_TYPE(NODE_TEX_MAGIC)
+SHADER_NODE_TYPE(NODE_TEX_CHECKER)
+SHADER_NODE_TYPE(NODE_TEX_BRICK)
+SHADER_NODE_TYPE(NODE_TEX_WHITE_NOISE)
+SHADER_NODE_TYPE(NODE_NORMAL)
+SHADER_NODE_TYPE(NODE_LIGHT_FALLOFF)
+SHADER_NODE_TYPE(NODE_IES)
+SHADER_NODE_TYPE(NODE_CURVES)
+SHADER_NODE_TYPE(NODE_TANGENT)
+SHADER_NODE_TYPE(NODE_NORMAL_MAP)
+SHADER_NODE_TYPE(NODE_INVERT)
+SHADER_NODE_TYPE(NODE_MIX)
+SHADER_NODE_TYPE(NODE_SEPARATE_COLOR)
+SHADER_NODE_TYPE(NODE_COMBINE_COLOR)
+SHADER_NODE_TYPE(NODE_SEPARATE_VECTOR)
+SHADER_NODE_TYPE(NODE_COMBINE_VECTOR)
+SHADER_NODE_TYPE(NODE_SEPARATE_HSV)
+SHADER_NODE_TYPE(NODE_COMBINE_HSV)
+SHADER_NODE_TYPE(NODE_VECTOR_ROTATE)
+SHADER_NODE_TYPE(NODE_VECTOR_TRANSFORM)
+SHADER_NODE_TYPE(NODE_WIREFRAME)
+SHADER_NODE_TYPE(NODE_WAVELENGTH)
+SHADER_NODE_TYPE(NODE_BLACKBODY)
+SHADER_NODE_TYPE(NODE_MAP_RANGE)
+SHADER_NODE_TYPE(NODE_VECTOR_MAP_RANGE)
+SHADER_NODE_TYPE(NODE_CLAMP)
+SHADER_NODE_TYPE(NODE_BEVEL)
+SHADER_NODE_TYPE(NODE_AMBIENT_OCCLUSION)
+SHADER_NODE_TYPE(NODE_TEX_VOXEL)
+SHADER_NODE_TYPE(NODE_AOV_START)
+SHADER_NODE_TYPE(NODE_AOV_COLOR)
+SHADER_NODE_TYPE(NODE_AOV_VALUE)
+SHADER_NODE_TYPE(NODE_FLOAT_CURVE)
+
+/* Padding for struct alignment. */
+SHADER_NODE_TYPE(NODE_PAD1)
+
+#undef SHADER_NODE_TYPE
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 8fd41ec8531..9d6d3e9222c 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -204,6 +204,15 @@ CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN
+#ifdef __KERNEL_USE_DATA_CONSTANTS__
+# define SVM_CASE(node) \
+ case node: \
+ if (!kernel_data_svm_usage_##node) \
+ break;
+#else
+# define SVM_CASE(node) case node:
+#endif
+
/* Main Interpreter Loop */
template<uint node_feature_mask, ShaderType type, typename ConstIntegratorGenericState>
ccl_device void svm_eval_nodes(KernelGlobals kg,
@@ -219,9 +228,10 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
uint4 node = read_node(kg, &offset);
switch (node.x) {
- case NODE_END:
- return;
- case NODE_SHADER_JUMP: {
+ SVM_CASE(NODE_END)
+ return;
+ SVM_CASE(NODE_SHADER_JUMP)
+ {
if (type == SHADER_TYPE_SURFACE)
offset = node.y;
else if (type == SHADER_TYPE_VOLUME)
@@ -232,351 +242,349 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
return;
break;
}
- case NODE_CLOSURE_BSDF:
- offset = svm_node_closure_bsdf<node_feature_mask, type>(
- kg, sd, stack, node, path_flag, offset);
- break;
- case NODE_CLOSURE_EMISSION:
- IF_KERNEL_NODES_FEATURE(EMISSION)
- {
- svm_node_closure_emission(sd, stack, node);
- }
- break;
- case NODE_CLOSURE_BACKGROUND:
- IF_KERNEL_NODES_FEATURE(EMISSION)
- {
- svm_node_closure_background(sd, stack, node);
- }
- break;
- case NODE_CLOSURE_SET_WEIGHT:
- svm_node_closure_set_weight(sd, node.y, node.z, node.w);
- break;
- case NODE_CLOSURE_WEIGHT:
- svm_node_closure_weight(sd, stack, node.y);
- break;
- case NODE_EMISSION_WEIGHT:
- IF_KERNEL_NODES_FEATURE(EMISSION)
- {
- svm_node_emission_weight(kg, sd, stack, node);
- }
- break;
- case NODE_MIX_CLOSURE:
- svm_node_mix_closure(sd, stack, node);
- break;
- case NODE_JUMP_IF_ZERO:
- if (stack_load_float(stack, node.z) <= 0.0f)
- offset += node.y;
- break;
- case NODE_JUMP_IF_ONE:
- if (stack_load_float(stack, node.z) >= 1.0f)
- offset += node.y;
- break;
- case NODE_GEOMETRY:
- svm_node_geometry(kg, sd, stack, node.y, node.z);
- break;
- case NODE_CONVERT:
- svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_TEX_COORD:
- offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
- break;
- case NODE_VALUE_F:
- svm_node_value_f(kg, sd, stack, node.y, node.z);
- break;
- case NODE_VALUE_V:
- offset = svm_node_value_v(kg, sd, stack, node.y, offset);
- break;
- case NODE_ATTR:
- svm_node_attr<node_feature_mask>(kg, sd, stack, node);
- break;
- case NODE_VERTEX_COLOR:
- svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_GEOMETRY_BUMP_DX:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
- }
- break;
- case NODE_GEOMETRY_BUMP_DY:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
- }
- break;
- case NODE_SET_DISPLACEMENT:
- svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y);
- break;
- case NODE_DISPLACEMENT:
- svm_node_displacement<node_feature_mask>(kg, sd, stack, node);
- break;
- case NODE_VECTOR_DISPLACEMENT:
- offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset);
- break;
- case NODE_TEX_IMAGE:
- offset = svm_node_tex_image(kg, sd, stack, node, offset);
- break;
- case NODE_TEX_IMAGE_BOX:
- svm_node_tex_image_box(kg, sd, stack, node);
- break;
- case NODE_TEX_NOISE:
- offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_SET_BUMP:
- svm_node_set_bump<node_feature_mask>(kg, sd, stack, node);
- break;
- case NODE_ATTR_BUMP_DX:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- svm_node_attr_bump_dx(kg, sd, stack, node);
- }
- break;
- case NODE_ATTR_BUMP_DY:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- svm_node_attr_bump_dy(kg, sd, stack, node);
- }
- break;
- case NODE_VERTEX_COLOR_BUMP_DX:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
- }
- break;
- case NODE_VERTEX_COLOR_BUMP_DY:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
- }
- break;
- case NODE_TEX_COORD_BUMP_DX:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset);
- }
- break;
- case NODE_TEX_COORD_BUMP_DY:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset);
- }
- break;
- case NODE_CLOSURE_SET_NORMAL:
- IF_KERNEL_NODES_FEATURE(BUMP)
- {
- svm_node_set_normal(kg, sd, stack, node.y, node.z);
- }
- break;
- case NODE_ENTER_BUMP_EVAL:
- IF_KERNEL_NODES_FEATURE(BUMP_STATE)
- {
- svm_node_enter_bump_eval(kg, sd, stack, node.y);
- }
- break;
- case NODE_LEAVE_BUMP_EVAL:
- IF_KERNEL_NODES_FEATURE(BUMP_STATE)
- {
- svm_node_leave_bump_eval(kg, sd, stack, node.y);
- }
- break;
- case NODE_HSV:
- svm_node_hsv(kg, sd, stack, node);
- break;
-
- case NODE_CLOSURE_HOLDOUT:
- svm_node_closure_holdout(sd, stack, node);
- break;
- case NODE_FRESNEL:
- svm_node_fresnel(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_LAYER_WEIGHT:
- svm_node_layer_weight(sd, stack, node);
- break;
- case NODE_CLOSURE_VOLUME:
- IF_KERNEL_NODES_FEATURE(VOLUME)
- {
- svm_node_closure_volume<type>(kg, sd, stack, node);
- }
- break;
- case NODE_PRINCIPLED_VOLUME:
- IF_KERNEL_NODES_FEATURE(VOLUME)
- {
- offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset);
- }
- break;
- case NODE_MATH:
- svm_node_math(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_VECTOR_MATH:
- offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_RGB_RAMP:
- offset = svm_node_rgb_ramp(kg, sd, stack, node, offset);
- break;
- case NODE_GAMMA:
- svm_node_gamma(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_BRIGHTCONTRAST:
- svm_node_brightness(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_LIGHT_PATH:
- svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag);
- break;
- case NODE_OBJECT_INFO:
- svm_node_object_info(kg, sd, stack, node.y, node.z);
- break;
- case NODE_PARTICLE_INFO:
- svm_node_particle_info(kg, sd, stack, node.y, node.z);
- break;
+ SVM_CASE(NODE_CLOSURE_BSDF)
+ offset = svm_node_closure_bsdf<node_feature_mask, type>(
+ kg, sd, stack, node, path_flag, offset);
+ break;
+ SVM_CASE(NODE_CLOSURE_EMISSION)
+ IF_KERNEL_NODES_FEATURE(EMISSION)
+ {
+ svm_node_closure_emission(sd, stack, node);
+ }
+ break;
+ SVM_CASE(NODE_CLOSURE_BACKGROUND)
+ IF_KERNEL_NODES_FEATURE(EMISSION)
+ {
+ svm_node_closure_background(sd, stack, node);
+ }
+ break;
+ SVM_CASE(NODE_CLOSURE_SET_WEIGHT)
+ svm_node_closure_set_weight(sd, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_CLOSURE_WEIGHT)
+ svm_node_closure_weight(sd, stack, node.y);
+ break;
+ SVM_CASE(NODE_EMISSION_WEIGHT)
+ IF_KERNEL_NODES_FEATURE(EMISSION)
+ {
+ svm_node_emission_weight(kg, sd, stack, node);
+ }
+ break;
+ SVM_CASE(NODE_MIX_CLOSURE)
+ svm_node_mix_closure(sd, stack, node);
+ break;
+ SVM_CASE(NODE_JUMP_IF_ZERO)
+ if (stack_load_float(stack, node.z) <= 0.0f)
+ offset += node.y;
+ break;
+ SVM_CASE(NODE_JUMP_IF_ONE)
+ if (stack_load_float(stack, node.z) >= 1.0f)
+ offset += node.y;
+ break;
+ SVM_CASE(NODE_GEOMETRY)
+ svm_node_geometry(kg, sd, stack, node.y, node.z);
+ break;
+ SVM_CASE(NODE_CONVERT)
+ svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_TEX_COORD)
+ offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+ break;
+ SVM_CASE(NODE_VALUE_F)
+ svm_node_value_f(kg, sd, stack, node.y, node.z);
+ break;
+ SVM_CASE(NODE_VALUE_V)
+ offset = svm_node_value_v(kg, sd, stack, node.y, offset);
+ break;
+ SVM_CASE(NODE_ATTR)
+ svm_node_attr<node_feature_mask>(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_VERTEX_COLOR)
+ svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_GEOMETRY_BUMP_DX)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
+ }
+ break;
+ SVM_CASE(NODE_GEOMETRY_BUMP_DY)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
+ }
+ break;
+ SVM_CASE(NODE_SET_DISPLACEMENT)
+ svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y);
+ break;
+ SVM_CASE(NODE_DISPLACEMENT)
+ svm_node_displacement<node_feature_mask>(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_VECTOR_DISPLACEMENT)
+ offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_TEX_IMAGE)
+ offset = svm_node_tex_image(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_TEX_IMAGE_BOX)
+ svm_node_tex_image_box(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_TEX_NOISE)
+ offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_SET_BUMP)
+ svm_node_set_bump<node_feature_mask>(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_ATTR_BUMP_DX)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ svm_node_attr_bump_dx(kg, sd, stack, node);
+ }
+ break;
+ SVM_CASE(NODE_ATTR_BUMP_DY)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ svm_node_attr_bump_dy(kg, sd, stack, node);
+ }
+ break;
+ SVM_CASE(NODE_VERTEX_COLOR_BUMP_DX)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
+ }
+ break;
+ SVM_CASE(NODE_VERTEX_COLOR_BUMP_DY)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
+ }
+ break;
+ SVM_CASE(NODE_TEX_COORD_BUMP_DX)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset);
+ }
+ break;
+ SVM_CASE(NODE_TEX_COORD_BUMP_DY)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset);
+ }
+ break;
+ SVM_CASE(NODE_CLOSURE_SET_NORMAL)
+ IF_KERNEL_NODES_FEATURE(BUMP)
+ {
+ svm_node_set_normal(kg, sd, stack, node.y, node.z);
+ }
+ break;
+ SVM_CASE(NODE_ENTER_BUMP_EVAL)
+ IF_KERNEL_NODES_FEATURE(BUMP_STATE)
+ {
+ svm_node_enter_bump_eval(kg, sd, stack, node.y);
+ }
+ break;
+ SVM_CASE(NODE_LEAVE_BUMP_EVAL)
+ IF_KERNEL_NODES_FEATURE(BUMP_STATE)
+ {
+ svm_node_leave_bump_eval(kg, sd, stack, node.y);
+ }
+ break;
+ SVM_CASE(NODE_HSV)
+ svm_node_hsv(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_CLOSURE_HOLDOUT)
+ svm_node_closure_holdout(sd, stack, node);
+ break;
+ SVM_CASE(NODE_FRESNEL)
+ svm_node_fresnel(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_LAYER_WEIGHT)
+ svm_node_layer_weight(sd, stack, node);
+ break;
+ SVM_CASE(NODE_CLOSURE_VOLUME)
+ IF_KERNEL_NODES_FEATURE(VOLUME)
+ {
+ svm_node_closure_volume<type>(kg, sd, stack, node);
+ }
+ break;
+ SVM_CASE(NODE_PRINCIPLED_VOLUME)
+ IF_KERNEL_NODES_FEATURE(VOLUME)
+ {
+ offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset);
+ }
+ break;
+ SVM_CASE(NODE_MATH)
+ svm_node_math(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_VECTOR_MATH)
+ offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_RGB_RAMP)
+ offset = svm_node_rgb_ramp(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_GAMMA)
+ svm_node_gamma(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_BRIGHTCONTRAST)
+ svm_node_brightness(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_LIGHT_PATH)
+ svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag);
+ break;
+ SVM_CASE(NODE_OBJECT_INFO)
+ svm_node_object_info(kg, sd, stack, node.y, node.z);
+ break;
+ SVM_CASE(NODE_PARTICLE_INFO)
+ svm_node_particle_info(kg, sd, stack, node.y, node.z);
+ break;
#if defined(__HAIR__)
- case NODE_HAIR_INFO:
- svm_node_hair_info(kg, sd, stack, node.y, node.z);
- break;
+ SVM_CASE(NODE_HAIR_INFO)
+ svm_node_hair_info(kg, sd, stack, node.y, node.z);
+ break;
#endif
#if defined(__POINTCLOUD__)
- case NODE_POINT_INFO:
- svm_node_point_info(kg, sd, stack, node.y, node.z);
- break;
+ SVM_CASE(NODE_POINT_INFO)
+ svm_node_point_info(kg, sd, stack, node.y, node.z);
+ break;
#endif
- case NODE_TEXTURE_MAPPING:
- offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset);
- break;
- case NODE_MAPPING:
- svm_node_mapping(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_MIN_MAX:
- offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset);
- break;
- case NODE_CAMERA:
- svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_TEX_ENVIRONMENT:
- svm_node_tex_environment(kg, sd, stack, node);
- break;
- case NODE_TEX_SKY:
- offset = svm_node_tex_sky(kg, sd, stack, node, offset);
- break;
- case NODE_TEX_GRADIENT:
- svm_node_tex_gradient(sd, stack, node);
- break;
- case NODE_TEX_VORONOI:
- offset = svm_node_tex_voronoi<node_feature_mask>(
- kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_TEX_MUSGRAVE:
- offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_TEX_WAVE:
- offset = svm_node_tex_wave(kg, sd, stack, node, offset);
- break;
- case NODE_TEX_MAGIC:
- offset = svm_node_tex_magic(kg, sd, stack, node, offset);
- break;
- case NODE_TEX_CHECKER:
- svm_node_tex_checker(kg, sd, stack, node);
- break;
- case NODE_TEX_BRICK:
- offset = svm_node_tex_brick(kg, sd, stack, node, offset);
- break;
- case NODE_TEX_WHITE_NOISE:
- svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_NORMAL:
- offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_LIGHT_FALLOFF:
- svm_node_light_falloff(sd, stack, node);
- break;
- case NODE_IES:
- svm_node_ies(kg, sd, stack, node);
- break;
- case NODE_RGB_CURVES:
- case NODE_VECTOR_CURVES:
- offset = svm_node_curves(kg, sd, stack, node, offset);
- break;
- case NODE_FLOAT_CURVE:
- offset = svm_node_curve(kg, sd, stack, node, offset);
- break;
- case NODE_TANGENT:
- svm_node_tangent(kg, sd, stack, node);
- break;
- case NODE_NORMAL_MAP:
- svm_node_normal_map(kg, sd, stack, node);
- break;
- case NODE_INVERT:
- svm_node_invert(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_MIX:
- offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_SEPARATE_COLOR:
- svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_COMBINE_COLOR:
- svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_SEPARATE_VECTOR:
- svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_COMBINE_VECTOR:
- svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_SEPARATE_HSV:
- offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_COMBINE_HSV:
- offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_VECTOR_ROTATE:
- svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_VECTOR_TRANSFORM:
- svm_node_vector_transform(kg, sd, stack, node);
- break;
- case NODE_WIREFRAME:
- svm_node_wireframe(kg, sd, stack, node);
- break;
- case NODE_WAVELENGTH:
- svm_node_wavelength(kg, sd, stack, node.y, node.z);
- break;
- case NODE_BLACKBODY:
- svm_node_blackbody(kg, sd, stack, node.y, node.z);
- break;
- case NODE_MAP_RANGE:
- offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_VECTOR_MAP_RANGE:
- offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
- case NODE_CLAMP:
- offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset);
- break;
+ SVM_CASE(NODE_TEXTURE_MAPPING)
+ offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset);
+ break;
+ SVM_CASE(NODE_MAPPING)
+ svm_node_mapping(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_MIN_MAX)
+ offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset);
+ break;
+ SVM_CASE(NODE_CAMERA)
+ svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_TEX_ENVIRONMENT)
+ svm_node_tex_environment(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_TEX_SKY)
+ offset = svm_node_tex_sky(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_TEX_GRADIENT)
+ svm_node_tex_gradient(sd, stack, node);
+ break;
+ SVM_CASE(NODE_TEX_VORONOI)
+ offset = svm_node_tex_voronoi<node_feature_mask>(
+ kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_TEX_MUSGRAVE)
+ offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_TEX_WAVE)
+ offset = svm_node_tex_wave(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_TEX_MAGIC)
+ offset = svm_node_tex_magic(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_TEX_CHECKER)
+ svm_node_tex_checker(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_TEX_BRICK)
+ offset = svm_node_tex_brick(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_TEX_WHITE_NOISE)
+ svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_NORMAL)
+ offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_LIGHT_FALLOFF)
+ svm_node_light_falloff(sd, stack, node);
+ break;
+ SVM_CASE(NODE_IES)
+ svm_node_ies(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_CURVES)
+ offset = svm_node_curves(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_FLOAT_CURVE)
+ offset = svm_node_curve(kg, sd, stack, node, offset);
+ break;
+ SVM_CASE(NODE_TANGENT)
+ svm_node_tangent(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_NORMAL_MAP)
+ svm_node_normal_map(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_INVERT)
+ svm_node_invert(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_MIX)
+ offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_SEPARATE_COLOR)
+ svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_COMBINE_COLOR)
+ svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_SEPARATE_VECTOR)
+ svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_COMBINE_VECTOR)
+ svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_SEPARATE_HSV)
+ offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_COMBINE_HSV)
+ offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_VECTOR_ROTATE)
+ svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_VECTOR_TRANSFORM)
+ svm_node_vector_transform(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_WIREFRAME)
+ svm_node_wireframe(kg, sd, stack, node);
+ break;
+ SVM_CASE(NODE_WAVELENGTH)
+ svm_node_wavelength(kg, sd, stack, node.y, node.z);
+ break;
+ SVM_CASE(NODE_BLACKBODY)
+ svm_node_blackbody(kg, sd, stack, node.y, node.z);
+ break;
+ SVM_CASE(NODE_MAP_RANGE)
+ offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_VECTOR_MAP_RANGE)
+ offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
+ SVM_CASE(NODE_CLAMP)
+ offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset);
+ break;
#ifdef __SHADER_RAYTRACE__
- case NODE_BEVEL:
- svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node);
- break;
- case NODE_AMBIENT_OCCLUSION:
- svm_node_ao<node_feature_mask>(kg, state, sd, stack, node);
- break;
+ SVM_CASE(NODE_BEVEL)
+ svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node);
+ break;
+ SVM_CASE(NODE_AMBIENT_OCCLUSION)
+ svm_node_ao<node_feature_mask>(kg, state, sd, stack, node);
+ break;
#endif
- case NODE_TEX_VOXEL:
- IF_KERNEL_NODES_FEATURE(VOLUME)
- {
- offset = svm_node_tex_voxel(kg, sd, stack, node, offset);
- }
- break;
- case NODE_AOV_START:
- if (!svm_node_aov_check(path_flag, render_buffer)) {
- return;
- }
- break;
- case NODE_AOV_COLOR:
- svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
- break;
- case NODE_AOV_VALUE:
- svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
- break;
+ SVM_CASE(NODE_TEX_VOXEL)
+ IF_KERNEL_NODES_FEATURE(VOLUME)
+ {
+ offset = svm_node_tex_voxel(kg, sd, stack, node, offset);
+ }
+ break;
+ SVM_CASE(NODE_AOV_START)
+ if (!svm_node_aov_check(path_flag, render_buffer)) {
+ return;
+ }
+ break;
+ SVM_CASE(NODE_AOV_COLOR)
+ svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
+ break;
+ SVM_CASE(NODE_AOV_VALUE)
+ svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
+ break;
default:
kernel_assert(!"Unknown node type was passed to the SVM machine");
return;
diff --git a/intern/cycles/kernel/svm/tex_coord.h b/intern/cycles/kernel/svm/tex_coord.h
index d9138796c45..2a0130e11d4 100644
--- a/intern/cycles/kernel/svm/tex_coord.h
+++ b/intern/cycles/kernel/svm/tex_coord.h
@@ -138,7 +138,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
case NODE_TEXCO_WINDOW: {
if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f));
+ data = camera_world_to_ndc(kg, sd, sd->ray_P);
else
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
data.z = 0.0f;
@@ -223,7 +223,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
case NODE_TEXCO_WINDOW: {
if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f));
+ data = camera_world_to_ndc(kg, sd, sd->ray_P);
else
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
data.z = 0.0f;
diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h
index 82109ec4c4f..12d0ec141e6 100644
--- a/intern/cycles/kernel/svm/types.h
+++ b/intern/cycles/kernel/svm/types.h
@@ -17,104 +17,9 @@ CCL_NAMESPACE_BEGIN
/* Nodes */
typedef enum ShaderNodeType {
- NODE_END = 0,
- NODE_SHADER_JUMP,
- NODE_CLOSURE_BSDF,
- NODE_CLOSURE_EMISSION,
- NODE_CLOSURE_BACKGROUND,
- NODE_CLOSURE_SET_WEIGHT,
- NODE_CLOSURE_WEIGHT,
- NODE_EMISSION_WEIGHT,
- NODE_MIX_CLOSURE,
- NODE_JUMP_IF_ZERO,
- NODE_JUMP_IF_ONE,
- NODE_GEOMETRY,
- NODE_CONVERT,
- NODE_TEX_COORD,
- NODE_VALUE_F,
- NODE_VALUE_V,
- NODE_ATTR,
- NODE_VERTEX_COLOR,
- NODE_GEOMETRY_BUMP_DX,
- NODE_GEOMETRY_BUMP_DY,
- NODE_SET_DISPLACEMENT,
- NODE_DISPLACEMENT,
- NODE_VECTOR_DISPLACEMENT,
- NODE_TEX_IMAGE,
- NODE_TEX_IMAGE_BOX,
- NODE_TEX_NOISE,
- NODE_SET_BUMP,
- NODE_ATTR_BUMP_DX,
- NODE_ATTR_BUMP_DY,
- NODE_VERTEX_COLOR_BUMP_DX,
- NODE_VERTEX_COLOR_BUMP_DY,
- NODE_TEX_COORD_BUMP_DX,
- NODE_TEX_COORD_BUMP_DY,
- NODE_CLOSURE_SET_NORMAL,
- NODE_ENTER_BUMP_EVAL,
- NODE_LEAVE_BUMP_EVAL,
- NODE_HSV,
- NODE_CLOSURE_HOLDOUT,
- NODE_FRESNEL,
- NODE_LAYER_WEIGHT,
- NODE_CLOSURE_VOLUME,
- NODE_PRINCIPLED_VOLUME,
- NODE_MATH,
- NODE_VECTOR_MATH,
- NODE_RGB_RAMP,
- NODE_GAMMA,
- NODE_BRIGHTCONTRAST,
- NODE_LIGHT_PATH,
- NODE_OBJECT_INFO,
- NODE_PARTICLE_INFO,
- NODE_HAIR_INFO,
- NODE_POINT_INFO,
- NODE_TEXTURE_MAPPING,
- NODE_MAPPING,
- NODE_MIN_MAX,
- NODE_CAMERA,
- NODE_TEX_ENVIRONMENT,
- NODE_TEX_SKY,
- NODE_TEX_GRADIENT,
- NODE_TEX_VORONOI,
- NODE_TEX_MUSGRAVE,
- NODE_TEX_WAVE,
- NODE_TEX_MAGIC,
- NODE_TEX_CHECKER,
- NODE_TEX_BRICK,
- NODE_TEX_WHITE_NOISE,
- NODE_NORMAL,
- NODE_LIGHT_FALLOFF,
- NODE_IES,
- NODE_RGB_CURVES,
- NODE_VECTOR_CURVES,
- NODE_TANGENT,
- NODE_NORMAL_MAP,
- NODE_INVERT,
- NODE_MIX,
- NODE_SEPARATE_COLOR,
- NODE_COMBINE_COLOR,
- NODE_SEPARATE_VECTOR,
- NODE_COMBINE_VECTOR,
- NODE_SEPARATE_HSV,
- NODE_COMBINE_HSV,
- NODE_VECTOR_ROTATE,
- NODE_VECTOR_TRANSFORM,
- NODE_WIREFRAME,
- NODE_WAVELENGTH,
- NODE_BLACKBODY,
- NODE_MAP_RANGE,
- NODE_VECTOR_MAP_RANGE,
- NODE_CLAMP,
- NODE_BEVEL,
- NODE_AMBIENT_OCCLUSION,
- NODE_TEX_VOXEL,
- NODE_AOV_START,
- NODE_AOV_COLOR,
- NODE_AOV_VALUE,
- NODE_FLOAT_CURVE,
- /* NOTE: for best OpenCL performance, item definition in the enum must
- * match the switch case order in `svm.h`. */
+#define SHADER_NODE_TYPE(name) name,
+#include "node_types_template.h"
+ NODE_NUM
} ShaderNodeType;
typedef enum NodeAttributeOutputType {
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index f2e61d25002..7762c95275e 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -19,10 +19,6 @@
#include "kernel/svm/types.h"
-#ifndef __KERNEL_GPU__
-# define __KERNEL_CPU__
-#endif
-
CCL_NAMESPACE_BEGIN
/* Constants */
@@ -51,10 +47,10 @@ CCL_NAMESPACE_BEGIN
#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U
-#ifdef __KERNEL_CPU__
-# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
-#else
+#ifdef __KERNEL_GPU__
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
+#else
+# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
#endif
/* Kernel features */
@@ -83,7 +79,6 @@ CCL_NAMESPACE_BEGIN
#define __LAMP_MIS__
#define __CAMERA_MOTION__
#define __OBJECT_MOTION__
-#define __BAKING__
#define __PRINCIPLED__
#define __SUBSURFACE__
#define __VOLUME__
@@ -92,16 +87,12 @@ CCL_NAMESPACE_BEGIN
#define __BRANCHED_PATH__
/* Device specific features */
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
# ifdef WITH_OSL
# define __OSL__
# endif
# define __VOLUME_RECORD_ALL__
-#endif /* __KERNEL_CPU__ */
-
-#ifdef __KERNEL_GPU_RAYTRACING__
-# undef __BAKING__
-#endif /* __KERNEL_GPU_RAYTRACING__ */
+#endif /* !__KERNEL_GPU__ */
/* MNEE currently causes "Compute function exceeds available temporary registers"
* on Metal, disabled for now. */
@@ -129,9 +120,6 @@ CCL_NAMESPACE_BEGIN
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_SUBSURFACE)
# undef __SUBSURFACE__
# endif
-# if !(__KERNEL_FEATURES & KERNEL_FEATURE_BAKING)
-# undef __BAKING__
-# endif
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_PATCH_EVALUATION)
# undef __PATCH_EVAL__
# endif
@@ -535,7 +523,8 @@ typedef struct RaySelfPrimitives {
typedef struct Ray {
float3 P; /* origin */
float3 D; /* direction */
- float t; /* length of the ray */
+ float tmin; /* start distance */
+ float tmax; /* end distance */
float time; /* time (for motion blur) */
RaySelfPrimitives self;
@@ -729,7 +718,7 @@ typedef struct ccl_align(16) ShaderClosure
{
SHADER_CLOSURE_BASE;
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
float pad[2];
#endif
float data[10];
@@ -1072,94 +1061,6 @@ typedef struct KernelCamera {
} KernelCamera;
static_assert_align(KernelCamera, 16);
-typedef struct KernelFilm {
- float exposure;
- int pass_flag;
-
- int light_pass_flag;
- int pass_stride;
-
- int pass_combined;
- int pass_depth;
- int pass_position;
- int pass_normal;
- int pass_roughness;
- int pass_motion;
-
- int pass_motion_weight;
- int pass_uv;
- int pass_object_id;
- int pass_material_id;
-
- int pass_diffuse_color;
- int pass_glossy_color;
- int pass_transmission_color;
-
- int pass_diffuse_indirect;
- int pass_glossy_indirect;
- int pass_transmission_indirect;
- int pass_volume_indirect;
-
- int pass_diffuse_direct;
- int pass_glossy_direct;
- int pass_transmission_direct;
- int pass_volume_direct;
-
- int pass_emission;
- int pass_background;
- int pass_ao;
- float pass_alpha_threshold;
-
- int pass_shadow;
- float pass_shadow_scale;
-
- int pass_shadow_catcher;
- int pass_shadow_catcher_sample_count;
- int pass_shadow_catcher_matte;
-
- int filter_table_offset;
-
- int cryptomatte_passes;
- int cryptomatte_depth;
- int pass_cryptomatte;
-
- int pass_adaptive_aux_buffer;
- int pass_sample_count;
-
- int pass_mist;
- float mist_start;
- float mist_inv_depth;
- float mist_falloff;
-
- int pass_denoising_normal;
- int pass_denoising_albedo;
- int pass_denoising_depth;
-
- int pass_aov_color;
- int pass_aov_value;
- int pass_lightgroup;
-
- /* XYZ to rendering color space transform. float4 instead of float3 to
- * ensure consistent padding/alignment across devices. */
- float4 xyz_to_r;
- float4 xyz_to_g;
- float4 xyz_to_b;
- float4 rgb_to_y;
- /* Rec709 to rendering color space. */
- float4 rec709_to_r;
- float4 rec709_to_g;
- float4 rec709_to_b;
- int is_rec709;
-
- int pass_bake_primitive;
- int pass_bake_differential;
-
- int use_approximate_shadow_catcher;
-
- int pad1;
-} KernelFilm;
-static_assert_align(KernelFilm, 16);
-
typedef struct KernelFilmConvert {
int pass_offset;
int pass_stride;
@@ -1201,108 +1102,6 @@ typedef struct KernelFilmConvert {
} KernelFilmConvert;
static_assert_align(KernelFilmConvert, 16);
-typedef struct KernelBackground {
- /* only shader index */
- int surface_shader;
- int volume_shader;
- float volume_step_size;
- int transparent;
- float transparent_roughness_squared_threshold;
-
- /* portal sampling */
- float portal_weight;
- int num_portals;
- int portal_offset;
-
- /* sun sampling */
- float sun_weight;
- /* xyz store direction, w the angle. float4 instead of float3 is used
- * to ensure consistent padding/alignment across devices. */
- float4 sun;
-
- /* map sampling */
- float map_weight;
- int map_res_x;
- int map_res_y;
-
- int use_mis;
-
- int lightgroup;
-
- /* Padding */
- int pad1, pad2;
-} KernelBackground;
-static_assert_align(KernelBackground, 16);
-
-typedef struct KernelIntegrator {
- /* emission */
- int use_direct_light;
- int num_distribution;
- int num_all_lights;
- float pdf_triangles;
- float pdf_lights;
- float light_inv_rr_threshold;
-
- /* bounces */
- int min_bounce;
- int max_bounce;
-
- int max_diffuse_bounce;
- int max_glossy_bounce;
- int max_transmission_bounce;
- int max_volume_bounce;
-
- /* AO bounces */
- int ao_bounces;
- float ao_bounces_distance;
- float ao_bounces_factor;
- float ao_additive_factor;
-
- /* transparent */
- int transparent_min_bounce;
- int transparent_max_bounce;
- int transparent_shadows;
-
- /* caustics */
- int caustics_reflective;
- int caustics_refractive;
- float filter_glossy;
-
- /* seed */
- int seed;
-
- /* clamp */
- float sample_clamp_direct;
- float sample_clamp_indirect;
-
- /* mis */
- int use_lamp_mis;
-
- /* caustics */
- int use_caustics;
-
- /* sampler */
- int sampling_pattern;
-
- /* volume render */
- int use_volumes;
- int volume_max_steps;
- float volume_step_rate;
-
- int has_shadow_catcher;
- float scrambling_distance;
-
- /* Closure filter. */
- int filter_closures;
-
- /* MIS debugging. */
- int direct_light_sampling_type;
-
- /* padding */
- int pad1;
-} KernelIntegrator;
-static_assert_align(KernelIntegrator, 16);
-
typedef enum KernelBVHLayout {
BVH_LAYOUT_NONE = 0,
@@ -1320,36 +1119,25 @@ typedef enum KernelBVHLayout {
BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL,
} KernelBVHLayout;
-typedef struct KernelBVH {
- /* Own BVH */
- int root;
- int have_motion;
- int have_curves;
- int bvh_layout;
- int use_bvh_steps;
- int curve_subdivisions;
+/* Specialized struct that can become constants in dynamic compilation. */
+#define KERNEL_STRUCT_BEGIN(name, parent) struct name {
+#define KERNEL_STRUCT_END(name) \
+ } \
+ ; \
+ static_assert_align(name, 16);
- /* Custom BVH */
-#ifdef __KERNEL_OPTIX__
- OptixTraversableHandle scene;
-#elif defined __METALRT__
- metalrt_as_type scene;
+#ifdef __KERNEL_USE_DATA_CONSTANTS__
+# define KERNEL_STRUCT_MEMBER(parent, type, name) type __unused_##name;
#else
-# ifdef __EMBREE__
- RTCScene scene;
-# ifndef __KERNEL_64_BIT__
- int pad2;
-# endif
-# else
- int scene, pad2;
-# endif
+# define KERNEL_STRUCT_MEMBER(parent, type, name) type name;
#endif
-} KernelBVH;
-static_assert_align(KernelBVH, 16);
+
+#include "kernel/data_template.h"
typedef struct KernelTables {
int beckmann_offset;
- int pad1, pad2, pad3;
+ int filter_table_offset;
+ int pad1, pad2;
} KernelTables;
static_assert_align(KernelTables, 16);
@@ -1362,18 +1150,37 @@ typedef struct KernelBake {
static_assert_align(KernelBake, 16);
typedef struct KernelData {
+ /* Features and limits. */
uint kernel_features;
uint max_closures;
uint max_shaders;
uint volume_stack_size;
+ /* Always dynamic data members. */
KernelCamera cam;
- KernelFilm film;
- KernelBackground background;
- KernelIntegrator integrator;
- KernelBVH bvh;
- KernelTables tables;
KernelBake bake;
+ KernelTables tables;
+
+ /* Potentially specialized data members. */
+#define KERNEL_STRUCT_BEGIN(name, parent) name parent;
+#include "kernel/data_template.h"
+
+ /* Device specific BVH. */
+#ifdef __KERNEL_OPTIX__
+ OptixTraversableHandle device_bvh;
+#elif defined __METALRT__
+ metalrt_as_type device_bvh;
+#else
+# ifdef __EMBREE__
+ RTCScene device_bvh;
+# ifndef __KERNEL_64_BIT__
+ int pad1;
+# endif
+# else
+ int device_bvh, pad1;
+# endif
+#endif
+ int pad2, pad3;
} KernelData;
static_assert_align(KernelData, 16);
@@ -1729,15 +1536,15 @@ enum KernelFeatureFlag : uint32_t {
/* Must be constexpr on the CPU to avoid compile errors because the state types
* are different depending on the main, shadow or null path. For GPU we don't have
* C++17 everywhere so can't use it. */
-#ifdef __KERNEL_CPU__
+#ifdef __KERNEL_GPU__
+# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
+# define IF_KERNEL_NODES_FEATURE(feature) \
+ if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+#else
# define IF_KERNEL_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
# define IF_KERNEL_NODES_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
-#else
-# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
-# define IF_KERNEL_NODES_FEATURE(feature) \
- if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/profiling.h b/intern/cycles/kernel/util/profiling.h
index 39cabd35967..b8afaf1166d 100644
--- a/intern/cycles/kernel/util/profiling.h
+++ b/intern/cycles/kernel/util/profiling.h
@@ -3,13 +3,13 @@
#pragma once
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
# include "util/profiling.h"
#endif
CCL_NAMESPACE_BEGIN
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
# define PROFILING_INIT(kg, event) \
ProfilingHelper profiling_helper((ProfilingState *)&kg->profiler, event)
# define PROFILING_EVENT(event) profiling_helper.set_event(event)
@@ -22,6 +22,6 @@ CCL_NAMESPACE_BEGIN
# define PROFILING_EVENT(event)
# define PROFILING_INIT_FOR_SHADER(kg, event)
# define PROFILING_SHADER(object, shader)
-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */
CCL_NAMESPACE_END