Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/device')
-rw-r--r--intern/cycles/kernel/device/cpu/bvh.h582
-rw-r--r--intern/cycles/kernel/device/cpu/compat.h36
-rw-r--r--intern/cycles/kernel/device/cpu/globals.h19
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch_impl.h10
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h8
-rw-r--r--intern/cycles/kernel/device/gpu/parallel_active_index.h33
-rw-r--r--intern/cycles/kernel/device/metal/bvh.h360
-rw-r--r--intern/cycles/kernel/device/metal/compat.h90
-rw-r--r--intern/cycles/kernel/device/metal/context_begin.h37
-rw-r--r--intern/cycles/kernel/device/metal/kernel.metal506
-rw-r--r--intern/cycles/kernel/device/oneapi/compat.h82
-rw-r--r--intern/cycles/kernel/device/oneapi/dll_interface_template.h53
-rw-r--r--intern/cycles/kernel/device/oneapi/globals.h9
-rw-r--r--intern/cycles/kernel/device/oneapi/image.h26
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.cpp566
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.h21
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel_templates.h2
-rw-r--r--intern/cycles/kernel/device/optix/bvh.h659
-rw-r--r--intern/cycles/kernel/device/optix/compat.h1
-rw-r--r--intern/cycles/kernel/device/optix/kernel.cu436
20 files changed, 2109 insertions, 1427 deletions
diff --git a/intern/cycles/kernel/device/cpu/bvh.h b/intern/cycles/kernel/device/cpu/bvh.h
new file mode 100644
index 00000000000..2d7d8c2d704
--- /dev/null
+++ b/intern/cycles/kernel/device/cpu/bvh.h
@@ -0,0 +1,582 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* CPU Embree implementation of ray-scene intersection. */
+
+#pragma once
+
+#include <embree3/rtcore_ray.h>
+#include <embree3/rtcore_scene.h>
+
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+#include "kernel/geom/object.h"
+#include "kernel/integrator/state.h"
+#include "kernel/sample/lcg.h"
+
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define EMBREE_IS_HAIR(x) (x & 1)
+
+/* Intersection context. */
+
+struct CCLIntersectContext {
+ typedef enum {
+ RAY_REGULAR = 0,
+ RAY_SHADOW_ALL = 1,
+ RAY_LOCAL = 2,
+ RAY_SSS = 3,
+ RAY_VOLUME_ALL = 4,
+ } RayType;
+
+ KernelGlobals kg;
+ RayType type;
+
+ /* For avoiding self intersections */
+ const Ray *ray;
+
+ /* for shadow rays */
+ Intersection *isect_s;
+ uint max_hits;
+ uint num_hits;
+ uint num_recorded_hits;
+ float throughput;
+ float max_t;
+ bool opaque_hit;
+
+ /* for SSS Rays: */
+ LocalIntersection *local_isect;
+ int local_object_id;
+ uint *lcg_state;
+
+ CCLIntersectContext(KernelGlobals kg_, RayType type_)
+ {
+ kg = kg_;
+ type = type_;
+ ray = NULL;
+ max_hits = 1;
+ num_hits = 0;
+ num_recorded_hits = 0;
+ throughput = 1.0f;
+ max_t = FLT_MAX;
+ opaque_hit = false;
+ isect_s = NULL;
+ local_isect = NULL;
+ local_object_id = -1;
+ lcg_state = NULL;
+ }
+};
+
+class IntersectContext {
+ public:
+ IntersectContext(CCLIntersectContext *ctx)
+ {
+ rtcInitIntersectContext(&context);
+ userRayExt = ctx;
+ }
+ RTCIntersectContext context;
+ CCLIntersectContext *userRayExt;
+};
+
+/* Utilities. */
+
+ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
+ RTCRay &rtc_ray,
+ const uint visibility)
+{
+ rtc_ray.org_x = ray.P.x;
+ rtc_ray.org_y = ray.P.y;
+ rtc_ray.org_z = ray.P.z;
+ rtc_ray.dir_x = ray.D.x;
+ rtc_ray.dir_y = ray.D.y;
+ rtc_ray.dir_z = ray.D.z;
+ rtc_ray.tnear = ray.tmin;
+ rtc_ray.tfar = ray.tmax;
+ rtc_ray.time = ray.time;
+ rtc_ray.mask = visibility;
+}
+
+ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
+ RTCRayHit &rayhit,
+ const uint visibility)
+{
+ kernel_embree_setup_ray(ray, rayhit.ray, visibility);
+ rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
+ rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
+}
+
+ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
+ const RTCHit *hit,
+ const Ray *ray)
+{
+ int object, prim;
+
+ if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+ object = hit->instID[0] / 2;
+ if ((ray->self.object == object) || (ray->self.light_object == object)) {
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
+ prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ }
+ else {
+ return false;
+ }
+ }
+ else {
+ object = hit->geomID / 2;
+ if ((ray->self.object == object) || (ray->self.light_object == object)) {
+ prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
+ }
+ else {
+ return false;
+ }
+ }
+
+ const bool is_hair = hit->geomID & 1;
+ if (is_hair) {
+ prim = kernel_data_fetch(curve_segments, prim).prim;
+ }
+
+ return intersection_skip_self_shadow(ray->self, object, prim);
+}
+
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
+ const RTCRay *ray,
+ const RTCHit *hit,
+ Intersection *isect)
+{
+ isect->t = ray->tfar;
+ if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
+ isect->prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ isect->object = hit->instID[0] / 2;
+ }
+ else {
+ isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
+ isect->object = hit->geomID / 2;
+ }
+
+ const bool is_hair = hit->geomID & 1;
+ if (is_hair) {
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
+ isect->type = segment.type;
+ isect->prim = segment.prim;
+ isect->u = hit->u;
+ isect->v = hit->v;
+ }
+ else {
+ isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
+ isect->u = hit->u;
+ isect->v = hit->v;
+ }
+}
+
+ccl_device_inline void kernel_embree_convert_sss_hit(
+ KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
+{
+ isect->u = hit->u;
+ isect->v = hit->v;
+ isect->t = ray->tfar;
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, object * 2));
+ isect->prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ isect->object = object;
+ isect->type = kernel_data_fetch(objects, object).primitive_type;
+}
+
+/* Ray filter functions. */
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls. */
+ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
+{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
+ assert(args->N == 1);
+
+ RTCHit *hit = (RTCHit *)args->hit;
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ }
+}
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls.
+ */
+ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArguments *args)
+{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
+ assert(args->N == 1);
+
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ switch (ctx->type) {
+ case CCLIntersectContext::RAY_SHADOW_ALL: {
+ Intersection current_isect;
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+ /* If no transparent shadows or max number of hits exceeded, all light is blocked. */
+ const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
+ if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
+ ctx->opaque_hit = true;
+ return;
+ }
+
+ ++ctx->num_hits;
+
+ /* Always use baked shadow transparency for curves. */
+ if (current_isect.type & PRIMITIVE_CURVE) {
+ ctx->throughput *= intersection_curve_shadow_transparency(
+ kg, current_isect.object, current_isect.prim, current_isect.type, current_isect.u);
+
+ if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ ctx->opaque_hit = true;
+ return;
+ }
+ else {
+ *args->valid = 0;
+ return;
+ }
+ }
+
+ /* Test if we need to record this transparent intersection. */
+ const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
+ /* If maximum number of hits was reached, replace the intersection with the
+ * highest distance. We want to find the N closest intersections. */
+ const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
+ uint isect_index = num_recorded_hits;
+ if (num_recorded_hits + 1 >= max_record_hits) {
+ float max_t = ctx->isect_s[0].t;
+ uint max_recorded_hit = 0;
+
+ for (uint i = 1; i < num_recorded_hits; ++i) {
+ if (ctx->isect_s[i].t > max_t) {
+ max_recorded_hit = i;
+ max_t = ctx->isect_s[i].t;
+ }
+ }
+
+ if (num_recorded_hits >= max_record_hits) {
+ isect_index = max_recorded_hit;
+ }
+
+ /* Limit the ray distance and stop counting hits beyond this.
+ * TODO: is there some way we can tell Embree to stop intersecting beyond
+ * this distance when max number of hits is reached?. Or maybe it will
+ * become irrelevant if we make max_hits a very high number on the CPU. */
+ ctx->max_t = max(current_isect.t, max_t);
+ }
+
+ ctx->isect_s[isect_index] = current_isect;
+ }
+
+ /* Always increase the number of recorded hits, even beyond the maximum,
+ * so that we can detect this and trace another ray if needed. */
+ ++ctx->num_recorded_hits;
+
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ case CCLIntersectContext::RAY_LOCAL:
+ case CCLIntersectContext::RAY_SSS: {
+ /* Check if it's hitting the correct object. */
+ Intersection current_isect;
+ if (ctx->type == CCLIntersectContext::RAY_SSS) {
+ kernel_embree_convert_sss_hit(kg, ray, hit, &current_isect, ctx->local_object_id);
+ }
+ else {
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (ctx->local_object_id != current_isect.object) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ }
+ if (intersection_skip_self_local(cray->self, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+
+ /* No intersection information requested, just return a hit. */
+ if (ctx->max_hits == 0) {
+ break;
+ }
+
+ /* Ignore curves. */
+ if (EMBREE_IS_HAIR(hit->geomID)) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+
+ LocalIntersection *local_isect = ctx->local_isect;
+ int hit_idx = 0;
+
+ if (ctx->lcg_state) {
+ /* See triangle_intersect_subsurface() for the native equivalent. */
+ for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (local_isect->hits[i].t == ray->tfar) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ return;
+ }
+ }
+
+ local_isect->num_hits++;
+
+ if (local_isect->num_hits <= ctx->max_hits) {
+ hit_idx = local_isect->num_hits - 1;
+ }
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
+
+ if (hit_idx >= ctx->max_hits) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ return;
+ }
+ }
+ }
+ else {
+ /* Record closest intersection only. */
+ if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
+ *args->valid = 0;
+ return;
+ }
+
+ local_isect->num_hits = 1;
+ }
+
+ /* record intersection */
+ local_isect->hits[hit_idx] = current_isect;
+ local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ case CCLIntersectContext::RAY_VOLUME_ALL: {
+ /* Append the intersection to the end of the array. */
+ if (ctx->num_hits < ctx->max_hits) {
+ Intersection current_isect;
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+
+ Intersection *isect = &ctx->isect_s[ctx->num_hits];
+ ++ctx->num_hits;
+ *isect = current_isect;
+ /* Only primitives from volume object. */
+ uint tri_object = isect->object;
+ int object_flag = kernel_data_fetch(object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ --ctx->num_hits;
+ }
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ }
+ break;
+ }
+ case CCLIntersectContext::RAY_REGULAR:
+ default:
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ return;
+ }
+ break;
+ }
+}
+
+ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore back-facing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ }
+}
+
+ccl_device void kernel_embree_filter_occluded_func_backface_cull(
+ const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore back-facing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+
+ kernel_embree_filter_occluded_func(args);
+}
+
+/* Scene intersection. */
+
+ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ isect->t = ray->tmax;
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
+ IntersectContext rtc_ctx(&ctx);
+ RTCRayHit ray_hit;
+ ctx.ray = ray;
+ kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
+ rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
+ if (ray_hit.hit.geomID == RTC_INVALID_GEOMETRY_ID ||
+ ray_hit.hit.primID == RTC_INVALID_GEOMETRY_ID) {
+ return false;
+ }
+
+ kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
+ return true;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
+ SD_OBJECT_TRANSFORM_APPLIED);
+ CCLIntersectContext ctx(kg,
+ has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
+ ctx.lcg_state = lcg_state;
+ ctx.max_hits = max_hits;
+ ctx.ray = ray;
+ ctx.local_isect = local_isect;
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ ctx.local_object_id = local_object;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+
+ /* If this object has its own BVH, use it. */
+ if (has_bvh) {
+ RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
+ if (geom) {
+ float3 P = ray->P;
+ float3 dir = ray->D;
+ float3 idir = ray->D;
+ bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
+
+ rtc_ray.org_x = P.x;
+ rtc_ray.org_y = P.y;
+ rtc_ray.org_z = P.z;
+ rtc_ray.dir_x = dir.x;
+ rtc_ray.dir_y = dir.y;
+ rtc_ray.dir_z = dir.z;
+ rtc_ray.tnear = ray->tmin;
+ rtc_ray.tfar = ray->tmax;
+ RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
+ kernel_assert(scene);
+ if (scene) {
+ rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
+ }
+ }
+ }
+ else {
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+ }
+
+ /* rtcOccluded1 sets tfar to -inf if a hit was found. */
+ return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowStateCPU *state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
+ Intersection *isect_array = (Intersection *)state->shadow_isect;
+ ctx.isect_s = isect_array;
+ ctx.max_hits = max_hits;
+ ctx.ray = ray;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+
+ *num_recorded_hits = ctx.num_recorded_hits;
+ *throughput = ctx.throughput;
+ return ctx.opaque_hit;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint max_hits,
+ const uint visibility)
+{
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
+ ctx.isect_s = isect;
+ ctx.max_hits = max_hits;
+ ctx.num_hits = 0;
+ ctx.ray = ray;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+ return ctx.num_hits;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index 3bfc37e98ee..1e3e790ca1f 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -3,8 +3,6 @@
#pragma once
-#define __KERNEL_CPU__
-
/* Release kernel has too much false-positive maybe-uninitialized warnings,
* which makes it possible to miss actual warnings.
*/
@@ -35,38 +33,4 @@ CCL_NAMESPACE_BEGIN
#define kernel_assert(cond) assert(cond)
-/* Macros to handle different memory storage on different devices */
-
-#ifdef __KERNEL_SSE2__
-typedef vector3<sseb> sse3b;
-typedef vector3<ssef> sse3f;
-typedef vector3<ssei> sse3i;
-
-ccl_device_inline void print_sse3b(const char *label, sse3b &a)
-{
- print_sseb(label, a.x);
- print_sseb(label, a.y);
- print_sseb(label, a.z);
-}
-
-ccl_device_inline void print_sse3f(const char *label, sse3f &a)
-{
- print_ssef(label, a.x);
- print_ssef(label, a.y);
- print_ssef(label, a.z);
-}
-
-ccl_device_inline void print_sse3i(const char *label, sse3i &a)
-{
- print_ssei(label, a.x);
- print_ssei(label, a.y);
- print_ssei(label, a.z);
-}
-
-# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
-typedef vector3<avxf> avx3f;
-# endif
-
-#endif
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h
index 309afae412e..f7f1a36b2a7 100644
--- a/intern/cycles/kernel/device/cpu/globals.h
+++ b/intern/cycles/kernel/device/cpu/globals.h
@@ -9,6 +9,8 @@
#include "kernel/types.h"
#include "kernel/util/profiling.h"
+#include "util/guiding.h"
+
CCL_NAMESPACE_BEGIN
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
@@ -43,9 +45,20 @@ typedef struct KernelGlobalsCPU {
#ifdef __OSL__
/* On the CPU, we also have the OSL globals here. Most data structures are shared
* with SVM, the difference is in the shaders and object/mesh attributes. */
- OSLGlobals *osl;
- OSLShadingSystem *osl_ss;
- OSLThreadData *osl_tdata;
+ OSLGlobals *osl = nullptr;
+ OSLShadingSystem *osl_ss = nullptr;
+ OSLThreadData *osl_tdata = nullptr;
+#endif
+
+#ifdef __PATH_GUIDING__
+ /* Pointers to global data structures. */
+ openpgl::cpp::SampleStorage *opgl_sample_data_storage = nullptr;
+ openpgl::cpp::Field *opgl_guiding_field = nullptr;
+
+ /* Local data structures owned by the thread. */
+ openpgl::cpp::PathSegmentStorage *opgl_path_segment_storage = nullptr;
+ openpgl::cpp::SurfaceSamplingDistribution *opgl_surface_sampling_distribution = nullptr;
+ openpgl::cpp::VolumeSamplingDistribution *opgl_volume_sampling_distribution = nullptr;
#endif
/* **** Run-time data **** */
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 0e5f7b4a2fd..0d7c06f4fc6 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -34,7 +34,7 @@
# include "kernel/integrator/megakernel.h"
# include "kernel/film/adaptive_sampling.h"
-# include "kernel/film/id_passes.h"
+# include "kernel/film/cryptomatte_passes.h"
# include "kernel/film/read.h"
# include "kernel/bake/bake.h"
@@ -169,7 +169,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_convergence_check);
return false;
#else
- return kernel_adaptive_sampling_convergence_check(
+ return film_adaptive_sampling_convergence_check(
kg, render_buffer, x, y, threshold, reset, offset, stride);
#endif
}
@@ -185,7 +185,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCP
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_x);
#else
- kernel_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
+ film_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
#endif
}
@@ -200,7 +200,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_y);
#else
- kernel_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
+ film_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
#endif
}
@@ -215,7 +215,7 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, cryptomatte_postprocess);
#else
- kernel_cryptomatte_post(kg, render_buffer, pixel_index);
+ film_cryptomatte_post(kg, render_buffer, pixel_index);
#endif
}
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index e1ab802aa80..d7d2000775f 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -526,7 +526,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
bool converged = true;
if (x < sw && y < sh) {
- converged = ccl_gpu_kernel_call(kernel_adaptive_sampling_convergence_check(
+ converged = ccl_gpu_kernel_call(film_adaptive_sampling_convergence_check(
nullptr, render_buffer, sx + x, sy + y, threshold, reset, offset, stride));
}
@@ -553,7 +553,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (y < sh) {
ccl_gpu_kernel_call(
- kernel_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride));
+ film_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride));
}
}
ccl_gpu_kernel_postfix
@@ -572,7 +572,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (x < sw) {
ccl_gpu_kernel_call(
- kernel_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride));
+ film_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride));
}
}
ccl_gpu_kernel_postfix
@@ -589,7 +589,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
const int pixel_index = ccl_gpu_global_id_x();
if (pixel_index < num_pixels) {
- ccl_gpu_kernel_call(kernel_cryptomatte_post(nullptr, render_buffer, pixel_index));
+ ccl_gpu_kernel_call(film_cryptomatte_post(nullptr, render_buffer, pixel_index));
}
}
ccl_gpu_kernel_postfix
diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h
index c1df49c4f49..38cdcb572eb 100644
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -23,22 +23,6 @@ CCL_NAMESPACE_BEGIN
* and keep device specific code in compat.h */
#ifdef __KERNEL_ONEAPI__
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-template<typename IsActiveOp>
-void cpu_serial_active_index_array_impl(const uint num_states,
- ccl_global int *ccl_restrict indices,
- ccl_global int *ccl_restrict num_indices,
- IsActiveOp is_active_op)
-{
- int write_index = 0;
- for (int state_index = 0; state_index < num_states; state_index++) {
- if (is_active_op(state_index))
- indices[write_index++] = state_index;
- }
- *num_indices = write_index;
- return;
-}
-# endif /* WITH_ONEAPI_SYCL_HOST_ENABLED */
template<typename IsActiveOp>
void gpu_parallel_active_index_array_impl(const uint num_states,
@@ -182,18 +166,11 @@ __device__
num_simd_groups, \
simdgroup_offset)
#elif defined(__KERNEL_ONEAPI__)
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-# define gpu_parallel_active_index_array( \
- blocksize, num_states, indices, num_indices, is_active_op) \
- if (ccl_gpu_global_size_x() == 1) \
- cpu_serial_active_index_array_impl(num_states, indices, num_indices, is_active_op); \
- else \
- gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op);
-# else
-# define gpu_parallel_active_index_array( \
- blocksize, num_states, indices, num_indices, is_active_op) \
- gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
-# endif
+
+# define gpu_parallel_active_index_array( \
+ blocksize, num_states, indices, num_indices, is_active_op) \
+ gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
+
#else
# define gpu_parallel_active_index_array( \
diff --git a/intern/cycles/kernel/device/metal/bvh.h b/intern/cycles/kernel/device/metal/bvh.h
new file mode 100644
index 00000000000..03faa3f020f
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/bvh.h
@@ -0,0 +1,360 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* MetalRT implementation of ray-scene intersection. */
+
+#pragma once
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Payload types. */
+
+struct MetalRTIntersectionPayload {
+ RaySelfPrimitives self;
+ uint visibility;
+ float u, v;
+ int prim;
+ int type;
+#if defined(__METALRT_MOTION__)
+ float time;
+#endif
+};
+
+struct MetalRTIntersectionLocalPayload {
+ RaySelfPrimitives self;
+ uint local_object;
+ uint lcg_state;
+ short max_hits;
+ bool has_lcg_state;
+ bool result;
+ LocalIntersection local_isect;
+};
+
+struct MetalRTIntersectionShadowPayload {
+ RaySelfPrimitives self;
+ uint visibility;
+#if defined(__METALRT_MOTION__)
+ float time;
+#endif
+ int state;
+ float throughput;
+ short max_hits;
+ short num_hits;
+ short num_recorded_hits;
+ bool result;
+};
+
+/* Scene intersection. */
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ if (!intersection_ray_valid(ray)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ return false;
+ }
+
+#if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ kernel_assert(!"Invalid ift_default");
+ return false;
+ }
+#endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionPayload payload;
+ payload.self = ray->self;
+ payload.u = 0.0f;
+ payload.v = 0.0f;
+ payload.visibility = visibility;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ /* No further intersector setup required: Default MetalRT behavior is any-hit. */
+ }
+ else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ /* No further intersector setup required: Shadow ray early termination is controlled by the
+ * intersection handler */
+ }
+
+#if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_default,
+ payload);
+#else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
+#endif
+
+ if (intersection.type == intersection_type::none) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+
+ return false;
+ }
+
+ isect->t = intersection.distance;
+
+ isect->prim = payload.prim;
+ isect->type = payload.type;
+ isect->object = intersection.user_instance_id;
+
+ isect->t = intersection.distance;
+ if (intersection.type == intersection_type::triangle) {
+ isect->u = intersection.triangle_barycentric_coord.x;
+ isect->v = intersection.triangle_barycentric_coord.y;
+ }
+ else {
+ isect->u = payload.u;
+ isect->v = payload.v;
+ }
+
+ return isect->type != PRIMITIVE_NONE;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ if (!intersection_ray_valid(ray)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert(!"Invalid ift_local");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionLocalPayload payload;
+ payload.self = ray->self;
+ payload.local_object = local_object;
+ payload.max_hits = max_hits;
+ payload.local_isect.num_hits = 0;
+ if (lcg_state) {
+ payload.has_lcg_state = true;
+ payload.lcg_state = *lcg_state;
+ }
+ payload.result = false;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+# if defined(__METALRT_MOTION__)
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
+# endif
+
+ if (lcg_state) {
+ *lcg_state = payload.lcg_state;
+ }
+ *local_isect = payload.local_isect;
+
+ return payload.result;
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowState state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ if (!intersection_ray_valid(ray)) {
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
+ kernel_assert(!"Invalid ift_shadow");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionShadowPayload payload;
+ payload.self = ray->self;
+ payload.visibility = visibility;
+ payload.max_hits = max_hits;
+ payload.num_hits = 0;
+ payload.num_recorded_hits = 0;
+ payload.throughput = 1.0f;
+ payload.result = false;
+ payload.state = state;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ typename metalrt_intersector_type::result_type intersection;
+
+# if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_shadow,
+ payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
+# endif
+
+ *num_recorded_hits = payload.num_recorded_hits;
+ *throughput = payload.throughput;
+
+ return payload.result;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint visibility)
+{
+ if (!intersection_ray_valid(ray)) {
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
+ kernel_assert(!"Invalid ift_default");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionPayload payload;
+ payload.self = ray->self;
+ payload.visibility = visibility;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+# if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_default,
+ payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
+# endif
+
+ if (intersection.type == intersection_type::none) {
+ return false;
+ }
+
+ isect->prim = payload.prim;
+ isect->type = payload.type;
+ isect->object = intersection.user_instance_id;
+
+ isect->t = intersection.distance;
+ if (intersection.type == intersection_type::triangle) {
+ isect->u = intersection.triangle_barycentric_coord.x;
+ isect->v = intersection.triangle_barycentric_coord.y;
+ }
+ else {
+ isect->u = payload.u;
+ isect->v = payload.v;
+ }
+
+ return isect->type != PRIMITIVE_NONE;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 0ed52074a90..f689e93e5a2 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -29,24 +29,13 @@ using namespace metal::raytracing;
/* Qualifiers */
+#define ccl_device
+#define ccl_device_inline ccl_device __attribute__((always_inline))
+#define ccl_device_forceinline ccl_device __attribute__((always_inline))
#if defined(__KERNEL_METAL_APPLE__)
-
-/* Inline everything for Apple GPUs.
- * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
- * at the cost of longer compile times (~4.5 minutes on M1 Max). */
-
-# define ccl_device __attribute__((always_inline))
-# define ccl_device_inline __attribute__((always_inline))
-# define ccl_device_forceinline __attribute__((always_inline))
-# define ccl_device_noinline __attribute__((always_inline))
-
+# define ccl_device_noinline ccl_device
#else
-
-# define ccl_device
-# define ccl_device_inline ccl_device
-# define ccl_device_forceinline ccl_device
# define ccl_device_noinline ccl_device __attribute__((noinline))
-
#endif
#define ccl_device_noinline_cpu ccl_device
@@ -189,35 +178,46 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
} volume_write_lambda_pass{kg, this, state};
/* make_type definitions with Metal style element initializers */
-#ifdef make_float2
-# undef make_float2
-#endif
-#ifdef make_float3
-# undef make_float3
-#endif
-#ifdef make_float4
-# undef make_float4
-#endif
-#ifdef make_int2
-# undef make_int2
-#endif
-#ifdef make_int3
-# undef make_int3
-#endif
-#ifdef make_int4
-# undef make_int4
-#endif
-#ifdef make_uchar4
-# undef make_uchar4
-#endif
-
-#define make_float2(x, y) float2(x, y)
-#define make_float3(x, y, z) float3(x, y, z)
-#define make_float4(x, y, z, w) float4(x, y, z, w)
-#define make_int2(x, y) int2(x, y)
-#define make_int3(x, y, z) int3(x, y, z)
-#define make_int4(x, y, z, w) int4(x, y, z, w)
-#define make_uchar4(x, y, z, w) uchar4(x, y, z, w)
+ccl_device_forceinline float2 make_float2(const float x, const float y)
+{
+ return float2(x, y);
+}
+
+ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
+{
+ return float3(x, y, z);
+}
+
+ccl_device_forceinline float4 make_float4(const float x,
+ const float y,
+ const float z,
+ const float w)
+{
+ return float4(x, y, z, w);
+}
+
+ccl_device_forceinline int2 make_int2(const int x, const int y)
+{
+ return int2(x, y);
+}
+
+ccl_device_forceinline int3 make_int3(const int x, const int y, const int z)
+{
+ return int3(x, y, z);
+}
+
+ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
+{
+ return int4(x, y, z, w);
+}
+
+ccl_device_forceinline uchar4 make_uchar4(const uchar x,
+ const uchar y,
+ const uchar z,
+ const uchar w)
+{
+ return uchar4(x, y, z, w);
+}
/* Math functions */
@@ -260,8 +260,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
#ifdef __METALRT__
-# define __KERNEL_GPU_RAYTRACING__
-
# if defined(__METALRT_MOTION__)
# define METALRT_TAGS instancing, instance_motion, primitive_motion
# else
diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h
index 99cb1e3826e..e75ec9cadec 100644
--- a/intern/cycles/kernel/device/metal/context_begin.h
+++ b/intern/cycles/kernel/device/metal/context_begin.h
@@ -34,21 +34,48 @@ class MetalKernelContext {
kernel_assert(0);
return 0;
}
-
+
+#ifdef __KERNEL_METAL_INTEL__
+ template<typename TextureType, typename CoordsType>
+ inline __attribute__((__always_inline__))
+ auto ccl_gpu_tex_object_read_intel_workaround(TextureType texture_array,
+ const uint tid, const uint sid,
+ CoordsType coords) const
+ {
+ switch(sid) {
+ default:
+ case 0: return texture_array[tid].tex.sample(sampler(address::repeat, filter::nearest), coords);
+ case 1: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::nearest), coords);
+ case 2: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::nearest), coords);
+ case 3: return texture_array[tid].tex.sample(sampler(address::repeat, filter::linear), coords);
+ case 4: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::linear), coords);
+ case 5: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::linear), coords);
+ }
+ }
+#endif
+
// texture2d
template<>
inline __attribute__((__always_inline__))
float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y));
+#endif
}
template<>
inline __attribute__((__always_inline__))
float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y)).x;
+#endif
}
// texture3d
@@ -57,14 +84,22 @@ class MetalKernelContext {
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z));
+#endif
}
template<>
inline __attribute__((__always_inline__))
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z)).x;
+#endif
}
# include "kernel/device/gpu/image.h"
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
index 764c26dbe8f..8b69ee025cd 100644
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -1,41 +1,44 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2021-2022 Blender Foundation */
-/* Metal kernel entry points */
+/* Metal kernel entry points. */
#include "kernel/device/metal/compat.h"
#include "kernel/device/metal/globals.h"
#include "kernel/device/metal/function_constants.h"
#include "kernel/device/gpu/kernel.h"
-/* MetalRT intersection handlers */
+/* MetalRT intersection handlers. */
+
#ifdef __METALRT__
-/* Return type for a bounding box intersection function. */
-struct BoundingBoxIntersectionResult
-{
+/* Intersection return types. */
+
+/* For a bounding box intersection function. */
+struct BoundingBoxIntersectionResult {
bool accept [[accept_intersection]];
bool continue_search [[continue_search]];
float distance [[distance]];
};
-/* Return type for a triangle intersection function. */
-struct TriangleIntersectionResult
-{
+/* For a triangle intersection function. */
+struct TriangleIntersectionResult {
bool accept [[accept_intersection]];
- bool continue_search [[continue_search]];
+ bool continue_search [[continue_search]];
};
enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
-ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives& self,
+/* Utilities. */
+
+ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self,
const int object,
const int prim)
{
return (self.prim == prim) && (self.object == object);
}
-ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives& self,
+ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self,
const int object,
const int prim)
{
@@ -43,12 +46,14 @@ ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimi
((self.light_prim == prim) && (self.light_object == object));
}
-ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives& self,
+ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self,
const int prim)
{
return (self.prim == prim);
}
+/* Hit functions. */
+
template<typename TReturn, uint intersection_type>
TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
@@ -58,7 +63,7 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
const float ray_tmax)
{
TReturn result;
-
+
#ifdef __BVH_LOCAL__
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
@@ -101,7 +106,8 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
}
else {
if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
- /* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */
+ /* Record closest intersection only. Do not terminate ray here, since there is no guarantee
+ * about distance ordering in any-hit */
result.accept = false;
result.continue_search = true;
return result;
@@ -116,8 +122,8 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
isect->object = object;
isect->type = kernel_data_fetch(objects, object).primitive_type;
- isect->u = 1.0f - barycentrics.y - barycentrics.x;
- isect->v = barycentrics.x;
+ isect->u = barycentrics.x;
+ isect->v = barycentrics.y;
/* Record geometric normal */
const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
@@ -133,21 +139,20 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
#endif
}
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
- uint instance_id [[user_instance_id]],
- uint primitive_id [[primitive_id]],
- float2 barycentrics [[barycentric_coord]],
- float ray_tmax [[distance]])
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_local_hit_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
+ uint instance_id [[user_instance_id]],
+ uint primitive_id [[primitive_id]],
+ float2 barycentrics [[barycentric_coord]],
+ float ray_tmax [[distance]])
{
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
+ launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
{
/* unused function */
@@ -175,23 +180,14 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
}
# endif
- if (intersection_skip_self_shadow(payload.self, object, prim)) {
- /* continue search */
- return true;
- }
-
- float u = 0.0f, v = 0.0f;
+ const float u = barycentrics.x;
+ const float v = barycentrics.y;
int type = 0;
if (intersection_type == METALRT_HIT_TRIANGLE) {
- u = 1.0f - barycentrics.y - barycentrics.x;
- v = barycentrics.x;
type = kernel_data_fetch(objects, object).primitive_type;
}
# ifdef __HAIR__
else {
- u = barycentrics.x;
- v = barycentrics.y;
-
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
type = segment.type;
prim = segment.prim;
@@ -204,6 +200,11 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
}
# endif
+ if (intersection_skip_self_shadow(payload.self, object, prim)) {
+ /* continue search */
+ return true;
+ }
+
# ifndef __TRANSPARENT_SHADOWS__
/* No transparent shadows support compiled in, make opaque. */
payload.result = true;
@@ -215,7 +216,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
short num_recorded_hits = payload.num_recorded_hits;
MetalKernelContext context(launch_params_metal);
-
+
/* If no transparent shadows, all light is blocked and we can stop immediately. */
if (num_hits >= max_hits ||
!(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
@@ -223,11 +224,11 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
/* terminate ray */
return false;
}
-
+
/* Always use baked shadow transparency for curves. */
if (type & PRIMITIVE_CURVE) {
float throughput = payload.throughput;
- throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
+ throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, type, u);
payload.throughput = throughput;
payload.num_hits += 1;
@@ -240,10 +241,10 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
return true;
}
}
-
+
payload.num_hits += 1;
payload.num_recorded_hits += 1;
-
+
uint record_index = num_recorded_hits;
const IntegratorShadowState state = payload.state;
@@ -278,7 +279,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
-
+
/* Continue tracing. */
# endif /* __TRANSPARENT_SHADOWS__ */
#endif /* __SHADOW_RECORD_ALL__ */
@@ -286,26 +287,25 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
return true;
}
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- unsigned int object [[user_instance_id]],
- unsigned int primitive_id [[primitive_id]],
- float2 barycentrics [[barycentric_coord]],
- float ray_tmax [[distance]])
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_shadow_all_hit_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ unsigned int object [[user_instance_id]],
+ unsigned int primitive_id [[primitive_id]],
+ float2 barycentrics [[barycentric_coord]],
+ float ray_tmax [[distance]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
TriangleIntersectionResult result;
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
+ launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
result.accept = !result.continue_search;
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
{
/* unused function */
@@ -317,15 +317,16 @@ __anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]
}
template<typename TReturnType, uint intersection_type>
-inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const float u)
+inline TReturnType metalrt_visibility_test(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ uint prim,
+ const float u)
{
TReturnType result;
-
-# ifdef __HAIR__
+
+#ifdef __HAIR__
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
/* Filter out curve endcaps. */
if (u == 0.0f || u == 1.0f) {
@@ -334,15 +335,23 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
return result;
}
}
-# endif
+#endif
uint visibility = payload.visibility;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
result.accept = false;
result.continue_search = true;
return result;
}
+#endif
+
+ if (intersection_type == METALRT_HIT_TRIANGLE) {
+ }
+# ifdef __HAIR__
+ else {
+ prim = kernel_data_fetch(curve_segments, prim).prim;
+ }
# endif
/* Shadow ray early termination. */
@@ -371,16 +380,17 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
return result;
}
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
- unsigned int object [[user_instance_id]],
- unsigned int primitive_id [[primitive_id]])
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_visibility_test_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ unsigned int object [[user_instance_id]],
+ unsigned int primitive_id [[primitive_id]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, object, prim, 0.0f);
+ TriangleIntersectionResult result =
+ metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
+ launch_params_metal, payload, object, prim, 0.0f);
if (result.accept) {
payload.prim = prim;
payload.type = kernel_data_fetch(objects, object).primitive_type;
@@ -388,8 +398,7 @@ __anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
{
/* Unused function */
@@ -400,19 +409,21 @@ __anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance
return result;
}
+/* Primitive intersection functions. */
+
#ifdef __HAIR__
-ccl_device_inline
-void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmin,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
+ccl_device_inline void metalrt_intersection_curve(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
@@ -421,25 +432,16 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
}
# endif
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
Intersection isect;
isect.t = ray_tmax;
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
MetalKernelContext context(launch_params_metal);
- if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+ if (context.curve_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, isect.u);
+ launch_params_metal, payload, object, prim, isect.u);
if (result.accept) {
- result.distance = isect.t / len;
+ result.distance = isect.t;
payload.u = isect.u;
payload.v = isect.v;
payload.prim = prim;
@@ -448,54 +450,41 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
}
}
-ccl_device_inline
-void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmin,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
+ccl_device_inline void metalrt_intersection_curve_shadow(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
{
const uint visibility = payload.visibility;
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
Intersection isect;
isect.t = ray_tmax;
- /* Transform maximum distance into object space */
- if (isect.t != FLT_MAX)
- isect.t *= len;
MetalKernelContext context(launch_params_metal);
- if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+ if (context.curve_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
+ launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
result.accept = !result.continue_search;
-
- if (result.accept) {
- result.distance = isect.t / len;
- }
}
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
+ [[payload]],
const uint object [[user_instance_id]],
const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
const float ray_tmin [[min_distance]],
const float ray_tmax [[max_distance]])
{
@@ -508,28 +497,36 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
result.distance = ray_tmax;
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
- metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+ metalrt_intersection_curve(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
}
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmin [[min_distance]],
- const float ray_tmax [[max_distance]])
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__curve_ribbon_shadow(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
@@ -540,57 +537,73 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
result.distance = ray_tmax;
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
- metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+ metalrt_intersection_curve_shadow(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
# if defined(__METALRT_MOTION__)
- payload.time,
+ payload.time,
# else
- 0.0f,
+ 0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
}
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
+ [[payload]],
const uint object [[user_instance_id]],
const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
const float ray_tmin [[min_distance]],
const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
-
+
BoundingBoxIntersectionResult result;
result.accept = false;
result.continue_search = true;
result.distance = ray_tmax;
- metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+ metalrt_intersection_curve(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmin [[min_distance]],
- const float ray_tmax [[max_distance]])
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__curve_all_shadow(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
@@ -600,31 +613,39 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
result.continue_search = true;
result.distance = ray_tmax;
- metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+ metalrt_intersection_curve_shadow(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
# if defined(__METALRT_MOTION__)
- payload.time,
+ payload.time,
# else
- 0.0f,
+ 0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
return result;
}
#endif /* __HAIR__ */
#ifdef __POINTCLOUD__
-ccl_device_inline
-void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmin,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
+ccl_device_inline void metalrt_intersection_point(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
@@ -633,25 +654,16 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
}
# endif
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the point intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
Intersection isect;
isect.t = ray_tmax;
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
MetalKernelContext context(launch_params_metal);
- if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+ if (context.point_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, isect.u);
+ launch_params_metal, payload, object, prim, isect.u);
if (result.accept) {
- result.distance = isect.t / len;
+ result.distance = isect.t;
payload.u = isect.u;
payload.v = isect.v;
payload.prim = prim;
@@ -660,56 +672,46 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
}
}
-ccl_device_inline
-void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmin,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
+ccl_device_inline void metalrt_intersection_point_shadow(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
{
const uint visibility = payload.visibility;
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the point intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
Intersection isect;
isect.t = ray_tmax;
- /* Transform maximum distance into object space */
- if (isect.t != FLT_MAX)
- isect.t *= len;
MetalKernelContext context(launch_params_metal);
- if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+ if (context.point_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
+ launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
result.accept = !result.continue_search;
if (result.accept) {
- result.distance = isect.t / len;
+ result.distance = isect.t;
}
}
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmin [[min_distance]],
- const float ray_tmax [[max_distance]])
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
{
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const int type = kernel_data_fetch(objects, object).primitive_type;
@@ -719,27 +721,35 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
result.continue_search = true;
result.distance = ray_tmax;
- metalrt_intersection_point(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction,
+ metalrt_intersection_point(launch_params_metal,
+ payload,
+ object,
+ prim,
+ type,
+ ray_origin,
+ ray_direction,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmin [[min_distance]],
- const float ray_tmax [[max_distance]])
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload
+ [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
{
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const int type = kernel_data_fetch(objects, object).primitive_type;
@@ -749,13 +759,21 @@ __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[b
result.continue_search = true;
result.distance = ray_tmax;
- metalrt_intersection_point_shadow(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction,
+ metalrt_intersection_point_shadow(launch_params_metal,
+ payload,
+ object,
+ prim,
+ type,
+ ray_origin,
+ ray_direction,
# if defined(__METALRT_MOTION__)
- payload.time,
+ payload.time,
# else
- 0.0f,
+ 0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
return result;
}
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index 1b25259bcf5..dfaec65130c 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -10,6 +10,7 @@
#define CCL_NAMESPACE_END
#include <cstdint>
+#include <math.h>
#ifndef __NODES_MAX_GROUP__
# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
@@ -30,7 +31,7 @@
#define ccl_global
#define ccl_always_inline __attribute__((always_inline))
#define ccl_device_inline inline
-#define ccl_noinline
+#define ccl_noinline __attribute__((noinline))
#define ccl_inline_constant const constexpr
#define ccl_static_constant const
#define ccl_device_forceinline __attribute__((always_inline))
@@ -54,18 +55,6 @@
#define ccl_gpu_kernel(block_num_threads, thread_num_registers)
#define ccl_gpu_kernel_threads(block_num_threads)
-#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-# define KG_ND_ITEMS \
- kg->nd_item_local_id_0 = item.get_local_id(0); \
- kg->nd_item_local_range_0 = item.get_local_range(0); \
- kg->nd_item_group_0 = item.get_group(0); \
- kg->nd_item_group_range_0 = item.get_group_range(0); \
- kg->nd_item_global_id_0 = item.get_global_id(0); \
- kg->nd_item_global_range_0 = item.get_global_range(0);
-#else
-# define KG_ND_ITEMS
-#endif
-
#define ccl_gpu_kernel_signature(name, ...) \
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
size_t kernel_global_size, \
@@ -75,8 +64,7 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
(kg); \
cgh.parallel_for<class kernel_##name>( \
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
- [=](sycl::nd_item<1> item) { \
- KG_ND_ITEMS
+ [=](sycl::nd_item<1> item) {
#define ccl_gpu_kernel_postfix \
}); \
@@ -94,31 +82,17 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
} ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg)
/* GPU thread, block, grid size and index */
-#ifndef WITH_ONEAPI_SYCL_HOST_ENABLED
-# define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
-# define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
-# define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
-# define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
-# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
-# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-# define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
-# define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
-#else
-# define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0)
-# define ccl_gpu_block_dim_x (kg->nd_item_local_range_0)
-# define ccl_gpu_block_idx_x (kg->nd_item_group_0)
-# define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0)
-# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
-# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-# define ccl_gpu_global_id_x() (kg->nd_item_global_id_0)
-# define ccl_gpu_global_size_x() (kg->nd_item_global_range_0)
-#endif
+#define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
+#define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
+#define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
+#define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
+#define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
+#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
+#define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
+#define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
/* GPU warp synchronization */
-
#define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier()
#define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space)
#ifdef __SYCL_DEVICE_ONLY__
@@ -149,25 +123,13 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
/* clang-format on */
/* Types */
+
/* It's not possible to use sycl types like sycl::float3, sycl::int3, etc
- * because these types have different interfaces from blender version */
+ * because these types have different interfaces from blender version. */
using uchar = unsigned char;
using sycl::half;
-struct float3 {
- float x, y, z;
-};
-
-ccl_always_inline float3 make_float3(float x, float y, float z)
-{
- return {x, y, z};
-}
-ccl_always_inline float3 make_float3(float x)
-{
- return {x, x, x};
-}
-
/* math functions */
#define fabsf(x) sycl::fabs((x))
#define copysignf(x, y) sycl::copysign((x), (y))
@@ -186,21 +148,15 @@ ccl_always_inline float3 make_float3(float x)
#define fmodf(x, y) sycl::fmod((x), (y))
#define lgammaf(x) sycl::lgamma((x))
-#define __forceinline __attribute__((always_inline))
-
-/* Types */
-#include "util/half.h"
-#include "util/types.h"
-
-/* NOTE(@nsirgien): Declaring these functions after types headers is very important because they
- * include oneAPI headers, which transitively include math.h headers which will cause redefinitions
- * of the math defines because math.h also uses them and having them defined before math.h include
- * is actually UB. */
-/* Use fast math functions - get them from sycl::native namespace for native math function
- * implementations */
#define cosf(x) sycl::native::cos(((float)(x)))
#define sinf(x) sycl::native::sin(((float)(x)))
#define powf(x, y) sycl::native::powr(((float)(x)), ((float)(y)))
#define tanf(x) sycl::native::tan(((float)(x)))
#define logf(x) sycl::native::log(((float)(x)))
#define expf(x) sycl::native::exp(((float)(x)))
+
+#define __forceinline __attribute__((always_inline))
+
+/* Types */
+#include "util/half.h"
+#include "util/types.h"
diff --git a/intern/cycles/kernel/device/oneapi/dll_interface_template.h b/intern/cycles/kernel/device/oneapi/dll_interface_template.h
deleted file mode 100644
index 662068c0fed..00000000000
--- a/intern/cycles/kernel/device/oneapi/dll_interface_template.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2022 Intel Corporation */
-
-/* device_capabilities() returns a C string that must be free'd with oneapi_free(). */
-DLL_INTERFACE_CALL(oneapi_device_capabilities, char *)
-DLL_INTERFACE_CALL(oneapi_free, void, void *)
-DLL_INTERFACE_CALL(oneapi_get_memcapacity, size_t, SyclQueue *queue)
-
-DLL_INTERFACE_CALL(oneapi_get_compute_units_amount, size_t, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_iterate_devices, void, OneAPIDeviceIteratorCallback cb, void *user_ptr)
-DLL_INTERFACE_CALL(oneapi_set_error_cb, void, OneAPIErrorCallback, void *user_ptr)
-
-DLL_INTERFACE_CALL(oneapi_create_queue, bool, SyclQueue *&external_queue, int device_index)
-DLL_INTERFACE_CALL(oneapi_free_queue, void, SyclQueue *queue)
-DLL_INTERFACE_CALL(
- oneapi_usm_aligned_alloc_host, void *, SyclQueue *queue, size_t memory_size, size_t alignment)
-DLL_INTERFACE_CALL(oneapi_usm_alloc_device, void *, SyclQueue *queue, size_t memory_size)
-DLL_INTERFACE_CALL(oneapi_usm_free, void, SyclQueue *queue, void *usm_ptr)
-
-DLL_INTERFACE_CALL(
- oneapi_usm_memcpy, bool, SyclQueue *queue, void *dest, void *src, size_t num_bytes)
-DLL_INTERFACE_CALL(oneapi_queue_synchronize, bool, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_usm_memset,
- bool,
- SyclQueue *queue,
- void *usm_ptr,
- unsigned char value,
- size_t num_bytes)
-
-DLL_INTERFACE_CALL(oneapi_run_test_kernel, bool, SyclQueue *queue)
-
-/* Operation with Kernel globals structure - map of global/constant allocation - filled before
- * render/kernel execution As we don't know in cycles `sizeof` this - Cycles will manage just as
- * pointer. */
-DLL_INTERFACE_CALL(oneapi_kernel_globals_size, bool, SyclQueue *queue, size_t &kernel_global_size)
-DLL_INTERFACE_CALL(oneapi_set_global_memory,
- void,
- SyclQueue *queue,
- void *kernel_globals,
- const char *memory_name,
- void *memory_device_pointer)
-
-DLL_INTERFACE_CALL(oneapi_kernel_preferred_local_size,
- size_t,
- SyclQueue *queue,
- const DeviceKernel kernel,
- const size_t kernel_global_size)
-DLL_INTERFACE_CALL(oneapi_enqueue_kernel,
- bool,
- KernelContext *context,
- int kernel,
- size_t global_size,
- void **args)
diff --git a/intern/cycles/kernel/device/oneapi/globals.h b/intern/cycles/kernel/device/oneapi/globals.h
index d60f4f135ba..116620eb725 100644
--- a/intern/cycles/kernel/device/oneapi/globals.h
+++ b/intern/cycles/kernel/device/oneapi/globals.h
@@ -23,15 +23,6 @@ typedef struct KernelGlobalsGPU {
#undef KERNEL_DATA_ARRAY
IntegratorStateGPU *integrator_state;
const KernelData *__data;
-#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
- size_t nd_item_local_id_0;
- size_t nd_item_local_range_0;
- size_t nd_item_group_0;
- size_t nd_item_group_range_0;
-
- size_t nd_item_global_id_0;
- size_t nd_item_global_range_0;
-#endif
} KernelGlobalsGPU;
typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
diff --git a/intern/cycles/kernel/device/oneapi/image.h b/intern/cycles/kernel/device/oneapi/image.h
index 6681977a675..2417b8eac3b 100644
--- a/intern/cycles/kernel/device/oneapi/image.h
+++ b/intern/cycles/kernel/device/oneapi/image.h
@@ -81,10 +81,15 @@ ccl_device_inline float4 svm_image_texture_read_2d(int id, int x, int y)
x = svm_image_texture_wrap_periodic(x, info.width);
y = svm_image_texture_wrap_periodic(y, info.height);
}
- else {
+ else if (info.extension == EXTENSION_EXTEND) {
x = svm_image_texture_wrap_clamp(x, info.width);
y = svm_image_texture_wrap_clamp(y, info.height);
}
+ else {
+ if (x < 0 || x >= info.width || y < 0 || y >= info.height) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
return svm_image_texture_read(info, x, y, 0);
}
@@ -99,11 +104,16 @@ ccl_device_inline float4 svm_image_texture_read_3d(int id, int x, int y, int z)
y = svm_image_texture_wrap_periodic(y, info.height);
z = svm_image_texture_wrap_periodic(z, info.depth);
}
- else {
+ else if (info.extension == EXTENSION_EXTEND) {
x = svm_image_texture_wrap_clamp(x, info.width);
y = svm_image_texture_wrap_clamp(y, info.height);
z = svm_image_texture_wrap_clamp(z, info.depth);
}
+ else {
+ if (x < 0 || x >= info.width || y < 0 || y >= info.height || z < 0 || z >= info.depth) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
return svm_image_texture_read(info, x, y, z);
}
@@ -128,12 +138,6 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals, int id, float x, float
{
const TextureInfo &info = kernel_data_fetch(texture_info, id);
- if (info.extension == EXTENSION_CLIP) {
- if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
if (info.interpolation == INTERPOLATION_CLOSEST) {
/* Closest interpolation. */
int ix, iy;
@@ -315,12 +319,6 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals, int id, float3 P, in
}
#endif
else {
- if (info.extension == EXTENSION_CLIP) {
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
x *= info.width;
y *= info.height;
z *= info.depth;
diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp
index 300e201600c..525ae288f0c 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -3,208 +3,79 @@
#ifdef WITH_ONEAPI
-/* clang-format off */
# include "kernel.h"
# include <iostream>
# include <map>
# include <set>
-# include <CL/sycl.hpp>
+# include <sycl/sycl.hpp>
# include "kernel/device/oneapi/compat.h"
# include "kernel/device/oneapi/globals.h"
# include "kernel/device/oneapi/kernel_templates.h"
# include "kernel/device/gpu/kernel.h"
-/* clang-format on */
static OneAPIErrorCallback s_error_cb = nullptr;
static void *s_error_user_ptr = nullptr;
-static std::vector<sycl::device> oneapi_available_devices();
-
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
{
s_error_cb = cb;
s_error_user_ptr = user_ptr;
}
-void oneapi_check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_host = false)
-{
-# ifdef _DEBUG
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- sycl::info::device_type device_type =
- queue->get_device().get_info<sycl::info::device::device_type>();
- sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
- (void)usm_type;
- assert(usm_type == sycl::usm::alloc::device ||
- ((device_type == sycl::info::device_type::host ||
- device_type == sycl::info::device_type::is_cpu || allow_host) &&
- usm_type == sycl::usm::alloc::host));
-# endif
-}
-
-bool oneapi_create_queue(SyclQueue *&external_queue, int device_index)
-{
- bool finished_correct = true;
- try {
- std::vector<sycl::device> devices = oneapi_available_devices();
- if (device_index < 0 || device_index >= devices.size()) {
- return false;
- }
- sycl::queue *created_queue = new sycl::queue(devices[device_index],
- sycl::property::queue::in_order());
- external_queue = reinterpret_cast<SyclQueue *>(created_queue);
- }
- catch (sycl::exception const &e) {
- finished_correct = false;
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- }
- return finished_correct;
-}
-
-void oneapi_free_queue(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- delete queue;
-}
-
-void *oneapi_usm_aligned_alloc_host(SyclQueue *queue_, size_t memory_size, size_t alignment)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- return sycl::aligned_alloc_host(alignment, memory_size, *queue);
-}
-
-void *oneapi_usm_alloc_device(SyclQueue *queue_, size_t memory_size)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- return sycl::malloc_device(memory_size, *queue);
-}
-
-void oneapi_usm_free(SyclQueue *queue_, void *usm_ptr)
+/* NOTE(@nsirgien): Execution of this simple kernel will check basic functionality like
+ * memory allocations, memory transfers and execution of kernel with USM memory. */
+bool oneapi_run_test_kernel(SyclQueue *queue_)
{
assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, usm_ptr, true);
- sycl::free(usm_ptr, *queue);
-}
+ const size_t N = 8;
+ const size_t memory_byte_size = sizeof(int) * N;
-bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, dest, true);
- oneapi_check_usm(queue_, src, true);
- sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
-# ifdef WITH_CYCLES_DEBUG
+ bool is_computation_correct = true;
try {
- /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
- * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
- */
- mem_event.wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-# else
- sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
- sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
- bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
- src_type == sycl::usm::alloc::device;
- bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
- src_type == sycl::usm::alloc::unknown;
- /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
- * may not wait until the end of the transfer before using the memory.
- */
- if (from_device_to_host || host_or_device_memop_with_offset)
- mem_event.wait();
- return true;
-# endif
-}
+ int *A_host = (int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
-bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, size_t num_bytes)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, usm_ptr, true);
- sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
-# ifdef WITH_CYCLES_DEBUG
- try {
- /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
- * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
- */
- mem_event.wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
+ for (size_t i = (size_t)0; i < N; i++) {
+ A_host[i] = rand() % 32;
}
- return false;
- }
-# else
- (void)mem_event;
- return true;
-# endif
-}
-bool oneapi_queue_synchronize(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- try {
- queue->wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-}
+ int *A_device = (int *)sycl::malloc_device(memory_byte_size, *queue);
+ int *B_device = (int *)sycl::malloc_device(memory_byte_size, *queue);
-/* NOTE(@nsirgien): Execution of this simple kernel will check basic functionality and
- * also trigger runtime compilation of all existing oneAPI kernels */
-bool oneapi_run_test_kernel(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- size_t N = 8;
- sycl::buffer<float, 1> A(N);
- sycl::buffer<float, 1> B(N);
-
- {
- sycl::host_accessor A_host_acc(A, sycl::write_only);
- for (size_t i = (size_t)0; i < N; i++)
- A_host_acc[i] = rand() % 32;
- }
+ queue->memcpy(A_device, A_host, memory_byte_size);
+ queue->wait_and_throw();
- try {
queue->submit([&](sycl::handler &cgh) {
- sycl::accessor A_acc(A, cgh, sycl::read_only);
- sycl::accessor B_acc(B, cgh, sycl::write_only, sycl::no_init);
-
- cgh.parallel_for(N, [=](sycl::id<1> idx) { B_acc[idx] = A_acc[idx] + idx.get(0); });
+ cgh.parallel_for(N, [=](sycl::id<1> idx) { B_device[idx] = A_device[idx] + idx.get(0); });
});
queue->wait_and_throw();
- sycl::host_accessor A_host_acc(A, sycl::read_only);
- sycl::host_accessor B_host_acc(B, sycl::read_only);
+ int *B_host = (int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
+
+ queue->memcpy(B_host, B_device, memory_byte_size);
+ queue->wait_and_throw();
for (size_t i = (size_t)0; i < N; i++) {
- float result = A_host_acc[i] + B_host_acc[i];
- (void)result;
+ const int expected_result = i + A_host[i];
+ if (B_host[i] != expected_result) {
+ is_computation_correct = false;
+ if (s_error_cb) {
+ s_error_cb(("Incorrect result in test kernel execution - expected " +
+ std::to_string(expected_result) + ", got " + std::to_string(B_host[i]))
+ .c_str(),
+ s_error_user_ptr);
+ }
+ }
}
+
+ sycl::free(A_host, *queue);
+ sycl::free(B_host, *queue);
+ sycl::free(A_device, *queue);
+ sycl::free(B_device, *queue);
+ queue->wait_and_throw();
}
catch (sycl::exception const &e) {
if (s_error_cb) {
@@ -213,63 +84,16 @@ bool oneapi_run_test_kernel(SyclQueue *queue_)
return false;
}
- return true;
-}
-
-bool oneapi_kernel_globals_size(SyclQueue *queue_, size_t &kernel_global_size)
-{
- kernel_global_size = sizeof(KernelGlobalsGPU);
-
- return true;
-}
-
-void oneapi_set_global_memory(SyclQueue *queue_,
- void *kernel_globals,
- const char *memory_name,
- void *memory_device_pointer)
-{
- assert(queue_);
- assert(kernel_globals);
- assert(memory_name);
- assert(memory_device_pointer);
- KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals;
- oneapi_check_usm(queue_, memory_device_pointer);
- oneapi_check_usm(queue_, kernel_globals, true);
-
- std::string matched_name(memory_name);
-
-/* This macro will change global ptr of KernelGlobals via name matching. */
-# define KERNEL_DATA_ARRAY(type, name) \
- else if (#name == matched_name) \
- { \
- globals->__##name = (type *)memory_device_pointer; \
- return; \
- }
- if (false) {
- }
- else if ("integrator_state" == matched_name) {
- globals->integrator_state = (IntegratorStateGPU *)memory_device_pointer;
- return;
- }
- KERNEL_DATA_ARRAY(KernelData, data)
-# include "kernel/data_arrays.h"
- else
- {
- std::cerr << "Can't found global/constant memory with name \"" << matched_name << "\"!"
- << std::endl;
- assert(false);
- }
-# undef KERNEL_DATA_ARRAY
+ return is_computation_correct;
}
/* TODO: Move device information to OneapiDevice initialized on creation and use it. */
/* TODO: Move below function to oneapi/queue.cpp. */
-size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_,
+size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
const DeviceKernel kernel,
const size_t kernel_global_size)
{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ assert(queue);
(void)kernel_global_size;
const static size_t preferred_work_group_size_intersect_shading = 32;
const static size_t preferred_work_group_size_technical = 1024;
@@ -311,11 +135,63 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_,
preferred_work_group_size = 512;
}
- const size_t limit_work_group_size =
- queue->get_device().get_info<sycl::info::device::max_work_group_size>();
+ const size_t limit_work_group_size = reinterpret_cast<sycl::queue *>(queue)
+ ->get_device()
+ .get_info<sycl::info::device::max_work_group_size>();
+
return std::min(limit_work_group_size, preferred_work_group_size);
}
+bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
+{
+# ifdef SYCL_SKIP_KERNELS_PRELOAD
+ (void)queue_;
+ (void)requested_features;
+# else
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+
+ try {
+ sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
+ sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
+ {queue->get_device()});
+
+ for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
+ const std::string &kernel_name = kernel_id.get_name();
+
+ /* NOTE(@nsirgien): Names in this conditions below should match names from
+ * oneapi_call macro in oneapi_enqueue_kernel below */
+ if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
+ kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
+ continue;
+ }
+
+ if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
+ kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
+ continue;
+ }
+
+ if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
+ kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
+ std::string::npos) {
+ continue;
+ }
+
+ sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
+ sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
+ sycl::build(one_kernel_bundle);
+ }
+ }
+ catch (sycl::exception const &e) {
+ if (s_error_cb) {
+ s_error_cb(e.what(), s_error_user_ptr);
+ }
+ return false;
+ }
+# endif
+ return true;
+}
+
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
int kernel,
size_t global_size,
@@ -354,13 +230,6 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
/* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices,
* we extend work size to fit uniformity requirements. */
global_size = groups_count * local_size;
-
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
- if (queue->get_device().is_host()) {
- global_size = 1;
- local_size = 1;
- }
-# endif
}
/* Let the compiler throw an error if there are any kernels missing in this implementation. */
@@ -645,13 +514,9 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
/* Unsupported kernels */
case DEVICE_KERNEL_NUM:
case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
- assert(0);
- return false;
+ kernel_assert(0);
+ break;
}
-
- /* Unknown kernel. */
- assert(0);
- return false;
});
}
catch (sycl::exception const &e) {
@@ -668,247 +533,4 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
# endif
return success;
}
-
-static const int lowest_supported_driver_version_win = 1011660;
-static const int lowest_supported_driver_version_neo = 23570;
-
-static int parse_driver_build_version(const sycl::device &device)
-{
- const std::string &driver_version = device.get_info<sycl::info::device::driver_version>();
- int driver_build_version = 0;
-
- size_t second_dot_position = driver_version.find('.', driver_version.find('.') + 1);
- if (second_dot_position == std::string::npos) {
- std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
- << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
- << " xx.xx.xxx.xxxx (Windows) for device \""
- << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
- }
- else {
- try {
- size_t third_dot_position = driver_version.find('.', second_dot_position + 1);
- if (third_dot_position != std::string::npos) {
- const std::string &third_number_substr = driver_version.substr(
- second_dot_position + 1, third_dot_position - second_dot_position - 1);
- const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1);
- if (third_number_substr.length() == 3 && forth_number_substr.length() == 4)
- driver_build_version = std::stoi(third_number_substr) * 10000 +
- std::stoi(forth_number_substr);
- }
- else {
- const std::string &third_number_substr = driver_version.substr(second_dot_position + 1);
- driver_build_version = std::stoi(third_number_substr);
- }
- }
- catch (std::invalid_argument &e) {
- std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
- << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
- << " xx.xx.xxx.xxxx (Windows) for device \""
- << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
- }
- }
-
- return driver_build_version;
-}
-
-static std::vector<sycl::device> oneapi_available_devices()
-{
- bool allow_all_devices = false;
- if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr)
- allow_all_devices = true;
-
- /* Host device is useful only for debugging at the moment
- * so we hide this device with default build settings. */
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
- bool allow_host = true;
-# else
- bool allow_host = false;
-# endif
-
- const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms();
-
- std::vector<sycl::device> available_devices;
- for (const sycl::platform &platform : oneapi_platforms) {
- /* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL.
- */
- if (platform.get_backend() == sycl::backend::opencl) {
- continue;
- }
-
- const std::vector<sycl::device> &oneapi_devices =
- (allow_all_devices || allow_host) ? platform.get_devices(sycl::info::device_type::all) :
- platform.get_devices(sycl::info::device_type::gpu);
-
- for (const sycl::device &device : oneapi_devices) {
- if (allow_all_devices) {
- /* still filter out host device if build doesn't support it. */
- if (allow_host || !device.is_host()) {
- available_devices.push_back(device);
- }
- }
- else {
- bool filter_out = false;
-
- /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU,
- * assuming they have either more than 96 Execution Units or not 7 threads per EU.
- * Official support can be broaden to older and smaller GPUs once ready. */
- if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) {
- /* Filtered-out defaults in-case these values aren't available through too old L0
- * runtime. */
- int number_of_eus = 96;
- int threads_per_eu = 7;
- if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
- number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
- }
- if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
- threads_per_eu =
- device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
- }
- /* This filters out all Level-Zero supported GPUs from older generation than Arc. */
- if (number_of_eus <= 96 && threads_per_eu == 7) {
- filter_out = true;
- }
- /* if not already filtered out, check driver version. */
- if (!filter_out) {
- int driver_build_version = parse_driver_build_version(device);
- if ((driver_build_version > 100000 &&
- driver_build_version < lowest_supported_driver_version_win) ||
- (driver_build_version > 0 &&
- driver_build_version < lowest_supported_driver_version_neo)) {
- filter_out = true;
- }
- }
- }
- else if (!allow_host && device.is_host()) {
- filter_out = true;
- }
- else if (!allow_all_devices) {
- filter_out = true;
- }
-
- if (!filter_out) {
- available_devices.push_back(device);
- }
- }
- }
- }
-
- return available_devices;
-}
-
-char *oneapi_device_capabilities()
-{
- std::stringstream capabilities;
-
- const std::vector<sycl::device> &oneapi_devices = oneapi_available_devices();
- for (const sycl::device &device : oneapi_devices) {
- const std::string &name = device.get_info<sycl::info::device::name>();
-
- capabilities << std::string("\t") << name << "\n";
-# define WRITE_ATTR(attribute_name, attribute_variable) \
- capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \
- << "\n";
-# define GET_NUM_ATTR(attribute) \
- { \
- size_t attribute = (size_t)device.get_info<sycl::info::device ::attribute>(); \
- capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" << attribute << "\n"; \
- }
-
- GET_NUM_ATTR(vendor_id)
- GET_NUM_ATTR(max_compute_units)
- GET_NUM_ATTR(max_work_item_dimensions)
-
- sycl::id<3> max_work_item_sizes = device.get_info<sycl::info::device::max_work_item_sizes>();
- WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
- WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
- WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
-
- GET_NUM_ATTR(max_work_group_size)
- GET_NUM_ATTR(max_num_sub_groups)
- GET_NUM_ATTR(sub_group_independent_forward_progress)
-
- GET_NUM_ATTR(preferred_vector_width_char)
- GET_NUM_ATTR(preferred_vector_width_short)
- GET_NUM_ATTR(preferred_vector_width_int)
- GET_NUM_ATTR(preferred_vector_width_long)
- GET_NUM_ATTR(preferred_vector_width_float)
- GET_NUM_ATTR(preferred_vector_width_double)
- GET_NUM_ATTR(preferred_vector_width_half)
-
- GET_NUM_ATTR(native_vector_width_char)
- GET_NUM_ATTR(native_vector_width_short)
- GET_NUM_ATTR(native_vector_width_int)
- GET_NUM_ATTR(native_vector_width_long)
- GET_NUM_ATTR(native_vector_width_float)
- GET_NUM_ATTR(native_vector_width_double)
- GET_NUM_ATTR(native_vector_width_half)
-
- size_t max_clock_frequency =
- (size_t)(device.is_host() ? (size_t)0 :
- device.get_info<sycl::info::device::max_clock_frequency>());
- WRITE_ATTR("max_clock_frequency", max_clock_frequency)
-
- GET_NUM_ATTR(address_bits)
- GET_NUM_ATTR(max_mem_alloc_size)
-
- /* NOTE(@nsirgien): Implementation doesn't use image support as bindless images aren't
- * supported so we always return false, even if device supports HW texture usage acceleration.
- */
- bool image_support = false;
- WRITE_ATTR("image_support", (size_t)image_support)
-
- GET_NUM_ATTR(max_parameter_size)
- GET_NUM_ATTR(mem_base_addr_align)
- GET_NUM_ATTR(global_mem_size)
- GET_NUM_ATTR(local_mem_size)
- GET_NUM_ATTR(error_correction_support)
- GET_NUM_ATTR(profiling_timer_resolution)
- GET_NUM_ATTR(is_available)
-
-# undef GET_NUM_ATTR
-# undef WRITE_ATTR
- capabilities << "\n";
- }
-
- return ::strdup(capabilities.str().c_str());
-}
-
-void oneapi_free(void *p)
-{
- if (p) {
- ::free(p);
- }
-}
-
-void oneapi_iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr)
-{
- int num = 0;
- std::vector<sycl::device> devices = oneapi_available_devices();
- for (sycl::device &device : devices) {
- const std::string &platform_name =
- device.get_platform().get_info<sycl::info::platform::name>();
- std::string name = device.get_info<sycl::info::device::name>();
- std::string id = "ONEAPI_" + platform_name + "_" + name;
- if (device.has(sycl::aspect::ext_intel_pci_address)) {
- id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>());
- }
- (cb)(id.c_str(), name.c_str(), num, user_ptr);
- num++;
- }
-}
-
-size_t oneapi_get_memcapacity(SyclQueue *queue)
-{
- return reinterpret_cast<sycl::queue *>(queue)
- ->get_device()
- .get_info<sycl::info::device::global_mem_size>();
-}
-
-size_t oneapi_get_compute_units_amount(SyclQueue *queue)
-{
- return reinterpret_cast<sycl::queue *>(queue)
- ->get_device()
- .get_info<sycl::info::device::max_compute_units>();
-}
-
#endif /* WITH_ONEAPI */
diff --git a/intern/cycles/kernel/device/oneapi/kernel.h b/intern/cycles/kernel/device/oneapi/kernel.h
index c5f853742ed..2bfc0b89c87 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.h
+++ b/intern/cycles/kernel/device/oneapi/kernel.h
@@ -25,11 +25,6 @@ enum DeviceKernel : int;
class SyclQueue;
-typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
- const char *name,
- int num,
- void *user_ptr);
-
typedef void (*OneAPIErrorCallback)(const char *error, void *user_ptr);
struct KernelContext {
@@ -45,13 +40,17 @@ struct KernelContext {
extern "C" {
# endif
-# define DLL_INTERFACE_CALL(function, return_type, ...) \
- CYCLES_KERNEL_ONEAPI_EXPORT return_type function(__VA_ARGS__);
-# include "kernel/device/oneapi/dll_interface_template.h"
-# undef DLL_INTERFACE_CALL
-
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_run_test_kernel(SyclQueue *queue_);
+CYCLES_KERNEL_ONEAPI_EXPORT void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr);
+CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
+ SyclQueue *queue, const DeviceKernel kernel, const size_t kernel_global_size);
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
+ int kernel,
+ size_t global_size,
+ void **args);
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
+ const unsigned int requested_features);
# ifdef __cplusplus
}
# endif
-
#endif /* WITH_ONEAPI */
diff --git a/intern/cycles/kernel/device/oneapi/kernel_templates.h b/intern/cycles/kernel/device/oneapi/kernel_templates.h
index d8964d9b672..0ae925cf748 100644
--- a/intern/cycles/kernel/device/oneapi/kernel_templates.h
+++ b/intern/cycles/kernel/device/oneapi/kernel_templates.h
@@ -80,7 +80,7 @@ void oneapi_call(
(x, ##__VA_ARGS__)
/* This template automatically casts entries in the void **args array to the types requested by the kernel func.
- Since kernel parameters are passed as void ** to the device, this is the closest that we have to type safety. */
+ * Since kernel parameters are passed as void ** to the device, this is the closest that we have to type safety. */
#define oneapi_template(...) \
template<ONEAPI_CALL_FOR(ONEAPI_TYP, __VA_ARGS__)> \
void oneapi_call( \
diff --git a/intern/cycles/kernel/device/optix/bvh.h b/intern/cycles/kernel/device/optix/bvh.h
new file mode 100644
index 00000000000..6d81b44660c
--- /dev/null
+++ b/intern/cycles/kernel/device/optix/bvh.h
@@ -0,0 +1,659 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* OptiX implementation of ray-scene intersection. */
+
+#pragma once
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+
+#define OPTIX_DEFINE_ABI_VERSION_ONLY
+#include <optix_function_table.h>
+
+CCL_NAMESPACE_BEGIN
+
+/* Utilities. */
+
+template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
+{
+ return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
+}
+template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
+{
+ return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3());
+}
+
+template<typename T> ccl_device_forceinline T *get_payload_ptr_6()
+{
+ return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6());
+}
+
+ccl_device_forceinline int get_object_id()
+{
+#ifdef __OBJECT_MOTION__
+ /* Always get the instance ID from the TLAS
+ * There might be a motion transform node between TLAS and BLAS which does not have one. */
+ return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
+#else
+ return optixGetInstanceId();
+#endif
+}
+
+/* Hit/miss functions. */
+
+extern "C" __global__ void __miss__kernel_optix_miss()
+{
+ /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
+ optixSetPayload_5(PRIMITIVE_NONE);
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_local_hit()
+{
+#if defined(__HAIR__) || defined(__POINTCLOUD__)
+ if (!optixIsTriangleHit()) {
+ /* Ignore curves and points. */
+ return optixIgnoreIntersection();
+ }
+#endif
+
+#ifdef __BVH_LOCAL__
+ const int object = get_object_id();
+ if (object != optixGetPayload_4() /* local_object */) {
+ /* Only intersect with matching object. */
+ return optixIgnoreIntersection();
+ }
+
+ const int prim = optixGetPrimitiveIndex();
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self_local(ray->self, prim)) {
+ return optixIgnoreIntersection();
+ }
+
+ const uint max_hits = optixGetPayload_5();
+ if (max_hits == 0) {
+ /* Special case for when no hit information is requested, just report that something was hit */
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+
+ int hit = 0;
+ uint *const lcg_state = get_payload_ptr_0<uint>();
+ LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
+
+ if (lcg_state) {
+ for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (optixGetRayTmax() == local_isect->hits[i].t) {
+ return optixIgnoreIntersection();
+ }
+ }
+
+ hit = local_isect->num_hits++;
+
+ if (local_isect->num_hits > max_hits) {
+ hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+ if (hit >= max_hits) {
+ return optixIgnoreIntersection();
+ }
+ }
+ }
+ else {
+ if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
+ /* Record closest intersection only.
+ * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
+ */
+ return optixIgnoreIntersection();
+ }
+
+ local_isect->num_hits = 1;
+ }
+
+ Intersection *isect = &local_isect->hits[hit];
+ isect->t = optixGetRayTmax();
+ isect->prim = prim;
+ isect->object = get_object_id();
+ isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
+
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ isect->u = barycentrics.x;
+ isect->v = barycentrics.y;
+
+ /* Record geometric normal. */
+ const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
+ const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
+ const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
+ const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
+ local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+
+ /* Continue tracing (without this the trace call would return after the first hit). */
+ optixIgnoreIntersection();
+#endif
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
+{
+#ifdef __SHADOW_RECORD_ALL__
+ int prim = optixGetPrimitiveIndex();
+ const uint object = get_object_id();
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+# endif
+
+ float u = 0.0f, v = 0.0f;
+ int type = 0;
+ if (optixIsTriangleHit()) {
+ /* Triangle. */
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ u = barycentrics.x;
+ v = barycentrics.y;
+ type = kernel_data_fetch(objects, object).primitive_type;
+ }
+# ifdef __HAIR__
+ else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
+ /* Curve. */
+ u = __uint_as_float(optixGetAttribute_0());
+ v = __uint_as_float(optixGetAttribute_1());
+
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+ type = segment.type;
+ prim = segment.prim;
+
+# if OPTIX_ABI_VERSION < 55
+ /* Filter out curve end-caps. */
+ if (u == 0.0f || u == 1.0f) {
+ return optixIgnoreIntersection();
+ }
+# endif
+ }
+# endif
+ else {
+ /* Point. */
+ type = kernel_data_fetch(objects, object).primitive_type;
+ u = 0.0f;
+ v = 0.0f;
+ }
+
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self_shadow(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+
+# ifndef __TRANSPARENT_SHADOWS__
+ /* No transparent shadows support compiled in, make opaque. */
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+# else
+ const uint max_hits = optixGetPayload_3();
+ const uint num_hits_packed = optixGetPayload_2();
+ const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
+ const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
+
+ /* If no transparent shadows, all light is blocked and we can stop immediately. */
+ if (num_hits >= max_hits ||
+ !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+
+ /* Always use baked shadow transparency for curves. */
+ if (type & PRIMITIVE_CURVE) {
+ float throughput = __uint_as_float(optixGetPayload_1());
+ throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, type, u);
+ optixSetPayload_1(__float_as_uint(throughput));
+ optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
+
+ if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+ else {
+ /* Continue tracing. */
+ optixIgnoreIntersection();
+ return;
+ }
+ }
+
+ /* Record transparent intersection. */
+ optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
+
+ uint record_index = num_recorded_hits;
+
+ const IntegratorShadowState state = optixGetPayload_0();
+
+ const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (record_index >= max_record_hits) {
+ /* If maximum number of hits reached, find a hit to replace. */
+ float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+ uint max_recorded_hit = 0;
+
+ for (int i = 1; i < max_record_hits; i++) {
+ const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+ if (isect_t > max_recorded_t) {
+ max_recorded_t = isect_t;
+ max_recorded_hit = i;
+ }
+ }
+
+ if (optixGetRayTmax() >= max_recorded_t) {
+ /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
+ * current hit anymore. */
+ return;
+ }
+
+ record_index = max_recorded_hit;
+ }
+
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax();
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
+
+ /* Continue tracing. */
+ optixIgnoreIntersection();
+# endif /* __TRANSPARENT_SHADOWS__ */
+#endif /* __SHADOW_RECORD_ALL__ */
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_volume_test()
+{
+#if defined(__HAIR__) || defined(__POINTCLOUD__)
+ if (!optixIsTriangleHit()) {
+ /* Ignore curves. */
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ const uint object = get_object_id();
+#ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
+ return optixIgnoreIntersection();
+ }
+
+ const int prim = optixGetPrimitiveIndex();
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
+{
+#ifdef __HAIR__
+# if OPTIX_ABI_VERSION < 55
+ if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) {
+ /* Filter out curve end-caps. */
+ const float u = __uint_as_float(optixGetAttribute_0());
+ if (u == 0.0f || u == 1.0f) {
+ return optixIgnoreIntersection();
+ }
+ }
+# endif
+#endif
+
+ const uint object = get_object_id();
+ const uint visibility = optixGetPayload_4();
+#ifdef __VISIBILITY_FLAG__
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ int prim = optixGetPrimitiveIndex();
+ if (optixIsTriangleHit()) {
+ /* Triangle. */
+ }
+#ifdef __HAIR__
+ else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
+ /* Curve. */
+ prim = kernel_data_fetch(curve_segments, prim).prim;
+ }
+#endif
+
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ if (intersection_skip_self_shadow(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+ else {
+ /* Shadow ray early termination. */
+ return optixTerminateRay();
+ }
+ }
+ else {
+ if (intersection_skip_self(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+ }
+}
+
+extern "C" __global__ void __closesthit__kernel_optix_hit()
+{
+ const int object = get_object_id();
+ const int prim = optixGetPrimitiveIndex();
+
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
+ optixSetPayload_4(object);
+
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ optixSetPayload_1(__float_as_uint(barycentrics.x));
+ optixSetPayload_2(__float_as_uint(barycentrics.y));
+ optixSetPayload_3(prim);
+ optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
+ }
+ else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+ optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
+ optixSetPayload_2(optixGetAttribute_1());
+ optixSetPayload_3(segment.prim);
+ optixSetPayload_5(segment.type);
+ }
+ else {
+ optixSetPayload_1(0);
+ optixSetPayload_2(0);
+ optixSetPayload_3(prim);
+ optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
+ }
+}
+
+/* Custom primitive intersection functions. */
+
+#ifdef __HAIR__
+ccl_device_inline void optix_intersection_curve(const int prim, const int type)
+{
+ const int object = get_object_id();
+
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ const float3 ray_P = optixGetObjectRayOrigin();
+ const float3 ray_D = optixGetObjectRayDirection();
+ const float ray_tmin = optixGetRayTmin();
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+
+ if (curve_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
+ optixReportIntersection(isect.t,
+ type & PRIMITIVE_ALL,
+ __float_as_int(isect.u), /* Attribute_0 */
+ __float_as_int(isect.v)); /* Attribute_1 */
+ }
+}
+
+extern "C" __global__ void __intersection__curve_ribbon()
+{
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
+ const int prim = segment.prim;
+ const int type = segment.type;
+ if (type & PRIMITIVE_CURVE_RIBBON) {
+ optix_intersection_curve(prim, type);
+ }
+}
+
+#endif
+
+#ifdef __POINTCLOUD__
+extern "C" __global__ void __intersection__point()
+{
+ const int prim = optixGetPrimitiveIndex();
+ const int object = get_object_id();
+ const int type = kernel_data_fetch(objects, object).primitive_type;
+
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ const float3 ray_P = optixGetObjectRayOrigin();
+ const float3 ray_D = optixGetObjectRayDirection();
+ const float ray_tmin = optixGetRayTmin();
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+
+ if (point_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
+ optixReportIntersection(isect.t, type & PRIMITIVE_ALL);
+ }
+}
+#endif
+
+/* Scene intersection. */
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+ else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ ray_flags,
+ 0, /* SBT offset for PG_HITD */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ uint p0 = pointer_pack_to_uint_0(lcg_state);
+ uint p1 = pointer_pack_to_uint_1(lcg_state);
+ uint p2 = pointer_pack_to_uint_0(local_isect);
+ uint p3 = pointer_pack_to_uint_1(local_isect);
+ uint p4 = local_object;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ /* Is set to zero on miss or if ray is aborted, so can be used as return value. */
+ uint p5 = max_hits;
+
+ if (local_isect) {
+ local_isect->num_hits = 0; /* Initialize hit count to zero. */
+ }
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ 0xFF,
+ /* Need to always call into __anyhit__kernel_optix_local_hit. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 2, /* SBT offset for PG_HITL */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ return p5;
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowState state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ uint p0 = state;
+ uint p1 = __float_as_uint(1.0f); /* Throughput. */
+ uint p2 = 0; /* Number of hits. */
+ uint p3 = max_hits;
+ uint p4 = visibility;
+ uint p5 = false;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 1, /* SBT offset for PG_HITS */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ *num_recorded_hits = uint16_unpack_from_uint_0(p2);
+ *throughput = __uint_as_float(p1);
+
+ return p5;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint visibility)
+{
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ /* Need to always call into __anyhit__kernel_optix_volume_test. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 3, /* SBT offset for PG_HITV */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index aa4a6321a8b..1a11a533b7e 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -8,7 +8,6 @@
#include <optix.h>
#define __KERNEL_GPU__
-#define __KERNEL_GPU_RAYTRACING__
#define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */
#define __KERNEL_OPTIX__
#define CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 510f7cca5d6..6abb5aeacb9 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -20,34 +20,6 @@
#include "kernel/integrator/intersect_volume_stack.h"
// clang-format on
-#define OPTIX_DEFINE_ABI_VERSION_ONLY
-#include <optix_function_table.h>
-
-template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
-{
- return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
-}
-template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
-{
- return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3());
-}
-
-template<typename T> ccl_device_forceinline T *get_payload_ptr_6()
-{
- return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6());
-}
-
-ccl_device_forceinline int get_object_id()
-{
-#ifdef __OBJECT_MOTION__
- /* Always get the instance ID from the TLAS
- * There might be a motion transform node between TLAS and BLAS which does not have one. */
- return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
-#else
- return optixGetInstanceId();
-#endif
-}
-
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
{
const int global_index = optixGetLaunchIndex().x;
@@ -84,411 +56,3 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_st
integrator_intersect_volume_stack(nullptr, path_index);
}
-extern "C" __global__ void __miss__kernel_optix_miss()
-{
- /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
- optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
- optixSetPayload_5(PRIMITIVE_NONE);
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_local_hit()
-{
-#if defined(__HAIR__) || defined(__POINTCLOUD__)
- if (!optixIsTriangleHit()) {
- /* Ignore curves and points. */
- return optixIgnoreIntersection();
- }
-#endif
-
-#ifdef __BVH_LOCAL__
- const int object = get_object_id();
- if (object != optixGetPayload_4() /* local_object */) {
- /* Only intersect with matching object. */
- return optixIgnoreIntersection();
- }
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self_local(ray->self, prim)) {
- return optixIgnoreIntersection();
- }
-
- const uint max_hits = optixGetPayload_5();
- if (max_hits == 0) {
- /* Special case for when no hit information is requested, just report that something was hit */
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
-
- int hit = 0;
- uint *const lcg_state = get_payload_ptr_0<uint>();
- LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
-
- if (lcg_state) {
- for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
- if (optixGetRayTmax() == local_isect->hits[i].t) {
- return optixIgnoreIntersection();
- }
- }
-
- hit = local_isect->num_hits++;
-
- if (local_isect->num_hits > max_hits) {
- hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
- if (hit >= max_hits) {
- return optixIgnoreIntersection();
- }
- }
- }
- else {
- if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
- /* Record closest intersection only.
- * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
- */
- return optixIgnoreIntersection();
- }
-
- local_isect->num_hits = 1;
- }
-
- Intersection *isect = &local_isect->hits[hit];
- isect->t = optixGetRayTmax();
- isect->prim = prim;
- isect->object = get_object_id();
- isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
-
- const float2 barycentrics = optixGetTriangleBarycentrics();
- isect->u = 1.0f - barycentrics.y - barycentrics.x;
- isect->v = barycentrics.x;
-
- /* Record geometric normal. */
- const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
- const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
- const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
- const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
- local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
-
- /* Continue tracing (without this the trace call would return after the first hit). */
- optixIgnoreIntersection();
-#endif
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
-{
-#ifdef __SHADOW_RECORD_ALL__
- int prim = optixGetPrimitiveIndex();
- const uint object = get_object_id();
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-# endif
-
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self_shadow(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
-
- float u = 0.0f, v = 0.0f;
- int type = 0;
- if (optixIsTriangleHit()) {
- const float2 barycentrics = optixGetTriangleBarycentrics();
- u = 1.0f - barycentrics.y - barycentrics.x;
- v = barycentrics.x;
- type = kernel_data_fetch(objects, object).primitive_type;
- }
-# ifdef __HAIR__
- else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
- u = __uint_as_float(optixGetAttribute_0());
- v = __uint_as_float(optixGetAttribute_1());
-
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
- type = segment.type;
- prim = segment.prim;
-
-# if OPTIX_ABI_VERSION < 55
- /* Filter out curve endcaps. */
- if (u == 0.0f || u == 1.0f) {
- return optixIgnoreIntersection();
- }
-# endif
- }
-# endif
- else {
- type = kernel_data_fetch(objects, object).primitive_type;
- u = 0.0f;
- v = 0.0f;
- }
-
-# ifndef __TRANSPARENT_SHADOWS__
- /* No transparent shadows support compiled in, make opaque. */
- optixSetPayload_5(true);
- return optixTerminateRay();
-# else
- const uint max_hits = optixGetPayload_3();
- const uint num_hits_packed = optixGetPayload_2();
- const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
- const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
-
- /* If no transparent shadows, all light is blocked and we can stop immediately. */
- if (num_hits >= max_hits ||
- !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
-
- /* Always use baked shadow transparency for curves. */
- if (type & PRIMITIVE_CURVE) {
- float throughput = __uint_as_float(optixGetPayload_1());
- throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
- optixSetPayload_1(__float_as_uint(throughput));
- optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
-
- if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
- else {
- /* Continue tracing. */
- optixIgnoreIntersection();
- return;
- }
- }
-
- /* Record transparent intersection. */
- optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
-
- uint record_index = num_recorded_hits;
-
- const IntegratorShadowState state = optixGetPayload_0();
-
- const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
- if (record_index >= max_record_hits) {
- /* If maximum number of hits reached, find a hit to replace. */
- float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
- uint max_recorded_hit = 0;
-
- for (int i = 1; i < max_record_hits; i++) {
- const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
- if (isect_t > max_recorded_t) {
- max_recorded_t = isect_t;
- max_recorded_hit = i;
- }
- }
-
- if (optixGetRayTmax() >= max_recorded_t) {
- /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
- * current hit anymore. */
- return;
- }
-
- record_index = max_recorded_hit;
- }
-
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax();
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
-
- /* Continue tracing. */
- optixIgnoreIntersection();
-# endif /* __TRANSPARENT_SHADOWS__ */
-#endif /* __SHADOW_RECORD_ALL__ */
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_volume_test()
-{
-#if defined(__HAIR__) || defined(__POINTCLOUD__)
- if (!optixIsTriangleHit()) {
- /* Ignore curves. */
- return optixIgnoreIntersection();
- }
-#endif
-
- const uint object = get_object_id();
-#ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-#endif
-
- if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
- return optixIgnoreIntersection();
- }
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
-{
-#ifdef __HAIR__
-# if OPTIX_ABI_VERSION < 55
- if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) {
- /* Filter out curve endcaps. */
- const float u = __uint_as_float(optixGetAttribute_0());
- if (u == 0.0f || u == 1.0f) {
- return optixIgnoreIntersection();
- }
- }
-# endif
-#endif
-
- const uint object = get_object_id();
- const uint visibility = optixGetPayload_4();
-#ifdef __VISIBILITY_FLAG__
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-#endif
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
-
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- if (intersection_skip_self_shadow(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
- else {
- /* Shadow ray early termination. */
- return optixTerminateRay();
- }
- }
- else {
- if (intersection_skip_self(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
- }
-}
-
-extern "C" __global__ void __closesthit__kernel_optix_hit()
-{
- const int object = get_object_id();
- const int prim = optixGetPrimitiveIndex();
-
- optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
- optixSetPayload_4(object);
-
- if (optixIsTriangleHit()) {
- const float2 barycentrics = optixGetTriangleBarycentrics();
- optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
- optixSetPayload_2(__float_as_uint(barycentrics.x));
- optixSetPayload_3(prim);
- optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
- }
- else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
- optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
- optixSetPayload_2(optixGetAttribute_1());
- optixSetPayload_3(segment.prim);
- optixSetPayload_5(segment.type);
- }
- else {
- optixSetPayload_1(0);
- optixSetPayload_2(0);
- optixSetPayload_3(prim);
- optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
- }
-}
-
-#ifdef __HAIR__
-ccl_device_inline void optix_intersection_curve(const int prim, const int type)
-{
- const int object = get_object_id();
-
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return;
- }
-# endif
-
- float3 P = optixGetObjectRayOrigin();
- float3 dir = optixGetObjectRayDirection();
- float tmin = optixGetRayTmin();
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
-# ifdef __OBJECT_MOTION__
- const float time = optixGetRayTime();
-# else
- const float time = 0.0f;
-# endif
-
- Intersection isect;
- isect.t = optixGetRayTmax();
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
-
- if (curve_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
- static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
- optixReportIntersection(isect.t / len,
- type & PRIMITIVE_ALL,
- __float_as_int(isect.u), /* Attribute_0 */
- __float_as_int(isect.v)); /* Attribute_1 */
- }
-}
-
-extern "C" __global__ void __intersection__curve_ribbon()
-{
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
- const int prim = segment.prim;
- const int type = segment.type;
- if (type & PRIMITIVE_CURVE_RIBBON) {
- optix_intersection_curve(prim, type);
- }
-}
-
-#endif
-
-#ifdef __POINTCLOUD__
-extern "C" __global__ void __intersection__point()
-{
- const int prim = optixGetPrimitiveIndex();
- const int object = get_object_id();
- const int type = kernel_data_fetch(objects, object).primitive_type;
-
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return;
- }
-# endif
-
- float3 P = optixGetObjectRayOrigin();
- float3 dir = optixGetObjectRayDirection();
- float tmin = optixGetRayTmin();
-
- /* The direction is not normalized by default, the point intersection routine expects that. */
- float len;
- dir = normalize_len(dir, &len);
-
-# ifdef __OBJECT_MOTION__
- const float time = optixGetRayTime();
-# else
- const float time = 0.0f;
-# endif
-
- Intersection isect;
- isect.t = optixGetRayTmax();
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX) {
- isect.t *= len;
- }
-
- if (point_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
- static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
- optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL);
- }
-}
-#endif