Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/device')
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h38
-rw-r--r--intern/cycles/kernel/device/gpu/work_stealing.h25
-rw-r--r--intern/cycles/kernel/device/metal/compat.h31
-rw-r--r--intern/cycles/kernel/device/metal/context_begin.h4
-rw-r--r--intern/cycles/kernel/device/metal/kernel.metal562
-rw-r--r--intern/cycles/kernel/device/optix/compat.h1
-rw-r--r--intern/cycles/kernel/device/optix/kernel.cu74
7 files changed, 707 insertions, 28 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 22e2a61a06d..b50f492e8c7 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -19,7 +19,8 @@
#include "kernel/device/gpu/parallel_active_index.h"
#include "kernel/device/gpu/parallel_prefix_sum.h"
#include "kernel/device/gpu/parallel_sorted_index.h"
-#include "kernel/device/gpu/work_stealing.h"
+
+#include "kernel/sample/lcg.h"
/* Include constant tables before entering Metal's context class scope (context_begin.h) */
#include "kernel/tables.h"
@@ -28,6 +29,8 @@
# include "kernel/device/metal/context_begin.h"
#endif
+#include "kernel/device/gpu/work_stealing.h"
+
#include "kernel/integrator/state.h"
#include "kernel/integrator/state_flow.h"
#include "kernel/integrator/state_util.h"
@@ -94,7 +97,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
const int state = tile->path_index_offset + tile_work_index;
uint x, y, sample;
- get_work_pixel(tile, tile_work_index, &x, &y, &sample);
+ ccl_gpu_kernel_call(get_work_pixel(tile, tile_work_index, &x, &y, &sample));
ccl_gpu_kernel_call(
integrator_init_from_camera(nullptr, state, tile, render_buffer, x, y, sample));
@@ -125,7 +128,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
const int state = tile->path_index_offset + tile_work_index;
uint x, y, sample;
- get_work_pixel(tile, tile_work_index, &x, &y, &sample);
+ ccl_gpu_kernel_call(get_work_pixel(tile, tile_work_index, &x, &y, &sample));
ccl_gpu_kernel_call(
integrator_init_from_bake(nullptr, state, tile, render_buffer, x, y, sample));
@@ -547,6 +550,33 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
#endif
}
+#ifdef __KERNEL_METAL__
+
+/* Fetch into a local variable on Metal - there is minimal overhead. Templating the
+ * film_get_pass_pixel_... functions works on MSL, but not on other compilers. */
+# define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \
+ float local_pixel[4]; \
+ film_get_pass_pixel_##variant(&kfilm_convert, buffer, local_pixel); \
+ if (input_channel_count >= 1) { \
+ pixel[0] = local_pixel[0]; \
+ } \
+ if (input_channel_count >= 2) { \
+ pixel[1] = local_pixel[1]; \
+ } \
+ if (input_channel_count >= 3) { \
+ pixel[2] = local_pixel[2]; \
+ } \
+ if (input_channel_count >= 4) { \
+ pixel[3] = local_pixel[3]; \
+ }
+
+#else
+
+# define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \
+ film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel);
+
+#endif
+
#define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
ccl_gpu_kernel_signature(film_convert_##variant, \
@@ -574,7 +604,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
ccl_global float *pixel = pixels + \
(render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
\
- film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \
+ FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \
} \
\
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
diff --git a/intern/cycles/kernel/device/gpu/work_stealing.h b/intern/cycles/kernel/device/gpu/work_stealing.h
index fab0915c38e..c3083948057 100644
--- a/intern/cycles/kernel/device/gpu/work_stealing.h
+++ b/intern/cycles/kernel/device/gpu/work_stealing.h
@@ -29,17 +29,20 @@ ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile,
ccl_private uint *y,
ccl_private uint *sample)
{
-#if 0
- /* Keep threads for the same sample together. */
- uint tile_pixels = tile->w * tile->h;
- uint sample_offset = global_work_index / tile_pixels;
- uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-#else
- /* Keeping threads for the same pixel together.
- * Appears to improve performance by a few % on CUDA and OptiX. */
- uint sample_offset = global_work_index % tile->num_samples;
- uint pixel_offset = global_work_index / tile->num_samples;
-#endif
+ uint sample_offset, pixel_offset;
+
+ if (kernel_data.integrator.scrambling_distance < 0.9f) {
+ /* Keep threads for the same sample together. */
+ uint tile_pixels = tile->w * tile->h;
+ sample_offset = global_work_index / tile_pixels;
+ pixel_offset = global_work_index - sample_offset * tile_pixels;
+ }
+ else {
+ /* Keeping threads for the same pixel together.
+ * Appears to improve performance by a few % on CUDA and OptiX. */
+ sample_offset = global_work_index % tile->num_samples;
+ pixel_offset = global_work_index / tile->num_samples;
+ }
uint y_offset = pixel_offset / tile->w;
uint x_offset = pixel_offset - y_offset * tile->w;
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 61597a4acfc..a51afc37fc0 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -32,6 +32,10 @@
using namespace metal;
+#ifdef __METALRT__
+using namespace metal::raytracing;
+#endif
+
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wsign-compare"
#pragma clang diagnostic ignored "-Wuninitialized"
@@ -47,7 +51,7 @@ using namespace metal;
#define ccl_global device
#define ccl_inline_constant static constant constexpr
#define ccl_device_constant constant
-#define ccl_constant const device
+#define ccl_constant constant
#define ccl_gpu_shared threadgroup
#define ccl_private thread
#define ccl_may_alias
@@ -113,7 +117,7 @@ struct kernel_gpu_##name \
uint simd_group_index, \
uint num_simd_groups) ccl_global const; \
}; \
-kernel void kernel_metal_##name(device const kernel_gpu_##name *params_struct, \
+kernel void cycles_metal_##name(device const kernel_gpu_##name *params_struct, \
constant KernelParamsMetal &ccl_restrict _launch_params_metal, \
constant MetalAncillaries *_metal_ancillaries, \
threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \
@@ -246,6 +250,22 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
#define __device__
+#ifdef __METALRT__
+
+# define __KERNEL_GPU_RAYTRACING__
+
+# if defined(__METALRT_MOTION__)
+# define METALRT_TAGS instancing, instance_motion, primitive_motion
+# else
+# define METALRT_TAGS instancing
+# endif /* __METALRT_MOTION__ */
+
+typedef acceleration_structure<METALRT_TAGS> metalrt_as_type;
+typedef intersection_function_table<triangle_data, METALRT_TAGS> metalrt_ift_type;
+typedef metal::raytracing::intersector<triangle_data, METALRT_TAGS> metalrt_intersector_type;
+
+#endif /* __METALRT__ */
+
/* texture bindings and sampler setup */
struct Texture2DParamsMetal {
@@ -258,6 +278,13 @@ struct Texture3DParamsMetal {
struct MetalAncillaries {
device Texture2DParamsMetal *textures_2d;
device Texture3DParamsMetal *textures_3d;
+
+#ifdef __METALRT__
+ metalrt_as_type accel_struct;
+ metalrt_ift_type ift_default;
+ metalrt_ift_type ift_shadow;
+ metalrt_ift_type ift_local;
+#endif
};
#include "util/half.h"
diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h
index 8c9e1c54077..2eefd795aa1 100644
--- a/intern/cycles/kernel/device/metal/context_begin.h
+++ b/intern/cycles/kernel/device/metal/context_begin.h
@@ -27,6 +27,10 @@ class MetalKernelContext {
: launch_params_metal(_launch_params_metal), metal_ancillaries(_metal_ancillaries)
{}
+ MetalKernelContext(constant KernelParamsMetal &_launch_params_metal)
+ : launch_params_metal(_launch_params_metal)
+ {}
+
/* texture fetch adapter functions */
typedef uint64_t ccl_gpu_tex_object;
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
index feca20ff475..27dc1f44c6f 100644
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -16,10 +16,566 @@
/* Metal kernel entry points */
-// clang-format off
-
#include "kernel/device/metal/compat.h"
#include "kernel/device/metal/globals.h"
#include "kernel/device/gpu/kernel.h"
-// clang-format on \ No newline at end of file
+/* MetalRT intersection handlers */
+#ifdef __METALRT__
+
+/* Return type for a bounding box intersection function. */
+struct BoundingBoxIntersectionResult
+{
+ bool accept [[accept_intersection]];
+ bool continue_search [[continue_search]];
+ float distance [[distance]];
+};
+
+/* Return type for a triangle intersection function. */
+struct TriangleIntersectionResult
+{
+ bool accept [[accept_intersection]];
+ bool continue_search [[continue_search]];
+};
+
+enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
+
+template<typename TReturn, uint intersection_type>
+TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
+ const uint object,
+ const uint primitive_id,
+ const float2 barycentrics,
+ const float ray_tmax)
+{
+ TReturn result;
+
+#ifdef __BVH_LOCAL__
+ uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+
+ if (object != payload.local_object) {
+ /* Only intersect with matching object */
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+
+ const short max_hits = payload.max_hits;
+ if (max_hits == 0) {
+ /* Special case for when no hit information is requested, just report that something was hit */
+ payload.result = true;
+ result.accept = true;
+ result.continue_search = false;
+ return result;
+ }
+
+ int hit = 0;
+ if (payload.has_lcg_state) {
+ for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) {
+ if (ray_tmax == payload.local_isect.hits[i].t) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+ }
+
+ hit = payload.local_isect.num_hits++;
+
+ if (payload.local_isect.num_hits > max_hits) {
+ hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits;
+ if (hit >= max_hits) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+ }
+ }
+ else {
+ if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
+ /* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+
+ payload.local_isect.num_hits = 1;
+ }
+
+ ray_data Intersection *isect = &payload.local_isect.hits[hit];
+ isect->t = ray_tmax;
+ isect->prim = prim;
+ isect->object = object;
+ isect->type = kernel_tex_fetch(__objects, object).primitive_type;
+
+ isect->u = 1.0f - barycentrics.y - barycentrics.x;
+ isect->v = barycentrics.x;
+
+ /* Record geometric normal */
+ const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect->prim).w;
+ const float3 tri_a = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0));
+ const float3 tri_b = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1));
+ const float3 tri_c = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
+ payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+
+ /* Continue tracing (without this the trace call would return after the first hit) */
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+#endif
+}
+
+[[intersection(triangle, triangle_data, METALRT_TAGS)]]
+TriangleIntersectionResult
+__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
+ uint instance_id [[user_instance_id]],
+ uint primitive_id [[primitive_id]],
+ float2 barycentrics [[barycentric_coord]],
+ float ray_tmax [[distance]])
+{
+ return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
+ launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
+BoundingBoxIntersectionResult
+__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
+{
+ /* unused function */
+ BoundingBoxIntersectionResult result;
+ result.distance = ray_tmax;
+ result.accept = false;
+ result.continue_search = false;
+ return result;
+}
+
+template<uint intersection_type>
+bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ uint object,
+ uint prim,
+ const float2 barycentrics,
+ const float ray_tmax)
+{
+#ifdef __SHADOW_RECORD_ALL__
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = payload.visibility;
+ if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ /* continue search */
+ return true;
+ }
+# endif
+
+ float u = 0.0f, v = 0.0f;
+ int type = 0;
+ if (intersection_type == METALRT_HIT_TRIANGLE) {
+ u = 1.0f - barycentrics.y - barycentrics.x;
+ v = barycentrics.x;
+ type = kernel_tex_fetch(__objects, object).primitive_type;
+ }
+# ifdef __HAIR__
+ else {
+ u = barycentrics.x;
+ v = barycentrics.y;
+
+ const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+ type = segment.type;
+ prim = segment.prim;
+
+ /* Filter out curve endcaps */
+ if (u == 0.0f || u == 1.0f) {
+ /* continue search */
+ return true;
+ }
+ }
+# endif
+
+# ifndef __TRANSPARENT_SHADOWS__
+ /* No transparent shadows support compiled in, make opaque. */
+ payload.result = true;
+ /* terminate ray */
+ return false;
+# else
+ short max_hits = payload.max_hits;
+ short num_hits = payload.num_hits;
+ short num_recorded_hits = payload.num_recorded_hits;
+
+ MetalKernelContext context(launch_params_metal);
+
+ /* If no transparent shadows, all light is blocked and we can stop immediately. */
+ if (num_hits >= max_hits ||
+ !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+ payload.result = true;
+ /* terminate ray */
+ return false;
+ }
+
+ /* Always use baked shadow transparency for curves. */
+ if (type & PRIMITIVE_ALL_CURVE) {
+ float throughput = payload.throughput;
+ throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
+ payload.throughput = throughput;
+ payload.num_hits += 1;
+
+ if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ /* Accept result and terminate if throughput is sufficiently low */
+ payload.result = true;
+ return false;
+ }
+ else {
+ return true;
+ }
+ }
+
+ payload.num_hits += 1;
+ payload.num_recorded_hits += 1;
+
+ uint record_index = num_recorded_hits;
+
+ const IntegratorShadowState state = payload.state;
+
+ const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (record_index >= max_record_hits) {
+ /* If maximum number of hits reached, find a hit to replace. */
+ float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+ uint max_recorded_hit = 0;
+
+ for (int i = 1; i < max_record_hits; i++) {
+ const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+ if (isect_t > max_recorded_t) {
+ max_recorded_t = isect_t;
+ max_recorded_hit = i;
+ }
+ }
+
+ if (ray_tmax >= max_recorded_t) {
+ /* Accept hit, so that we don't consider any more hits beyond the distance of the
+ * current hit anymore. */
+ payload.result = true;
+ return true;
+ }
+
+ record_index = max_recorded_hit;
+ }
+
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
+
+ /* Continue tracing. */
+# endif /* __TRANSPARENT_SHADOWS__ */
+#endif /* __SHADOW_RECORD_ALL__ */
+
+ return true;
+}
+
+[[intersection(triangle, triangle_data, METALRT_TAGS)]]
+TriangleIntersectionResult
+__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ unsigned int object [[user_instance_id]],
+ unsigned int primitive_id [[primitive_id]],
+ float2 barycentrics [[barycentric_coord]],
+ float ray_tmax [[distance]])
+{
+ uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+
+ TriangleIntersectionResult result;
+ result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
+ launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
+ result.accept = !result.continue_search;
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
+BoundingBoxIntersectionResult
+__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
+{
+ /* unused function */
+ BoundingBoxIntersectionResult result;
+ result.distance = ray_tmax;
+ result.accept = false;
+ result.continue_search = false;
+ return result;
+}
+
+template<typename TReturnType, uint intersection_type>
+inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const float u)
+{
+ TReturnType result;
+
+# ifdef __HAIR__
+ if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
+ /* Filter out curve endcaps. */
+ if (u == 0.0f || u == 1.0f) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+ }
+# endif
+
+# ifdef __VISIBILITY_FLAG__
+ uint visibility = payload.visibility;
+ if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ result.accept = false;
+ result.continue_search = true;
+ return result;
+ }
+
+ /* Shadow ray early termination. */
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ result.accept = true;
+ result.continue_search = false;
+ return result;
+ }
+# endif
+
+ result.accept = true;
+ result.continue_search = true;
+ return result;
+}
+
+[[intersection(triangle, triangle_data, METALRT_TAGS)]]
+TriangleIntersectionResult
+__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ unsigned int object [[user_instance_id]],
+ unsigned int primitive_id [[primitive_id]])
+{
+ uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+ TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
+ launch_params_metal, payload, object, prim, 0.0f);
+ if (result.accept) {
+ payload.prim = prim;
+ payload.type = kernel_tex_fetch(__objects, object).primitive_type;
+ }
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
+BoundingBoxIntersectionResult
+__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
+{
+ /* Unused function */
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+ return result;
+}
+
+#ifdef __HAIR__
+ccl_device_inline
+void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_origin,
+ const float3 ray_direction,
+ float time,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
+{
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = payload.visibility;
+ if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ float3 P = ray_origin;
+ float3 dir = ray_direction;
+
+ /* The direction is not normalized by default, but the curve intersection routine expects that */
+ float len;
+ dir = normalize_len(dir, &len);
+
+ Intersection isect;
+ isect.t = ray_tmax;
+ /* Transform maximum distance into object space. */
+ if (isect.t != FLT_MAX)
+ isect.t *= len;
+
+ MetalKernelContext context(launch_params_metal);
+ if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
+ result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
+ launch_params_metal, payload, object, prim, isect.u);
+ if (result.accept) {
+ result.distance = isect.t / len;
+ payload.u = isect.u;
+ payload.v = isect.v;
+ payload.prim = prim;
+ payload.type = type;
+ }
+ }
+}
+
+ccl_device_inline
+void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_origin,
+ const float3 ray_direction,
+ float time,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
+{
+ const uint visibility = payload.visibility;
+
+ float3 P = ray_origin;
+ float3 dir = ray_direction;
+
+ /* The direction is not normalized by default, but the curve intersection routine expects that */
+ float len;
+ dir = normalize_len(dir, &len);
+
+ Intersection isect;
+ isect.t = ray_tmax;
+ /* Transform maximum distance into object space */
+ if (isect.t != FLT_MAX)
+ isect.t *= len;
+
+ MetalKernelContext context(launch_params_metal);
+ if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
+ result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
+ launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
+ result.accept = !result.continue_search;
+
+ if (result.accept) {
+ result.distance = isect.t / len;
+ }
+ }
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
+BoundingBoxIntersectionResult
+__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmax [[max_distance]])
+{
+ uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+
+ if (segment.type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+ metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmax, result);
+ }
+
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
+BoundingBoxIntersectionResult
+__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmax [[max_distance]])
+{
+ uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+
+ if (segment.type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+ metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmax, result);
+ }
+
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
+BoundingBoxIntersectionResult
+__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmax [[max_distance]])
+{
+ uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+ metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmax, result);
+
+ return result;
+}
+
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
+BoundingBoxIntersectionResult
+__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmax [[max_distance]])
+{
+ uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+
+ BoundingBoxIntersectionResult result;
+ result.accept = false;
+ result.continue_search = true;
+ result.distance = ray_tmax;
+
+ metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+# if defined(__METALRT_MOTION__)
+ payload.time,
+# else
+ 0.0f,
+# endif
+ ray_tmax, result);
+
+ return result;
+}
+
+#endif /* __HAIR__ */
+#endif /* __METALRT__ */
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index 0619c135c39..db4233624b9 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -21,6 +21,7 @@
#include <optix.h>
#define __KERNEL_GPU__
+#define __KERNEL_GPU_RAYTRACING__
#define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */
#define __KERNEL_OPTIX__
#define CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 4feed59d018..c639dc87f35 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -31,9 +31,11 @@
#include "kernel/integrator/intersect_shadow.h"
#include "kernel/integrator/intersect_subsurface.h"
#include "kernel/integrator/intersect_volume_stack.h"
-
// clang-format on
+#define OPTIX_DEFINE_ABI_VERSION_ONLY
+#include <optix_function_table.h>
+
template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
{
return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
@@ -95,9 +97,9 @@ extern "C" __global__ void __miss__kernel_optix_miss()
extern "C" __global__ void __anyhit__kernel_optix_local_hit()
{
-#ifdef __HAIR__
+#if defined(__HAIR__) || defined(__POINTCLOUD__)
if (!optixIsTriangleHit()) {
- /* Ignore curves. */
+ /* Ignore curves and points. */
return optixIgnoreIntersection();
}
#endif
@@ -192,7 +194,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
type = kernel_tex_fetch(__objects, object).primitive_type;
}
# ifdef __HAIR__
- else {
+ else if (optixGetHitKind() & PRIMITIVE_ALL_CURVE) {
u = __uint_as_float(optixGetAttribute_0());
v = __uint_as_float(optixGetAttribute_1());
@@ -200,12 +202,19 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
type = segment.type;
prim = segment.prim;
+# if OPTIX_ABI_VERSION < 55
/* Filter out curve endcaps. */
if (u == 0.0f || u == 1.0f) {
return optixIgnoreIntersection();
}
+# endif
}
# endif
+ else {
+ type = kernel_tex_fetch(__objects, object).primitive_type;
+ u = 0.0f;
+ v = 0.0f;
+ }
# ifndef __TRANSPARENT_SHADOWS__
/* No transparent shadows support compiled in, make opaque. */
@@ -232,7 +241,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
- optixSetPayload_4(true);
+ optixSetPayload_5(true);
return optixTerminateRay();
}
else {
@@ -287,7 +296,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
extern "C" __global__ void __anyhit__kernel_optix_volume_test()
{
-#ifdef __HAIR__
+#if defined(__HAIR__) || defined(__POINTCLOUD__)
if (!optixIsTriangleHit()) {
/* Ignore curves. */
return optixIgnoreIntersection();
@@ -310,13 +319,15 @@ extern "C" __global__ void __anyhit__kernel_optix_volume_test()
extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
{
#ifdef __HAIR__
- if (!optixIsTriangleHit()) {
+# if OPTIX_ABI_VERSION < 55
+ if (optixGetHitKind() & PRIMITIVE_ALL_CURVE) {
/* Filter out curve endcaps. */
const float u = __uint_as_float(optixGetAttribute_0());
if (u == 0.0f || u == 1.0f) {
return optixIgnoreIntersection();
}
}
+# endif
#endif
#ifdef __VISIBILITY_FLAG__
@@ -348,13 +359,19 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
optixSetPayload_3(prim);
optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type);
}
- else {
+ else if (optixGetHitKind() & PRIMITIVE_ALL_CURVE) {
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
optixSetPayload_2(optixGetAttribute_1());
optixSetPayload_3(segment.prim);
optixSetPayload_5(segment.type);
}
+ else {
+ optixSetPayload_1(0);
+ optixSetPayload_2(0);
+ optixSetPayload_3(prim);
+ optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type);
+ }
}
#ifdef __HAIR__
@@ -405,4 +422,45 @@ extern "C" __global__ void __intersection__curve_ribbon()
optix_intersection_curve(prim, type);
}
}
+
+#endif
+
+#ifdef __POINTCLOUD__
+extern "C" __global__ void __intersection__point()
+{
+ const int prim = optixGetPrimitiveIndex();
+ const int object = get_object_id();
+ const int type = kernel_tex_fetch(__objects, object).primitive_type;
+
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ float3 P = optixGetObjectRayOrigin();
+ float3 dir = optixGetObjectRayDirection();
+
+ /* The direction is not normalized by default, the point intersection routine expects that. */
+ float len;
+ dir = normalize_len(dir, &len);
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+ /* Transform maximum distance into object space. */
+ if (isect.t != FLT_MAX) {
+ isect.t *= len;
+ }
+
+ if (point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
+ optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL);
+ }
+}
#endif