diff options
Diffstat (limited to 'intern/cycles/kernel/device')
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 38 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/work_stealing.h | 25 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/compat.h | 31 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/context_begin.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/kernel.metal | 562 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/compat.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/kernel.cu | 74 |
7 files changed, 707 insertions, 28 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 22e2a61a06d..b50f492e8c7 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -19,7 +19,8 @@ #include "kernel/device/gpu/parallel_active_index.h" #include "kernel/device/gpu/parallel_prefix_sum.h" #include "kernel/device/gpu/parallel_sorted_index.h" -#include "kernel/device/gpu/work_stealing.h" + +#include "kernel/sample/lcg.h" /* Include constant tables before entering Metal's context class scope (context_begin.h) */ #include "kernel/tables.h" @@ -28,6 +29,8 @@ # include "kernel/device/metal/context_begin.h" #endif +#include "kernel/device/gpu/work_stealing.h" + #include "kernel/integrator/state.h" #include "kernel/integrator/state_flow.h" #include "kernel/integrator/state_util.h" @@ -94,7 +97,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) const int state = tile->path_index_offset + tile_work_index; uint x, y, sample; - get_work_pixel(tile, tile_work_index, &x, &y, &sample); + ccl_gpu_kernel_call(get_work_pixel(tile, tile_work_index, &x, &y, &sample)); ccl_gpu_kernel_call( integrator_init_from_camera(nullptr, state, tile, render_buffer, x, y, sample)); @@ -125,7 +128,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) const int state = tile->path_index_offset + tile_work_index; uint x, y, sample; - get_work_pixel(tile, tile_work_index, &x, &y, &sample); + ccl_gpu_kernel_call(get_work_pixel(tile, tile_work_index, &x, &y, &sample)); ccl_gpu_kernel_call( integrator_init_from_bake(nullptr, state, tile, render_buffer, x, y, sample)); @@ -547,6 +550,33 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb #endif } +#ifdef __KERNEL_METAL__ + +/* Fetch into a local variable on Metal - there is minimal overhead. Templating the + * film_get_pass_pixel_... functions works on MSL, but not on other compilers. */ +# define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \ + float local_pixel[4]; \ + film_get_pass_pixel_##variant(&kfilm_convert, buffer, local_pixel); \ + if (input_channel_count >= 1) { \ + pixel[0] = local_pixel[0]; \ + } \ + if (input_channel_count >= 2) { \ + pixel[1] = local_pixel[1]; \ + } \ + if (input_channel_count >= 3) { \ + pixel[2] = local_pixel[2]; \ + } \ + if (input_channel_count >= 4) { \ + pixel[3] = local_pixel[3]; \ + } + +#else + +# define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \ + film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); + +#endif + #define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ ccl_gpu_kernel_signature(film_convert_##variant, \ @@ -574,7 +604,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb ccl_global float *pixel = pixels + \ (render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \ \ - film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \ + FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \ } \ \ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ diff --git a/intern/cycles/kernel/device/gpu/work_stealing.h b/intern/cycles/kernel/device/gpu/work_stealing.h index fab0915c38e..c3083948057 100644 --- a/intern/cycles/kernel/device/gpu/work_stealing.h +++ b/intern/cycles/kernel/device/gpu/work_stealing.h @@ -29,17 +29,20 @@ ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile, ccl_private uint *y, ccl_private uint *sample) { -#if 0 - /* Keep threads for the same sample together. */ - uint tile_pixels = tile->w * tile->h; - uint sample_offset = global_work_index / tile_pixels; - uint pixel_offset = global_work_index - sample_offset * tile_pixels; -#else - /* Keeping threads for the same pixel together. - * Appears to improve performance by a few % on CUDA and OptiX. */ - uint sample_offset = global_work_index % tile->num_samples; - uint pixel_offset = global_work_index / tile->num_samples; -#endif + uint sample_offset, pixel_offset; + + if (kernel_data.integrator.scrambling_distance < 0.9f) { + /* Keep threads for the same sample together. */ + uint tile_pixels = tile->w * tile->h; + sample_offset = global_work_index / tile_pixels; + pixel_offset = global_work_index - sample_offset * tile_pixels; + } + else { + /* Keeping threads for the same pixel together. + * Appears to improve performance by a few % on CUDA and OptiX. */ + sample_offset = global_work_index % tile->num_samples; + pixel_offset = global_work_index / tile->num_samples; + } uint y_offset = pixel_offset / tile->w; uint x_offset = pixel_offset - y_offset * tile->w; diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 61597a4acfc..a51afc37fc0 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -32,6 +32,10 @@ using namespace metal; +#ifdef __METALRT__ +using namespace metal::raytracing; +#endif + #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wsign-compare" #pragma clang diagnostic ignored "-Wuninitialized" @@ -47,7 +51,7 @@ using namespace metal; #define ccl_global device #define ccl_inline_constant static constant constexpr #define ccl_device_constant constant -#define ccl_constant const device +#define ccl_constant constant #define ccl_gpu_shared threadgroup #define ccl_private thread #define ccl_may_alias @@ -113,7 +117,7 @@ struct kernel_gpu_##name \ uint simd_group_index, \ uint num_simd_groups) ccl_global const; \ }; \ -kernel void kernel_metal_##name(device const kernel_gpu_##name *params_struct, \ +kernel void cycles_metal_##name(device const kernel_gpu_##name *params_struct, \ constant KernelParamsMetal &ccl_restrict _launch_params_metal, \ constant MetalAncillaries *_metal_ancillaries, \ threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \ @@ -246,6 +250,22 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \ #define __device__ +#ifdef __METALRT__ + +# define __KERNEL_GPU_RAYTRACING__ + +# if defined(__METALRT_MOTION__) +# define METALRT_TAGS instancing, instance_motion, primitive_motion +# else +# define METALRT_TAGS instancing +# endif /* __METALRT_MOTION__ */ + +typedef acceleration_structure<METALRT_TAGS> metalrt_as_type; +typedef intersection_function_table<triangle_data, METALRT_TAGS> metalrt_ift_type; +typedef metal::raytracing::intersector<triangle_data, METALRT_TAGS> metalrt_intersector_type; + +#endif /* __METALRT__ */ + /* texture bindings and sampler setup */ struct Texture2DParamsMetal { @@ -258,6 +278,13 @@ struct Texture3DParamsMetal { struct MetalAncillaries { device Texture2DParamsMetal *textures_2d; device Texture3DParamsMetal *textures_3d; + +#ifdef __METALRT__ + metalrt_as_type accel_struct; + metalrt_ift_type ift_default; + metalrt_ift_type ift_shadow; + metalrt_ift_type ift_local; +#endif }; #include "util/half.h" diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h index 8c9e1c54077..2eefd795aa1 100644 --- a/intern/cycles/kernel/device/metal/context_begin.h +++ b/intern/cycles/kernel/device/metal/context_begin.h @@ -27,6 +27,10 @@ class MetalKernelContext { : launch_params_metal(_launch_params_metal), metal_ancillaries(_metal_ancillaries) {} + MetalKernelContext(constant KernelParamsMetal &_launch_params_metal) + : launch_params_metal(_launch_params_metal) + {} + /* texture fetch adapter functions */ typedef uint64_t ccl_gpu_tex_object; diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index feca20ff475..27dc1f44c6f 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -16,10 +16,566 @@ /* Metal kernel entry points */ -// clang-format off - #include "kernel/device/metal/compat.h" #include "kernel/device/metal/globals.h" #include "kernel/device/gpu/kernel.h" -// clang-format on
\ No newline at end of file +/* MetalRT intersection handlers */ +#ifdef __METALRT__ + +/* Return type for a bounding box intersection function. */ +struct BoundingBoxIntersectionResult +{ + bool accept [[accept_intersection]]; + bool continue_search [[continue_search]]; + float distance [[distance]]; +}; + +/* Return type for a triangle intersection function. */ +struct TriangleIntersectionResult +{ + bool accept [[accept_intersection]]; + bool continue_search [[continue_search]]; +}; + +enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX }; + +template<typename TReturn, uint intersection_type> +TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload, + const uint object, + const uint primitive_id, + const float2 barycentrics, + const float ray_tmax) +{ + TReturn result; + +#ifdef __BVH_LOCAL__ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + + if (object != payload.local_object) { + /* Only intersect with matching object */ + result.accept = false; + result.continue_search = true; + return result; + } + + const short max_hits = payload.max_hits; + if (max_hits == 0) { + /* Special case for when no hit information is requested, just report that something was hit */ + payload.result = true; + result.accept = true; + result.continue_search = false; + return result; + } + + int hit = 0; + if (payload.has_lcg_state) { + for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) { + if (ray_tmax == payload.local_isect.hits[i].t) { + result.accept = false; + result.continue_search = true; + return result; + } + } + + hit = payload.local_isect.num_hits++; + + if (payload.local_isect.num_hits > max_hits) { + hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits; + if (hit >= max_hits) { + result.accept = false; + result.continue_search = true; + return result; + } + } + } + else { + if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) { + /* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */ + result.accept = false; + result.continue_search = true; + return result; + } + + payload.local_isect.num_hits = 1; + } + + ray_data Intersection *isect = &payload.local_isect.hits[hit]; + isect->t = ray_tmax; + isect->prim = prim; + isect->object = object; + isect->type = kernel_tex_fetch(__objects, object).primitive_type; + + isect->u = 1.0f - barycentrics.y - barycentrics.x; + isect->v = barycentrics.x; + + /* Record geometric normal */ + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect->prim).w; + const float3 tri_a = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)); + const float3 tri_b = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)); + const float3 tri_c = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); + payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + + /* Continue tracing (without this the trace call would return after the first hit) */ + result.accept = false; + result.continue_search = true; + return result; +#endif +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] +TriangleIntersectionResult +__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], + uint instance_id [[user_instance_id]], + uint primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax); +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + +template<uint intersection_type> +bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + uint object, + uint prim, + const float2 barycentrics, + const float ray_tmax) +{ +#ifdef __SHADOW_RECORD_ALL__ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + /* continue search */ + return true; + } +# endif + + float u = 0.0f, v = 0.0f; + int type = 0; + if (intersection_type == METALRT_HIT_TRIANGLE) { + u = 1.0f - barycentrics.y - barycentrics.x; + v = barycentrics.x; + type = kernel_tex_fetch(__objects, object).primitive_type; + } +# ifdef __HAIR__ + else { + u = barycentrics.x; + v = barycentrics.y; + + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + type = segment.type; + prim = segment.prim; + + /* Filter out curve endcaps */ + if (u == 0.0f || u == 1.0f) { + /* continue search */ + return true; + } + } +# endif + +# ifndef __TRANSPARENT_SHADOWS__ + /* No transparent shadows support compiled in, make opaque. */ + payload.result = true; + /* terminate ray */ + return false; +# else + short max_hits = payload.max_hits; + short num_hits = payload.num_hits; + short num_recorded_hits = payload.num_recorded_hits; + + MetalKernelContext context(launch_params_metal); + + /* If no transparent shadows, all light is blocked and we can stop immediately. */ + if (num_hits >= max_hits || + !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { + payload.result = true; + /* terminate ray */ + return false; + } + + /* Always use baked shadow transparency for curves. */ + if (type & PRIMITIVE_ALL_CURVE) { + float throughput = payload.throughput; + throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u); + payload.throughput = throughput; + payload.num_hits += 1; + + if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { + /* Accept result and terminate if throughput is sufficiently low */ + payload.result = true; + return false; + } + else { + return true; + } + } + + payload.num_hits += 1; + payload.num_recorded_hits += 1; + + uint record_index = num_recorded_hits; + + const IntegratorShadowState state = payload.state; + + const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE); + if (record_index >= max_record_hits) { + /* If maximum number of hits reached, find a hit to replace. */ + float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); + uint max_recorded_hit = 0; + + for (int i = 1; i < max_record_hits; i++) { + const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); + if (isect_t > max_recorded_t) { + max_recorded_t = isect_t; + max_recorded_hit = i; + } + } + + if (ray_tmax >= max_recorded_t) { + /* Accept hit, so that we don't consider any more hits beyond the distance of the + * current hit anymore. */ + payload.result = true; + return true; + } + + record_index = max_recorded_hit; + } + + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; + + /* Continue tracing. */ +# endif /* __TRANSPARENT_SHADOWS__ */ +#endif /* __SHADOW_RECORD_ALL__ */ + + return true; +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] +TriangleIntersectionResult +__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + + TriangleIntersectionResult result; + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, barycentrics, ray_tmax); + result.accept = !result.continue_search; + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + +template<typename TReturnType, uint intersection_type> +inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const float u) +{ + TReturnType result; + +# ifdef __HAIR__ + if (intersection_type == METALRT_HIT_BOUNDING_BOX) { + /* Filter out curve endcaps. */ + if (u == 0.0f || u == 1.0f) { + result.accept = false; + result.continue_search = true; + return result; + } + } +# endif + +# ifdef __VISIBILITY_FLAG__ + uint visibility = payload.visibility; + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + result.accept = false; + result.continue_search = true; + return result; + } + + /* Shadow ray early termination. */ + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + result.accept = true; + result.continue_search = false; + return result; + } +# endif + + result.accept = true; + result.continue_search = true; + return result; +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] +TriangleIntersectionResult +__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, 0.0f); + if (result.accept) { + payload.prim = prim; + payload.type = kernel_tex_fetch(__objects, object).primitive_type; + } + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]]) +{ + /* Unused function */ + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + return result; +} + +#ifdef __HAIR__ +ccl_device_inline +void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_origin, + const float3 ray_direction, + float time, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + return; + } +# endif + + float3 P = ray_origin; + float3 dir = ray_direction; + + /* The direction is not normalized by default, but the curve intersection routine expects that */ + float len; + dir = normalize_len(dir, &len); + + Intersection isect; + isect.t = ray_tmax; + /* Transform maximum distance into object space. */ + if (isect.t != FLT_MAX) + isect.t *= len; + + MetalKernelContext context(launch_params_metal); + if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, isect.u); + if (result.accept) { + result.distance = isect.t / len; + payload.u = isect.u; + payload.v = isect.v; + payload.prim = prim; + payload.type = type; + } + } +} + +ccl_device_inline +void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_origin, + const float3 ray_direction, + float time, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ + const uint visibility = payload.visibility; + + float3 P = ray_origin; + float3 dir = ray_direction; + + /* The direction is not normalized by default, but the curve intersection routine expects that */ + float len; + dir = normalize_len(dir, &len); + + Intersection isect; + isect.t = ray_tmax; + /* Transform maximum distance into object space */ + if (isect.t != FLT_MAX) + isect.t *= len; + + MetalKernelContext context(launch_params_metal); + if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + result.accept = !result.continue_search; + + if (result.accept) { + result.distance = isect.t / len; + } + } +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + if (segment.type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { + metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + } + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + if (segment.type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { + metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + } + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + + return result; +} + +#endif /* __HAIR__ */ +#endif /* __METALRT__ */ diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index 0619c135c39..db4233624b9 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -21,6 +21,7 @@ #include <optix.h> #define __KERNEL_GPU__ +#define __KERNEL_GPU_RAYTRACING__ #define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */ #define __KERNEL_OPTIX__ #define CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index 4feed59d018..c639dc87f35 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -31,9 +31,11 @@ #include "kernel/integrator/intersect_shadow.h" #include "kernel/integrator/intersect_subsurface.h" #include "kernel/integrator/intersect_volume_stack.h" - // clang-format on +#define OPTIX_DEFINE_ABI_VERSION_ONLY +#include <optix_function_table.h> + template<typename T> ccl_device_forceinline T *get_payload_ptr_0() { return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1()); @@ -95,9 +97,9 @@ extern "C" __global__ void __miss__kernel_optix_miss() extern "C" __global__ void __anyhit__kernel_optix_local_hit() { -#ifdef __HAIR__ +#if defined(__HAIR__) || defined(__POINTCLOUD__) if (!optixIsTriangleHit()) { - /* Ignore curves. */ + /* Ignore curves and points. */ return optixIgnoreIntersection(); } #endif @@ -192,7 +194,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() type = kernel_tex_fetch(__objects, object).primitive_type; } # ifdef __HAIR__ - else { + else if (optixGetHitKind() & PRIMITIVE_ALL_CURVE) { u = __uint_as_float(optixGetAttribute_0()); v = __uint_as_float(optixGetAttribute_1()); @@ -200,12 +202,19 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() type = segment.type; prim = segment.prim; +# if OPTIX_ABI_VERSION < 55 /* Filter out curve endcaps. */ if (u == 0.0f || u == 1.0f) { return optixIgnoreIntersection(); } +# endif } # endif + else { + type = kernel_tex_fetch(__objects, object).primitive_type; + u = 0.0f; + v = 0.0f; + } # ifndef __TRANSPARENT_SHADOWS__ /* No transparent shadows support compiled in, make opaque. */ @@ -232,7 +241,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1)); if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { - optixSetPayload_4(true); + optixSetPayload_5(true); return optixTerminateRay(); } else { @@ -287,7 +296,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() extern "C" __global__ void __anyhit__kernel_optix_volume_test() { -#ifdef __HAIR__ +#if defined(__HAIR__) || defined(__POINTCLOUD__) if (!optixIsTriangleHit()) { /* Ignore curves. */ return optixIgnoreIntersection(); @@ -310,13 +319,15 @@ extern "C" __global__ void __anyhit__kernel_optix_volume_test() extern "C" __global__ void __anyhit__kernel_optix_visibility_test() { #ifdef __HAIR__ - if (!optixIsTriangleHit()) { +# if OPTIX_ABI_VERSION < 55 + if (optixGetHitKind() & PRIMITIVE_ALL_CURVE) { /* Filter out curve endcaps. */ const float u = __uint_as_float(optixGetAttribute_0()); if (u == 0.0f || u == 1.0f) { return optixIgnoreIntersection(); } } +# endif #endif #ifdef __VISIBILITY_FLAG__ @@ -348,13 +359,19 @@ extern "C" __global__ void __closesthit__kernel_optix_hit() optixSetPayload_3(prim); optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type); } - else { + else if (optixGetHitKind() & PRIMITIVE_ALL_CURVE) { const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */ optixSetPayload_2(optixGetAttribute_1()); optixSetPayload_3(segment.prim); optixSetPayload_5(segment.type); } + else { + optixSetPayload_1(0); + optixSetPayload_2(0); + optixSetPayload_3(prim); + optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type); + } } #ifdef __HAIR__ @@ -405,4 +422,45 @@ extern "C" __global__ void __intersection__curve_ribbon() optix_intersection_curve(prim, type); } } + +#endif + +#ifdef __POINTCLOUD__ +extern "C" __global__ void __intersection__point() +{ + const int prim = optixGetPrimitiveIndex(); + const int object = get_object_id(); + const int type = kernel_tex_fetch(__objects, object).primitive_type; + +# ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + return; + } +# endif + + float3 P = optixGetObjectRayOrigin(); + float3 dir = optixGetObjectRayDirection(); + + /* The direction is not normalized by default, the point intersection routine expects that. */ + float len; + dir = normalize_len(dir, &len); + +# ifdef __OBJECT_MOTION__ + const float time = optixGetRayTime(); +# else + const float time = 0.0f; +# endif + + Intersection isect; + isect.t = optixGetRayTmax(); + /* Transform maximum distance into object space. */ + if (isect.t != FLT_MAX) { + isect.t *= len; + } + + if (point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL); + } +} #endif |