diff options
author | Brecht Van Lommel <brecht@blender.org> | 2022-07-25 22:16:34 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2022-07-26 01:17:37 +0300 |
commit | 4cf6524731c0856bfc6d6c0b7bcb2df0ad28b97a (patch) | |
tree | 8eec2e3f32155310e1605a4205102709341c0267 /intern | |
parent | f76a2c0d1895a43b932eb5e458732e54b6aa17c7 (diff) |
Fix Cycles Metal build errors after recent changes
float8 is a reserved type in Metal, but is not implemented. So rename to
float8_t for now.
Also move back intersection handlers to kernel.metal, they can't be in the
class that encapsulates the other Metal kernel functions.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/device/metal/bvh.h | 763 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/kernel.metal | 770 | ||||
-rw-r--r-- | intern/cycles/util/math_float8.h | 384 | ||||
-rw-r--r-- | intern/cycles/util/types_float8.h | 21 | ||||
-rw-r--r-- | intern/cycles/util/types_float8_impl.h | 30 |
5 files changed, 989 insertions, 979 deletions
diff --git a/intern/cycles/kernel/device/metal/bvh.h b/intern/cycles/kernel/device/metal/bvh.h index d3a0ab1b519..f30b21abaf9 100644 --- a/intern/cycles/kernel/device/metal/bvh.h +++ b/intern/cycles/kernel/device/metal/bvh.h @@ -47,767 +47,6 @@ struct MetalRTIntersectionShadowPayload { bool result; }; -/* Intersection return types. */ - -/* For a bounding box intersection function. */ -struct BoundingBoxIntersectionResult { - bool accept [[accept_intersection]]; - bool continue_search [[continue_search]]; - float distance [[distance]]; -}; - -/* For a triangle intersection function. */ -struct TriangleIntersectionResult { - bool accept [[accept_intersection]]; - bool continue_search [[continue_search]]; -}; - -enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX }; - -/* Utilities. */ - -ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self, - const int object, - const int prim) -{ - return (self.prim == prim) && (self.object == object); -} - -ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self, - const int object, - const int prim) -{ - return ((self.prim == prim) && (self.object == object)) || - ((self.light_prim == prim) && (self.light_object == object)); -} - -ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self, - const int prim) -{ - return (self.prim == prim); -} - -/* Hit functions. */ - -template<typename TReturn, uint intersection_type> -TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload, - const uint object, - const uint primitive_id, - const float2 barycentrics, - const float ray_tmax) -{ - TReturn result; - -#ifdef __BVH_LOCAL__ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - - if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) { - /* Only intersect with matching object and skip self-intersecton. */ - result.accept = false; - result.continue_search = true; - return result; - } - - const short max_hits = payload.max_hits; - if (max_hits == 0) { - /* Special case for when no hit information is requested, just report that something was hit */ - payload.result = true; - result.accept = true; - result.continue_search = false; - return result; - } - - int hit = 0; - if (payload.has_lcg_state) { - for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) { - if (ray_tmax == payload.local_isect.hits[i].t) { - result.accept = false; - result.continue_search = true; - return result; - } - } - - hit = payload.local_isect.num_hits++; - - if (payload.local_isect.num_hits > max_hits) { - hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits; - if (hit >= max_hits) { - result.accept = false; - result.continue_search = true; - return result; - } - } - } - else { - if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) { - /* Record closest intersection only. Do not terminate ray here, since there is no guarantee - * about distance ordering in any-hit */ - result.accept = false; - result.continue_search = true; - return result; - } - - payload.local_isect.num_hits = 1; - } - - ray_data Intersection *isect = &payload.local_isect.hits[hit]; - isect->t = ray_tmax; - isect->prim = prim; - isect->object = object; - isect->type = kernel_data_fetch(objects, object).primitive_type; - - isect->u = 1.0f - barycentrics.y - barycentrics.x; - isect->v = barycentrics.x; - - /* Record geometric normal */ - const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w; - const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0)); - const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1)); - const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2)); - payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); - - /* Continue tracing (without this the trace call would return after the first hit) */ - result.accept = false; - result.continue_search = true; - return result; -#endif -} - -[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult -__anyhit__cycles_metalrt_local_hit_tri( - constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], - uint instance_id [[user_instance_id]], - uint primitive_id [[primitive_id]], - float2 barycentrics [[barycentric_coord]], - float ray_tmax [[distance]]) -{ - return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax); -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]]) -{ - /* unused function */ - BoundingBoxIntersectionResult result; - result.distance = ray_tmax; - result.accept = false; - result.continue_search = false; - return result; -} - -template<uint intersection_type> -bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, - uint object, - uint prim, - const float2 barycentrics, - const float ray_tmax) -{ -#ifdef __SHADOW_RECORD_ALL__ -# ifdef __VISIBILITY_FLAG__ - const uint visibility = payload.visibility; - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - /* continue search */ - return true; - } -# endif - - if (intersection_skip_self_shadow(payload.self, object, prim)) { - /* continue search */ - return true; - } - - float u = 0.0f, v = 0.0f; - int type = 0; - if (intersection_type == METALRT_HIT_TRIANGLE) { - u = 1.0f - barycentrics.y - barycentrics.x; - v = barycentrics.x; - type = kernel_data_fetch(objects, object).primitive_type; - } -# ifdef __HAIR__ - else { - u = barycentrics.x; - v = barycentrics.y; - - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - type = segment.type; - prim = segment.prim; - - /* Filter out curve endcaps */ - if (u == 0.0f || u == 1.0f) { - /* continue search */ - return true; - } - } -# endif - -# ifndef __TRANSPARENT_SHADOWS__ - /* No transparent shadows support compiled in, make opaque. */ - payload.result = true; - /* terminate ray */ - return false; -# else - short max_hits = payload.max_hits; - short num_hits = payload.num_hits; - short num_recorded_hits = payload.num_recorded_hits; - - MetalKernelContext context(launch_params_metal); - - /* If no transparent shadows, all light is blocked and we can stop immediately. */ - if (num_hits >= max_hits || - !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { - payload.result = true; - /* terminate ray */ - return false; - } - - /* Always use baked shadow transparency for curves. */ - if (type & PRIMITIVE_CURVE) { - float throughput = payload.throughput; - throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u); - payload.throughput = throughput; - payload.num_hits += 1; - - if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { - /* Accept result and terminate if throughput is sufficiently low */ - payload.result = true; - return false; - } - else { - return true; - } - } - - payload.num_hits += 1; - payload.num_recorded_hits += 1; - - uint record_index = num_recorded_hits; - - const IntegratorShadowState state = payload.state; - - const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE); - if (record_index >= max_record_hits) { - /* If maximum number of hits reached, find a hit to replace. */ - float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); - uint max_recorded_hit = 0; - - for (int i = 1; i < max_record_hits; i++) { - const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); - if (isect_t > max_recorded_t) { - max_recorded_t = isect_t; - max_recorded_hit = i; - } - } - - if (ray_tmax >= max_recorded_t) { - /* Accept hit, so that we don't consider any more hits beyond the distance of the - * current hit anymore. */ - payload.result = true; - return true; - } - - record_index = max_recorded_hit; - } - - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; - - /* Continue tracing. */ -# endif /* __TRANSPARENT_SHADOWS__ */ -#endif /* __SHADOW_RECORD_ALL__ */ - - return true; -} - -[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult -__anyhit__cycles_metalrt_shadow_all_hit_tri( - constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - unsigned int object [[user_instance_id]], - unsigned int primitive_id [[primitive_id]], - float2 barycentrics [[barycentric_coord]], - float ray_tmax [[distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - - TriangleIntersectionResult result; - result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, object, prim, barycentrics, ray_tmax); - result.accept = !result.continue_search; - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]]) -{ - /* unused function */ - BoundingBoxIntersectionResult result; - result.distance = ray_tmax; - result.accept = false; - result.continue_search = false; - return result; -} - -template<typename TReturnType, uint intersection_type> -inline TReturnType metalrt_visibility_test( - constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const float u) -{ - TReturnType result; - -#ifdef __HAIR__ - if (intersection_type == METALRT_HIT_BOUNDING_BOX) { - /* Filter out curve endcaps. */ - if (u == 0.0f || u == 1.0f) { - result.accept = false; - result.continue_search = true; - return result; - } - } -#endif - - uint visibility = payload.visibility; -#ifdef __VISIBILITY_FLAG__ - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - result.accept = false; - result.continue_search = true; - return result; - } -#endif - - /* Shadow ray early termination. */ - if (visibility & PATH_RAY_SHADOW_OPAQUE) { - if (intersection_skip_self_shadow(payload.self, object, prim)) { - result.accept = false; - result.continue_search = true; - return result; - } - else { - result.accept = true; - result.continue_search = false; - return result; - } - } - else { - if (intersection_skip_self(payload.self, object, prim)) { - result.accept = false; - result.continue_search = true; - return result; - } - } - - result.accept = true; - result.continue_search = true; - return result; -} - -[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult -__anyhit__cycles_metalrt_visibility_test_tri( - constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], - unsigned int object [[user_instance_id]], - unsigned int primitive_id [[primitive_id]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - TriangleIntersectionResult result = - metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, object, prim, 0.0f); - if (result.accept) { - payload.prim = prim; - payload.type = kernel_data_fetch(objects, object).primitive_type; - } - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]]) -{ - /* Unused function */ - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - return result; -} - -/* Primitive intersection functions. */ - -#ifdef __HAIR__ -ccl_device_inline void metalrt_intersection_curve( - constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_P, - const float3 ray_D, - float time, - const float ray_tmin, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) -{ -# ifdef __VISIBILITY_FLAG__ - const uint visibility = payload.visibility; - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return; - } -# endif - - Intersection isect; - isect.t = ray_tmax; - - MetalKernelContext context(launch_params_metal); - if (context.curve_intersect( - NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { - result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, isect.u); - if (result.accept) { - result.distance = isect.t; - payload.u = isect.u; - payload.v = isect.v; - payload.prim = prim; - payload.type = type; - } - } -} - -ccl_device_inline void metalrt_intersection_curve_shadow( - constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, - const uint object, - const uint prim, - const uint type, - float time, - const float ray_tmin, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) -{ - const uint visibility = payload.visibility; - - Intersection isect; - isect.t = ray_tmax; - - MetalKernelContext context(launch_params_metal); - if (context.curve_intersect( - NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { - result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); - result.accept = !result.continue_search; - } -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload - [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_P [[origin]], - const float3 ray_D [[direction]], - const float ray_tmin [[min_distance]], - const float ray_tmax [[max_distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - if (segment.type & PRIMITIVE_CURVE_RIBBON) { - metalrt_intersection_curve(launch_params_metal, - payload, - object, - segment.prim, - segment.type, - ray_P, - ray_D, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmin, - ray_tmax, - result); - } - - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__intersection__curve_ribbon_shadow( - constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_P [[origin]], - const float3 ray_D [[direction]], - const float ray_tmin [[min_distance]], - const float ray_tmax [[max_distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - if (segment.type & PRIMITIVE_CURVE_RIBBON) { - metalrt_intersection_curve_shadow(launch_params_metal, - payload, - object, - segment.prim, - segment.type, - ray_P, - ray_D, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmin, - ray_tmax, - result); - } - - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload - [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_P [[origin]], - const float3 ray_D [[direction]], - const float ray_tmin [[min_distance]], - const float ray_tmax [[max_distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - metalrt_intersection_curve(launch_params_metal, - payload, - object, - segment.prim, - segment.type, - ray_P, - ray_D, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmin, - ray_tmax, - result); - - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__intersection__curve_all_shadow( - constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_P [[origin]], - const float3 ray_D [[direction]], - const float ray_tmin [[min_distance]], - const float ray_tmax [[max_distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - metalrt_intersection_curve_shadow(launch_params_metal, - payload, - object, - segment.prim, - segment.type, - ray_P, - ray_D, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmin, - ray_tmax, - result); - - return result; -} -#endif /* __HAIR__ */ - -#ifdef __POINTCLOUD__ -ccl_device_inline void metalrt_intersection_point( - constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_P, - const float3 ray_D, - float time, - const float ray_tmin, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) -{ -# ifdef __VISIBILITY_FLAG__ - const uint visibility = payload.visibility; - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return; - } -# endif - - Intersection isect; - isect.t = ray_tmax; - - MetalKernelContext context(launch_params_metal); - if (context.point_intersect( - NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { - result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, isect.u); - if (result.accept) { - result.distance = isect.t; - payload.u = isect.u; - payload.v = isect.v; - payload.prim = prim; - payload.type = type; - } - } -} - -ccl_device_inline void metalrt_intersection_point_shadow( - constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_P, - const float3 ray_D, - float time, - const float ray_tmin, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) -{ - const uint visibility = payload.visibility; - - Intersection isect; - isect.t = ray_tmax; - - MetalKernelContext context(launch_params_metal); - if (context.point_intersect( - NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { - result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); - result.accept = !result.continue_search; - - if (result.accept) { - result.distance = isect.t; - } - } -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmin [[min_distance]], - const float ray_tmax [[max_distance]]) -{ - const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const int type = kernel_data_fetch(objects, object).primitive_type; - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - metalrt_intersection_point(launch_params_metal, - payload, - object, - prim, - type, - ray_origin, - ray_direction, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmin, - ray_tmax, - result); - - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult -__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload - [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmin [[min_distance]], - const float ray_tmax [[max_distance]]) -{ - const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const int type = kernel_data_fetch(objects, object).primitive_type; - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - metalrt_intersection_point_shadow(launch_params_metal, - payload, - object, - prim, - type, - ray_origin, - ray_direction, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmin, - ray_tmax, - result); - - return result; -} -#endif /* __POINTCLOUD__ */ - /* Scene intersection. */ ccl_device_intersect bool scene_intersect(KernelGlobals kg, @@ -815,7 +54,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, const uint visibility, ccl_private Intersection *isect) { - if (!scene_intersect_valid(ray)) { + if (!intersection_ray_valid(ray)) { isect->t = ray->tmax; isect->type = PRIMITIVE_NONE; return false; diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index 3df81fcf369..b295e081f3f 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -1,9 +1,777 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2021-2022 Blender Foundation */ -/* Metal kernel entry points */ +/* Metal kernel entry points. */ #include "kernel/device/metal/compat.h" #include "kernel/device/metal/globals.h" #include "kernel/device/metal/function_constants.h" #include "kernel/device/gpu/kernel.h" + +/* MetalRT intersection handlers. */ + +#ifdef __METALRT__ + +/* Intersection return types. */ + +/* For a bounding box intersection function. */ +struct BoundingBoxIntersectionResult { + bool accept [[accept_intersection]]; + bool continue_search [[continue_search]]; + float distance [[distance]]; +}; + +/* For a triangle intersection function. */ +struct TriangleIntersectionResult { + bool accept [[accept_intersection]]; + bool continue_search [[continue_search]]; +}; + +enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX }; + +/* Utilities. */ + +ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self, + const int object, + const int prim) +{ + return (self.prim == prim) && (self.object == object); +} + +ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self, + const int object, + const int prim) +{ + return ((self.prim == prim) && (self.object == object)) || + ((self.light_prim == prim) && (self.light_object == object)); +} + +ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self, + const int prim) +{ + return (self.prim == prim); +} + +/* Hit functions. */ + +template<typename TReturn, uint intersection_type> +TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload, + const uint object, + const uint primitive_id, + const float2 barycentrics, + const float ray_tmax) +{ + TReturn result; + +#ifdef __BVH_LOCAL__ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + + if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) { + /* Only intersect with matching object and skip self-intersecton. */ + result.accept = false; + result.continue_search = true; + return result; + } + + const short max_hits = payload.max_hits; + if (max_hits == 0) { + /* Special case for when no hit information is requested, just report that something was hit */ + payload.result = true; + result.accept = true; + result.continue_search = false; + return result; + } + + int hit = 0; + if (payload.has_lcg_state) { + for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) { + if (ray_tmax == payload.local_isect.hits[i].t) { + result.accept = false; + result.continue_search = true; + return result; + } + } + + hit = payload.local_isect.num_hits++; + + if (payload.local_isect.num_hits > max_hits) { + hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits; + if (hit >= max_hits) { + result.accept = false; + result.continue_search = true; + return result; + } + } + } + else { + if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) { + /* Record closest intersection only. Do not terminate ray here, since there is no guarantee + * about distance ordering in any-hit */ + result.accept = false; + result.continue_search = true; + return result; + } + + payload.local_isect.num_hits = 1; + } + + ray_data Intersection *isect = &payload.local_isect.hits[hit]; + isect->t = ray_tmax; + isect->prim = prim; + isect->object = object; + isect->type = kernel_data_fetch(objects, object).primitive_type; + + isect->u = 1.0f - barycentrics.y - barycentrics.x; + isect->v = barycentrics.x; + + /* Record geometric normal */ + const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w; + const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0)); + const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1)); + const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2)); + payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + + /* Continue tracing (without this the trace call would return after the first hit) */ + result.accept = false; + result.continue_search = true; + return result; +#endif +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_local_hit_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], + uint instance_id [[user_instance_id]], + uint primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax); +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + +template<uint intersection_type> +bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + uint object, + uint prim, + const float2 barycentrics, + const float ray_tmax) +{ +#ifdef __SHADOW_RECORD_ALL__ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + /* continue search */ + return true; + } +# endif + + if (intersection_skip_self_shadow(payload.self, object, prim)) { + /* continue search */ + return true; + } + + float u = 0.0f, v = 0.0f; + int type = 0; + if (intersection_type == METALRT_HIT_TRIANGLE) { + u = 1.0f - barycentrics.y - barycentrics.x; + v = barycentrics.x; + type = kernel_data_fetch(objects, object).primitive_type; + } +# ifdef __HAIR__ + else { + u = barycentrics.x; + v = barycentrics.y; + + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + type = segment.type; + prim = segment.prim; + + /* Filter out curve endcaps */ + if (u == 0.0f || u == 1.0f) { + /* continue search */ + return true; + } + } +# endif + +# ifndef __TRANSPARENT_SHADOWS__ + /* No transparent shadows support compiled in, make opaque. */ + payload.result = true; + /* terminate ray */ + return false; +# else + short max_hits = payload.max_hits; + short num_hits = payload.num_hits; + short num_recorded_hits = payload.num_recorded_hits; + + MetalKernelContext context(launch_params_metal); + + /* If no transparent shadows, all light is blocked and we can stop immediately. */ + if (num_hits >= max_hits || + !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { + payload.result = true; + /* terminate ray */ + return false; + } + + /* Always use baked shadow transparency for curves. */ + if (type & PRIMITIVE_CURVE) { + float throughput = payload.throughput; + throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u); + payload.throughput = throughput; + payload.num_hits += 1; + + if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { + /* Accept result and terminate if throughput is sufficiently low */ + payload.result = true; + return false; + } + else { + return true; + } + } + + payload.num_hits += 1; + payload.num_recorded_hits += 1; + + uint record_index = num_recorded_hits; + + const IntegratorShadowState state = payload.state; + + const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE); + if (record_index >= max_record_hits) { + /* If maximum number of hits reached, find a hit to replace. */ + float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); + uint max_recorded_hit = 0; + + for (int i = 1; i < max_record_hits; i++) { + const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); + if (isect_t > max_recorded_t) { + max_recorded_t = isect_t; + max_recorded_hit = i; + } + } + + if (ray_tmax >= max_recorded_t) { + /* Accept hit, so that we don't consider any more hits beyond the distance of the + * current hit anymore. */ + payload.result = true; + return true; + } + + record_index = max_recorded_hit; + } + + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; + + /* Continue tracing. */ +# endif /* __TRANSPARENT_SHADOWS__ */ +#endif /* __SHADOW_RECORD_ALL__ */ + + return true; +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_shadow_all_hit_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + + TriangleIntersectionResult result; + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, barycentrics, ray_tmax); + result.accept = !result.continue_search; + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + +template<typename TReturnType, uint intersection_type> +inline TReturnType metalrt_visibility_test( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const float u) +{ + TReturnType result; + +#ifdef __HAIR__ + if (intersection_type == METALRT_HIT_BOUNDING_BOX) { + /* Filter out curve endcaps. */ + if (u == 0.0f || u == 1.0f) { + result.accept = false; + result.continue_search = true; + return result; + } + } +#endif + + uint visibility = payload.visibility; +#ifdef __VISIBILITY_FLAG__ + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + result.accept = false; + result.continue_search = true; + return result; + } +#endif + + /* Shadow ray early termination. */ + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + if (intersection_skip_self_shadow(payload.self, object, prim)) { + result.accept = false; + result.continue_search = true; + return result; + } + else { + result.accept = true; + result.continue_search = false; + return result; + } + } + else { + if (intersection_skip_self(payload.self, object, prim)) { + result.accept = false; + result.continue_search = true; + return result; + } + } + + result.accept = true; + result.continue_search = true; + return result; +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_visibility_test_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + TriangleIntersectionResult result = + metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, 0.0f); + if (result.accept) { + payload.prim = prim; + payload.type = kernel_data_fetch(objects, object).primitive_type; + } + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]]) +{ + /* Unused function */ + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + return result; +} + +/* Primitive intersection functions. */ + +#ifdef __HAIR__ +ccl_device_inline void metalrt_intersection_curve( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return; + } +# endif + + Intersection isect; + isect.t = ray_tmax; + + MetalKernelContext context(launch_params_metal); + if (context.curve_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, isect.u); + if (result.accept) { + result.distance = isect.t; + payload.u = isect.u; + payload.v = isect.v; + payload.prim = prim; + payload.type = type; + } + } +} + +ccl_device_inline void metalrt_intersection_curve_shadow( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ + const uint visibility = payload.visibility; + + Intersection isect; + isect.t = ray_tmax; + + MetalKernelContext context(launch_params_metal); + if (context.curve_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + result.accept = !result.continue_search; + } +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload + [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + if (segment.type & PRIMITIVE_CURVE_RIBBON) { + metalrt_intersection_curve(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + } + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_ribbon_shadow( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + if (segment.type & PRIMITIVE_CURVE_RIBBON) { + metalrt_intersection_curve_shadow(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + } + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload + [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + metalrt_intersection_curve(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_all_shadow( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_curve_shadow(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + + return result; +} +#endif /* __HAIR__ */ + +#ifdef __POINTCLOUD__ +ccl_device_inline void metalrt_intersection_point( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return; + } +# endif + + Intersection isect; + isect.t = ray_tmax; + + MetalKernelContext context(launch_params_metal); + if (context.point_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, isect.u); + if (result.accept) { + result.distance = isect.t; + payload.u = isect.u; + payload.v = isect.v; + payload.prim = prim; + payload.type = type; + } + } +} + +ccl_device_inline void metalrt_intersection_point_shadow( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ + const uint visibility = payload.visibility; + + Intersection isect; + isect.t = ray_tmax; + + MetalKernelContext context(launch_params_metal); + if (context.point_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + result.accept = !result.continue_search; + + if (result.accept) { + result.distance = isect.t; + } + } +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const int type = kernel_data_fetch(objects, object).primitive_type; + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_point(launch_params_metal, + payload, + object, + prim, + type, + ray_origin, + ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload + [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const int type = kernel_data_fetch(objects, object).primitive_type; + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_point_shadow(launch_params_metal, + payload, + object, + prim, + type, + ray_origin, + ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + + return result; +} +#endif /* __POINTCLOUD__ */ +#endif /* __METALRT__ */ diff --git a/intern/cycles/util/math_float8.h b/intern/cycles/util/math_float8.h index 8ed8d56a034..b538cfbe70b 100644 --- a/intern/cycles/util/math_float8.h +++ b/intern/cycles/util/math_float8.h @@ -14,187 +14,187 @@ CCL_NAMESPACE_BEGIN * Declaration. */ -ccl_device_inline float8 operator+(const float8 &a, const float8 &b); -ccl_device_inline float8 operator+(const float8 &a, const float f); -ccl_device_inline float8 operator+(const float f, const float8 &a); +ccl_device_inline float8_t operator+(const float8_t a, const float8_t b); +ccl_device_inline float8_t operator+(const float8_t a, const float f); +ccl_device_inline float8_t operator+(const float f, const float8_t a); -ccl_device_inline float8 operator-(const float8 &a); -ccl_device_inline float8 operator-(const float8 &a, const float8 &b); -ccl_device_inline float8 operator-(const float8 &a, const float f); -ccl_device_inline float8 operator-(const float f, const float8 &a); +ccl_device_inline float8_t operator-(const float8_t a); +ccl_device_inline float8_t operator-(const float8_t a, const float8_t b); +ccl_device_inline float8_t operator-(const float8_t a, const float f); +ccl_device_inline float8_t operator-(const float f, const float8_t a); -ccl_device_inline float8 operator*(const float8 &a, const float8 &b); -ccl_device_inline float8 operator*(const float8 &a, const float f); -ccl_device_inline float8 operator*(const float f, const float8 &a); +ccl_device_inline float8_t operator*(const float8_t a, const float8_t b); +ccl_device_inline float8_t operator*(const float8_t a, const float f); +ccl_device_inline float8_t operator*(const float f, const float8_t a); -ccl_device_inline float8 operator/(const float8 &a, const float8 &b); -ccl_device_inline float8 operator/(const float8 &a, float f); -ccl_device_inline float8 operator/(const float f, const float8 &a); +ccl_device_inline float8_t operator/(const float8_t a, const float8_t b); +ccl_device_inline float8_t operator/(const float8_t a, float f); +ccl_device_inline float8_t operator/(const float f, const float8_t a); -ccl_device_inline float8 operator+=(float8 &a, const float8 &b); +ccl_device_inline float8_t operator+=(float8_t a, const float8_t b); -ccl_device_inline float8 operator*=(float8 &a, const float8 &b); -ccl_device_inline float8 operator*=(float8 &a, float f); +ccl_device_inline float8_t operator*=(float8_t a, const float8_t b); +ccl_device_inline float8_t operator*=(float8_t a, float f); -ccl_device_inline float8 operator/=(float8 &a, float f); +ccl_device_inline float8_t operator/=(float8_t a, float f); -ccl_device_inline bool operator==(const float8 &a, const float8 &b); +ccl_device_inline bool operator==(const float8_t a, const float8_t b); -ccl_device_inline float8 rcp(const float8 &a); -ccl_device_inline float8 sqrt(const float8 &a); -ccl_device_inline float8 sqr(const float8 &a); -ccl_device_inline bool is_zero(const float8 &a); -ccl_device_inline float average(const float8 &a); -ccl_device_inline float8 min(const float8 &a, const float8 &b); -ccl_device_inline float8 max(const float8 &a, const float8 &b); -ccl_device_inline float8 clamp(const float8 &a, const float8 &mn, const float8 &mx); -ccl_device_inline float8 fabs(const float8 &a); -ccl_device_inline float8 mix(const float8 &a, const float8 &b, float t); +ccl_device_inline float8_t rcp(const float8_t a); +ccl_device_inline float8_t sqrt(const float8_t a); +ccl_device_inline float8_t sqr(const float8_t a); +ccl_device_inline bool is_zero(const float8_t a); +ccl_device_inline float average(const float8_t a); +ccl_device_inline float8_t min(const float8_t a, const float8_t b); +ccl_device_inline float8_t max(const float8_t a, const float8_t b); +ccl_device_inline float8_t clamp(const float8_t a, const float8_t mn, const float8_t mx); +ccl_device_inline float8_t fabs(const float8_t a); +ccl_device_inline float8_t mix(const float8_t a, const float8_t b, float t); +ccl_device_inline float8_t saturate(const float8_t a); -ccl_device_inline float8 safe_divide(const float8 a, const float b); -ccl_device_inline float8 safe_divide(const float8 a, const float8 b); +ccl_device_inline float8_t safe_divide(const float8_t a, const float b); +ccl_device_inline float8_t safe_divide(const float8_t a, const float8_t b); -ccl_device_inline float reduce_min(const float8 &a); -ccl_device_inline float reduce_max(const float8 &a); -ccl_device_inline float reduce_add(const float8 &a); +ccl_device_inline float reduce_min(const float8_t a); +ccl_device_inline float reduce_max(const float8_t a); +ccl_device_inline float reduce_add(const float8_t a); -ccl_device_inline float8 saturate(const float8 &a); -ccl_device_inline bool isequal(const float8 a, const float8 b); +ccl_device_inline bool isequal(const float8_t a, const float8_t b); /******************************************************************************* * Definition. */ -ccl_device_inline float8 zero_float8() +ccl_device_inline float8_t zero_float8_t() { #ifdef __KERNEL_AVX2__ - return float8(_mm256_setzero_ps()); + return float8_t(_mm256_setzero_ps()); #else - return make_float8(0.0f); + return make_float8_t(0.0f); #endif } -ccl_device_inline float8 one_float8() +ccl_device_inline float8_t one_float8_t() { - return make_float8(1.0f); + return make_float8_t(1.0f); } -ccl_device_inline float8 operator+(const float8 &a, const float8 &b) +ccl_device_inline float8_t operator+(const float8_t a, const float8_t b) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_add_ps(a.m256, b.m256)); + return float8_t(_mm256_add_ps(a.m256, b.m256)); #else - return make_float8( + return make_float8_t( a.a + b.a, a.b + b.b, a.c + b.c, a.d + b.d, a.e + b.e, a.f + b.f, a.g + b.g, a.h + b.h); #endif } -ccl_device_inline float8 operator+(const float8 &a, const float f) +ccl_device_inline float8_t operator+(const float8_t a, const float f) { - return a + make_float8(f); + return a + make_float8_t(f); } -ccl_device_inline float8 operator+(const float f, const float8 &a) +ccl_device_inline float8_t operator+(const float f, const float8_t a) { - return make_float8(f) + a; + return make_float8_t(f) + a; } -ccl_device_inline float8 operator-(const float8 &a) +ccl_device_inline float8_t operator-(const float8_t a) { #ifdef __KERNEL_AVX2__ __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)); - return float8(_mm256_xor_ps(a.m256, mask)); + return float8_t(_mm256_xor_ps(a.m256, mask)); #else - return make_float8(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h); + return make_float8_t(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h); #endif } -ccl_device_inline float8 operator-(const float8 &a, const float8 &b) +ccl_device_inline float8_t operator-(const float8_t a, const float8_t b) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_sub_ps(a.m256, b.m256)); + return float8_t(_mm256_sub_ps(a.m256, b.m256)); #else - return make_float8( + return make_float8_t( a.a - b.a, a.b - b.b, a.c - b.c, a.d - b.d, a.e - b.e, a.f - b.f, a.g - b.g, a.h - b.h); #endif } -ccl_device_inline float8 operator-(const float8 &a, const float f) +ccl_device_inline float8_t operator-(const float8_t a, const float f) { - return a - make_float8(f); + return a - make_float8_t(f); } -ccl_device_inline float8 operator-(const float f, const float8 &a) +ccl_device_inline float8_t operator-(const float f, const float8_t a) { - return make_float8(f) - a; + return make_float8_t(f) - a; } -ccl_device_inline float8 operator*(const float8 &a, const float8 &b) +ccl_device_inline float8_t operator*(const float8_t a, const float8_t b) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_mul_ps(a.m256, b.m256)); + return float8_t(_mm256_mul_ps(a.m256, b.m256)); #else - return make_float8( + return make_float8_t( a.a * b.a, a.b * b.b, a.c * b.c, a.d * b.d, a.e * b.e, a.f * b.f, a.g * b.g, a.h * b.h); #endif } -ccl_device_inline float8 operator*(const float8 &a, const float f) +ccl_device_inline float8_t operator*(const float8_t a, const float f) { - return a * make_float8(f); + return a * make_float8_t(f); } -ccl_device_inline float8 operator*(const float f, const float8 &a) +ccl_device_inline float8_t operator*(const float f, const float8_t a) { - return make_float8(f) * a; + return make_float8_t(f) * a; } -ccl_device_inline float8 operator/(const float8 &a, const float8 &b) +ccl_device_inline float8_t operator/(const float8_t a, const float8_t b) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_div_ps(a.m256, b.m256)); + return float8_t(_mm256_div_ps(a.m256, b.m256)); #else - return make_float8( + return make_float8_t( a.a / b.a, a.b / b.b, a.c / b.c, a.d / b.d, a.e / b.e, a.f / b.f, a.g / b.g, a.h / b.h); #endif } -ccl_device_inline float8 operator/(const float8 &a, const float f) +ccl_device_inline float8_t operator/(const float8_t a, const float f) { - return a / make_float8(f); + return a / make_float8_t(f); } -ccl_device_inline float8 operator/(const float f, const float8 &a) +ccl_device_inline float8_t operator/(const float f, const float8_t a) { - return make_float8(f) / a; + return make_float8_t(f) / a; } -ccl_device_inline float8 operator+=(float8 &a, const float8 &b) +ccl_device_inline float8_t operator+=(float8_t a, const float8_t b) { return a = a + b; } -ccl_device_inline float8 operator-=(float8 &a, const float8 &b) +ccl_device_inline float8_t operator-=(float8_t a, const float8_t b) { return a = a - b; } -ccl_device_inline float8 operator*=(float8 &a, const float8 &b) +ccl_device_inline float8_t operator*=(float8_t a, const float8_t b) { return a = a * b; } -ccl_device_inline float8 operator*=(float8 &a, float f) +ccl_device_inline float8_t operator*=(float8_t a, float f) { return a = a * f; } -ccl_device_inline float8 operator/=(float8 &a, float f) +ccl_device_inline float8_t operator/=(float8_t a, float f) { return a = a / f; } -ccl_device_inline bool operator==(const float8 &a, const float8 &b) +ccl_device_inline bool operator==(const float8_t a, const float8_t b) { #ifdef __KERNEL_AVX2__ return (_mm256_movemask_ps(_mm256_castsi256_ps( @@ -206,160 +206,195 @@ ccl_device_inline bool operator==(const float8 &a, const float8 &b) #endif } -ccl_device_inline float8 rcp(const float8 &a) +ccl_device_inline float8_t rcp(const float8_t a) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_rcp_ps(a.m256)); + return float8_t(_mm256_rcp_ps(a.m256)); #else - return make_float8(1.0f / a.a, - 1.0f / a.b, - 1.0f / a.c, - 1.0f / a.d, - 1.0f / a.e, - 1.0f / a.f, - 1.0f / a.g, - 1.0f / a.h); + return make_float8_t(1.0f / a.a, + 1.0f / a.b, + 1.0f / a.c, + 1.0f / a.d, + 1.0f / a.e, + 1.0f / a.f, + 1.0f / a.g, + 1.0f / a.h); #endif } -ccl_device_inline float8 sqrt(const float8 &a) +ccl_device_inline float8_t sqrt(const float8_t a) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_sqrt_ps(a.m256)); + return float8_t(_mm256_sqrt_ps(a.m256)); #else - return make_float8(sqrtf(a.a), - sqrtf(a.b), - sqrtf(a.c), - sqrtf(a.d), - sqrtf(a.e), - sqrtf(a.f), - sqrtf(a.g), - sqrtf(a.h)); + return make_float8_t(sqrtf(a.a), + sqrtf(a.b), + sqrtf(a.c), + sqrtf(a.d), + sqrtf(a.e), + sqrtf(a.f), + sqrtf(a.g), + sqrtf(a.h)); #endif } -ccl_device_inline float8 sqr(const float8 &a) +ccl_device_inline float8_t sqr(const float8_t a) { return a * a; } -ccl_device_inline bool is_zero(const float8 &a) +ccl_device_inline bool is_zero(const float8_t a) { - return a == make_float8(0.0f); + return a == make_float8_t(0.0f); } -ccl_device_inline float average(const float8 &a) +ccl_device_inline float average(const float8_t a) { return reduce_add(a) / 8.0f; } -ccl_device_inline float8 min(const float8 &a, const float8 &b) +ccl_device_inline float8_t min(const float8_t a, const float8_t b) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_min_ps(a.m256, b.m256)); + return float8_t(_mm256_min_ps(a.m256, b.m256)); #else - return make_float8(min(a.a, b.a), - min(a.b, b.b), - min(a.c, b.c), - min(a.d, b.d), - min(a.e, b.e), - min(a.f, b.f), - min(a.g, b.g), - min(a.h, b.h)); + return make_float8_t(min(a.a, b.a), + min(a.b, b.b), + min(a.c, b.c), + min(a.d, b.d), + min(a.e, b.e), + min(a.f, b.f), + min(a.g, b.g), + min(a.h, b.h)); #endif } -ccl_device_inline float8 max(const float8 &a, const float8 &b) +ccl_device_inline float8_t max(const float8_t a, const float8_t b) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_max_ps(a.m256, b.m256)); + return float8_t(_mm256_max_ps(a.m256, b.m256)); #else - return make_float8(max(a.a, b.a), - max(a.b, b.b), - max(a.c, b.c), - max(a.d, b.d), - max(a.e, b.e), - max(a.f, b.f), - max(a.g, b.g), - max(a.h, b.h)); + return make_float8_t(max(a.a, b.a), + max(a.b, b.b), + max(a.c, b.c), + max(a.d, b.d), + max(a.e, b.e), + max(a.f, b.f), + max(a.g, b.g), + max(a.h, b.h)); #endif } -ccl_device_inline float8 clamp(const float8 &a, const float8 &mn, const float8 &mx) +ccl_device_inline float8_t clamp(const float8_t a, const float8_t mn, const float8_t mx) { return min(max(a, mn), mx); } -ccl_device_inline float8 fabs(const float8 &a) +ccl_device_inline float8_t fabs(const float8_t a) { #ifdef __KERNEL_AVX2__ - return float8(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)))); + return float8_t(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)))); #else - return make_float8(fabsf(a.a), - fabsf(a.b), - fabsf(a.c), - fabsf(a.d), - fabsf(a.e), - fabsf(a.f), - fabsf(a.g), - fabsf(a.h)); + return make_float8_t(fabsf(a.a), + fabsf(a.b), + fabsf(a.c), + fabsf(a.d), + fabsf(a.e), + fabsf(a.f), + fabsf(a.g), + fabsf(a.h)); #endif } -ccl_device_inline float8 mix(const float8 &a, const float8 &b, float t) +ccl_device_inline float8_t mix(const float8_t a, const float8_t b, float t) { return a + t * (b - a); } -ccl_device_inline float reduce_min(const float8 &a) +ccl_device_inline float8_t saturate(const float8_t a) +{ + return clamp(a, make_float8_t(0.0f), make_float8_t(1.0f)); +} + +ccl_device_inline float8_t exp(float8_t v) +{ + return make_float8_t( + expf(v.a), expf(v.b), expf(v.c), expf(v.d), expf(v.e), expf(v.f), expf(v.g), expf(v.h)); +} + +ccl_device_inline float8_t log(float8_t v) +{ + return make_float8_t( + logf(v.a), logf(v.b), logf(v.c), logf(v.d), logf(v.e), logf(v.f), logf(v.g), logf(v.h)); +} + +ccl_device_inline float dot(const float8_t a, const float8_t b) +{ +#ifdef __KERNEL_AVX2__ + float8_t t(_mm256_dp_ps(a.m256, b.m256, 0xFF)); + return t[0] + t[4]; +#else + return (a.a * b.a) + (a.b * b.b) + (a.c * b.c) + (a.d * b.d) + (a.e * b.e) + (a.f * b.f) + + (a.g * b.g) + (a.h * b.h); +#endif +} + +ccl_device_inline float8_t pow(float8_t v, float e) +{ + return make_float8_t(powf(v.a, e), + powf(v.b, e), + powf(v.c, e), + powf(v.d, e), + powf(v.e, e), + powf(v.f, e), + powf(v.g, e), + powf(v.h, e)); +} + +ccl_device_inline float reduce_min(const float8_t a) { return min(min(min(a.a, a.b), min(a.c, a.d)), min(min(a.e, a.f), min(a.g, a.h))); } -ccl_device_inline float reduce_max(const float8 &a) +ccl_device_inline float reduce_max(const float8_t a) { return max(max(max(a.a, a.b), max(a.c, a.d)), max(max(a.e, a.f), max(a.g, a.h))); } -ccl_device_inline float reduce_add(const float8 &a) +ccl_device_inline float reduce_add(const float8_t a) { #ifdef __KERNEL_AVX2__ - float8 b(_mm256_hadd_ps(a.m256, a.m256)); - float8 h(_mm256_hadd_ps(b.m256, b.m256)); + float8_t b(_mm256_hadd_ps(a.m256, a.m256)); + float8_t h(_mm256_hadd_ps(b.m256, b.m256)); return h[0] + h[4]; #else return a.a + a.b + a.c + a.d + a.e + a.f + a.g + a.h; #endif } -ccl_device_inline float8 saturate(const float8 &a) -{ - return clamp(a, make_float8(0.0f), make_float8(1.0f)); -} - -ccl_device_inline bool isequal(const float8 a, const float8 b) +ccl_device_inline bool isequal(const float8_t a, const float8_t b) { return a == b; } -ccl_device_inline float8 safe_divide(const float8 a, const float b) +ccl_device_inline float8_t safe_divide(const float8_t a, const float b) { - return (b != 0.0f) ? a / b : make_float8(0.0f); + return (b != 0.0f) ? a / b : make_float8_t(0.0f); } -ccl_device_inline float8 safe_divide(const float8 a, const float8 b) +ccl_device_inline float8_t safe_divide(const float8_t a, const float8_t b) { - return make_float8((b.a != 0.0f) ? a.a / b.a : 0.0f, - (b.b != 0.0f) ? a.b / b.b : 0.0f, - (b.c != 0.0f) ? a.c / b.c : 0.0f, - (b.d != 0.0f) ? a.d / b.d : 0.0f, - (b.e != 0.0f) ? a.e / b.e : 0.0f, - (b.f != 0.0f) ? a.f / b.f : 0.0f, - (b.g != 0.0f) ? a.g / b.g : 0.0f, - (b.h != 0.0f) ? a.h / b.h : 0.0f); + return make_float8_t((b.a != 0.0f) ? a.a / b.a : 0.0f, + (b.b != 0.0f) ? a.b / b.b : 0.0f, + (b.c != 0.0f) ? a.c / b.c : 0.0f, + (b.d != 0.0f) ? a.d / b.d : 0.0f, + (b.e != 0.0f) ? a.e / b.e : 0.0f, + (b.f != 0.0f) ? a.f / b.f : 0.0f, + (b.g != 0.0f) ? a.g / b.g : 0.0f, + (b.h != 0.0f) ? a.h / b.h : 0.0f); } -ccl_device_inline float8 ensure_finite(float8 v) +ccl_device_inline float8_t ensure_finite(float8_t v) { v.a = ensure_finite(v.a); v.b = ensure_finite(v.b); @@ -373,47 +408,12 @@ ccl_device_inline float8 ensure_finite(float8 v) return v; } -ccl_device_inline bool isfinite_safe(float8 v) +ccl_device_inline bool isfinite_safe(float8_t v) { return isfinite_safe(v.a) && isfinite_safe(v.b) && isfinite_safe(v.c) && isfinite_safe(v.d) && isfinite_safe(v.e) && isfinite_safe(v.f) && isfinite_safe(v.g) && isfinite_safe(v.h); } -ccl_device_inline float8 pow(float8 v, float e) -{ - return make_float8(powf(v.a, e), - powf(v.b, e), - powf(v.c, e), - powf(v.d, e), - powf(v.e, e), - powf(v.f, e), - powf(v.g, e), - powf(v.h, e)); -} - -ccl_device_inline float8 exp(float8 v) -{ - return make_float8( - expf(v.a), expf(v.b), expf(v.c), expf(v.d), expf(v.e), expf(v.f), expf(v.g), expf(v.h)); -} - -ccl_device_inline float8 log(float8 v) -{ - return make_float8( - logf(v.a), logf(v.b), logf(v.c), logf(v.d), logf(v.e), logf(v.f), logf(v.g), logf(v.h)); -} - -ccl_device_inline float dot(const float8 &a, const float8 &b) -{ -#ifdef __KERNEL_AVX2__ - float8 t(_mm256_dp_ps(a.m256, b.m256, 0xFF)); - return t[0] + t[4]; -#else - return (a.a * b.a) + (a.b * b.b) + (a.c * b.c) + (a.d * b.d) + (a.e * b.e) + (a.f * b.f) + - (a.g * b.g) + (a.h * b.h); -#endif -} - CCL_NAMESPACE_END #endif /* __UTIL_MATH_FLOAT8_H__ */ diff --git a/intern/cycles/util/types_float8.h b/intern/cycles/util/types_float8.h index f04dc675c84..bb9798932ac 100644 --- a/intern/cycles/util/types_float8.h +++ b/intern/cycles/util/types_float8.h @@ -11,10 +11,13 @@ CCL_NAMESPACE_BEGIN +/* float8 is a reserved type in Metal that has not been implemented. For + * that reason this is named float8_t. */ + #ifdef __KERNEL_GPU__ -struct float8 +struct float8_t #else -struct ccl_try_align(32) float8 +struct ccl_try_align(32) float8_t #endif { #ifdef __KERNEL_AVX2__ @@ -25,14 +28,14 @@ struct ccl_try_align(32) float8 }; }; - __forceinline float8(); - __forceinline float8(const float8 &a); - __forceinline explicit float8(const __m256 &a); + __forceinline float8_t(); + __forceinline float8_t(const float8_t &a); + __forceinline explicit float8_t(const __m256 &a); __forceinline operator const __m256 &() const; __forceinline operator __m256 &(); - __forceinline float8 &operator=(const float8 &a); + __forceinline float8_t &operator=(const float8_t &a); #else /* __KERNEL_AVX2__ */ float a, b, c, d, e, f, g, h; @@ -44,9 +47,9 @@ struct ccl_try_align(32) float8 #endif }; -ccl_device_inline float8 make_float8(float f); -ccl_device_inline float8 -make_float8(float a, float b, float c, float d, float e, float f, float g, float h); +ccl_device_inline float8_t make_float8_t(float f); +ccl_device_inline float8_t +make_float8_t(float a, float b, float c, float d, float e, float f, float g, float h); CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float8_impl.h b/intern/cycles/util/types_float8_impl.h index 21931c55071..2ab464a791b 100644 --- a/intern/cycles/util/types_float8_impl.h +++ b/intern/cycles/util/types_float8_impl.h @@ -16,29 +16,29 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_AVX2__ -__forceinline float8::float8() +__forceinline float8_t::float8_t() { } -__forceinline float8::float8(const float8 &f) : m256(f.m256) +__forceinline float8_t::float8_t(const float8_t &f) : m256(f.m256) { } -__forceinline float8::float8(const __m256 &f) : m256(f) +__forceinline float8_t::float8_t(const __m256 &f) : m256(f) { } -__forceinline float8::operator const __m256 &() const +__forceinline float8_t::operator const __m256 &() const { return m256; } -__forceinline float8::operator __m256 &() +__forceinline float8_t::operator __m256 &() { return m256; } -__forceinline float8 &float8::operator=(const float8 &f) +__forceinline float8_t &float8_t::operator=(const float8_t &f) { m256 = f.m256; return *this; @@ -46,14 +46,14 @@ __forceinline float8 &float8::operator=(const float8 &f) #endif /* __KERNEL_AVX2__ */ #ifndef __KERNEL_GPU__ -__forceinline float float8::operator[](int i) const +__forceinline float float8_t::operator[](int i) const { util_assert(i >= 0); util_assert(i < 8); return *(&a + i); } -__forceinline float &float8::operator[](int i) +__forceinline float &float8_t::operator[](int i) { util_assert(i >= 0); util_assert(i < 8); @@ -61,23 +61,23 @@ __forceinline float &float8::operator[](int i) } #endif -ccl_device_inline float8 make_float8(float f) +ccl_device_inline float8_t make_float8_t(float f) { #ifdef __KERNEL_AVX2__ - float8 r(_mm256_set1_ps(f)); + float8_t r(_mm256_set1_ps(f)); #else - float8 r = {f, f, f, f, f, f, f, f}; + float8_t r = {f, f, f, f, f, f, f, f}; #endif return r; } -ccl_device_inline float8 -make_float8(float a, float b, float c, float d, float e, float f, float g, float h) +ccl_device_inline float8_t +make_float8_t(float a, float b, float c, float d, float e, float f, float g, float h) { #ifdef __KERNEL_AVX2__ - float8 r(_mm256_setr_ps(a, b, c, d, e, f, g, h)); + float8_t r(_mm256_setr_ps(a, b, c, d, e, f, g, h)); #else - float8 r = {a, b, c, d, e, f, g, h}; + float8_t r = {a, b, c, d, e, f, g, h}; #endif return r; } |