From 5d565062edc25575bbabf173a4e26f184103944b Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 13 Oct 2021 18:19:51 +0200 Subject: Cleanup: refactor OptiX shadow intersection for upcoming changes --- intern/cycles/bvh/bvh_embree.cpp | 2 +- intern/cycles/kernel/bvh/bvh_shadow_all.h | 2 +- intern/cycles/kernel/bvh/bvh_util.h | 35 +++++++++-- intern/cycles/kernel/device/optix/kernel.cu | 68 ++++++++++------------ intern/cycles/kernel/geom/geom_motion_curve.h | 43 +++----------- intern/cycles/kernel/geom/geom_motion_triangle.h | 39 ++----------- .../kernel/geom/geom_motion_triangle_shader.h | 6 +- intern/cycles/kernel/kernel_shader.h | 10 +--- 8 files changed, 79 insertions(+), 126 deletions(-) (limited to 'intern/cycles') diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp index ae5b7dd426a..76fcdf539ea 100644 --- a/intern/cycles/bvh/bvh_embree.cpp +++ b/intern/cycles/bvh/bvh_embree.cpp @@ -81,7 +81,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args) kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); /* If no transparent shadows, all light is blocked. */ - const int flags = intersection_get_shader_flags(kg, ¤t_isect); + const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type); if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->max_hits == 0) { ctx->opaque_hit = true; return; diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index ea1ee26b863..4f2164a86ae 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -197,7 +197,7 @@ ccl_device_inline /* todo: optimize so primitive visibility flag indicates if * the primitive has a transparent shadow shader? */ - const int flags = intersection_get_shader_flags(kg, isect); + const int flags = intersection_get_shader_flags(kg, isect->prim, isect->type); if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || max_hits == 0) { /* If no transparent shadows, all light is blocked and we can diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h index fb546f568f3..31aae389da0 100644 --- a/intern/cycles/kernel/bvh/bvh_util.h +++ b/intern/cycles/kernel/bvh/bvh_util.h @@ -140,14 +140,12 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection * /* Utility to quickly get flags from an intersection. */ ccl_device_forceinline int intersection_get_shader_flags( - ccl_global const KernelGlobals *ccl_restrict kg, - ccl_private const Intersection *ccl_restrict isect) + ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int type) { - const int prim = isect->prim; int shader = 0; #ifdef __HAIR__ - if (isect->type & PRIMITIVE_ALL_TRIANGLE) + if (type & PRIMITIVE_ALL_TRIANGLE) #endif { shader = kernel_tex_fetch(__tri_shader, prim); @@ -195,4 +193,33 @@ ccl_device_forceinline int intersection_get_object_flags( return kernel_tex_fetch(__object_flag, isect->object); } +/* TODO: find a better (faster) solution for this. Maybe store offset per object for + * attributes needed in intersection? */ +ccl_device_inline int intersection_find_attribute(ccl_global const KernelGlobals *kg, + const int object, + const uint id) +{ + uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset; + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while (attr_map.x != id) { + if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) { + if (UNLIKELY(attr_map.y == 0)) { + return (int)ATTR_STD_NOT_FOUND; + } + else { + /* Chain jump to a different part of the table. */ + attr_offset = attr_map.z; + } + } + else { + attr_offset += ATTR_PRIM_TYPES; + } + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index c9577bb2aa2..e97b25d31a2 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -172,14 +172,12 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() { #ifdef __SHADOW_RECORD_ALL__ - bool ignore_intersection = false; - int prim = optixGetPrimitiveIndex(); const uint object = get_object_id(); # ifdef __VISIBILITY_FLAG__ const uint visibility = optixGetPayload_4(); if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { - ignore_intersection = true; + return optixIgnoreIntersection(); } # endif @@ -202,29 +200,39 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() /* Filter out curve endcaps. */ if (u == 0.0f || u == 1.0f) { - ignore_intersection = true; + return optixIgnoreIntersection(); } } # endif - int num_hits = optixGetPayload_2(); - int record_index = num_hits; +# ifndef __TRANSPARENT_SHADOWS__ + /* No transparent shadows support compiled in, make opaque. */ + optixSetPayload_5(true); + return optixTerminateRay(); +# else const int max_hits = optixGetPayload_3(); - if (!ignore_intersection) { - optixSetPayload_2(num_hits + 1); + /* If no transparent shadows, all light is blocked and we can stop immediately. */ + if (max_hits == 0 || + !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { + optixSetPayload_5(true); + return optixTerminateRay(); } + /* Record transparent intersection. */ + const int num_hits = optixGetPayload_2(); + int record_index = num_hits; + + optixSetPayload_2(num_hits + 1); + Intersection *const isect_array = get_payload_ptr_0(); -# ifdef __TRANSPARENT_SHADOWS__ - if (num_hits >= max_hits) { + if (record_index >= max_hits) { /* If maximum number of hits reached, find a hit to replace. */ - const int num_recorded_hits = min(max_hits, num_hits); float max_recorded_t = isect_array[0].t; int max_recorded_hit = 0; - for (int i = 1; i < num_recorded_hits; i++) { + for (int i = 1; i < max_hits; i++) { if (isect_array[i].t > max_recorded_t) { max_recorded_t = isect_array[i].t; max_recorded_hit = i; @@ -232,39 +240,25 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() } if (optixGetRayTmax() >= max_recorded_t) { - /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the current - * hit anymore. */ + /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the + * current hit anymore. */ return; } record_index = max_recorded_hit; } -# endif - if (!ignore_intersection) { - Intersection *const isect = isect_array + record_index; - isect->u = u; - isect->v = v; - isect->t = optixGetRayTmax(); - isect->prim = prim; - isect->object = object; - isect->type = type; - -# ifdef __TRANSPARENT_SHADOWS__ - /* Detect if this surface has a shader with transparent shadows. */ - if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) { -# endif - /* If no transparent shadows, all light is blocked and we can stop immediately. */ - optixSetPayload_5(true); - return optixTerminateRay(); -# ifdef __TRANSPARENT_SHADOWS__ - } -# endif - } + Intersection *const isect = isect_array + record_index; + isect->u = u; + isect->v = v; + isect->t = optixGetRayTmax(); + isect->prim = prim; + isect->object = object; + isect->type = type; - /* Continue tracing. */ optixIgnoreIntersection(); -#endif +# endif /* __TRANSPARENT_SHADOWS__ */ +#endif /* __SHADOW_RECORD_ALL__ */ } extern "C" __global__ void __anyhit__kernel_optix_volume_test() diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h index 8e32df439cd..5754608a69b 100644 --- a/intern/cycles/kernel/geom/geom_motion_curve.h +++ b/intern/cycles/kernel/geom/geom_motion_curve.h @@ -27,31 +27,6 @@ CCL_NAMESPACE_BEGIN #ifdef __HAIR__ -ccl_device_inline int find_attribute_curve_motion(ccl_global const KernelGlobals *kg, - int object, - uint id, - ccl_private AttributeElement *elem) -{ - /* todo: find a better (faster) solution for this, maybe store offset per object. - * - * NOTE: currently it's not a bottleneck because in test scenes the loop below runs - * zero iterations and rendering is really slow with motion curves. For until other - * areas are speed up it's probably not so crucial to optimize this out. - */ - uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_GEOMETRY; - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while (attr_map.x != id) { - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } - - *elem = (AttributeElement)attr_map.y; - - /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; -} - ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const KernelGlobals *kg, int offset, int numkeys, @@ -92,13 +67,12 @@ ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals * object_motion_info(kg, object, &numsteps, NULL, &numkeys); /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps * 2; - int step = min((int)(time * maxstep), maxstep - 1); - float t = time * maxstep - step; + const int maxstep = numsteps * 2; + const int step = min((int)(time * maxstep), maxstep - 1); + const float t = time * maxstep - step; /* find attribute */ - AttributeElement elem; - int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* fetch key coordinates */ @@ -160,13 +134,12 @@ ccl_device_inline void motion_curve_keys(ccl_global const KernelGlobals *kg, object_motion_info(kg, object, &numsteps, NULL, &numkeys); /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps * 2; - int step = min((int)(time * maxstep), maxstep - 1); - float t = time * maxstep - step; + const int maxstep = numsteps * 2; + const int step = min((int)(time * maxstep), maxstep - 1); + const float t = time * maxstep - step; /* find attribute */ - AttributeElement elem; - int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* fetch key coordinates */ diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 161b358110d..547f03af47c 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -27,41 +27,12 @@ #pragma once +#include "kernel/bvh/bvh_util.h" + CCL_NAMESPACE_BEGIN /* Time interpolation of vertex positions and normals */ -ccl_device_inline int find_attribute_motion(ccl_global const KernelGlobals *kg, - int object, - uint id, - ccl_private AttributeElement *elem) -{ - /* todo: find a better (faster) solution for this, maybe store offset per object */ - uint attr_offset = object_attribute_map_offset(kg, object); - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while (attr_map.x != id) { - if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) { - if (UNLIKELY(attr_map.y == 0)) { - return (int)ATTR_STD_NOT_FOUND; - } - else { - /* Chain jump to a different part of the table. */ - attr_offset = attr_map.z; - } - } - else { - attr_offset += ATTR_PRIM_TYPES; - } - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } - - *elem = (AttributeElement)attr_map.y; - - /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; -} - ccl_device_inline void motion_triangle_verts_for_step(ccl_global const KernelGlobals *kg, uint4 tri_vindex, int offset, @@ -129,8 +100,7 @@ ccl_device_inline void motion_triangle_vertices( float t = time * maxstep - step; /* find attribute */ - AttributeElement elem; - int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* fetch vertex coordinates */ @@ -164,8 +134,7 @@ ccl_device_inline float3 motion_triangle_smooth_normal(ccl_global const KernelGl float t = time * maxstep - step; /* find attribute */ - AttributeElement elem; - int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* fetch normals */ diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h index 03bb1fba2a2..25a68fa7781 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h @@ -56,8 +56,7 @@ ccl_device_noinline void motion_triangle_shader_setup(ccl_global const KernelGlo int step = min((int)(sd->time * maxstep), maxstep - 1); float t = sd->time * maxstep - step; /* Find attribute. */ - AttributeElement elem; - int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* Fetch vertex coordinates. */ float3 verts[3], next_verts[3]; @@ -96,8 +95,7 @@ ccl_device_noinline void motion_triangle_shader_setup(ccl_global const KernelGlo /* Compute smooth normal. */ if (sd->shader & SHADER_SMOOTH_NORMAL) { /* Find attribute. */ - AttributeElement elem; - int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* Fetch vertex coordinates. */ float3 normals[3], next_normals[3]; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 4174a27406b..b5a52ff866d 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -862,15 +862,7 @@ ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ccl_privat #endif } -/* Transparent Shadows */ - -#ifdef __TRANSPARENT_SHADOWS__ -ccl_device bool shader_transparent_shadow(ccl_global const KernelGlobals *kg, - ccl_private Intersection *isect) -{ - return (intersection_get_shader_flags(kg, isect) & SD_HAS_TRANSPARENT_SHADOW) != 0; -} -#endif /* __TRANSPARENT_SHADOWS__ */ +/* Cryptomatte */ ccl_device float shader_cryptomatte_id(ccl_global const KernelGlobals *kg, int shader) { -- cgit v1.2.3