diff options
author | Howard Trickey <howard.trickey@gmail.com> | 2021-10-24 15:31:22 +0300 |
---|---|---|
committer | Howard Trickey <howard.trickey@gmail.com> | 2021-10-24 15:31:22 +0300 |
commit | 1aa953bd1913c81b22c80a00edbf4ad88a32c52f (patch) | |
tree | 7fa65e43d5a9bac6496555b723f37e0031e2737e /intern/cycles/kernel/bvh/bvh_util.h | |
parent | fc171c1be9da36485e892339b86dc8d4251914af (diff) | |
parent | 6ce383a9dfba5c49a48676c3a651804fde3dfe34 (diff) |
Merge branch 'master' into soc-2020-io-performance
Diffstat (limited to 'intern/cycles/kernel/bvh/bvh_util.h')
-rw-r--r-- | intern/cycles/kernel/bvh/bvh_util.h | 110 |
1 files changed, 69 insertions, 41 deletions
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h index fb546f568f3..8686f887021 100644 --- a/intern/cycles/kernel/bvh/bvh_util.h +++ b/intern/cycles/kernel/bvh/bvh_util.h @@ -71,8 +71,7 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng) #endif } -#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__)) -/* TODO: Move to another file? */ +#if defined(__KERNEL_CPU__) ccl_device int intersections_compare(const void *a, const void *b) { const Intersection *isect_a = (const Intersection *)a; @@ -87,32 +86,6 @@ ccl_device int intersections_compare(const void *a, const void *b) } #endif -#if defined(__SHADOW_RECORD_ALL__) -ccl_device_inline void sort_intersections(ccl_private Intersection *hits, uint num_hits) -{ - kernel_assert(num_hits > 0); - -# ifdef __KERNEL_GPU__ - /* Use bubble sort which has more friendly memory pattern on GPU. */ - bool swapped; - do { - swapped = false; - for (int j = 0; j < num_hits - 1; ++j) { - if (hits[j].t > hits[j + 1].t) { - struct Intersection tmp = hits[j]; - hits[j] = hits[j + 1]; - hits[j + 1] = tmp; - swapped = true; - } - } - --num_hits; - } while (swapped); -# else - qsort(hits, num_hits, sizeof(Intersection), intersections_compare); -# endif -} -#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */ - /* For subsurface scattering, only sorting a small amount of intersections * so bubble sort is fine for CPU and GPU. */ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *hits, @@ -125,7 +98,7 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection * for (int j = 0; j < num_hits - 1; ++j) { if (hits[j].t > hits[j + 1].t) { struct Intersection tmp_hit = hits[j]; - struct float3 tmp_Ng = Ng[j]; + float3 tmp_Ng = Ng[j]; hits[j] = hits[j + 1]; Ng[j] = Ng[j + 1]; hits[j + 1] = tmp_hit; @@ -139,15 +112,14 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection * /* Utility to quickly get flags from an intersection. */ -ccl_device_forceinline int intersection_get_shader_flags( - ccl_global const KernelGlobals *ccl_restrict kg, - ccl_private const Intersection *ccl_restrict isect) +ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals kg, + const int prim, + const int type) { - const int prim = isect->prim; int shader = 0; #ifdef __HAIR__ - if (isect->type & PRIMITIVE_ALL_TRIANGLE) + if (type & PRIMITIVE_ALL_TRIANGLE) #endif { shader = kernel_tex_fetch(__tri_shader, prim); @@ -161,8 +133,9 @@ ccl_device_forceinline int intersection_get_shader_flags( return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; } -ccl_device_forceinline int intersection_get_shader_from_isect_prim( - ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type) +ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals kg, + const int prim, + const int isect_type) { int shader = 0; @@ -181,18 +154,73 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim( return shader & SHADER_MASK; } -ccl_device_forceinline int intersection_get_shader(ccl_global const KernelGlobals *ccl_restrict kg, - ccl_private const Intersection *ccl_restrict - isect) +ccl_device_forceinline int intersection_get_shader( + KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect) { return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type); } ccl_device_forceinline int intersection_get_object_flags( - ccl_global const KernelGlobals *ccl_restrict kg, - ccl_private const Intersection *ccl_restrict isect) + KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect) { return kernel_tex_fetch(__object_flag, isect->object); } +/* TODO: find a better (faster) solution for this. Maybe store offset per object for + * attributes needed in intersection? */ +ccl_device_inline int intersection_find_attribute(KernelGlobals kg, + const int object, + const uint id) +{ + uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset; + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while (attr_map.x != id) { + if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) { + if (UNLIKELY(attr_map.y == 0)) { + return (int)ATTR_STD_NOT_FOUND; + } + else { + /* Chain jump to a different part of the table. */ + attr_offset = attr_map.z; + } + } + else { + attr_offset += ATTR_PRIM_TYPES; + } + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; +} + +/* Transparent Shadows */ + +/* Cut-off value to stop transparent shadow tracing when practically opaque. */ +#define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f + +ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg, + const int object, + const int prim, + const float u) +{ + /* Find attribute. */ + const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY); + if (offset == ATTR_STD_NOT_FOUND) { + /* If no shadow transparency attribute, assume opaque. */ + return 0.0f; + } + + /* Interpolate transparency between curve keys. */ + const KernelCurve kcurve = kernel_tex_fetch(__curves, prim); + const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type); + const int k1 = k0 + 1; + + const float f0 = kernel_tex_fetch(__attributes_float, offset + k0); + const float f1 = kernel_tex_fetch(__attributes_float, offset + k1); + + return (1.0f - u) * f0 + u * f1; +} + CCL_NAMESPACE_END |