Merge branch 'master' into soc-2020-io-performance

author: Howard Trickey <howard.trickey@gmail.com> 2021-10-24 15:31:22 +0300
committer: Howard Trickey <howard.trickey@gmail.com> 2021-10-24 15:31:22 +0300
commit: 1aa953bd1913c81b22c80a00edbf4ad88a32c52f (patch)
tree: 7fa65e43d5a9bac6496555b723f37e0031e2737e /intern/cycles/kernel/bvh/bvh_util.h
parent: fc171c1be9da36485e892339b86dc8d4251914af (diff)
parent: 6ce383a9dfba5c49a48676c3a651804fde3dfe34 (diff)
1 files changed, 69 insertions, 41 deletions
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index fb546f568f3..8686f887021 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -71,8 +71,7 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
 #endif
 }
 
-#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__))
-/* TODO: Move to another file? */
+#if defined(__KERNEL_CPU__)
 ccl_device int intersections_compare(const void *a, const void *b)
 {
   const Intersection *isect_a = (const Intersection *)a;
@@ -87,32 +86,6 @@ ccl_device int intersections_compare(const void *a, const void *b)
 }
 #endif
 
-#if defined(__SHADOW_RECORD_ALL__)
-ccl_device_inline void sort_intersections(ccl_private Intersection *hits, uint num_hits)
-{
-  kernel_assert(num_hits > 0);
-
-#  ifdef __KERNEL_GPU__
-  /* Use bubble sort which has more friendly memory pattern on GPU. */
-  bool swapped;
-  do {
-    swapped = false;
-    for (int j = 0; j < num_hits - 1; ++j) {
-      if (hits[j].t > hits[j + 1].t) {
-        struct Intersection tmp = hits[j];
-        hits[j] = hits[j + 1];
-        hits[j + 1] = tmp;
-        swapped = true;
-      }
-    }
-    --num_hits;
-  } while (swapped);
-#  else
-  qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-#  endif
-}
-#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
-
 /* For subsurface scattering, only sorting a small amount of intersections
  * so bubble sort is fine for CPU and GPU. */
 ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *hits,
@@ -125,7 +98,7 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
     for (int j = 0; j < num_hits - 1; ++j) {
       if (hits[j].t > hits[j + 1].t) {
         struct Intersection tmp_hit = hits[j];
-        struct float3 tmp_Ng = Ng[j];
+        float3 tmp_Ng = Ng[j];
         hits[j] = hits[j + 1];
         Ng[j] = Ng[j + 1];
         hits[j + 1] = tmp_hit;
@@ -139,15 +112,14 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
 
 /* Utility to quickly get flags from an intersection. */
 
-ccl_device_forceinline int intersection_get_shader_flags(
-    ccl_global const KernelGlobals *ccl_restrict kg,
-    ccl_private const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals kg,
+                                                         const int prim,
+                                                         const int type)
 {
-  const int prim = isect->prim;
   int shader = 0;
 
 #ifdef __HAIR__
-  if (isect->type & PRIMITIVE_ALL_TRIANGLE)
+  if (type & PRIMITIVE_ALL_TRIANGLE)
 #endif
   {
     shader = kernel_tex_fetch(__tri_shader, prim);
@@ -161,8 +133,9 @@ ccl_device_forceinline int intersection_get_shader_flags(
   return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
 }
 
-ccl_device_forceinline int intersection_get_shader_from_isect_prim(
-    ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
+ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals kg,
+                                                                   const int prim,
+                                                                   const int isect_type)
 {
   int shader = 0;
 
@@ -181,18 +154,73 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim(
   return shader & SHADER_MASK;
 }
 
-ccl_device_forceinline int intersection_get_shader(ccl_global const KernelGlobals *ccl_restrict kg,
-                                                   ccl_private const Intersection *ccl_restrict
-                                                       isect)
+ccl_device_forceinline int intersection_get_shader(
+    KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect)
 {
   return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type);
 }
 
 ccl_device_forceinline int intersection_get_object_flags(
-    ccl_global const KernelGlobals *ccl_restrict kg,
-    ccl_private const Intersection *ccl_restrict isect)
+    KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect)
 {
   return kernel_tex_fetch(__object_flag, isect->object);
 }
 
+/* TODO: find a better (faster) solution for this. Maybe store offset per object for
+ * attributes needed in intersection? */
+ccl_device_inline int intersection_find_attribute(KernelGlobals kg,
+                                                  const int object,
+                                                  const uint id)
+{
+  uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset;
+  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+  while (attr_map.x != id) {
+    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
+      if (UNLIKELY(attr_map.y == 0)) {
+        return (int)ATTR_STD_NOT_FOUND;
+      }
+      else {
+        /* Chain jump to a different part of the table. */
+        attr_offset = attr_map.z;
+      }
+    }
+    else {
+      attr_offset += ATTR_PRIM_TYPES;
+    }
+    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+  }
+
+  /* return result */
+  return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+}
+
+/* Transparent Shadows */
+
+/* Cut-off value to stop transparent shadow tracing when practically opaque. */
+#define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f
+
+ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
+                                                               const int object,
+                                                               const int prim,
+                                                               const float u)
+{
+  /* Find attribute. */
+  const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY);
+  if (offset == ATTR_STD_NOT_FOUND) {
+    /* If no shadow transparency attribute, assume opaque. */
+    return 0.0f;
+  }
+
+  /* Interpolate transparency between curve keys. */
+  const KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
+  const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type);
+  const int k1 = k0 + 1;
+
+  const float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
+  const float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
+
+  return (1.0f - u) * f0 + u * f1;
+}
+
 CCL_NAMESPACE_END
author	Howard Trickey <howard.trickey@gmail.com>	2021-10-24 15:31:22 +0300
committer	Howard Trickey <howard.trickey@gmail.com>	2021-10-24 15:31:22 +0300
commit	1aa953bd1913c81b22c80a00edbf4ad88a32c52f (patch)
tree	7fa65e43d5a9bac6496555b723f37e0031e2737e /intern/cycles/kernel/bvh/bvh_util.h
parent	fc171c1be9da36485e892339b86dc8d4251914af (diff)
parent	6ce383a9dfba5c49a48676c3a651804fde3dfe34 (diff)