Merge branch 'master' into temp-pbvh-split

author: Joseph Eagar <joeedh@gmail.com> 2022-10-15 09:22:01 +0300
committer: Joseph Eagar <joeedh@gmail.com> 2022-10-15 09:22:01 +0300
commit: aa1f2f243ddb7ed340856ddf97ec650407ad386b (patch)
tree: 471c95b234e7764ff7368e480308f21dc5bb0ca7 /source/blender/gpu
parent: 278a2137f9a5989f8e9ebb30bbfb761608f0de14 (diff)
parent: ebe9804cfa421b746148f3067797f16e7f460551 (diff)
13 files changed, 385 insertions, 9 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index e2285a3fd3e..f387a4588b6 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -332,6 +332,7 @@ set(GLSL_SRC
   shaders/compositor/compositor_alpha_crop.glsl
   shaders/compositor/compositor_bilateral_blur.glsl
   shaders/compositor/compositor_blur.glsl
+  shaders/compositor/compositor_blur_variable_size.glsl
   shaders/compositor/compositor_bokeh_image.glsl
   shaders/compositor/compositor_box_mask.glsl
   shaders/compositor/compositor_convert.glsl
@@ -346,6 +347,7 @@ set(GLSL_SRC
   shaders/compositor/compositor_morphological_distance_feather.glsl
   shaders/compositor/compositor_morphological_distance_threshold.glsl
   shaders/compositor/compositor_morphological_step.glsl
+  shaders/compositor/compositor_parallel_reduction.glsl
   shaders/compositor/compositor_projector_lens_distortion.glsl
   shaders/compositor/compositor_realize_on_domain.glsl
   shaders/compositor/compositor_screen_lens_distortion.glsl
@@ -612,6 +614,7 @@ set(SRC_SHADER_CREATE_INFOS
   shaders/compositor/infos/compositor_alpha_crop_info.hh
   shaders/compositor/infos/compositor_bilateral_blur_info.hh
   shaders/compositor/infos/compositor_blur_info.hh
+  shaders/compositor/infos/compositor_blur_variable_size_info.hh
   shaders/compositor/infos/compositor_bokeh_image_info.hh
   shaders/compositor/infos/compositor_box_mask_info.hh
   shaders/compositor/infos/compositor_convert_info.hh
@@ -626,6 +629,7 @@ set(SRC_SHADER_CREATE_INFOS
   shaders/compositor/infos/compositor_morphological_distance_info.hh
   shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh
   shaders/compositor/infos/compositor_morphological_step_info.hh
+  shaders/compositor/infos/compositor_parallel_reduction_info.hh
   shaders/compositor/infos/compositor_projector_lens_distortion_info.hh
   shaders/compositor/infos/compositor_realize_on_domain_info.hh
   shaders/compositor/infos/compositor_screen_lens_distortion_info.hh
diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h
index 31354585308..3dad2a1a19a 100644
--- a/source/blender/gpu/GPU_material.h
+++ b/source/blender/gpu/GPU_material.h
@@ -162,6 +162,7 @@ GPUNodeLink *GPU_uniform_attribute(GPUMaterial *mat,
                                    const char *name,
                                    bool use_dupli,
                                    uint32_t *r_hash);
+GPUNodeLink *GPU_layer_attribute(GPUMaterial *mat, const char *name);
 GPUNodeLink *GPU_image(GPUMaterial *mat,
                        struct Image *ima,
                        struct ImageUser *iuser,
@@ -357,6 +358,20 @@ struct GHash *GPU_uniform_attr_list_hash_new(const char *info);
 void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, const GPUUniformAttrList *src);
 void GPU_uniform_attr_list_free(GPUUniformAttrList *set);
 
+typedef struct GPULayerAttr {
+  struct GPULayerAttr *next, *prev;
+
+  /* Meaningful part of the attribute set key. */
+  char name[64]; /* MAX_CUSTOMDATA_LAYER_NAME */
+  /** Hash of name[64]. */
+  uint32_t hash_code;
+
+  /* Helper fields used by code generation. */
+  int users;
+} GPULayerAttr;
+
+const ListBase *GPU_material_layer_attributes(const GPUMaterial *material);
+
 /* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and
  * linking the necessary GPU material nodes. */
 typedef void (*ConstructGPUMaterialFn)(void *thunk, GPUMaterial *material);
diff --git a/source/blender/gpu/GPU_uniform_buffer.h b/source/blender/gpu/GPU_uniform_buffer.h
index f78719d1963..28f06d6071d 100644
--- a/source/blender/gpu/GPU_uniform_buffer.h
+++ b/source/blender/gpu/GPU_uniform_buffer.h
@@ -44,6 +44,7 @@ void GPU_uniformbuf_unbind_all(void);
 
 #define GPU_UBO_BLOCK_NAME "node_tree"
 #define GPU_ATTRIBUTE_UBO_BLOCK_NAME "unf_attrs"
+#define GPU_LAYER_ATTRIBUTE_UBO_BLOCK_NAME "drw_layer_attrs"
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc
index b02d8a02704..4adeac1b49a 100644
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@@ -183,6 +183,8 @@ static std::ostream &operator<<(std::ostream &stream, const GPUInput *input)
       return stream << "var_attrs.v" << input->attr->id;
     case GPU_SOURCE_UNIFORM_ATTR:
       return stream << "unf_attrs[resource_id].attr" << input->uniform_attr->id;
+    case GPU_SOURCE_LAYER_ATTR:
+      return stream << "attr_load_layer(" << input->layer_attr->hash_code << ")";
     case GPU_SOURCE_STRUCT:
       return stream << "strct" << input->id;
     case GPU_SOURCE_TEX:
@@ -432,6 +434,10 @@ void GPUCodegen::generate_resources()
     info.uniform_buf(2, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH);
   }
 
+  if (!BLI_listbase_is_empty(&graph.layer_attrs)) {
+    info.additional_info("draw_layer_attributes");
+  }
+
   info.typedef_source_generated = ss.str();
 }
 
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index 0f9dc8be9c5..ca2a9f5cf28 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -291,6 +291,12 @@ const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *mat
   return attrs->count > 0 ? attrs : NULL;
 }
 
+const ListBase *GPU_material_layer_attributes(const GPUMaterial *material)
+{
+  const ListBase *attrs = &material->graph.layer_attrs;
+  return !BLI_listbase_is_empty(attrs) ? attrs : NULL;
+}
+
 #if 1 /* End of life code. */
 /* Eevee Subsurface scattering. */
 /* Based on Separable SSS. by Jorge Jimenez and Diego Gutierrez */
diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c
index e1ae731d49c..c72e7097b33 100644
--- a/source/blender/gpu/intern/gpu_node_graph.c
+++ b/source/blender/gpu/intern/gpu_node_graph.c
@@ -83,6 +83,9 @@ static void gpu_node_input_link(GPUNode *node, GPUNodeLink *link, const eGPUType
         case GPU_SOURCE_UNIFORM_ATTR:
           input->uniform_attr->users++;
           break;
+        case GPU_SOURCE_LAYER_ATTR:
+          input->layer_attr->users++;
+          break;
         case GPU_SOURCE_TEX:
         case GPU_SOURCE_TEX_TILED_MAPPING:
           input->texture->users++;
@@ -133,6 +136,10 @@ static void gpu_node_input_link(GPUNode *node, GPUNodeLink *link, const eGPUType
       input->source = GPU_SOURCE_UNIFORM_ATTR;
       input->uniform_attr = link->uniform_attr;
       break;
+    case GPU_NODE_LINK_LAYER_ATTR:
+      input->source = GPU_SOURCE_LAYER_ATTR;
+      input->layer_attr = link->layer_attr;
+      break;
     case GPU_NODE_LINK_CONSTANT:
       input->source = (type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT;
       break;
@@ -430,6 +437,34 @@ static GPUUniformAttr *gpu_node_graph_add_uniform_attribute(GPUNodeGraph *graph,
   return attr;
 }
 
+/** Add a new uniform attribute of given type and name. Returns NULL if out of slots. */
+static GPULayerAttr *gpu_node_graph_add_layer_attribute(GPUNodeGraph *graph, const char *name)
+{
+  /* Find existing attribute. */
+  ListBase *attrs = &graph->layer_attrs;
+  GPULayerAttr *attr = attrs->first;
+
+  for (; attr; attr = attr->next) {
+    if (STREQ(attr->name, name)) {
+      break;
+    }
+  }
+
+  /* Add new requested attribute to the list. */
+  if (attr == NULL) {
+    attr = MEM_callocN(sizeof(*attr), __func__);
+    STRNCPY(attr->name, name);
+    attr->hash_code = BLI_ghashutil_strhash_p(attr->name);
+    BLI_addtail(attrs, attr);
+  }
+
+  if (attr != NULL) {
+    attr->users++;
+  }
+
+  return attr;
+}
+
 static GPUMaterialTexture *gpu_node_graph_add_texture(GPUNodeGraph *graph,
                                                       Image *ima,
                                                       ImageUser *iuser,
@@ -546,6 +581,17 @@ GPUNodeLink *GPU_uniform_attribute(GPUMaterial *mat,
   return link;
 }
 
+GPUNodeLink *GPU_layer_attribute(GPUMaterial *mat, const char *name)
+{
+  GPUNodeGraph *graph = gpu_material_node_graph(mat);
+  GPULayerAttr *attr = gpu_node_graph_add_layer_attribute(graph, name);
+
+  GPUNodeLink *link = gpu_node_link_create();
+  link->link_type = GPU_NODE_LINK_LAYER_ATTR;
+  link->layer_attr = attr;
+  return link;
+}
+
 GPUNodeLink *GPU_constant(const float *num)
 {
   GPUNodeLink *link = gpu_node_link_create();
@@ -767,14 +813,22 @@ static void gpu_inputs_free(ListBase *inputs)
   GPUInput *input;
 
   for (input = inputs->first; input; input = input->next) {
-    if (input->source == GPU_SOURCE_ATTR) {
-      input->attr->users--;
-    }
-    else if (input->source == GPU_SOURCE_UNIFORM_ATTR) {
-      input->uniform_attr->users--;
-    }
-    else if (ELEM(input->source, GPU_SOURCE_TEX, GPU_SOURCE_TEX_TILED_MAPPING)) {
-      input->texture->users--;
+    switch (input->source) {
+      case GPU_SOURCE_ATTR:
+        input->attr->users--;
+        break;
+      case GPU_SOURCE_UNIFORM_ATTR:
+        input->uniform_attr->users--;
+        break;
+      case GPU_SOURCE_LAYER_ATTR:
+        input->layer_attr->users--;
+        break;
+      case GPU_SOURCE_TEX:
+      case GPU_SOURCE_TEX_TILED_MAPPING:
+        input->texture->users--;
+        break;
+      default:
+        break;
     }
 
     if (input->link) {
@@ -826,6 +880,7 @@ void gpu_node_graph_free(GPUNodeGraph *graph)
   BLI_freelistN(&graph->textures);
   BLI_freelistN(&graph->attributes);
   GPU_uniform_attr_list_free(&graph->uniform_attrs);
+  BLI_freelistN(&graph->layer_attrs);
 
   if (graph->used_libraries) {
     BLI_gset_free(graph->used_libraries, NULL);
@@ -908,4 +963,10 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
       uattrs->count--;
     }
   }
+
+  LISTBASE_FOREACH_MUTABLE (GPULayerAttr *, attr, &graph->layer_attrs) {
+    if (attr->users == 0) {
+      BLI_freelinkN(&graph->layer_attrs, attr);
+    }
+  }
 }
diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h
index 7db22151f86..de0a0687b13 100644
--- a/source/blender/gpu/intern/gpu_node_graph.h
+++ b/source/blender/gpu/intern/gpu_node_graph.h
@@ -31,6 +31,7 @@ typedef enum eGPUDataSource {
   GPU_SOURCE_UNIFORM,
   GPU_SOURCE_ATTR,
   GPU_SOURCE_UNIFORM_ATTR,
+  GPU_SOURCE_LAYER_ATTR,
   GPU_SOURCE_STRUCT,
   GPU_SOURCE_TEX,
   GPU_SOURCE_TEX_TILED_MAPPING,
@@ -42,6 +43,7 @@ typedef enum {
   GPU_NODE_LINK_NONE = 0,
   GPU_NODE_LINK_ATTR,
   GPU_NODE_LINK_UNIFORM_ATTR,
+  GPU_NODE_LINK_LAYER_ATTR,
   GPU_NODE_LINK_COLORBAND,
   GPU_NODE_LINK_CONSTANT,
   GPU_NODE_LINK_IMAGE,
@@ -95,6 +97,8 @@ struct GPUNodeLink {
     struct GPUMaterialAttribute *attr;
     /* GPU_NODE_LINK_UNIFORM_ATTR */
     struct GPUUniformAttr *uniform_attr;
+    /* GPU_NODE_LINK_LAYER_ATTR */
+    struct GPULayerAttr *layer_attr;
     /* GPU_NODE_LINK_IMAGE_BLENDER */
     struct GPUMaterialTexture *texture;
     /* GPU_NODE_LINK_DIFFERENTIATE_FLOAT_FN */
@@ -131,6 +135,8 @@ typedef struct GPUInput {
     struct GPUMaterialAttribute *attr;
     /* GPU_SOURCE_UNIFORM_ATTR */
     struct GPUUniformAttr *uniform_attr;
+    /* GPU_SOURCE_LAYER_ATTR */
+    struct GPULayerAttr *layer_attr;
     /* GPU_SOURCE_FUNCTION_CALL */
     char function_call[64];
   };
@@ -171,6 +177,9 @@ typedef struct GPUNodeGraph {
   /* The list of uniform attributes. */
   GPUUniformAttrList uniform_attrs;
 
+  /* The list of layer attributes. */
+  ListBase layer_attrs;
+
   /** Set of all the GLSL lib code blocks . */
   GSet *used_libraries;
 } GPUNodeGraph;
diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
index aaebe7e20f8..4b63a3b1ce2 100644
--- a/source/blender/gpu/metal/mtl_immediate.mm
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -125,7 +125,7 @@ void MTLImmediate::end()
      * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */
     for (int i = 0; i < interface->get_total_attributes(); i++) {
 
-      /* Note: Attribute in VERTEX FORMAT does not necessarily share the same array index as
+      /* NOTE: Attribute in VERTEX FORMAT does not necessarily share the same array index as
        * attributes in shader interface. */
       GPUVertAttr *attr = nullptr;
       const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i);
diff --git a/source/blender/gpu/shaders/compositor/compositor_blur_variable_size.glsl b/source/blender/gpu/shaders/compositor/compositor_blur_variable_size.glsl
new file mode 100644
index 00000000000..e7e5aac12a5
--- /dev/null
+++ b/source/blender/gpu/shaders/compositor/compositor_blur_variable_size.glsl
@@ -0,0 +1,60 @@
+#pragma BLENDER_REQUIRE(gpu_shader_common_math_utils.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+/* Given the texel in the range [-radius, radius] in both axis, load the appropriate weight from
+ * the weights texture, where the texel (0, 0) is considered the center of weights texture. */
+vec4 load_weight(ivec2 texel, float radius)
+{
+  /* The center zero texel is always assigned a unit weight regardless of the corresponding weight
+   * in the weights texture. That's to guarantee that at last the center pixel will be accumulated
+   * even if the weights texture is zero at its center. */
+  if (texel == ivec2(0)) {
+    return vec4(1.0);
+  }
+
+  /* Add the radius to transform the texel into the range [0, radius * 2], then divide by the upper
+   * bound plus one to transform the texel into the normalized range [0, 1] needed to sample the
+   * weights sampler. Finally, also add 0.5 to sample at the center of the pixels. */
+  return texture(weights_tx, (texel + vec2(radius + 0.5)) / (radius * 2 + 1));
+}
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* The mask input is treated as a boolean. If it is zero, then no blurring happens for this
+   * pixel. Otherwise, the pixel is blurred normally and the mask value is irrelevant. */
+  float mask = texture_load(mask_tx, texel).x;
+  if (mask == 0.0) {
+    imageStore(output_img, texel, texture_load(input_tx, texel));
+    return;
+  }
+
+  float center_size = texture_load(size_tx, texel).x * base_size;
+
+  /* Go over the window of the given search radius and accumulate the colors multiplied by their
+   * respective weights as well as the weights themselves, but only if both the size of the center
+   * pixel and the size of the candidate pixel are less than both the x and y distances of the
+   * candidate pixel. */
+  vec4 accumulated_color = vec4(0.0);
+  vec4 accumulated_weight = vec4(0.0);
+  for (int y = -search_radius; y <= search_radius; y++) {
+    for (int x = -search_radius; x <= search_radius; x++) {
+      float candidate_size = texture_load(size_tx, texel + ivec2(x, y)).x * base_size;
+
+      /* Skip accumulation if either the x or y distances of the candidate pixel are larger than
+       * either the center or candidate pixel size. Note that the max and min functions here denote
+       * "either" in the aforementioned description. */
+      float size = min(center_size, candidate_size);
+      if (max(abs(x), abs(y)) > size) {
+        continue;
+      }
+
+      vec4 weight = load_weight(ivec2(x, y), size);
+      accumulated_color += texture_load(input_tx, texel + ivec2(x, y)) * weight;
+      accumulated_weight += weight;
+    }
+  }
+
+  imageStore(output_img, texel, safe_divide(accumulated_color, accumulated_weight));
+}
diff --git a/source/blender/gpu/shaders/compositor/compositor_parallel_reduction.glsl b/source/blender/gpu/shaders/compositor/compositor_parallel_reduction.glsl
new file mode 100644
index 00000000000..f6f84aa24c1
--- /dev/null
+++ b/source/blender/gpu/shaders/compositor/compositor_parallel_reduction.glsl
@@ -0,0 +1,98 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+/* This shader reduces the given texture into a smaller texture of a size equal to the number of
+ * work groups. In particular, each work group reduces its contents into a single value and writes
+ * that value to a single pixel in the output image. The shader can be dispatched multiple times to
+ * eventually reduce the image into a single pixel.
+ *
+ * The shader works by loading the whole data of each work group into a linear array, then it
+ * reduces the second half of the array onto the first half of the array, then it reduces the
+ * second quarter of the array onto the first quarter or the array, and so on until only one
+ * element remains. The following figure illustrates the process for sum reduction on 8 elements.
+ *
+ *     .---. .---. .---. .---. .---. .---. .---. .---.
+ *     | 0 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 |  Original data.
+ *     '---' '---' '---' '---' '---' '---' '---' '---'
+ *       |.____|_____|_____|_____|     |     |     |
+ *       ||    |.____|_____|___________|     |     |
+ *       ||    ||    |.____|_________________|     |
+ *       ||    ||    ||    |.______________________|  <--First reduction. Stride = 4.
+ *       ||    ||    ||    ||
+ *     .---. .---. .---. .----.
+ *     | 4 | | 6 | | 8 | | 10 |                       <--Data after first reduction.
+ *     '---' '---' '---' '----'
+ *       |.____|_____|     |
+ *       ||    |.__________|                          <--Second reduction. Stride = 2.
+ *       ||    ||
+ *     .----. .----.
+ *     | 12 | | 16 |                                  <--Data after second reduction.
+ *     '----' '----'
+ *       |.____|
+ *       ||                                           <--Third reduction. Stride = 1.
+ *     .----.
+ *     | 28 |
+ *     '----'                                         <--Data after third reduction.
+ *
+ *
+ * The shader is generic enough to implement many types of reductions. This is done by using macros
+ * that the developer should define to implement a certain reduction operation. Those include,
+ * TYPE, IDENTITY, INITIALIZE, LOAD, and REDUCE. See the implementation below for more information
+ * as well as the compositor_parallel_reduction_info.hh for example reductions operations. */
+
+/* Doing the reduction in shared memory is faster, so create a shared array where the whole data
+ * of the work group will be loaded and reduced. The 2D structure of the work group is irrelevant
+ * for reduction, so we just load the data in a 1D array to simplify reduction. The developer is
+ * expected to define the TYPE macro to be a float or a vec4, depending on the type of data being
+ * reduced. */
+const uint reduction_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y;
+shared TYPE reduction_data[reduction_size];
+
+void main()
+{
+  /* Load the data from the texture, while returning IDENTITY for out of bound coordinates. The
+   * developer is expected to define the IDENTITY macro to be a vec4 that does not affect the
+   * output of the reduction. For instance, sum reductions have an identity of vec4(0.0), while
+   * max value reductions have an identity of vec4(FLT_MIN). */
+  vec4 value = texture_load(input_tx, ivec2(gl_GlobalInvocationID.xy), IDENTITY);
+
+  /* Initialize the shared array given the previously loaded value. This step can be different
+   * depending on whether this is the initial reduction pass or a latter one. Indeed, the input
+   * texture for the initial reduction is the source texture itself, while the input texture to a
+   * latter reduction pass is an intermediate texture after one or more reductions have happened.
+   * This is significant because the data being reduced might be computed from the original data
+   * and different from it, for instance, when summing the luminance of an image, the original data
+   * is a vec4 color, while the reduced data is a float luminance value. So for the initial
+   * reduction pass, the luminance will be computed from the color, reduced, then stored into an
+   * intermediate float texture. On the other hand, for latter reduction passes, the luminance will
+   * be loaded directly and reduced without extra processing. So the developer is expected to
+   * define the INITIALIZE and LOAD macros to be expressions that derive the needed value from the
+   * loaded value for the initial reduction pass and latter ones respectively. */
+  reduction_data[gl_LocalInvocationIndex] = is_initial_reduction ? INITIALIZE(value) : LOAD(value);
+
+  /* Reduce the reduction data by half on every iteration until only one element remains. See the
+   * above figure for an intuitive understanding of the stride value. */
+  for (uint stride = reduction_size / 2; stride > 0; stride /= 2) {
+    barrier();
+
+    /* Only the threads up to the current stride should be active as can be seen in the diagram
+     * above. */
+    if (gl_LocalInvocationIndex >= stride) {
+      continue;
+    }
+
+    /* Reduce each two elements that are stride apart, writing the result to the element with the
+     * lower index, as can be seen in the diagram above. The developer is expected to define the
+     * REDUCE macro to be a commutative and associative binary operator suitable for parallel
+     * reduction. */
+    reduction_data[gl_LocalInvocationIndex] = REDUCE(
+        reduction_data[gl_LocalInvocationIndex], reduction_data[gl_LocalInvocationIndex + stride]);
+  }
+
+  /* Finally, the result of the reduction is available as the first element in the reduction data,
+   * write it to the pixel corresponding to the work group, making sure only the one thread writes
+   * it. */
+  barrier();
+  if (gl_LocalInvocationIndex == 0) {
+    imageStore(output_img, ivec2(gl_WorkGroupID.xy), vec4(reduction_data[0]));
+  }
+}
diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_blur_variable_size_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_blur_variable_size_info.hh
new file mode 100644
index 00000000000..05b6385fd1e
--- /dev/null
+++ b/source/blender/gpu/shaders/compositor/infos/compositor_blur_variable_size_info.hh
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_blur_variable_size)
+    .local_group_size(16, 16)
+    .push_constant(Type::FLOAT, "base_size")
+    .push_constant(Type::INT, "search_radius")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .sampler(1, ImageType::FLOAT_2D, "weights_tx")
+    .sampler(2, ImageType::FLOAT_2D, "size_tx")
+    .sampler(3, ImageType::FLOAT_2D, "mask_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_blur_variable_size.glsl")
+    .do_static_compilation(true);
diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_parallel_reduction_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_parallel_reduction_info.hh
new file mode 100644
index 00000000000..2e661f280af
--- /dev/null
+++ b/source/blender/gpu/shaders/compositor/infos/compositor_parallel_reduction_info.hh
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_shared)
+    .local_group_size(16, 16)
+    .push_constant(Type::BOOL, "is_initial_reduction")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .compute_source("compositor_parallel_reduction.glsl");
+
+/* --------------------------------------------------------------------
+ * Sum Reductions.
+ */
+
+GPU_SHADER_CREATE_INFO(compositor_sum_float_shared)
+    .additional_info("compositor_parallel_reduction_shared")
+    .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .define("TYPE", "float")
+    .define("IDENTITY", "vec4(0.0)")
+    .define("LOAD(value)", "value.x")
+    .define("REDUCE(lhs, rhs)", "lhs + rhs");
+
+GPU_SHADER_CREATE_INFO(compositor_sum_red)
+    .additional_info("compositor_sum_float_shared")
+    .define("INITIALIZE(value)", "value.r")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_sum_green)
+    .additional_info("compositor_sum_float_shared")
+    .define("INITIALIZE(value)", "value.g")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_sum_blue)
+    .additional_info("compositor_sum_float_shared")
+    .define("INITIALIZE(value)", "value.b")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_sum_luminance)
+    .additional_info("compositor_sum_float_shared")
+    .push_constant(Type::VEC3, "luminance_coefficients")
+    .define("INITIALIZE(value)", "dot(value.rgb, luminance_coefficients)")
+    .do_static_compilation(true);
+
+/* --------------------------------------------------------------------
+ * Sum Of Squared Difference Reductions.
+ */
+
+GPU_SHADER_CREATE_INFO(compositor_sum_squared_difference_float_shared)
+    .additional_info("compositor_parallel_reduction_shared")
+    .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .push_constant(Type::FLOAT, "subtrahend")
+    .define("TYPE", "float")
+    .define("IDENTITY", "vec4(subtrahend)")
+    .define("LOAD(value)", "value.x")
+    .define("REDUCE(lhs, rhs)", "lhs + rhs");
+
+GPU_SHADER_CREATE_INFO(compositor_sum_red_squared_difference)
+    .additional_info("compositor_sum_squared_difference_float_shared")
+    .define("INITIALIZE(value)", "pow(value.r - subtrahend, 2.0)")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_sum_green_squared_difference)
+    .additional_info("compositor_sum_squared_difference_float_shared")
+    .define("INITIALIZE(value)", "pow(value.g - subtrahend, 2.0)")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_sum_blue_squared_difference)
+    .additional_info("compositor_sum_squared_difference_float_shared")
+    .define("INITIALIZE(value)", "pow(value.b - subtrahend, 2.0)")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_sum_luminance_squared_difference)
+    .additional_info("compositor_sum_squared_difference_float_shared")
+    .push_constant(Type::VEC3, "luminance_coefficients")
+    .define("INITIALIZE(value)", "pow(dot(value.rgb, luminance_coefficients) - subtrahend, 2.0)")
+    .do_static_compilation(true);
diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl
index bacf089deb1..8d0016a2206 100644
--- a/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl
+++ b/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl
@@ -29,6 +29,31 @@ void node_attribute_uniform(vec4 attr, const float attr_hash, out vec4 out_attr)
   out_attr = attr_load_uniform(attr, floatBitsToUint(attr_hash));
 }
 
+vec4 attr_load_layer(const uint attr_hash)
+{
+#ifdef VLATTR_LIB
+  /* The first record of the buffer stores the length. */
+  uint left = 0, right = drw_layer_attrs[0].buffer_length;
+
+  while (left < right) {
+    uint mid = (left + right) / 2;
+    uint hash = drw_layer_attrs[mid].hash_code;
+
+    if (hash < attr_hash) {
+      left = mid + 1;
+    }
+    else if (hash > attr_hash) {
+      right = mid;
+    }
+    else {
+      return drw_layer_attrs[mid].data;
+    }
+  }
+#endif
+
+  return vec4(0.0);
+}
+
 void node_attribute(
     vec4 attr, out vec4 outcol, out vec3 outvec, out float outf, out float outalpha)
 {
author	Joseph Eagar <joeedh@gmail.com>	2022-10-15 09:22:01 +0300
committer	Joseph Eagar <joeedh@gmail.com>	2022-10-15 09:22:01 +0300
commit	aa1f2f243ddb7ed340856ddf97ec650407ad386b (patch)
tree	471c95b234e7764ff7368e480308f21dc5bb0ca7 /source/blender/gpu
parent	278a2137f9a5989f8e9ebb30bbfb761608f0de14 (diff)
parent	ebe9804cfa421b746148f3067797f16e7f460551 (diff)