diff options
author | Joseph Eagar <joeedh@gmail.com> | 2022-10-15 09:22:01 +0300 |
---|---|---|
committer | Joseph Eagar <joeedh@gmail.com> | 2022-10-15 09:22:01 +0300 |
commit | aa1f2f243ddb7ed340856ddf97ec650407ad386b (patch) | |
tree | 471c95b234e7764ff7368e480308f21dc5bb0ca7 /source/blender/gpu | |
parent | 278a2137f9a5989f8e9ebb30bbfb761608f0de14 (diff) | |
parent | ebe9804cfa421b746148f3067797f16e7f460551 (diff) |
Merge branch 'master' into temp-pbvh-split
Diffstat (limited to 'source/blender/gpu')
13 files changed, 385 insertions, 9 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index e2285a3fd3e..f387a4588b6 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -332,6 +332,7 @@ set(GLSL_SRC shaders/compositor/compositor_alpha_crop.glsl shaders/compositor/compositor_bilateral_blur.glsl shaders/compositor/compositor_blur.glsl + shaders/compositor/compositor_blur_variable_size.glsl shaders/compositor/compositor_bokeh_image.glsl shaders/compositor/compositor_box_mask.glsl shaders/compositor/compositor_convert.glsl @@ -346,6 +347,7 @@ set(GLSL_SRC shaders/compositor/compositor_morphological_distance_feather.glsl shaders/compositor/compositor_morphological_distance_threshold.glsl shaders/compositor/compositor_morphological_step.glsl + shaders/compositor/compositor_parallel_reduction.glsl shaders/compositor/compositor_projector_lens_distortion.glsl shaders/compositor/compositor_realize_on_domain.glsl shaders/compositor/compositor_screen_lens_distortion.glsl @@ -612,6 +614,7 @@ set(SRC_SHADER_CREATE_INFOS shaders/compositor/infos/compositor_alpha_crop_info.hh shaders/compositor/infos/compositor_bilateral_blur_info.hh shaders/compositor/infos/compositor_blur_info.hh + shaders/compositor/infos/compositor_blur_variable_size_info.hh shaders/compositor/infos/compositor_bokeh_image_info.hh shaders/compositor/infos/compositor_box_mask_info.hh shaders/compositor/infos/compositor_convert_info.hh @@ -626,6 +629,7 @@ set(SRC_SHADER_CREATE_INFOS shaders/compositor/infos/compositor_morphological_distance_info.hh shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh shaders/compositor/infos/compositor_morphological_step_info.hh + shaders/compositor/infos/compositor_parallel_reduction_info.hh shaders/compositor/infos/compositor_projector_lens_distortion_info.hh shaders/compositor/infos/compositor_realize_on_domain_info.hh shaders/compositor/infos/compositor_screen_lens_distortion_info.hh diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h index 31354585308..3dad2a1a19a 100644 --- a/source/blender/gpu/GPU_material.h +++ b/source/blender/gpu/GPU_material.h @@ -162,6 +162,7 @@ GPUNodeLink *GPU_uniform_attribute(GPUMaterial *mat, const char *name, bool use_dupli, uint32_t *r_hash); +GPUNodeLink *GPU_layer_attribute(GPUMaterial *mat, const char *name); GPUNodeLink *GPU_image(GPUMaterial *mat, struct Image *ima, struct ImageUser *iuser, @@ -357,6 +358,20 @@ struct GHash *GPU_uniform_attr_list_hash_new(const char *info); void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, const GPUUniformAttrList *src); void GPU_uniform_attr_list_free(GPUUniformAttrList *set); +typedef struct GPULayerAttr { + struct GPULayerAttr *next, *prev; + + /* Meaningful part of the attribute set key. */ + char name[64]; /* MAX_CUSTOMDATA_LAYER_NAME */ + /** Hash of name[64]. */ + uint32_t hash_code; + + /* Helper fields used by code generation. */ + int users; +} GPULayerAttr; + +const ListBase *GPU_material_layer_attributes(const GPUMaterial *material); + /* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and * linking the necessary GPU material nodes. */ typedef void (*ConstructGPUMaterialFn)(void *thunk, GPUMaterial *material); diff --git a/source/blender/gpu/GPU_uniform_buffer.h b/source/blender/gpu/GPU_uniform_buffer.h index f78719d1963..28f06d6071d 100644 --- a/source/blender/gpu/GPU_uniform_buffer.h +++ b/source/blender/gpu/GPU_uniform_buffer.h @@ -44,6 +44,7 @@ void GPU_uniformbuf_unbind_all(void); #define GPU_UBO_BLOCK_NAME "node_tree" #define GPU_ATTRIBUTE_UBO_BLOCK_NAME "unf_attrs" +#define GPU_LAYER_ATTRIBUTE_UBO_BLOCK_NAME "drw_layer_attrs" #ifdef __cplusplus } diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc index b02d8a02704..4adeac1b49a 100644 --- a/source/blender/gpu/intern/gpu_codegen.cc +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -183,6 +183,8 @@ static std::ostream &operator<<(std::ostream &stream, const GPUInput *input) return stream << "var_attrs.v" << input->attr->id; case GPU_SOURCE_UNIFORM_ATTR: return stream << "unf_attrs[resource_id].attr" << input->uniform_attr->id; + case GPU_SOURCE_LAYER_ATTR: + return stream << "attr_load_layer(" << input->layer_attr->hash_code << ")"; case GPU_SOURCE_STRUCT: return stream << "strct" << input->id; case GPU_SOURCE_TEX: @@ -432,6 +434,10 @@ void GPUCodegen::generate_resources() info.uniform_buf(2, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH); } + if (!BLI_listbase_is_empty(&graph.layer_attrs)) { + info.additional_info("draw_layer_attributes"); + } + info.typedef_source_generated = ss.str(); } diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c index 0f9dc8be9c5..ca2a9f5cf28 100644 --- a/source/blender/gpu/intern/gpu_material.c +++ b/source/blender/gpu/intern/gpu_material.c @@ -291,6 +291,12 @@ const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *mat return attrs->count > 0 ? attrs : NULL; } +const ListBase *GPU_material_layer_attributes(const GPUMaterial *material) +{ + const ListBase *attrs = &material->graph.layer_attrs; + return !BLI_listbase_is_empty(attrs) ? attrs : NULL; +} + #if 1 /* End of life code. */ /* Eevee Subsurface scattering. */ /* Based on Separable SSS. by Jorge Jimenez and Diego Gutierrez */ diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c index e1ae731d49c..c72e7097b33 100644 --- a/source/blender/gpu/intern/gpu_node_graph.c +++ b/source/blender/gpu/intern/gpu_node_graph.c @@ -83,6 +83,9 @@ static void gpu_node_input_link(GPUNode *node, GPUNodeLink *link, const eGPUType case GPU_SOURCE_UNIFORM_ATTR: input->uniform_attr->users++; break; + case GPU_SOURCE_LAYER_ATTR: + input->layer_attr->users++; + break; case GPU_SOURCE_TEX: case GPU_SOURCE_TEX_TILED_MAPPING: input->texture->users++; @@ -133,6 +136,10 @@ static void gpu_node_input_link(GPUNode *node, GPUNodeLink *link, const eGPUType input->source = GPU_SOURCE_UNIFORM_ATTR; input->uniform_attr = link->uniform_attr; break; + case GPU_NODE_LINK_LAYER_ATTR: + input->source = GPU_SOURCE_LAYER_ATTR; + input->layer_attr = link->layer_attr; + break; case GPU_NODE_LINK_CONSTANT: input->source = (type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT; break; @@ -430,6 +437,34 @@ static GPUUniformAttr *gpu_node_graph_add_uniform_attribute(GPUNodeGraph *graph, return attr; } +/** Add a new uniform attribute of given type and name. Returns NULL if out of slots. */ +static GPULayerAttr *gpu_node_graph_add_layer_attribute(GPUNodeGraph *graph, const char *name) +{ + /* Find existing attribute. */ + ListBase *attrs = &graph->layer_attrs; + GPULayerAttr *attr = attrs->first; + + for (; attr; attr = attr->next) { + if (STREQ(attr->name, name)) { + break; + } + } + + /* Add new requested attribute to the list. */ + if (attr == NULL) { + attr = MEM_callocN(sizeof(*attr), __func__); + STRNCPY(attr->name, name); + attr->hash_code = BLI_ghashutil_strhash_p(attr->name); + BLI_addtail(attrs, attr); + } + + if (attr != NULL) { + attr->users++; + } + + return attr; +} + static GPUMaterialTexture *gpu_node_graph_add_texture(GPUNodeGraph *graph, Image *ima, ImageUser *iuser, @@ -546,6 +581,17 @@ GPUNodeLink *GPU_uniform_attribute(GPUMaterial *mat, return link; } +GPUNodeLink *GPU_layer_attribute(GPUMaterial *mat, const char *name) +{ + GPUNodeGraph *graph = gpu_material_node_graph(mat); + GPULayerAttr *attr = gpu_node_graph_add_layer_attribute(graph, name); + + GPUNodeLink *link = gpu_node_link_create(); + link->link_type = GPU_NODE_LINK_LAYER_ATTR; + link->layer_attr = attr; + return link; +} + GPUNodeLink *GPU_constant(const float *num) { GPUNodeLink *link = gpu_node_link_create(); @@ -767,14 +813,22 @@ static void gpu_inputs_free(ListBase *inputs) GPUInput *input; for (input = inputs->first; input; input = input->next) { - if (input->source == GPU_SOURCE_ATTR) { - input->attr->users--; - } - else if (input->source == GPU_SOURCE_UNIFORM_ATTR) { - input->uniform_attr->users--; - } - else if (ELEM(input->source, GPU_SOURCE_TEX, GPU_SOURCE_TEX_TILED_MAPPING)) { - input->texture->users--; + switch (input->source) { + case GPU_SOURCE_ATTR: + input->attr->users--; + break; + case GPU_SOURCE_UNIFORM_ATTR: + input->uniform_attr->users--; + break; + case GPU_SOURCE_LAYER_ATTR: + input->layer_attr->users--; + break; + case GPU_SOURCE_TEX: + case GPU_SOURCE_TEX_TILED_MAPPING: + input->texture->users--; + break; + default: + break; } if (input->link) { @@ -826,6 +880,7 @@ void gpu_node_graph_free(GPUNodeGraph *graph) BLI_freelistN(&graph->textures); BLI_freelistN(&graph->attributes); GPU_uniform_attr_list_free(&graph->uniform_attrs); + BLI_freelistN(&graph->layer_attrs); if (graph->used_libraries) { BLI_gset_free(graph->used_libraries, NULL); @@ -908,4 +963,10 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph) uattrs->count--; } } + + LISTBASE_FOREACH_MUTABLE (GPULayerAttr *, attr, &graph->layer_attrs) { + if (attr->users == 0) { + BLI_freelinkN(&graph->layer_attrs, attr); + } + } } diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h index 7db22151f86..de0a0687b13 100644 --- a/source/blender/gpu/intern/gpu_node_graph.h +++ b/source/blender/gpu/intern/gpu_node_graph.h @@ -31,6 +31,7 @@ typedef enum eGPUDataSource { GPU_SOURCE_UNIFORM, GPU_SOURCE_ATTR, GPU_SOURCE_UNIFORM_ATTR, + GPU_SOURCE_LAYER_ATTR, GPU_SOURCE_STRUCT, GPU_SOURCE_TEX, GPU_SOURCE_TEX_TILED_MAPPING, @@ -42,6 +43,7 @@ typedef enum { GPU_NODE_LINK_NONE = 0, GPU_NODE_LINK_ATTR, GPU_NODE_LINK_UNIFORM_ATTR, + GPU_NODE_LINK_LAYER_ATTR, GPU_NODE_LINK_COLORBAND, GPU_NODE_LINK_CONSTANT, GPU_NODE_LINK_IMAGE, @@ -95,6 +97,8 @@ struct GPUNodeLink { struct GPUMaterialAttribute *attr; /* GPU_NODE_LINK_UNIFORM_ATTR */ struct GPUUniformAttr *uniform_attr; + /* GPU_NODE_LINK_LAYER_ATTR */ + struct GPULayerAttr *layer_attr; /* GPU_NODE_LINK_IMAGE_BLENDER */ struct GPUMaterialTexture *texture; /* GPU_NODE_LINK_DIFFERENTIATE_FLOAT_FN */ @@ -131,6 +135,8 @@ typedef struct GPUInput { struct GPUMaterialAttribute *attr; /* GPU_SOURCE_UNIFORM_ATTR */ struct GPUUniformAttr *uniform_attr; + /* GPU_SOURCE_LAYER_ATTR */ + struct GPULayerAttr *layer_attr; /* GPU_SOURCE_FUNCTION_CALL */ char function_call[64]; }; @@ -171,6 +177,9 @@ typedef struct GPUNodeGraph { /* The list of uniform attributes. */ GPUUniformAttrList uniform_attrs; + /* The list of layer attributes. */ + ListBase layer_attrs; + /** Set of all the GLSL lib code blocks . */ GSet *used_libraries; } GPUNodeGraph; diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm index aaebe7e20f8..4b63a3b1ce2 100644 --- a/source/blender/gpu/metal/mtl_immediate.mm +++ b/source/blender/gpu/metal/mtl_immediate.mm @@ -125,7 +125,7 @@ void MTLImmediate::end() * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */ for (int i = 0; i < interface->get_total_attributes(); i++) { - /* Note: Attribute in VERTEX FORMAT does not necessarily share the same array index as + /* NOTE: Attribute in VERTEX FORMAT does not necessarily share the same array index as * attributes in shader interface. */ GPUVertAttr *attr = nullptr; const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i); diff --git a/source/blender/gpu/shaders/compositor/compositor_blur_variable_size.glsl b/source/blender/gpu/shaders/compositor/compositor_blur_variable_size.glsl new file mode 100644 index 00000000000..e7e5aac12a5 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_blur_variable_size.glsl @@ -0,0 +1,60 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_math_utils.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +/* Given the texel in the range [-radius, radius] in both axis, load the appropriate weight from + * the weights texture, where the texel (0, 0) is considered the center of weights texture. */ +vec4 load_weight(ivec2 texel, float radius) +{ + /* The center zero texel is always assigned a unit weight regardless of the corresponding weight + * in the weights texture. That's to guarantee that at last the center pixel will be accumulated + * even if the weights texture is zero at its center. */ + if (texel == ivec2(0)) { + return vec4(1.0); + } + + /* Add the radius to transform the texel into the range [0, radius * 2], then divide by the upper + * bound plus one to transform the texel into the normalized range [0, 1] needed to sample the + * weights sampler. Finally, also add 0.5 to sample at the center of the pixels. */ + return texture(weights_tx, (texel + vec2(radius + 0.5)) / (radius * 2 + 1)); +} + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* The mask input is treated as a boolean. If it is zero, then no blurring happens for this + * pixel. Otherwise, the pixel is blurred normally and the mask value is irrelevant. */ + float mask = texture_load(mask_tx, texel).x; + if (mask == 0.0) { + imageStore(output_img, texel, texture_load(input_tx, texel)); + return; + } + + float center_size = texture_load(size_tx, texel).x * base_size; + + /* Go over the window of the given search radius and accumulate the colors multiplied by their + * respective weights as well as the weights themselves, but only if both the size of the center + * pixel and the size of the candidate pixel are less than both the x and y distances of the + * candidate pixel. */ + vec4 accumulated_color = vec4(0.0); + vec4 accumulated_weight = vec4(0.0); + for (int y = -search_radius; y <= search_radius; y++) { + for (int x = -search_radius; x <= search_radius; x++) { + float candidate_size = texture_load(size_tx, texel + ivec2(x, y)).x * base_size; + + /* Skip accumulation if either the x or y distances of the candidate pixel are larger than + * either the center or candidate pixel size. Note that the max and min functions here denote + * "either" in the aforementioned description. */ + float size = min(center_size, candidate_size); + if (max(abs(x), abs(y)) > size) { + continue; + } + + vec4 weight = load_weight(ivec2(x, y), size); + accumulated_color += texture_load(input_tx, texel + ivec2(x, y)) * weight; + accumulated_weight += weight; + } + } + + imageStore(output_img, texel, safe_divide(accumulated_color, accumulated_weight)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_parallel_reduction.glsl b/source/blender/gpu/shaders/compositor/compositor_parallel_reduction.glsl new file mode 100644 index 00000000000..f6f84aa24c1 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_parallel_reduction.glsl @@ -0,0 +1,98 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +/* This shader reduces the given texture into a smaller texture of a size equal to the number of + * work groups. In particular, each work group reduces its contents into a single value and writes + * that value to a single pixel in the output image. The shader can be dispatched multiple times to + * eventually reduce the image into a single pixel. + * + * The shader works by loading the whole data of each work group into a linear array, then it + * reduces the second half of the array onto the first half of the array, then it reduces the + * second quarter of the array onto the first quarter or the array, and so on until only one + * element remains. The following figure illustrates the process for sum reduction on 8 elements. + * + * .---. .---. .---. .---. .---. .---. .---. .---. + * | 0 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 | Original data. + * '---' '---' '---' '---' '---' '---' '---' '---' + * |.____|_____|_____|_____| | | | + * || |.____|_____|___________| | | + * || || |.____|_________________| | + * || || || |.______________________| <--First reduction. Stride = 4. + * || || || || + * .---. .---. .---. .----. + * | 4 | | 6 | | 8 | | 10 | <--Data after first reduction. + * '---' '---' '---' '----' + * |.____|_____| | + * || |.__________| <--Second reduction. Stride = 2. + * || || + * .----. .----. + * | 12 | | 16 | <--Data after second reduction. + * '----' '----' + * |.____| + * || <--Third reduction. Stride = 1. + * .----. + * | 28 | + * '----' <--Data after third reduction. + * + * + * The shader is generic enough to implement many types of reductions. This is done by using macros + * that the developer should define to implement a certain reduction operation. Those include, + * TYPE, IDENTITY, INITIALIZE, LOAD, and REDUCE. See the implementation below for more information + * as well as the compositor_parallel_reduction_info.hh for example reductions operations. */ + +/* Doing the reduction in shared memory is faster, so create a shared array where the whole data + * of the work group will be loaded and reduced. The 2D structure of the work group is irrelevant + * for reduction, so we just load the data in a 1D array to simplify reduction. The developer is + * expected to define the TYPE macro to be a float or a vec4, depending on the type of data being + * reduced. */ +const uint reduction_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y; +shared TYPE reduction_data[reduction_size]; + +void main() +{ + /* Load the data from the texture, while returning IDENTITY for out of bound coordinates. The + * developer is expected to define the IDENTITY macro to be a vec4 that does not affect the + * output of the reduction. For instance, sum reductions have an identity of vec4(0.0), while + * max value reductions have an identity of vec4(FLT_MIN). */ + vec4 value = texture_load(input_tx, ivec2(gl_GlobalInvocationID.xy), IDENTITY); + + /* Initialize the shared array given the previously loaded value. This step can be different + * depending on whether this is the initial reduction pass or a latter one. Indeed, the input + * texture for the initial reduction is the source texture itself, while the input texture to a + * latter reduction pass is an intermediate texture after one or more reductions have happened. + * This is significant because the data being reduced might be computed from the original data + * and different from it, for instance, when summing the luminance of an image, the original data + * is a vec4 color, while the reduced data is a float luminance value. So for the initial + * reduction pass, the luminance will be computed from the color, reduced, then stored into an + * intermediate float texture. On the other hand, for latter reduction passes, the luminance will + * be loaded directly and reduced without extra processing. So the developer is expected to + * define the INITIALIZE and LOAD macros to be expressions that derive the needed value from the + * loaded value for the initial reduction pass and latter ones respectively. */ + reduction_data[gl_LocalInvocationIndex] = is_initial_reduction ? INITIALIZE(value) : LOAD(value); + + /* Reduce the reduction data by half on every iteration until only one element remains. See the + * above figure for an intuitive understanding of the stride value. */ + for (uint stride = reduction_size / 2; stride > 0; stride /= 2) { + barrier(); + + /* Only the threads up to the current stride should be active as can be seen in the diagram + * above. */ + if (gl_LocalInvocationIndex >= stride) { + continue; + } + + /* Reduce each two elements that are stride apart, writing the result to the element with the + * lower index, as can be seen in the diagram above. The developer is expected to define the + * REDUCE macro to be a commutative and associative binary operator suitable for parallel + * reduction. */ + reduction_data[gl_LocalInvocationIndex] = REDUCE( + reduction_data[gl_LocalInvocationIndex], reduction_data[gl_LocalInvocationIndex + stride]); + } + + /* Finally, the result of the reduction is available as the first element in the reduction data, + * write it to the pixel corresponding to the work group, making sure only the one thread writes + * it. */ + barrier(); + if (gl_LocalInvocationIndex == 0) { + imageStore(output_img, ivec2(gl_WorkGroupID.xy), vec4(reduction_data[0])); + } +} diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_blur_variable_size_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_blur_variable_size_info.hh new file mode 100644 index 00000000000..05b6385fd1e --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_blur_variable_size_info.hh @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_blur_variable_size) + .local_group_size(16, 16) + .push_constant(Type::FLOAT, "base_size") + .push_constant(Type::INT, "search_radius") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .sampler(1, ImageType::FLOAT_2D, "weights_tx") + .sampler(2, ImageType::FLOAT_2D, "size_tx") + .sampler(3, ImageType::FLOAT_2D, "mask_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_blur_variable_size.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_parallel_reduction_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_parallel_reduction_info.hh new file mode 100644 index 00000000000..2e661f280af --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_parallel_reduction_info.hh @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_shared) + .local_group_size(16, 16) + .push_constant(Type::BOOL, "is_initial_reduction") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .compute_source("compositor_parallel_reduction.glsl"); + +/* -------------------------------------------------------------------- + * Sum Reductions. + */ + +GPU_SHADER_CREATE_INFO(compositor_sum_float_shared) + .additional_info("compositor_parallel_reduction_shared") + .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("TYPE", "float") + .define("IDENTITY", "vec4(0.0)") + .define("LOAD(value)", "value.x") + .define("REDUCE(lhs, rhs)", "lhs + rhs"); + +GPU_SHADER_CREATE_INFO(compositor_sum_red) + .additional_info("compositor_sum_float_shared") + .define("INITIALIZE(value)", "value.r") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_sum_green) + .additional_info("compositor_sum_float_shared") + .define("INITIALIZE(value)", "value.g") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_sum_blue) + .additional_info("compositor_sum_float_shared") + .define("INITIALIZE(value)", "value.b") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_sum_luminance) + .additional_info("compositor_sum_float_shared") + .push_constant(Type::VEC3, "luminance_coefficients") + .define("INITIALIZE(value)", "dot(value.rgb, luminance_coefficients)") + .do_static_compilation(true); + +/* -------------------------------------------------------------------- + * Sum Of Squared Difference Reductions. + */ + +GPU_SHADER_CREATE_INFO(compositor_sum_squared_difference_float_shared) + .additional_info("compositor_parallel_reduction_shared") + .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .push_constant(Type::FLOAT, "subtrahend") + .define("TYPE", "float") + .define("IDENTITY", "vec4(subtrahend)") + .define("LOAD(value)", "value.x") + .define("REDUCE(lhs, rhs)", "lhs + rhs"); + +GPU_SHADER_CREATE_INFO(compositor_sum_red_squared_difference) + .additional_info("compositor_sum_squared_difference_float_shared") + .define("INITIALIZE(value)", "pow(value.r - subtrahend, 2.0)") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_sum_green_squared_difference) + .additional_info("compositor_sum_squared_difference_float_shared") + .define("INITIALIZE(value)", "pow(value.g - subtrahend, 2.0)") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_sum_blue_squared_difference) + .additional_info("compositor_sum_squared_difference_float_shared") + .define("INITIALIZE(value)", "pow(value.b - subtrahend, 2.0)") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_sum_luminance_squared_difference) + .additional_info("compositor_sum_squared_difference_float_shared") + .push_constant(Type::VEC3, "luminance_coefficients") + .define("INITIALIZE(value)", "pow(dot(value.rgb, luminance_coefficients) - subtrahend, 2.0)") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl index bacf089deb1..8d0016a2206 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl @@ -29,6 +29,31 @@ void node_attribute_uniform(vec4 attr, const float attr_hash, out vec4 out_attr) out_attr = attr_load_uniform(attr, floatBitsToUint(attr_hash)); } +vec4 attr_load_layer(const uint attr_hash) +{ +#ifdef VLATTR_LIB + /* The first record of the buffer stores the length. */ + uint left = 0, right = drw_layer_attrs[0].buffer_length; + + while (left < right) { + uint mid = (left + right) / 2; + uint hash = drw_layer_attrs[mid].hash_code; + + if (hash < attr_hash) { + left = mid + 1; + } + else if (hash > attr_hash) { + right = mid; + } + else { + return drw_layer_attrs[mid].data; + } + } +#endif + + return vec4(0.0); +} + void node_attribute( vec4 attr, out vec4 outcol, out vec3 outvec, out float outf, out float outalpha) { |