Welcome to mirror list, hosted at ThFree Co, Russian Federation.

eevee_depth_of_field_reduce_comp.glsl « shaders « eevee_next « engines « draw « blender « source - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 8055536747821fe50ba6dd398621fc6028b4f171 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247

/**
 * Reduce copy pass: filter fireflies and split color between scatter and gather input.
 *
 * NOTE: The texture can end up being too big because of the mipmap padding. We correct for
 * that during the convolution phase.
 *
 * Inputs:
 * - Output of setup pass (halfres) and reduce downsample pass (quarter res).
 * Outputs:
 * - Halfres padded to avoid mipmap misalignment (so possibly not matching input size).
 * - Gather input color (whole mip chain), Scatter rect list, Signed CoC (whole mip chain).
 **/

#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)

/* NOTE: Do not compare alpha as it is not scattered by the scatter pass. */
float dof_scatter_neighborhood_rejection(vec3 color)
{
  color = min(vec3(dof_buf.scatter_neighbor_max_color), color);

  float validity = 0.0;

  /* Centered in the middle of 4 quarter res texel. */
  vec2 texel_size = 1.0 / vec2(textureSize(downsample_tx, 0).xy);
  vec2 uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) * 0.5) * texel_size;

  vec3 max_diff = vec3(0.0);
  for (int i = 0; i < 4; i++) {
    vec2 sample_uv = uv + quad_offsets[i] * texel_size;
    vec3 ref = textureLod(downsample_tx, sample_uv, 0.0).rgb;

    ref = min(vec3(dof_buf.scatter_neighbor_max_color), ref);
    float diff = max_v3(max(vec3(0.0), abs(ref - color)));

    const float rejection_threshold = 0.7;
    diff = saturate(diff / rejection_threshold - 1.0);
    validity = max(validity, diff);
  }

  return validity;
}

/* This avoids Bokeh sprite popping in and out at the screen border and
 * drawing Bokeh sprites larger than the screen. */
float dof_scatter_screen_border_rejection(float coc, ivec2 texel)
{
  vec2 screen_size = vec2(imageSize(inout_color_lod0_img));
  vec2 uv = (vec2(texel) + 0.5) / screen_size;
  vec2 screen_pos = uv * screen_size;
  float min_screen_border_distance = min_v2(min(screen_pos, screen_size - screen_pos));
  /* Fullres to halfres CoC. */
  coc *= 0.5;
  /* Allow 10px transition. */
  const float rejection_hardeness = 1.0 / 10.0;
  return saturate((min_screen_border_distance - abs(coc)) * rejection_hardeness + 1.0);
}

float dof_scatter_luminosity_rejection(vec3 color)
{
  const float rejection_hardness = 1.0;
  return saturate(max_v3(color - dof_buf.scatter_color_threshold) * rejection_hardness);
}

float dof_scatter_coc_radius_rejection(float coc)
{
  const float rejection_hardness = 0.3;
  return saturate((abs(coc) - dof_buf.scatter_coc_threshold) * rejection_hardness);
}

float fast_luma(vec3 color)
{
  return (2.0 * color.g) + color.r + color.b;
}

const uint cache_size = gl_WorkGroupSize.x;
shared vec4 color_cache[cache_size][cache_size];
shared float coc_cache[cache_size][cache_size];
shared float do_scatter[cache_size][cache_size];

void main()
{
  ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(inout_color_lod0_img) - 1);
  uvec2 texel_local = gl_LocalInvocationID.xy;
  /* Increase readablility. */
#define LOCAL_INDEX texel_local.y][texel_local.x
#define LOCAL_OFFSET(x_, y_) texel_local.y + (y_)][texel_local.x + (x_)

  /* Load level 0 into cache. */
  color_cache[LOCAL_INDEX] = imageLoad(inout_color_lod0_img, texel);
  coc_cache[LOCAL_INDEX] = imageLoad(in_coc_lod0_img, texel).r;

  /* Only scatter if luminous enough. */
  do_scatter[LOCAL_INDEX] = dof_scatter_luminosity_rejection(color_cache[LOCAL_INDEX].rgb);
  /* Only scatter if CoC is big enough. */
  do_scatter[LOCAL_INDEX] *= dof_scatter_coc_radius_rejection(coc_cache[LOCAL_INDEX]);
  /* Only scatter if CoC is not too big to avoid performance issues. */
  do_scatter[LOCAL_INDEX] *= dof_scatter_screen_border_rejection(coc_cache[LOCAL_INDEX], texel);
  /* Only scatter if neighborhood is different enough. */
  do_scatter[LOCAL_INDEX] *= dof_scatter_neighborhood_rejection(color_cache[LOCAL_INDEX].rgb);
  /* For debugging. */
  if (no_scatter_pass) {
    do_scatter[LOCAL_INDEX] = 0.0;
  }

  barrier();

  /* Add a scatter sprite for each 2x2 pixel neighborhood passing the threshold. */
  if (all(equal(texel_local & 1u, uvec2(0)))) {
    vec4 do_scatter4;
    /* Follows quad_offsets order. */
    do_scatter4.x = do_scatter[LOCAL_OFFSET(0, 1)];
    do_scatter4.y = do_scatter[LOCAL_OFFSET(1, 1)];
    do_scatter4.z = do_scatter[LOCAL_OFFSET(1, 0)];
    do_scatter4.w = do_scatter[LOCAL_OFFSET(0, 0)];
    if (any(greaterThan(do_scatter4, vec4(0.0)))) {
      /* Apply energy conservation to anamorphic scattered bokeh. */
      do_scatter4 *= max_v2(dof_buf.bokeh_anisotropic_scale_inv);

      /* Circle of Confusion. */
      vec4 coc4;
      coc4.x = coc_cache[LOCAL_OFFSET(0, 1)];
      coc4.y = coc_cache[LOCAL_OFFSET(1, 1)];
      coc4.z = coc_cache[LOCAL_OFFSET(1, 0)];
      coc4.w = coc_cache[LOCAL_OFFSET(0, 0)];
      /* We are scattering at half resolution, so divide CoC by 2. */
      coc4 *= 0.5;
      /* Sprite center position. Center sprite around the 4 texture taps. */
      vec2 offset = vec2(gl_GlobalInvocationID.xy) + 1;
      /* Add 2.5 to max_coc because the max_coc may not be centered on the sprite origin
       * and because we smooth the bokeh shape a bit in the pixel shader. */
      vec2 half_extent = max_v4(abs(coc4)) * dof_buf.bokeh_anisotropic_scale + 2.5;
      /* Issue a sprite for each field if any CoC matches. */
      if (any(lessThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
        /* Same value for all threads. Not an issue if we don't sync access to it. */
        scatter_fg_indirect_buf.v_count = 4u;
        /* Issue 1 strip instance per sprite. */
        uint rect_id = atomicAdd(scatter_fg_indirect_buf.i_count, 1u);
        if (rect_id < dof_buf.scatter_max_rect) {

          vec4 coc4_fg = max(vec4(0.0), -coc4);
          vec4 fg_weights = dof_layer_weight(coc4_fg) * dof_sample_weight(coc4_fg) * do_scatter4;
          /* Filter NaNs. */
          fg_weights = select(fg_weights, vec4(0.0), equal(coc4_fg, vec4(0.0)));

          ScatterRect rect_fg;
          rect_fg.offset = offset;
          /* Negate extent to flip the sprite. Mimics optical phenomenon. */
          rect_fg.half_extent = -half_extent;
          /* NOTE: Since we fliped the quad along (1,-1) line, we need to also swap the (1,1) and
           * (0,0) values so that quad_offsets is in the right order in the vertex shader. */

          /* Circle of Confusion absolute radius in halfres pixels. */
          rect_fg.color_and_coc[0].a = coc4_fg[0];
          rect_fg.color_and_coc[1].a = coc4_fg[3];
          rect_fg.color_and_coc[2].a = coc4_fg[2];
          rect_fg.color_and_coc[3].a = coc4_fg[1];
          /* Apply weights. */
          rect_fg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * fg_weights[0];
          rect_fg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * fg_weights[3];
          rect_fg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * fg_weights[2];
          rect_fg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * fg_weights[1];

          scatter_fg_list_buf[rect_id] = rect_fg;
        }
      }
      if (any(greaterThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
        /* Same value for all threads. Not an issue if we don't sync access to it. */
        scatter_bg_indirect_buf.v_count = 4u;
        /* Issue 1 strip instance per sprite. */
        uint rect_id = atomicAdd(scatter_bg_indirect_buf.i_count, 1u);
        if (rect_id < dof_buf.scatter_max_rect) {
          vec4 coc4_bg = max(vec4(0.0), coc4);
          vec4 bg_weights = dof_layer_weight(coc4_bg) * dof_sample_weight(coc4_bg) * do_scatter4;
          /* Filter NaNs. */
          bg_weights = select(bg_weights, vec4(0.0), equal(coc4_bg, vec4(0.0)));

          ScatterRect rect_bg;
          rect_bg.offset = offset;
          rect_bg.half_extent = half_extent;

          /* Circle of Confusion absolute radius in halfres pixels. */
          rect_bg.color_and_coc[0].a = coc4_bg[0];
          rect_bg.color_and_coc[1].a = coc4_bg[1];
          rect_bg.color_and_coc[2].a = coc4_bg[2];
          rect_bg.color_and_coc[3].a = coc4_bg[3];
          /* Apply weights. */
          rect_bg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * bg_weights[0];
          rect_bg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * bg_weights[1];
          rect_bg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * bg_weights[2];
          rect_bg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * bg_weights[3];

          scatter_bg_list_buf[rect_id] = rect_bg;
        }
      }
    }
  }

  /* Remove scatter color from gather. */
  color_cache[LOCAL_INDEX].rgb *= 1.0 - do_scatter[LOCAL_INDEX];
  imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]);

  /* Recursive downsample. */
  for (uint i = 1u; i < DOF_MIP_COUNT; i++) {
    barrier();
    uint mask = ~(~0u << i);
    if (all(equal(gl_LocalInvocationID.xy & mask, uvec2(0)))) {
      uint ofs = 1u << (i - 1u);

      /* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */
      vec4 coc4;
      coc4.x = coc_cache[LOCAL_OFFSET(0, ofs)];
      coc4.y = coc_cache[LOCAL_OFFSET(ofs, ofs)];
      coc4.z = coc_cache[LOCAL_OFFSET(ofs, 0)];
      coc4.w = coc_cache[LOCAL_OFFSET(0, 0)];

      vec4 colors[4];
      colors[0] = color_cache[LOCAL_OFFSET(0, ofs)];
      colors[1] = color_cache[LOCAL_OFFSET(ofs, ofs)];
      colors[2] = color_cache[LOCAL_OFFSET(ofs, 0)];
      colors[3] = color_cache[LOCAL_OFFSET(0, 0)];

      vec4 weights = dof_bilateral_coc_weights(coc4);
      weights *= dof_bilateral_color_weights(colors);
      /* Normalize so that the sum is 1. */
      weights *= safe_rcp(sum(weights));

      color_cache[LOCAL_INDEX] = weighted_sum_array(colors, weights);
      coc_cache[LOCAL_INDEX] = dot(coc4, weights);

      ivec2 texel = ivec2(gl_GlobalInvocationID.xy >> i);

      if (i == 1) {
        imageStore(out_color_lod1_img, texel, color_cache[LOCAL_INDEX]);
        imageStore(out_coc_lod1_img, texel, vec4(coc_cache[LOCAL_INDEX]));
      }
      else if (i == 2) {
        imageStore(out_color_lod2_img, texel, color_cache[LOCAL_INDEX]);
        imageStore(out_coc_lod2_img, texel, vec4(coc_cache[LOCAL_INDEX]));
      }
      else /* if (i == 3) */ {
        imageStore(out_color_lod3_img, texel, color_cache[LOCAL_INDEX]);
        imageStore(out_coc_lod3_img, texel, vec4(coc_cache[LOCAL_INDEX]));
      }
    }
  }
}