source/blender/gpu/metal/mtl_pso_descriptor_state.hh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

/* SPDX-License-Identifier: GPL-2.0-or-later */

/** \file
 * \ingroup gpu
 */
#pragma once

#include "GPU_vertex_format.h"

#include <Metal/Metal.h>

namespace blender::gpu {

/** Vertex attribute and buffer descriptor wrappers
 * for use in PSO construction and caching. */
struct MTLVertexAttributeDescriptorPSO {
  MTLVertexFormat format;
  int offset;
  int buffer_index;
  GPUVertFetchMode format_conversion_mode;

  bool operator==(const MTLVertexAttributeDescriptorPSO &other) const
  {
    return (format == other.format) && (offset == other.offset) &&
           (buffer_index == other.buffer_index) &&
           (format_conversion_mode == other.format_conversion_mode);
  }

  uint64_t hash() const
  {
    return uint64_t((uint64_t(this->format) ^ (this->offset << 4) ^ (this->buffer_index << 8) ^
                     (this->format_conversion_mode << 12)));
  }
};

struct MTLVertexBufferLayoutDescriptorPSO {
  MTLVertexStepFunction step_function;
  int step_rate;
  int stride;

  bool operator==(const MTLVertexBufferLayoutDescriptorPSO &other) const
  {
    return (step_function == other.step_function) && (step_rate == other.step_rate) &&
           (stride == other.stride);
  }

  uint64_t hash() const
  {
    return uint64_t(uint64_t(this->step_function) ^ (this->step_rate << 4) ^ (this->stride << 8));
  }
};

/* SSBO attribute state caching. */
struct MTLSSBOAttribute {

  int mtl_attribute_index;
  int vbo_id;
  int attribute_offset;
  int per_vertex_stride;
  int attribute_format;
  bool is_instance;

  MTLSSBOAttribute(){};
  MTLSSBOAttribute(
      int attribute_ind, int vertexbuffer_ind, int offset, int stride, int format, bool instanced)
      : mtl_attribute_index(attribute_ind),
        vbo_id(vertexbuffer_ind),
        attribute_offset(offset),
        per_vertex_stride(stride),
        attribute_format(format),
        is_instance(instanced)
  {
  }

  bool operator==(const MTLSSBOAttribute &other) const
  {
    return (memcmp(this, &other, sizeof(MTLSSBOAttribute)) == 0);
  }
};

struct MTLVertexDescriptor {

  /* Core Vertex Attributes. */
  MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN];
  MTLVertexBufferLayoutDescriptorPSO
      buffer_layouts[GPU_BATCH_VBO_MAX_LEN + GPU_BATCH_INST_VBO_MAX_LEN];
  int num_attributes;
  int num_vert_buffers;
  MTLPrimitiveTopologyClass prim_topology_class;

  /* WORKAROUND: SSBO Vertex-fetch attributes -- These follow the same structure
   * but have slightly different binding rules, passed in via uniform
   * push constant data block. */
  bool uses_ssbo_vertex_fetch;
  MTLSSBOAttribute ssbo_attributes[GPU_VERT_ATTR_MAX_LEN];
  int num_ssbo_attributes;

  bool operator==(const MTLVertexDescriptor &other) const
  {
    if ((this->num_attributes != other.num_attributes) ||
        (this->num_vert_buffers != other.num_vert_buffers)) {
      return false;
    }
    if (this->prim_topology_class != other.prim_topology_class) {
      return false;
    };

    for (const int a : IndexRange(this->num_attributes)) {
      if (!(this->attributes[a] == other.attributes[a])) {
        return false;
      }
    }

    for (const int b : IndexRange(this->num_vert_buffers)) {
      if (!(this->buffer_layouts[b] == other.buffer_layouts[b])) {
        return false;
      }
    }

    /* NOTE: No need to compare SSBO attributes, as these will match attribute bindings for the
     * given shader. These are simply extra pre-resolved properties we want to include in the
     * cache. */
    return true;
  }

  uint64_t hash() const
  {
    uint64_t hash = (uint64_t)(this->num_attributes ^ this->num_vert_buffers);
    for (const int a : IndexRange(this->num_attributes)) {
      hash ^= this->attributes[a].hash() << a;
    }

    for (const int b : IndexRange(this->num_vert_buffers)) {
      hash ^= this->buffer_layouts[b].hash() << (b + 10);
    }

    /* NOTE: SSBO vertex fetch members not hashed as these will match attribute bindings. */
    return hash;
  }
};

/* Metal Render Pipeline State Descriptor -- All unique information which feeds PSO creation. */
struct MTLRenderPipelineStateDescriptor {
  /* This state descriptor will contain ALL parameters which generate a unique PSO.
   * We will then use this state-object to efficiently look-up or create a
   * new PSO for the current shader.
   *
   * Unlike the 'MTLContextGlobalShaderPipelineState', this struct contains a subset of
   * parameters used to distinguish between unique PSOs. This struct is hash-able and only contains
   * those parameters which are required by PSO generation. Non-unique state such as bound
   * resources is not tracked here, as it does not require a unique PSO permutation if changed. */

  /* Input Vertex Descriptor. */
  MTLVertexDescriptor vertex_descriptor;

  /* Render Target attachment state.
   * Assign to #MTLPixelFormatInvalid if not used. */
  int num_color_attachments;
  MTLPixelFormat color_attachment_format[GPU_FB_MAX_COLOR_ATTACHMENT];
  MTLPixelFormat depth_attachment_format;
  MTLPixelFormat stencil_attachment_format;

  /* Render Pipeline State affecting PSO creation. */
  bool blending_enabled;
  MTLBlendOperation alpha_blend_op;
  MTLBlendOperation rgb_blend_op;
  MTLBlendFactor dest_alpha_blend_factor;
  MTLBlendFactor dest_rgb_blend_factor;
  MTLBlendFactor src_alpha_blend_factor;
  MTLBlendFactor src_rgb_blend_factor;

  /* Global color write mask as this cannot be specified per attachment. */
  MTLColorWriteMask color_write_mask;

  /* Point size required by point primitives. */
  float point_size = 0.0f;

  /* Comparison Operator for caching. */
  bool operator==(const MTLRenderPipelineStateDescriptor &other) const
  {
    if (!(vertex_descriptor == other.vertex_descriptor)) {
      return false;
    }

    if ((num_color_attachments != other.num_color_attachments) ||
        (depth_attachment_format != other.depth_attachment_format) ||
        (stencil_attachment_format != other.stencil_attachment_format) ||
        (color_write_mask != other.color_write_mask) ||
        (blending_enabled != other.blending_enabled) || (alpha_blend_op != other.alpha_blend_op) ||
        (rgb_blend_op != other.rgb_blend_op) ||
        (dest_alpha_blend_factor != other.dest_alpha_blend_factor) ||
        (dest_rgb_blend_factor != other.dest_rgb_blend_factor) ||
        (src_alpha_blend_factor != other.src_alpha_blend_factor) ||
        (src_rgb_blend_factor != other.src_rgb_blend_factor) ||
        (vertex_descriptor.prim_topology_class != other.vertex_descriptor.prim_topology_class) ||
        (point_size != other.point_size)) {
      return false;
    }

    /* Attachments can be skipped, so num_color_attachments will not define the range. */
    for (const int c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) {
      if (color_attachment_format[c] != other.color_attachment_format[c]) {
        return false;
      }
    }

    return true;
  }

  uint64_t hash() const
  {
    /* NOTE(Metal): Current setup aims to minimize overlap of parameters
     * which are more likely to be different, to ensure earlier hash
     * differences without having to fallback to comparisons.
     * Though this could likely be further improved to remove
     * has collisions. */

    uint64_t hash = this->vertex_descriptor.hash();
    hash ^= uint64_t(this->num_color_attachments) << 16;     /* up to 6 (3 bits). */
    hash ^= uint64_t(this->depth_attachment_format) << 18;   /* up to 555 (9 bits). */
    hash ^= uint64_t(this->stencil_attachment_format) << 20; /* up to 555 (9 bits). */
    hash ^= uint64_t(
        *((uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */

    /* Only include elements in Hash if they are needed - avoids variable null assignments
     * influencing hash. */
    if (this->num_color_attachments > 0) {
      hash ^= uint64_t(this->color_write_mask) << 22;        /* 4 bit bit-mask. */
      hash ^= uint64_t(this->alpha_blend_op) << 26;          /* Up to 4 (3 bits). */
      hash ^= uint64_t(this->rgb_blend_op) << 29;            /* Up to 4 (3 bits). */
      hash ^= uint64_t(this->dest_alpha_blend_factor) << 32; /* Up to 18 (5 bits). */
      hash ^= uint64_t(this->dest_rgb_blend_factor) << 37;   /* Up to 18 (5 bits). */
      hash ^= uint64_t(this->src_alpha_blend_factor) << 42;  /* Up to 18 (5 bits). */
      hash ^= uint64_t(this->src_rgb_blend_factor) << 47;    /* Up to 18 (5 bits). */
    }

    for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) {
      hash ^= uint64_t(this->color_attachment_format[c]) << (c + 52); /* Up to 555 (9 bits). */
    }

    hash |= uint64_t((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0) << 62;
    hash ^= uint64_t(this->point_size);

    return hash;
  }

  /* Reset the Vertex Descriptor to default. */
  void reset_vertex_descriptor()
  {
    vertex_descriptor.num_attributes = 0;
    vertex_descriptor.num_vert_buffers = 0;
    for (int i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) {
      vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
      vertex_descriptor.attributes[i].offset = 0;
    }
    vertex_descriptor.uses_ssbo_vertex_fetch = false;
    vertex_descriptor.num_ssbo_attributes = 0;
  }
};

}  // namespace blender::gpu