diff options
24 files changed, 4236 insertions, 19 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 93030d6ab6e..49cfad9d89f 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -186,9 +186,17 @@ set(OPENGL_SRC set(METAL_SRC metal/mtl_backend.mm + metal/mtl_context.mm + metal/mtl_debug.mm + metal/mtl_texture.mm + metal/mtl_texture_util.mm metal/mtl_backend.hh metal/mtl_capabilities.hh + metal/mtl_common.hh + metal/mtl_context.hh + metal/mtl_debug.hh + metal/mtl_texture.hh ) # Select Backend source based on availability @@ -210,6 +218,18 @@ if(NOT WITH_SYSTEM_GLEW) ) endif() +set(MSL_SRC + + metal/kernels/compute_texture_update.msl + metal/kernels/compute_texture_read.msl + metal/kernels/depth_2d_update_float_frag.glsl + metal/kernels/depth_2d_update_int24_frag.glsl + metal/kernels/depth_2d_update_int32_frag.glsl + metal/kernels/depth_2d_update_vert.glsl + metal/kernels/gpu_shader_fullscreen_blit_vert.glsl + metal/kernels/gpu_shader_fullscreen_blit_frag.glsl +) + set(GLSL_SRC GPU_shader_shared.h shaders/opengl/glsl_shader_defines.glsl @@ -392,6 +412,15 @@ foreach(GLSL_FILE ${GLSL_SRC}) data_to_c_simple(${GLSL_FILE} GLSL_C) endforeach() + +if(WITH_METAL_BACKEND) + set(MSL_C) + foreach(MSL_FILE ${MSL_SRC}) + data_to_c_simple(${MSL_FILE} MSL_C) + endforeach() + list(APPEND GLSL_C ${MSL_C}) +endif() + blender_add_lib(bf_gpu_shaders "${GLSL_C}" "" "" "") list(APPEND LIB diff --git a/source/blender/gpu/GPU_state.h b/source/blender/gpu/GPU_state.h index 86615145d1e..99b60351dcc 100644 --- a/source/blender/gpu/GPU_state.h +++ b/source/blender/gpu/GPU_state.h @@ -157,6 +157,7 @@ void GPU_stencil_reference_set(uint reference); void GPU_stencil_write_mask_set(uint write_mask); void GPU_stencil_compare_mask_set(uint compare_mask); +eGPUFaceCullTest GPU_face_culling_get(void); eGPUBlend GPU_blend_get(void); eGPUDepthTest GPU_depth_test_get(void); eGPUWriteMask GPU_write_mask_get(void); diff --git a/source/blender/gpu/GPU_texture.h b/source/blender/gpu/GPU_texture.h index 37edc2abeb2..bb0912f284b 100644 --- a/source/blender/gpu/GPU_texture.h +++ b/source/blender/gpu/GPU_texture.h @@ -175,8 +175,18 @@ typedef enum eGPUDataFormat { GPU_DATA_UINT_24_8, GPU_DATA_10_11_11_REV, GPU_DATA_2_10_10_10_REV, + GPU_DATA_HALF_FLOAT } eGPUDataFormat; +typedef enum eGPUTextureUsage { + GPU_TEXTURE_USAGE_SHADER_READ = (1 << 0), + GPU_TEXTURE_USAGE_SHADER_WRITE = (1 << 1), + GPU_TEXTURE_USAGE_ATTACHMENT = (1 << 2), + GPU_TEXTURE_USAGE_GENERAL = 0xFF +} eGPUTextureUsage; + +ENUM_OPERATORS(eGPUTextureUsage, GPU_TEXTURE_USAGE_GENERAL) + unsigned int GPU_texture_memory_usage_get(void); /** diff --git a/source/blender/gpu/intern/gpu_state.cc b/source/blender/gpu/intern/gpu_state.cc index 885ec81f5c7..f74d500340d 100644 --- a/source/blender/gpu/intern/gpu_state.cc +++ b/source/blender/gpu/intern/gpu_state.cc @@ -48,6 +48,12 @@ void GPU_face_culling(eGPUFaceCullTest culling) SET_IMMUTABLE_STATE(culling_test, culling); } +eGPUFaceCullTest GPU_face_culling_get() +{ + GPUState &state = Context::get()->state_manager->state; + return (eGPUFaceCullTest)state.culling_test; +} + void GPU_front_facing(bool invert) { SET_IMMUTABLE_STATE(invert_facing, invert); diff --git a/source/blender/gpu/intern/gpu_texture_private.hh b/source/blender/gpu/intern/gpu_texture_private.hh index 109e60e19a6..00bcc9fac00 100644 --- a/source/blender/gpu/intern/gpu_texture_private.hh +++ b/source/blender/gpu/intern/gpu_texture_private.hh @@ -131,7 +131,6 @@ class Texture { /* TODO(fclem): Legacy. Should be removed at some point. */ virtual uint gl_bindcode_get() const = 0; - int width_get() const { return w_; @@ -458,6 +457,72 @@ inline bool validate_data_format(eGPUTextureFormat tex_format, eGPUDataFormat da } } +/* Ensure valid upload formats. With format conversion support, certain types can be extended to + * allow upload from differing source formats. If these cases are added, amend accordingly. */ +inline bool validate_data_format_mtl(eGPUTextureFormat tex_format, eGPUDataFormat data_format) +{ + switch (tex_format) { + case GPU_DEPTH_COMPONENT24: + case GPU_DEPTH_COMPONENT16: + case GPU_DEPTH_COMPONENT32F: + return ELEM(data_format, GPU_DATA_FLOAT, GPU_DATA_UINT); + case GPU_DEPTH24_STENCIL8: + case GPU_DEPTH32F_STENCIL8: + /* Data can be provided as a 4-byte UINT. */ + return ELEM(data_format, GPU_DATA_UINT_24_8, GPU_DATA_UINT); + case GPU_R8UI: + case GPU_R16UI: + case GPU_RG16UI: + case GPU_R32UI: + case GPU_RGBA32UI: + case GPU_RGBA16UI: + case GPU_RG8UI: + case GPU_RG32UI: + return data_format == GPU_DATA_UINT; + case GPU_R32I: + case GPU_RG16I: + case GPU_R16I: + case GPU_RGBA8I: + case GPU_RGBA32I: + case GPU_RGBA16I: + case GPU_RG8I: + case GPU_RG32I: + case GPU_R8I: + return data_format == GPU_DATA_INT; + case GPU_R8: + case GPU_RG8: + case GPU_RGBA8: + case GPU_RGBA8_DXT1: + case GPU_RGBA8_DXT3: + case GPU_RGBA8_DXT5: + case GPU_RGBA8UI: + case GPU_SRGB8_A8: + case GPU_SRGB8_A8_DXT1: + case GPU_SRGB8_A8_DXT3: + case GPU_SRGB8_A8_DXT5: + return ELEM(data_format, GPU_DATA_UBYTE, GPU_DATA_FLOAT); + case GPU_RGB10_A2: + return ELEM(data_format, GPU_DATA_2_10_10_10_REV, GPU_DATA_FLOAT); + case GPU_R11F_G11F_B10F: + return ELEM(data_format, GPU_DATA_10_11_11_REV, GPU_DATA_FLOAT); + case GPU_RGBA16F: + return ELEM(data_format, GPU_DATA_HALF_FLOAT, GPU_DATA_FLOAT); + case GPU_RGBA32F: + case GPU_RGBA16: + case GPU_RG32F: + case GPU_RG16F: + case GPU_RG16: + case GPU_R32F: + case GPU_R16F: + case GPU_R16: + case GPU_RGB16F: + return data_format == GPU_DATA_FLOAT; + default: + BLI_assert_msg(0, "Unrecognized data format"); + return data_format == GPU_DATA_FLOAT; + } +} + inline eGPUDataFormat to_data_format(eGPUTextureFormat tex_format) { switch (tex_format) { diff --git a/source/blender/gpu/metal/kernels/compute_texture_read.msl b/source/blender/gpu/metal/kernels/compute_texture_read.msl new file mode 100644 index 00000000000..4bfb48567f9 --- /dev/null +++ b/source/blender/gpu/metal/kernels/compute_texture_read.msl @@ -0,0 +1,182 @@ +/* MATCHING eGPUTextureType. */ +#define GPU_TEXTURE_1D (1 << 0) +#define GPU_TEXTURE_2D (1 << 1) +#define GPU_TEXTURE_3D (1 << 2) +#define GPU_TEXTURE_CUBE (1 << 3) +#define GPU_TEXTURE_ARRAY (1 << 4) +#define GPU_TEXTURE_BUFFER (1 << 5) +#define GPU_TEXTURE_1D_ARRAY (GPU_TEXTURE_1D | GPU_TEXTURE_ARRAY) +#define GPU_TEXTURE_2D_ARRAY (GPU_TEXTURE_2D | GPU_TEXTURE_ARRAY) +#define GPU_TEXTURE_CUBE_ARRAY (GPU_TEXTURE_CUBE | GPU_TEXTURE_ARRAY) + +/* Determine input texture type. */ +#if IS_DEPTH_FORMAT == 1 +# define TEX_NAME_BASE depth +#else +# define TEX_NAME_BASE texture +#endif + +#define JOIN(x, y) x##y +#define FUNC_NAME(x, y) JOIN(x, y) + +/* Assign parameters based on texture type. */ +#if TEX_TYPE == GPU_TEXTURE_1D +# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 1d) +# define DIMS 1 +#elif TEX_TYPE == GPU_TEXTURE_2D +# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 2d) +# define DIMS 2 +#elif TEX_TYPE == GPU_TEXTURE_3D +# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 3d) +# define DIMS 3 +#elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY +# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 1d_array) +# define DIMS 2 +#elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY +# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 2d_array) +# define DIMS 3 +#endif + +/* Position dimensionality for threadgroup. */ +#if DIMS == 1 +# define POSITION_TYPE uint +#elif DIMS == 2 +# define POSITION_TYPE uint2 +#elif DIMS == 3 +# define POSITION_TYPE uint3 +#endif + +using namespace metal; + +template<typename T> T denormalize(float val) +{ + return T(float(DEPTH_SCALE_FACTOR) * val); +}; + +template<> int denormalize<int>(float val) +{ + return int((float(DEPTH_SCALE_FACTOR) * val - 1.0f) / 2.0f); +} +template<> uint denormalize<uint>(float val) +{ + return uint(float(DEPTH_SCALE_FACTOR) * val); +} + +template<typename T> T convert_type(float type) +{ + return T(type); +} + +template<> uchar convert_type<uchar>(float val) +{ + return uchar(val * float(0xFF)); +} + +template<> uint convert_type<uint>(float val) +{ + return uint(val * double(0xFFFFFFFFu)); +} + +struct TextureReadParams { + int mip_index; + int extent[3]; + int offset[3]; +}; + +#if IS_DEPTH_FORMAT == 1 +constexpr sampler pixelSampler = sampler(coord::pixel, address::clamp_to_edge, filter::nearest); +#endif + +kernel void compute_texture_read(constant TextureReadParams ¶ms [[buffer(0)]], + device OUTPUT_DATA_TYPE *output_data [[buffer(1)]], +#if IS_DEPTH_FORMAT == 1 + TEX_TYPE_NAME<float, access::sample> read_tex [[texture(0)]], +#else + TEX_TYPE_NAME<INPUT_DATA_TYPE, access::read> read_tex + [[texture(0)]], +#endif + POSITION_TYPE position [[thread_position_in_grid]]) +{ + /* Read colour. */ + vec<INPUT_DATA_TYPE, 4> read_colour; + +/* 1D TEXTURE */ +#if TEX_TYPE == GPU_TEXTURE_1D + + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position[0]; + int index = (xx)*COMPONENT_COUNT_OUTPUT; + read_colour = read_tex.read(uint(params.offset[0]) + uint(xx)); + +/* 2D TEXTURE */ +#elif TEX_TYPE == GPU_TEXTURE_2D + + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position[0]; + uint yy = position[1]; + int index = (yy * params.extent[0] + xx) * COMPONENT_COUNT_OUTPUT; + + /* Read data */ +# if IS_DEPTH_FORMAT == 1 + output_data[index] = denormalize<OUTPUT_DATA_TYPE>( + read_tex.sample(pixelSampler, float2(params.offset[0], params.offset[1]) + float2(xx, yy))); +# else + read_colour = read_tex.read(uint2(params.offset[0], params.offset[1]) + uint2(xx, yy)); +# endif + +/* 3D TEXTURE */ +#elif TEX_TYPE == GPU_TEXTURE_3D + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position[0]; + uint yy = position[1]; + uint zz = position[2]; + int index = (zz * (params.extent[0] * params.extent[1]) + yy * params.extnt[0] + xx) * + COMPONENT_COUNT_INPUT; + read_colour = read_tex.read(uint3(params.offset[0], params.offset[1], params.offset[2]) + + uint3(xx, yy, zz)); + +/* 1D ARRAY TEXTURE */ +#elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY + + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position[0]; + uint layer = position[1]; + int index = (layer * params.extent[0] + xx) * COMPONENT_COUNT_OUTPUT; + read_colour = read_tex.read(uint(params.offset[0]) + uint(xx), uint(params.offset[1]) + layer); + +/* 2D ARRAY TEXTURE */ +#elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY + + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position[0]; + uint yy = position[1]; + uint layer = position[2]; + int index = (layer * (params.extent[0] * params.extent[1]) + yy * params.extent[0] + xx) * + COMPONENT_COUNT_INPUT; + + /* Read data */ +# if IS_DEPTH_FORMAT == 1 + output_data[index] = denormalize<OUTPUT_DATA_TYPE>( + read_tex.sample(pixelSampler, + float2(params.offset[0], params.offset[1]) + float2(xx, yy), + uint(params.offset[2] + layer))); +# else + read_colour = read_tex.read(uint2(params.offset[0], params.offset[1]) + uint2(xx, yy), + uint(params.offset[2] + layer)); +# endif + +#endif + + /* Output per-component colour data. */ +#if IS_DEPTH_FORMAT != 1 + /* Write data to block */ + for (int i = 0; i < WRITE_COMPONENT_COUNT; i++) { + output_data[index + i] = convert_type<OUTPUT_DATA_TYPE>(read_colour[i]); + } + + /* Fill in empty cells if more components are being read than exist */ + for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { + output_data[index + i] = convert_type<OUTPUT_DATA_TYPE>(0); + } +#endif +}
\ No newline at end of file diff --git a/source/blender/gpu/metal/kernels/compute_texture_update.msl b/source/blender/gpu/metal/kernels/compute_texture_update.msl new file mode 100644 index 00000000000..095c495ac54 --- /dev/null +++ b/source/blender/gpu/metal/kernels/compute_texture_update.msl @@ -0,0 +1,165 @@ +using namespace metal; + +/* MATCHING eGPUTextureType. */ +#define GPU_TEXTURE_1D (1 << 0) +#define GPU_TEXTURE_2D (1 << 1) +#define GPU_TEXTURE_3D (1 << 2) +#define GPU_TEXTURE_CUBE (1 << 3) +#define GPU_TEXTURE_ARRAY (1 << 4) +#define GPU_TEXTURE_BUFFER (1 << 5) +#define GPU_TEXTURE_1D_ARRAY (GPU_TEXTURE_1D | GPU_TEXTURE_ARRAY) +#define GPU_TEXTURE_2D_ARRAY (GPU_TEXTURE_2D | GPU_TEXTURE_ARRAY) +#define GPU_TEXTURE_CUBE_ARRAY (GPU_TEXTURE_CUBE | GPU_TEXTURE_ARRAY) + +/* Assign parameters based on texture type. */ +#if TEX_TYPE == GPU_TEXTURE_1D +# define TEX_TYPE_NAME texture1d +# define DIMS 1 +#elif TEX_TYPE == GPU_TEXTURE_2D +# define TEX_TYPE_NAME texture2d +# define DIMS 2 +#elif TEX_TYPE == GPU_TEXTURE_3D +# define TEX_TYPE_NAME texture3d +# define DIMS 3 +#elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY +# define TEX_TYPE_NAME texture1d_array +# define DIMS 2 +#elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY +# define TEX_TYPE_NAME texture2d_array +# define DIMS 3 +#endif + +/* Position dimensionality for threadgroup. */ +#if DIMS == 1 +# define POSITION_TYPE uint +#elif DIMS == 2 +# define POSITION_TYPE uint2 +#elif DIMS == 3 +# define POSITION_TYPE uint3 +#endif + +float3 mtl_linear_to_srgb_attr(float3 c) +{ + c = max(c, float3(0.0)); + float3 c1 = c * 12.92; + float3 c2 = 1.055 * pow(c, float3(1.0 / 2.4)) - 0.055; + return mix(c1, c2, step(float3(0.0031308), c)); +} + +float3 mtl_srgb_to_linear_attr(float3 c) +{ + c = max(c, float3(0.0)); + float3 c1 = c * (1.0 / 12.92); + float3 c2 = pow((c + 0.055) * (1.0 / 1.055), float3(2.4)); + return mix(c1, c2, step(float3(0.04045), c)); +} + +struct TextureUpdateParams { + int mip_index; + int extent[3]; + int offset[3]; + uint unpack_row_length; +}; + +kernel void compute_texture_update(constant TextureUpdateParams ¶ms [[buffer(0)]], + constant INPUT_DATA_TYPE *input_data [[buffer(1)]], + TEX_TYPE_NAME<OUTPUT_DATA_TYPE, access::write> update_tex + [[texture(0)]], + POSITION_TYPE position [[thread_position_in_grid]]) +{ + +/* 1D TEXTURE */ +#if TEX_TYPE == GPU_TEXTURE_1D + + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position; + int index = xx * COMPONENT_COUNT_INPUT; + + vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output; + for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); + } + for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(0); + } + update_tex.write(output, uint(params.offset[0]) + uint(xx)); + +/* 2D TEXTURE */ +#elif TEX_TYPE == GPU_TEXTURE_2D + + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position[0]; + uint yy = position[1]; + int index = (yy * params.unpack_row_length + xx) * COMPONENT_COUNT_INPUT; + + vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output; + for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); + } + for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(0); + } + update_tex.write(output, uint2(params.offset[0], params.offset[1]) + uint2(xx, yy)); + +/* 3D TEXTURE */ +#elif TEX_TYPE == GPU_TEXTURE_3D + + /* xx, yy, zz determined by kernel invocation pattern */ + uint xx = position[0]; + uint yy = position[1]; + uint zz = position[2]; + int index = (zz * (params.unpack_row_length * params.extent[1]) + yy * params.unpack_row_length + + xx) * + COMPONENT_COUNT_INPUT; + + vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output; + for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); + } + for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(0); + } + update_tex.write( + output, uint3(params.offset[0], params.offset[1], params.offset[2]) + uint3(xx, yy, zz)); + +/* 1D ARRAY TEXTURE */ +#elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY + + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position[0]; + uint layer = position[1]; + int index = (layer * params.unpack_row_length + xx) * COMPONENT_COUNT_INPUT; + + vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output; + for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); + } + for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(0); + } + update_tex.write( + output, uint(params.offset[0]) + uint(xx), uint(params.offset[1]) + uint(layer)); + +/* 2D ARRAY TEXTURE */ +#elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY + + /* xx, yy, layer determined by kernel invocation pattern */ + uint xx = position[0]; + uint yy = position[1]; + uint layer = position[2]; + int index = (layer * (params.unpack_row_length * params.extent[1]) + + yy * params.unpack_row_length + xx) * + COMPONENT_COUNT_INPUT; + + vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output; + for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); + } + for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { + output[i] = OUTPUT_DATA_TYPE(0); + } + update_tex.write( + output, uint2(params.offset[0], params.offset[1]) + uint2(xx, yy), params.offset[2] + layer); + +#endif +}
\ No newline at end of file diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl new file mode 100644 index 00000000000..ecb2dddcd63 --- /dev/null +++ b/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl @@ -0,0 +1,10 @@ + +uniform sampler2D source_data; +uniform int mip; + +in vec2 texCoord_interp; + +void main() +{ + gl_FragDepth = textureLod(source_data, texCoord_interp, mip).r; +}
\ No newline at end of file diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl new file mode 100644 index 00000000000..99661a760f0 --- /dev/null +++ b/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl @@ -0,0 +1,13 @@ + +uniform isampler2D source_data; +uniform int mip; + +in vec2 texCoord_interp; + +void main() +{ + uint val = textureLod(source_data, texCoord_interp, mip).r; + uint stencil = (val >> 24) & 0xFFu; + uint depth = (val)&0xFFFFFFu; + gl_FragDepth = float(depth) / float(0xFFFFFFu); +}
\ No newline at end of file diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl new file mode 100644 index 00000000000..15271ab2cdd --- /dev/null +++ b/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl @@ -0,0 +1,12 @@ + +uniform isampler2D source_data; +uniform int mip; + +in vec2 texCoord_interp; + +void main() +{ + uint val = textureLod(source_data, texCoord_interp, mip).r; + uint depth = (val) & (0xFFFFFFFFu); + gl_FragDepth = float(depth) / float(0xFFFFFFFFu); +}
\ No newline at end of file diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl new file mode 100644 index 00000000000..092ae45b719 --- /dev/null +++ b/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl @@ -0,0 +1,33 @@ + +uniform vec2 extent; +uniform vec2 offset; +uniform vec2 size; +out vec2 texCoord_interp; +in vec2 pos; + +void main() +{ + vec4 rect = vec4(offset.x, offset.y, offset.x + extent.x, offset.y + extent.y); + rect /= vec4(size, size); + vec4 tex = rect; + rect = rect * 2.0 - 1.0; + + /* QUAD */ + if (pos.x == 0.0 && pos.y == 0.0) { + rect.xy = rect.xy; + texCoord_interp = tex.xy; + } + else if (pos.x == 0.0 && pos.y == 1.0) { + rect.xy = rect.xw; + texCoord_interp = tex.xw; + } + else if (pos.x == 1.0 && pos.y == 1.0) { + rect.xy = rect.zw; + texCoord_interp = tex.zw; + } + else { + rect.xy = rect.zy; + texCoord_interp = tex.zy; + } + gl_Position = vec4(rect.xy, 0.0f, 1.0f); +}
\ No newline at end of file diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl new file mode 100644 index 00000000000..b1353478593 --- /dev/null +++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl @@ -0,0 +1,12 @@ + + +in vec4 uvcoordsvar; +uniform sampler2D imageTexture; +uniform int mip; +out vec4 fragColor; + +void main() +{ + vec4 tex_color = textureLod(imageTexture, uvcoordsvar.xy, mip); + fragColor = tex_color; +} diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl new file mode 100644 index 00000000000..8e52868f67d --- /dev/null +++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl @@ -0,0 +1,18 @@ + +out vec4 uvcoordsvar; + +in vec2 pos; +uniform vec2 fullscreen; +uniform vec2 size; +uniform vec2 dst_offset; +uniform vec2 src_offset; + +void main() +{ + /* The position represents a 0-1 square, we first scale it by the size we want to have it on + * screen next we divide by the fullscreen size, this will bring everything in range [0,1]. Next + * we scale to NDC range [-1,1]. */ + gl_Position = vec4((((pos * size + dst_offset) / fullscreen) * 2.0 - 1.0), 1.0, 1.0); + vec2 uvoff = (src_offset / fullscreen); + uvcoordsvar = vec4(pos + uvoff, 0.0, 0.0); +} diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh index 78f638d23f5..9044d8517ab 100644 --- a/source/blender/gpu/metal/mtl_backend.hh +++ b/source/blender/gpu/metal/mtl_backend.hh @@ -11,8 +11,7 @@ #include "gpu_backend.hh" #include "mtl_capabilities.hh" -namespace blender { -namespace gpu { +namespace blender::gpu { class Batch; class DrawList; @@ -20,7 +19,6 @@ class FrameBuffer; class IndexBuf; class QueryPool; class Shader; -class Texture; class UniformBuf; class VertBuf; class MTLContext; @@ -32,6 +30,11 @@ class MTLBackend : public GPUBackend { /* Capabilities. */ static MTLCapabilities capabilities; + static MTLCapabilities &get_capabilities() + { + return MTLBackend::capabilities; + } + inline ~MTLBackend() { MTLBackend::platform_exit(); @@ -49,6 +52,11 @@ class MTLBackend : public GPUBackend { /* Placeholder */ } + void compute_dispatch_indirect(StorageBuf *indirect_buf) override + { + /* Placeholder */ + } + /* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C * objects. */ Context *context_alloc(void *ghost_window) override; @@ -60,6 +68,7 @@ class MTLBackend : public GPUBackend { Shader *shader_alloc(const char *name) override; Texture *texture_alloc(const char *name) override; UniformBuf *uniformbuf_alloc(int size, const char *name) override; + StorageBuf *storagebuf_alloc(int size, GPUUsageType usage, const char *name) override; VertBuf *vertbuf_alloc() override; /* Render Frame Coordination. */ @@ -75,5 +84,4 @@ class MTLBackend : public GPUBackend { static void capabilities_init(MTLContext *ctx); }; -} // namespace gpu -} // namespace blender +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index 00d73ba5d71..8bdec962af0 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -8,6 +8,7 @@ #include "gpu_backend.hh" #include "mtl_backend.hh" +#include "mtl_context.hh" #include "gpu_capabilities_private.hh" #include "gpu_platform_private.hh" @@ -16,8 +17,7 @@ #include <Metal/Metal.h> #include <QuartzCore/QuartzCore.h> -namespace blender { -namespace gpu { +namespace blender::gpu { /* Global per-thread AutoReleasePool. */ thread_local NSAutoreleasePool *g_autoreleasepool = nil; @@ -33,8 +33,7 @@ void MTLBackend::samplers_update(){ Context *MTLBackend::context_alloc(void *ghost_window) { - /* TODO(Metal): Implement MTLContext. */ - return nullptr; + return new MTLContext(ghost_window); }; Batch *MTLBackend::batch_alloc() @@ -75,8 +74,7 @@ Shader *MTLBackend::shader_alloc(const char *name) Texture *MTLBackend::texture_alloc(const char *name) { - /* TODO(Metal): Implement MTLTexture. */ - return nullptr; + return new gpu::MTLTexture(name); } UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name) @@ -85,6 +83,12 @@ UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name) return nullptr; }; +StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name) +{ + /* TODO(Metal): Implement MTLStorageBuf. */ + return nullptr; +} + VertBuf *MTLBackend::vertbuf_alloc() { /* TODO(Metal): Implement MTLVertBuf. */ @@ -404,5 +408,4 @@ void MTLBackend::capabilities_init(MTLContext *ctx) /** \} */ -} // gpu -} // blender +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_capabilities.hh b/source/blender/gpu/metal/mtl_capabilities.hh index 5563008e87d..3afa6e31ccb 100644 --- a/source/blender/gpu/metal/mtl_capabilities.hh +++ b/source/blender/gpu/metal/mtl_capabilities.hh @@ -12,11 +12,11 @@ namespace gpu { /*** Derived from: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf ***/ /** Upper Bound/Fixed Limits **/ -#define METAL_MAX_TEXTURE_SLOTS 128 -#define METAL_MAX_SAMPLER_SLOTS METAL_MAX_TEXTURE_SLOTS -#define METAL_MAX_UNIFORM_BUFFER_BINDINGS 31 -#define METAL_MAX_VERTEX_INPUT_ATTRIBUTES 31 -#define METAL_MAX_UNIFORMS_PER_BLOCK 64 +#define MTL_MAX_TEXTURE_SLOTS 128 +#define MTL_MAX_SAMPLER_SLOTS MTL_MAX_TEXTURE_SLOTS +#define MTL_MAX_UNIFORM_BUFFER_BINDINGS 31 +#define MTL_MAX_VERTEX_INPUT_ATTRIBUTES 31 +#define MTL_MAX_UNIFORMS_PER_BLOCK 64 /* Context-specific limits -- populated in 'MTLBackend::platform_init' */ typedef struct MTLCapabilities { diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh new file mode 100644 index 00000000000..dc32043dc70 --- /dev/null +++ b/source/blender/gpu/metal/mtl_common.hh @@ -0,0 +1,8 @@ +#ifndef __MTL_COMMON +#define __MTL_COMMON + +// -- Renderer Options -- +#define MTL_MAX_SET_BYTES_SIZE 4096 +#define MTL_FORCE_WAIT_IDLE 0 + +#endif diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh new file mode 100644 index 00000000000..e855f9cb91a --- /dev/null +++ b/source/blender/gpu/metal/mtl_context.hh @@ -0,0 +1,185 @@ +/** \file + * \ingroup gpu + */ +#include "MEM_guardedalloc.h" + +#include "gpu_context_private.hh" + +#include "GPU_context.h" + +#include "mtl_texture.hh" + +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + +@class CAMetalLayer; +@class MTLCommandQueue; +@class MTLRenderPipelineState; + +namespace blender::gpu { + +typedef struct MTLContextTextureUtils { + + /* Depth Update Utilities */ + /* Depth texture updates are not directly supported with Blit operations, similarly, we cannot + * use a compute shader to write to depth, so we must instead render to a depth target. + * These processes use vertex/fragment shaders to render texture data from an intermediate + * source, in order to prime the depth buffer*/ + blender::Map<DepthTextureUpdateRoutineSpecialisation, GPUShader *> depth_2d_update_shaders; + GPUShader *fullscreen_blit_shader = nullptr; + + /* Texture Read/Update routines */ + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + texture_1d_read_compute_psos; + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + texture_1d_array_read_compute_psos; + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + texture_2d_read_compute_psos; + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + texture_2d_array_read_compute_psos; + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + texture_3d_read_compute_psos; + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + texture_cube_read_compute_psos; + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + texture_cube_array_read_compute_psos; + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + texture_buffer_read_compute_psos; + + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + texture_1d_update_compute_psos; + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + texture_1d_array_update_compute_psos; + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + texture_2d_update_compute_psos; + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + texture_2d_array_update_compute_psos; + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + texture_3d_update_compute_psos; + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + texture_cube_update_compute_psos; + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + texture_cube_array_update_compute_psos; + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + texture_buffer_update_compute_psos; + + template<typename T> + inline void free_cached_pso_map(blender::Map<T, id<MTLComputePipelineState>> &map) + { + for (typename blender::Map<T, id<MTLComputePipelineState>>::MutableItem item : map.items()) { + [item.value release]; + } + map.clear(); + } + + inline void init() + { + fullscreen_blit_shader = nullptr; + } + + inline void cleanup() + { + if (fullscreen_blit_shader) { + GPU_shader_free(fullscreen_blit_shader); + } + + /* Free Read shader maps */ + free_cached_pso_map(texture_1d_read_compute_psos); + free_cached_pso_map(texture_1d_read_compute_psos); + free_cached_pso_map(texture_1d_array_read_compute_psos); + free_cached_pso_map(texture_2d_read_compute_psos); + free_cached_pso_map(texture_2d_array_read_compute_psos); + free_cached_pso_map(texture_3d_read_compute_psos); + free_cached_pso_map(texture_cube_read_compute_psos); + free_cached_pso_map(texture_cube_array_read_compute_psos); + free_cached_pso_map(texture_buffer_read_compute_psos); + free_cached_pso_map(texture_1d_update_compute_psos); + free_cached_pso_map(texture_1d_array_update_compute_psos); + free_cached_pso_map(texture_2d_update_compute_psos); + free_cached_pso_map(texture_2d_array_update_compute_psos); + free_cached_pso_map(texture_3d_update_compute_psos); + free_cached_pso_map(texture_cube_update_compute_psos); + free_cached_pso_map(texture_cube_array_update_compute_psos); + free_cached_pso_map(texture_buffer_update_compute_psos); + } + +} MTLContextTextureUtils; + +typedef struct MTLContextGlobalShaderPipelineState { + /* ..TODO(Metal): More elements to be added as backend fleshed out.. */ + + /*** DATA and IMAGE access state ***/ + uint unpack_row_length; +} MTLContextGlobalShaderPipelineState; + +/* Metal Buffer */ +typedef struct MTLTemporaryBufferRange { + id<MTLBuffer> metal_buffer; + void *host_ptr; + unsigned long long buffer_offset; + unsigned long long size; + MTLResourceOptions options; + + void flush(); + bool requires_flush(); +} MTLTemporaryBufferRange; + +/** MTLContext -- Core render loop and state management **/ +/* Note(Metal): Partial MTLContext stub to provide wrapper functionality + * for work-in-progress MTL* classes. */ + +class MTLContext : public Context { + friend class MTLBackend; + + private: + /* Compute and specialisation caches */ + MTLContextTextureUtils texture_utils_; + + public: + /* METAL API Resource Handles. */ + id<MTLCommandQueue> queue = nil; + id<MTLDevice> device = nil; + + /* GPUContext interface. */ + MTLContext(void *ghost_window); + ~MTLContext(); + + static void check_error(const char *info); + + void activate(void) override; + void deactivate(void) override; + + void flush(void) override; + void finish(void) override; + + void memory_statistics_get(int *total_mem, int *free_mem) override; + + void debug_group_begin(const char *name, int index) override; + void debug_group_end(void) override; + + /*** Context Utility functions */ + /* + * All below functions modify the global state for the context, controlling the flow of + * rendering, binding resources, setting global state, resource management etc; + */ + + /* Metal Context Core functions */ + /* Command Buffer Management */ + id<MTLCommandBuffer> get_active_command_buffer(); + + /* Render Pass State and Management */ + void begin_render_pass(); + void end_render_pass(); + + /* Shaders and Pipeline state */ + MTLContextGlobalShaderPipelineState pipeline_state; + + /* Texture utilities */ + MTLContextTextureUtils &get_texture_utils() + { + return this->texture_utils_; + } +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm new file mode 100644 index 00000000000..313d928996b --- /dev/null +++ b/source/blender/gpu/metal/mtl_context.mm @@ -0,0 +1,101 @@ +/** \file + * \ingroup gpu + */ +#include "mtl_context.hh" +#include "mtl_debug.hh" + +using namespace blender; +using namespace blender::gpu; + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Memory Management + * \{ */ + +bool MTLTemporaryBufferRange::requires_flush() +{ + /* We do not need to flush shared memory */ + return this->options & MTLResourceStorageModeManaged; +} + +void MTLTemporaryBufferRange::flush() +{ + if (this->requires_flush()) { + BLI_assert(this->metal_buffer); + BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]); + BLI_assert(this->buffer_offset >= 0); + [this->metal_buffer + didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)]; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name MTLContext + * \{ */ + +/* Placeholder functions */ +MTLContext::MTLContext(void *ghost_window) +{ + /* Init debug. */ + debug::mtl_debug_init(); + + /* TODO(Metal): Implement. */ +} + +MTLContext::~MTLContext() +{ + /* TODO(Metal): Implement. */ +} + +void MTLContext::check_error(const char *info) +{ + /* TODO(Metal): Implement. */ +} + +void MTLContext::activate(void) +{ + /* TODO(Metal): Implement. */ +} +void MTLContext::deactivate(void) +{ + /* TODO(Metal): Implement. */ +} + +void MTLContext::flush(void) +{ + /* TODO(Metal): Implement. */ +} +void MTLContext::finish(void) +{ + /* TODO(Metal): Implement. */ +} + +void MTLContext::memory_statistics_get(int *total_mem, int *free_mem) +{ + /* TODO(Metal): Implement. */ + *total_mem = 0; + *free_mem = 0; +} + +id<MTLCommandBuffer> MTLContext::get_active_command_buffer() +{ + /* TODO(Metal): Implement. */ + return nil; +} + +/* Render Pass State and Management */ +void MTLContext::begin_render_pass() +{ + /* TODO(Metal): Implement. */ +} +void MTLContext::end_render_pass() +{ + /* TODO(Metal): Implement. */ +} + +/** \} */ + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_debug.hh b/source/blender/gpu/metal/mtl_debug.hh new file mode 100644 index 00000000000..455a6da5ddc --- /dev/null +++ b/source/blender/gpu/metal/mtl_debug.hh @@ -0,0 +1,58 @@ +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "BKE_global.h" +#include "CLG_log.h" + +namespace blender { +namespace gpu { +namespace debug { + +extern CLG_LogRef LOG; + +/* Initialise debugging. */ +void mtl_debug_init(); + +/* Using Macro's instead of variadic template due to non-string-literal + * warning for CLG_logf when indirectly passing format string. */ +#define EXPAND_ARGS(...) , ##__VA_ARGS__ +#define MTL_LOG_ERROR(info, ...) \ + { \ + if (G.debug & G_DEBUG_GPU) { \ + CLG_logf(debug::LOG.type, \ + CLG_SEVERITY_ERROR, \ + "[Metal Viewport Error]", \ + "", \ + info EXPAND_ARGS(__VA_ARGS__)); \ + } \ + BLI_assert(false); \ + } + +#define MTL_LOG_WARNING(info, ...) \ + { \ + if (G.debug & G_DEBUG_GPU) { \ + CLG_logf(debug::LOG.type, \ + CLG_SEVERITY_WARN, \ + "[Metal Viewport Warning]", \ + "", \ + info EXPAND_ARGS(__VA_ARGS__)); \ + } \ + } + +#define MTL_LOG_INFO(info, ...) \ + { \ + if (G.debug & G_DEBUG_GPU) { \ + CLG_logf(debug::LOG.type, \ + CLG_SEVERITY_INFO, \ + "[Metal Viewport Info]", \ + "", \ + info EXPAND_ARGS(__VA_ARGS__)); \ + } \ + } + +} // namespace debug +} // namespace gpu +} // namespace blender diff --git a/source/blender/gpu/metal/mtl_debug.mm b/source/blender/gpu/metal/mtl_debug.mm new file mode 100644 index 00000000000..70dd8532590 --- /dev/null +++ b/source/blender/gpu/metal/mtl_debug.mm @@ -0,0 +1,66 @@ +/** \file + * \ingroup gpu + * + * Debug features of OpenGL. + */ + +#include "BLI_compiler_attrs.h" +#include "BLI_string.h" +#include "BLI_system.h" +#include "BLI_utildefines.h" + +#include "BKE_global.h" + +#include "GPU_debug.h" +#include "GPU_platform.h" + +#include "mtl_context.hh" +#include "mtl_debug.hh" + +#include "CLG_log.h" + +#include <utility> + +namespace blender::gpu::debug { + +CLG_LogRef LOG = {"gpu.debug.metal"}; + +void mtl_debug_init() +{ + CLOG_ENSURE(&LOG); +} + +} // namespace blender::gpu::debug + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Debug Groups + * + * Useful for debugging through XCode GPU Debugger. This ensures all the API calls grouped into + * "passes". + * \{ */ + +void MTLContext::debug_group_begin(const char *name, int index) +{ + if (G.debug & G_DEBUG_GPU) { + id<MTLCommandBuffer> cmd = this->get_active_command_buffer(); + if (cmd != nil) { + [cmd pushDebugGroup:[NSString stringWithFormat:@"%s_%d", name, index]]; + } + } +} + +void MTLContext::debug_group_end() +{ + if (G.debug & G_DEBUG_GPU) { + id<MTLCommandBuffer> cmd = this->get_active_command_buffer(); + if (cmd != nil) { + [cmd popDebugGroup]; + } + } +} + +/** \} */ + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh new file mode 100644 index 00000000000..0762470dd88 --- /dev/null +++ b/source/blender/gpu/metal/mtl_texture.hh @@ -0,0 +1,605 @@ +/** \file + * \ingroup gpu + */ + +#pragma once + +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + +#include "BLI_assert.h" +#include "MEM_guardedalloc.h" +#include "gpu_texture_private.hh" + +#include "BLI_map.hh" +#include "GPU_texture.h" +#include <mutex> +#include <thread> + +@class CAMetalLayer; +@class MTLCommandQueue; +@class MTLRenderPipelineState; + +struct GPUFrameBuffer; + +/* Texture Update system structs. */ +struct TextureUpdateRoutineSpecialisation { + + /* The METAL type of data in input array, e.g. half, float, short, int */ + std::string input_data_type; + + /* The type of the texture data texture2d<T,..>, e.g. T=float, half, int etc. */ + std::string output_data_type; + + /* Number of image channels provided in input texture data array (min=1, max=4). */ + int component_count_input; + + /* Number of channels the destination texture has (min=1, max=4). */ + int component_count_output; + + inline bool operator==(const TextureUpdateRoutineSpecialisation &other) const + { + return ((input_data_type == other.input_data_type) && + (output_data_type == other.output_data_type) && + (component_count_input == other.component_count_input) && + (component_count_output == other.component_count_output)); + } +}; + +template<> struct blender::DefaultHash<TextureUpdateRoutineSpecialisation> { + inline uint64_t operator()(const TextureUpdateRoutineSpecialisation &key) const + { + + DefaultHash<std::string> string_hasher; + return (uint64_t)string_hasher( + key.input_data_type + key.output_data_type + + std::to_string((key.component_count_input << 8) + key.component_count_output)); + } +}; + +/* Type of data is being writen to the depth target: + * 0 = floating point (0.0 - 1.0) + * 1 = 24 bit integer (0 - 2^24) + * 2 = 32 bit integer (0 - 2^32) */ + +typedef enum { + MTL_DEPTH_UPDATE_MODE_FLOAT = 0, + MTL_DEPTH_UPDATE_MODE_INT24 = 1, + MTL_DEPTH_UPDATE_MODE_INT32 = 2 +} DepthTextureUpdateMode; + +struct DepthTextureUpdateRoutineSpecialisation { + DepthTextureUpdateMode data_mode; + + inline bool operator==(const DepthTextureUpdateRoutineSpecialisation &other) const + { + return ((data_mode == other.data_mode)); + } +}; + +template<> struct blender::DefaultHash<DepthTextureUpdateRoutineSpecialisation> { + inline uint64_t operator()(const DepthTextureUpdateRoutineSpecialisation &key) const + { + return (uint64_t)(key.data_mode); + } +}; + +/* Texture Read system structs. */ +struct TextureReadRoutineSpecialisation { + std::string input_data_type; + std::string output_data_type; + int component_count_input; + int component_count_output; + + /* Format for depth data. + * 0 = Not a Depth format, + * 1 = FLOAT DEPTH, + * 2 = 24Bit Integer Depth, + * 4 = 32bit unsigned Integer Depth. */ + int depth_format_mode; + + inline bool operator==(const TextureReadRoutineSpecialisation &other) const + { + return ((input_data_type == other.input_data_type) && + (output_data_type == other.output_data_type) && + (component_count_input == other.component_count_input) && + (component_count_output == other.component_count_output) && + (depth_format_mode == other.depth_format_mode)); + } +}; + +template<> struct blender::DefaultHash<TextureReadRoutineSpecialisation> { + inline uint64_t operator()(const TextureReadRoutineSpecialisation &key) const + { + + DefaultHash<std::string> string_hasher; + return (uint64_t)string_hasher(key.input_data_type + key.output_data_type + + std::to_string((key.component_count_input << 8) + + key.component_count_output + + (key.depth_format_mode << 28))); + } +}; + +namespace blender::gpu { + +class MTLContext; +class MTLVertBuf; + +/* Metal Texture internal implementation. */ +static const int MTL_MAX_MIPMAP_COUNT = 15; /* Max: 16384x16384 */ +static const int MTL_MAX_FBO_ATTACHED = 16; + +/* Samplers */ +typedef struct MTLSamplerState { + eGPUSamplerState state; + + /* Mip min and mip max on sampler state always the same. + * Level range now controlled with textureView to be consistent with GL baseLevel. */ + inline bool operator==(const MTLSamplerState &other) const + { + /* Add other parameters as needed. */ + return (this->state == other.state); + } + + operator unsigned int() const + { + return (unsigned int)state; + } + + operator uint64_t() const + { + return (uint64_t)state; + } + +} MTLSamplerState; + +const MTLSamplerState DEFAULT_SAMPLER_STATE = {GPU_SAMPLER_DEFAULT /*, 0, 9999*/}; + +} // namespace blender::gpu + +template<> struct blender::DefaultHash<blender::gpu::MTLSamplerState> { + inline uint64_t operator()(const blender::gpu::MTLSamplerState &key) const + { + const DefaultHash<unsigned int> uint_hasher; + uint64_t main_hash = (uint64_t)uint_hasher((unsigned int)(key.state)); + + /* Hash other parameters as needed. */ + return main_hash; + } +}; + +namespace blender::gpu { + +class MTLTexture : public Texture { + friend class MTLContext; + friend class MTLStateManager; + friend class MTLFrameBuffer; + + private: + /* Where the textures data comes from. */ + enum { + MTL_TEXTURE_MODE_DEFAULT, /* Texture is self-initialised (Standard). */ + MTL_TEXTURE_MODE_EXTERNAL, /* Texture source from external id<MTLTexture> handle */ + MTL_TEXTURE_MODE_VBO, /* Texture source initialised from VBO */ + MTL_TEXTURE_MODE_TEXTURE_VIEW /* Texture is a view into an existing texture. */ + } resource_mode_; + + /* 'baking' refers to the generation of GPU-backed resources. This flag ensures GPU resources are + * ready. Baking is generally deferred until as late as possible, to ensure all associated + * resource state has been specified up-front. */ + bool is_baked_; + MTLTextureDescriptor *texture_descriptor_; + id<MTLTexture> texture_; + MTLTextureUsage usage_; + + /* Texture Storage. */ + id<MTLBuffer> texture_buffer_; + unsigned int aligned_w_ = 0; + + /* Blit Framebuffer. */ + GPUFrameBuffer *blit_fb_ = nullptr; + unsigned int blit_fb_slice_ = 0; + unsigned int blit_fb_mip_ = 0; + + /* Texure view properties */ + /* In Metal, we use texture views to either limit mipmap ranges, + * , apply a swizzle mask, or both. + + * We apply the mip limit in the view rather than in the sampler, as + * certain effects and functionality such as textureSize rely on the base level + * being modified. + * + * Texture views can also point to external textures, rather than the owned + * texture if MTL_TEXTURE_MODE_TEXTURE_VIEW is used. + * If this mode is used, source_texture points to a GPUTexture from which + * we pull their texture handle as a root. + */ + const GPUTexture *source_texture_ = nullptr; + + enum TextureViewDirtyState { + TEXTURE_VIEW_NOT_DIRTY = 0, + TEXTURE_VIEW_SWIZZLE_DIRTY = (1 << 0), + TEXTURE_VIEW_MIP_DIRTY = (1 << 1) + }; + id<MTLTexture> mip_swizzle_view_; + char tex_swizzle_mask_[4]; + MTLTextureSwizzleChannels mtl_swizzle_mask_; + bool mip_range_dirty_ = false; + + int mip_texture_base_level_ = 0; + int mip_texture_max_level_ = 1000; + int mip_texture_base_layer_ = 0; + int texture_view_dirty_flags_ = TEXTURE_VIEW_NOT_DIRTY; + + /* Max mip-maps for currently allocated texture resource. */ + int mtl_max_mips_ = 1; + + /* VBO. */ + MTLVertBuf *vert_buffer_; + id<MTLBuffer> vert_buffer_mtl_; + int vert_buffer_offset_; + + /* Core parameters and subresources. */ + eGPUTextureUsage gpu_image_usage_flags_; + + /* Whether the texture's properties or state has changed (e.g. mipmap range), and re-baking of + * GPU resource is required. */ + bool is_dirty_; + bool is_bound_; + + public: + MTLTexture(const char *name); + MTLTexture(const char *name, + eGPUTextureFormat format, + eGPUTextureType type, + id<MTLTexture> metal_texture); + ~MTLTexture(); + + void update_sub( + int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) override; + + void generate_mipmap(void) override; + void copy_to(Texture *dst) override; + void clear(eGPUDataFormat format, const void *data) override; + void swizzle_set(const char swizzle_mask[4]) override; + void stencil_texture_mode_set(bool use_stencil) override{ + /* TODO(Metal): implement. */ + }; + void mip_range_set(int min, int max) override; + void *read(int mip, eGPUDataFormat type) override; + + /* Remove once no longer required -- will just return 0 for now in MTL path*/ + uint gl_bindcode_get(void) const override; + + bool texture_is_baked(); + inline const char *get_name() + { + return name_; + } + + protected: + bool init_internal(void) override; + bool init_internal(GPUVertBuf *vbo) override; + bool init_internal(const GPUTexture *src, + int mip_offset, + int layer_offset) override; /* Texture View */ + + private: + /* Common Constructor, default initialisation */ + void mtl_texture_init(); + + /* Post-construction and member initialisation, prior to baking. + * Called during init_internal */ + void prepare_internal(); + + /* Generate Metal GPU resources and upload data if needed */ + void ensure_baked(); + + /* Delete associated Metal GPU resources. */ + void reset(); + void ensure_mipmaps(int miplvl); + + /* Flags a given mip level as being used. */ + void add_subresource(unsigned int level); + + void read_internal(int mip, + int x_off, + int y_off, + int z_off, + int width, + int height, + int depth, + eGPUDataFormat desired_output_format, + int num_output_components, + int debug_data_size, + void *r_data); + void bake_mip_swizzle_view(); + + id<MTLTexture> get_metal_handle(); + id<MTLTexture> get_metal_handle_base(); + MTLSamplerState get_sampler_state(); + void blit(id<MTLBlitCommandEncoder> blit_encoder, + unsigned int src_x_offset, + unsigned int src_y_offset, + unsigned int src_z_offset, + unsigned int src_slice, + unsigned int src_mip, + gpu::MTLTexture *dest, + unsigned int dst_x_offset, + unsigned int dst_y_offset, + unsigned int dst_z_offset, + unsigned int dst_slice, + unsigned int dst_mip, + unsigned int width, + unsigned int height, + unsigned int depth); + void blit(gpu::MTLTexture *dest, + unsigned int src_x_offset, + unsigned int src_y_offset, + unsigned int dst_x_offset, + unsigned int dst_y_offset, + unsigned int src_mip, + unsigned int dst_mip, + unsigned int dst_slice, + int width, + int height); + GPUFrameBuffer *get_blit_framebuffer(unsigned int dst_slice, unsigned int dst_mip); + + MEM_CXX_CLASS_ALLOC_FUNCS("gpu::MTLTexture") + + /* Texture Update function Utilities. */ + /* Metal texture updating does not provide the same range of functionality for type conversiona + * and format compatibilities as are available in OpenGL. To achieve the same level of + * functionality, we need to instead use compute kernels to perform texture data conversions + * where appropriate. + * There are a number of different inputs which affect permutations and thus require different + * shaders and PSOs, such as: + * - Texture format + * - Texture type (e.g. 2D, 3D, 2D Array, Depth etc;) + * - Source data format and component count (e.g. floating point) + * + * MECHANISM: + * + * blender::map<INPUT DEFINES STRUCT, compute PSO> update_2d_array_kernel_psos; + * - Generate compute shader with configured kernel below with variable parameters depending + * on input/output format configurations. Do not need to keep source or descriptors around, + * just PSO, as same input defines will always generate the same code. + * + * - IF datatype IS an exact match e.g. : + * - Per-component size matches (e.g. GPU_DATA_UBYTE) + * OR GPU_DATA_10_11_11_REV && GPU_R11G11B10 (equiv) + * OR D24S8 and GPU_DATA_UINT_24_8 + * We can Use BLIT ENCODER. + * + * OTHERWISE TRIGGER COMPUTE: + * - Compute sizes will vary. Threads per grid WILL match 'extent'. + * Dimensions will vary depending on texture type. + * - Will use setBytes with 'TextureUpdateParams' struct to pass in useful member params. + */ + struct TextureUpdateParams { + int mip_index; + int extent[3]; /* Width, Height, Slice on 2D Array tex*/ + int offset[3]; /* Width, Height, Slice on 2D Array tex*/ + uint unpack_row_length; /* Number of pixels between bytes in input data */ + }; + + id<MTLComputePipelineState> texture_update_1d_get_kernel( + TextureUpdateRoutineSpecialisation specialisation); + id<MTLComputePipelineState> texture_update_1d_array_get_kernel( + TextureUpdateRoutineSpecialisation specialisation); + id<MTLComputePipelineState> texture_update_2d_get_kernel( + TextureUpdateRoutineSpecialisation specialisation); + id<MTLComputePipelineState> texture_update_2d_array_get_kernel( + TextureUpdateRoutineSpecialisation specialisation); + id<MTLComputePipelineState> texture_update_3d_get_kernel( + TextureUpdateRoutineSpecialisation specialisation); + + id<MTLComputePipelineState> mtl_texture_update_impl( + TextureUpdateRoutineSpecialisation specialisation_params, + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + &specialisation_cache, + eGPUTextureType texture_type); + + /* Depth Update Utilities */ + /* Depth texture updates are not directly supported with Blit operations, similarly, we cannot + * use a compute shader to write to depth, so we must instead render to a depth target. + * These processes use vertex/fragment shaders to render texture data from an intermediate + * source, in order to prime the depth buffer*/ + GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialisation); + + void update_sub_depth_2d( + int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data); + + /* Texture Read function utilities -- Follows a similar mechanism to the updating routines */ + struct TextureReadParams { + int mip_index; + int extent[3]; /* Width, Height, Slice on 2D Array tex*/ + int offset[3]; /* Width, Height, Slice on 2D Array tex*/ + }; + + id<MTLComputePipelineState> texture_read_1d_get_kernel( + TextureReadRoutineSpecialisation specialisation); + id<MTLComputePipelineState> texture_read_1d_array_get_kernel( + TextureReadRoutineSpecialisation specialisation); + id<MTLComputePipelineState> texture_read_2d_get_kernel( + TextureReadRoutineSpecialisation specialisation); + id<MTLComputePipelineState> texture_read_2d_array_get_kernel( + TextureReadRoutineSpecialisation specialisation); + id<MTLComputePipelineState> texture_read_3d_get_kernel( + TextureReadRoutineSpecialisation specialisation); + + id<MTLComputePipelineState> mtl_texture_read_impl( + TextureReadRoutineSpecialisation specialisation_params, + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + &specialisation_cache, + eGPUTextureType texture_type); + + /* fullscreen blit utilities. */ + GPUShader *fullscreen_blit_sh_get(); +}; + +/* Utility */ +MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format); +int get_mtl_format_bytesize(MTLPixelFormat tex_format); +int get_mtl_format_num_components(MTLPixelFormat tex_format); +bool mtl_format_supports_blending(MTLPixelFormat format); + +/* The type used to define the per-component data in the input buffer. */ +inline std::string tex_data_format_to_msl_type_str(eGPUDataFormat type) +{ + switch (type) { + case GPU_DATA_FLOAT: + return "float"; + case GPU_DATA_HALF_FLOAT: + return "half"; + case GPU_DATA_INT: + return "int"; + case GPU_DATA_UINT: + return "uint"; + case GPU_DATA_UBYTE: + return "uchar"; + case GPU_DATA_UINT_24_8: + return "uint"; /* Problematic type - but will match alignment. */ + case GPU_DATA_10_11_11_REV: + return "float"; /* Problematic type - each component will be read as a float. */ + default: + BLI_assert(false); + break; + } + return ""; +} + +/* The type T which goes into texture2d<T, access>. */ +inline std::string tex_data_format_to_msl_texture_template_type(eGPUDataFormat type) +{ + switch (type) { + case GPU_DATA_FLOAT: + return "float"; + case GPU_DATA_HALF_FLOAT: + return "half"; + case GPU_DATA_INT: + return "int"; + case GPU_DATA_UINT: + return "uint"; + case GPU_DATA_UBYTE: + return "ushort"; + case GPU_DATA_UINT_24_8: + return "uint"; /* Problematic type. */ + case GPU_DATA_10_11_11_REV: + return "float"; /* Problematic type. */ + default: + BLI_assert(false); + break; + } + return ""; +} + +/* Determine whether format is writable or not. Use mtl_format_get_writeable_view_format(..) for + * these. */ +inline bool mtl_format_is_writable(MTLPixelFormat format) +{ + switch (format) { + case MTLPixelFormatRGBA8Unorm_sRGB: + case MTLPixelFormatBGRA8Unorm_sRGB: + case MTLPixelFormatDepth16Unorm: + case MTLPixelFormatDepth32Float: + case MTLPixelFormatDepth32Float_Stencil8: + case MTLPixelFormatBGR10A2Unorm: + case MTLPixelFormatDepth24Unorm_Stencil8: + return false; + default: + return true; + } + return true; +} + +/* For the cases where a texture format is unwritable, we can create a texture view of a similar + * format */ +inline MTLPixelFormat mtl_format_get_writeable_view_format(MTLPixelFormat format) +{ + switch (format) { + case MTLPixelFormatRGBA8Unorm_sRGB: + return MTLPixelFormatRGBA8Unorm; + case MTLPixelFormatBGRA8Unorm_sRGB: + return MTLPixelFormatBGRA8Unorm; + case MTLPixelFormatDepth16Unorm: + return MTLPixelFormatR16Unorm; + case MTLPixelFormatDepth32Float: + return MTLPixelFormatR32Float; + case MTLPixelFormatDepth32Float_Stencil8: + /* return MTLPixelFormatRG32Float; */ + /* No alternative mirror format. This should not be used for + * manual data upload */ + return MTLPixelFormatInvalid; + case MTLPixelFormatBGR10A2Unorm: + /* return MTLPixelFormatBGRA8Unorm; */ + /* No alternative mirror format. This should not be used for + * manual data upload */ + return MTLPixelFormatInvalid; + case MTLPixelFormatDepth24Unorm_Stencil8: + /* No direct format, but we'll just mirror the bytes -- Uint + * should ensure bytes are not re-normalized or manipulated */ + /* return MTLPixelFormatR32Uint; */ + return MTLPixelFormatInvalid; + default: + return format; + } + return format; +} + +/* Returns the associated engine data type with a given texture: + * Definitely not complete, edit according to the METAL specification. */ +inline eGPUDataFormat to_mtl_internal_data_format(eGPUTextureFormat tex_format) +{ + switch (tex_format) { + case GPU_RGBA8: + case GPU_RGBA32F: + case GPU_RGBA16F: + case GPU_RGBA16: + case GPU_RG8: + case GPU_RG32F: + case GPU_RG16F: + case GPU_RG16: + case GPU_R8: + case GPU_R32F: + case GPU_R16F: + case GPU_R16: + case GPU_RGB16F: + case GPU_DEPTH_COMPONENT24: + case GPU_DEPTH_COMPONENT16: + case GPU_DEPTH_COMPONENT32F: + case GPU_SRGB8_A8: + return GPU_DATA_FLOAT; + case GPU_DEPTH24_STENCIL8: + case GPU_DEPTH32F_STENCIL8: + return GPU_DATA_UINT_24_8; + case GPU_RGBA8UI: + case GPU_RGBA32UI: + case GPU_RGBA16UI: + case GPU_RG8UI: + case GPU_RG32UI: + case GPU_R8UI: + case GPU_R16UI: + case GPU_RG16UI: + case GPU_R32UI: + return GPU_DATA_UINT; + case GPU_R8I: + case GPU_RG8I: + case GPU_R16I: + case GPU_R32I: + case GPU_RG16I: + case GPU_RGBA8I: + case GPU_RGBA32I: + case GPU_RGBA16I: + case GPU_RG32I: + return GPU_DATA_INT; + case GPU_R11F_G11F_B10F: + return GPU_DATA_10_11_11_REV; + default: + BLI_assert(false && "Texture not yet handled"); + return GPU_DATA_FLOAT; + } +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm new file mode 100644 index 00000000000..117b8850485 --- /dev/null +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -0,0 +1,1879 @@ +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "DNA_userdef_types.h" + +#include "GPU_batch.h" +#include "GPU_batch_presets.h" +#include "GPU_capabilities.h" +#include "GPU_framebuffer.h" +#include "GPU_platform.h" +#include "GPU_state.h" + +#include "mtl_backend.hh" +#include "mtl_common.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_texture.hh" + +#include "GHOST_C-api.h" + +/* Debug assistance. */ +/* Capture texture update routine for analysis in XCode GPU Frame Debugger. */ +#define DEBUG_TEXTURE_UPDATE_CAPTURE false + +/* Capture texture read routine for analysis in XCode GPU Frame Debugger. */ +#define DEBUG_TEXTURE_READ_CAPTURE false + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Creation & Deletion + * \{ */ + +void gpu::MTLTexture::mtl_texture_init() +{ + BLI_assert(MTLContext::get() != nullptr); + + /* Status. */ + this->is_baked_ = false; + this->is_dirty_ = false; + this->resource_mode_ = MTL_TEXTURE_MODE_DEFAULT; + this->mtl_max_mips_ = 1; + + /* Metal properties. */ + this->texture_ = nil; + this->texture_buffer_ = nil; + this->mip_swizzle_view_ = nil; + + /* Binding information. */ + this->is_bound_ = false; + + /* VBO. */ + this->vert_buffer_ = nullptr; + this->vert_buffer_mtl_ = nil; + this->vert_buffer_offset_ = -1; + + /* Default Swizzle. */ + this->tex_swizzle_mask_[0] = 'r'; + this->tex_swizzle_mask_[1] = 'g'; + this->tex_swizzle_mask_[2] = 'b'; + this->tex_swizzle_mask_[3] = 'a'; + this->mtl_swizzle_mask_ = MTLTextureSwizzleChannelsMake( + MTLTextureSwizzleRed, MTLTextureSwizzleGreen, MTLTextureSwizzleBlue, MTLTextureSwizzleAlpha); + + /* TODO(Metal): Find a way of specifying texture usage externally. */ + this->gpu_image_usage_flags_ = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT; +} + +gpu::MTLTexture::MTLTexture(const char *name) : Texture(name) +{ + /* Common Initialisation. */ + mtl_texture_init(); +} + +gpu::MTLTexture::MTLTexture(const char *name, + eGPUTextureFormat format, + eGPUTextureType type, + id<MTLTexture> metal_texture) + : Texture(name) +{ + /* Common Initialisation. */ + mtl_texture_init(); + + /* Prep texture from METAL handle. */ + BLI_assert(metal_texture != nil); + BLI_assert(type == GPU_TEXTURE_2D); + this->type_ = type; + init_2D(metal_texture.width, metal_texture.height, 0, 1, format); + + /* Assign MTLTexture. */ + this->texture_ = metal_texture; + [this->texture_ retain]; + + /* Flag as Baked. */ + this->is_baked_ = true; + this->is_dirty_ = false; + this->resource_mode_ = MTL_TEXTURE_MODE_EXTERNAL; +} + +gpu::MTLTexture::~MTLTexture() +{ + /* Unbind if bound. */ + if (this->is_bound_) { + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + if (ctx != nullptr) { + ctx->state_manager->texture_unbind(this); + } + } + + /* Free memory. */ + this->reset(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +void gpu::MTLTexture::bake_mip_swizzle_view() +{ + if (texture_view_dirty_flags_) { + /* if a texture view was previously created we release it. */ + if (this->mip_swizzle_view_ != nil) { + [this->mip_swizzle_view_ release]; + } + + /* Determine num slices */ + int num_slices = 1; + switch (this->type_) { + case GPU_TEXTURE_1D_ARRAY: + num_slices = this->h_; + break; + case GPU_TEXTURE_2D_ARRAY: + num_slices = this->d_; + break; + case GPU_TEXTURE_CUBE: + num_slices = 6; + break; + case GPU_TEXTURE_CUBE_ARRAY: + /* d_ is equal to array levels * 6, including face count. */ + num_slices = this->d_; + break; + default: + num_slices = 1; + break; + } + + int range_len = min_ii((this->mip_texture_max_level_ - this->mip_texture_base_level_) + 1, + this->texture_.mipmapLevelCount); + BLI_assert(range_len > 0); + BLI_assert(mip_texture_base_level_ < this->texture_.mipmapLevelCount); + BLI_assert(this->mip_texture_base_layer_ < num_slices); + this->mip_swizzle_view_ = [this->texture_ + newTextureViewWithPixelFormat:this->texture_.pixelFormat + textureType:this->texture_.textureType + levels:NSMakeRange(this->mip_texture_base_level_, range_len) + slices:NSMakeRange(this->mip_texture_base_layer_, num_slices) + swizzle:this->mtl_swizzle_mask_]; + MTL_LOG_INFO( + "Updating texture view - MIP TEXTURE BASE LEVEL: %d, MAX LEVEL: %d (Range len: %d)\n", + this->mip_texture_base_level_, + min_ii(this->mip_texture_max_level_, this->texture_.mipmapLevelCount), + range_len); + [this->mip_swizzle_view_ retain]; + this->mip_swizzle_view_.label = [this->texture_ label]; + texture_view_dirty_flags_ = TEXTURE_VIEW_NOT_DIRTY; + } +} + +/** \name Operations + * \{ */ + +id<MTLTexture> gpu::MTLTexture::get_metal_handle() +{ + + /* ensure up to date and baked. */ + this->ensure_baked(); + + /* Verify VBO texture shares same buffer. */ + if (this->resource_mode_ == MTL_TEXTURE_MODE_VBO) { + int r_offset = -1; + + /* TODO(Metal): Fetch buffer from MTLVertBuf when implemented. */ + id<MTLBuffer> buf = nil; /*vert_buffer_->get_metal_buffer(&r_offset);*/ + BLI_assert(this->vert_buffer_mtl_ != nil); + BLI_assert(buf == this->vert_buffer_mtl_ && r_offset == this->vert_buffer_offset_); + + UNUSED_VARS(buf); + UNUSED_VARS_NDEBUG(r_offset); + } + + if (this->is_baked_) { + /* For explicit texture views, ensure we always return the texture view. */ + if (this->resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) { + BLI_assert(this->mip_swizzle_view_ && "Texture view should always have a valid handle."); + } + + if (this->mip_swizzle_view_ != nil || texture_view_dirty_flags_) { + bake_mip_swizzle_view(); + return this->mip_swizzle_view_; + } + return this->texture_; + } + return nil; +} + +id<MTLTexture> gpu::MTLTexture::get_metal_handle_base() +{ + + /* ensure up to date and baked. */ + this->ensure_baked(); + + /* For explicit texture views, always return the texture view. */ + if (this->resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) { + BLI_assert(this->mip_swizzle_view_ && "Texture view should always have a valid handle."); + if (this->mip_swizzle_view_ != nil || texture_view_dirty_flags_) { + bake_mip_swizzle_view(); + } + return this->mip_swizzle_view_; + } + + /* Return base handle. */ + if (this->is_baked_) { + return this->texture_; + } + return nil; +} + +void gpu::MTLTexture::blit(id<MTLBlitCommandEncoder> blit_encoder, + unsigned int src_x_offset, + unsigned int src_y_offset, + unsigned int src_z_offset, + unsigned int src_slice, + unsigned int src_mip, + gpu::MTLTexture *dest, + unsigned int dst_x_offset, + unsigned int dst_y_offset, + unsigned int dst_z_offset, + unsigned int dst_slice, + unsigned int dst_mip, + unsigned int width, + unsigned int height, + unsigned int depth) +{ + + BLI_assert(this && dest); + BLI_assert(width > 0 && height > 0 && depth > 0); + MTLSize src_size = MTLSizeMake(width, height, depth); + MTLOrigin src_origin = MTLOriginMake(src_x_offset, src_y_offset, src_z_offset); + MTLOrigin dst_origin = MTLOriginMake(dst_x_offset, dst_y_offset, dst_z_offset); + + if (this->format_get() != dest->format_get()) { + MTL_LOG_WARNING( + "[Warning] gpu::MTLTexture: Cannot copy between two textures of different types using a " + "blit encoder. TODO: Support this operation\n"); + return; + } + + /* TODO(Metal): Verify if we want to use the one with modified base-level/texture view + * or not. */ + [blit_encoder copyFromTexture:this->get_metal_handle_base() + sourceSlice:src_slice + sourceLevel:src_mip + sourceOrigin:src_origin + sourceSize:src_size + toTexture:dest->get_metal_handle_base() + destinationSlice:dst_slice + destinationLevel:dst_mip + destinationOrigin:dst_origin]; +} + +void gpu::MTLTexture::blit(gpu::MTLTexture *dst, + unsigned int src_x_offset, + unsigned int src_y_offset, + unsigned int dst_x_offset, + unsigned int dst_y_offset, + unsigned int src_mip, + unsigned int dst_mip, + unsigned int dst_slice, + int width, + int height) +{ + BLI_assert(this->type_get() == dst->type_get()); + + GPUShader *shader = fullscreen_blit_sh_get(); + BLI_assert(shader != nullptr); + BLI_assert(GPU_context_active_get()); + + /* Fetch restore framebuffer and blit target framebuffer from destination texture. */ + GPUFrameBuffer *restore_fb = GPU_framebuffer_active_get(); + GPUFrameBuffer *blit_target_fb = dst->get_blit_framebuffer(dst_slice, dst_mip); + BLI_assert(blit_target_fb); + GPU_framebuffer_bind(blit_target_fb); + + /* Execute graphics draw call to perform the blit. */ + GPUBatch *quad = GPU_batch_preset_quad(); + + GPU_batch_set_shader(quad, shader); + + float w = dst->width_get(); + float h = dst->height_get(); + + GPU_shader_uniform_2f(shader, "fullscreen", w, h); + GPU_shader_uniform_2f(shader, "src_offset", src_x_offset, src_y_offset); + GPU_shader_uniform_2f(shader, "dst_offset", dst_x_offset, dst_y_offset); + GPU_shader_uniform_2f(shader, "size", width, height); + + GPU_shader_uniform_1i(shader, "mip", src_mip); + GPU_batch_texture_bind(quad, "imageTexture", wrap(this)); + + /* Caching previous pipeline state. */ + bool depth_write_prev = GPU_depth_mask_get(); + uint stencil_mask_prev = GPU_stencil_mask_get(); + eGPUStencilTest stencil_test_prev = GPU_stencil_test_get(); + eGPUFaceCullTest culling_test_prev = GPU_face_culling_get(); + eGPUBlend blend_prev = GPU_blend_get(); + eGPUDepthTest depth_test_prev = GPU_depth_test_get(); + GPU_scissor_test(false); + + /* Apply state for blit draw call. */ + GPU_stencil_write_mask_set(0xFF); + GPU_stencil_reference_set(0); + GPU_face_culling(GPU_CULL_NONE); + GPU_stencil_test(GPU_STENCIL_ALWAYS); + GPU_depth_mask(false); + GPU_blend(GPU_BLEND_NONE); + GPU_depth_test(GPU_DEPTH_ALWAYS); + + GPU_batch_draw(quad); + + /* restoring old pipeline state. */ + GPU_depth_mask(depth_write_prev); + GPU_stencil_write_mask_set(stencil_mask_prev); + GPU_stencil_test(stencil_test_prev); + GPU_face_culling(culling_test_prev); + GPU_depth_mask(depth_write_prev); + GPU_blend(blend_prev); + GPU_depth_test(depth_test_prev); + + if (restore_fb != nullptr) { + GPU_framebuffer_bind(restore_fb); + } + else { + GPU_framebuffer_restore(); + } +} + +GPUFrameBuffer *gpu::MTLTexture::get_blit_framebuffer(unsigned int dst_slice, unsigned int dst_mip) +{ + + /* Check if layer has changed. */ + bool update_attachments = false; + if (!this->blit_fb_) { + this->blit_fb_ = GPU_framebuffer_create("gpu_blit"); + update_attachments = true; + } + + /* Check if current blit FB has the correct attachment properties. */ + if (this->blit_fb_) { + if (this->blit_fb_slice_ != dst_slice || this->blit_fb_mip_ != dst_mip) { + update_attachments = true; + } + } + + if (update_attachments) { + if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) { + /* DEPTH TEX */ + GPU_framebuffer_ensure_config( + &this->blit_fb_, + {GPU_ATTACHMENT_TEXTURE_LAYER_MIP(wrap(static_cast<Texture *>(this)), + static_cast<int>(dst_slice), + static_cast<int>(dst_mip)), + GPU_ATTACHMENT_NONE}); + } + else { + /* COLOR TEX */ + GPU_framebuffer_ensure_config( + &this->blit_fb_, + {GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE_LAYER_MIP(wrap(static_cast<Texture *>(this)), + static_cast<int>(dst_slice), + static_cast<int>(dst_mip))}); + } + this->blit_fb_slice_ = dst_slice; + this->blit_fb_mip_ = dst_mip; + } + + BLI_assert(this->blit_fb_); + return this->blit_fb_; +} + +MTLSamplerState gpu::MTLTexture::get_sampler_state() +{ + MTLSamplerState sampler_state; + sampler_state.state = this->sampler_state; + /* Add more parameters as needed */ + return sampler_state; +} + +void gpu::MTLTexture::update_sub( + int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) +{ + /* Fetch active context. */ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Do not update texture view. */ + BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); + + /* Ensure mipmaps. */ + this->ensure_mipmaps(mip); + + /* Ensure texture is baked. */ + this->ensure_baked(); + + /* Safety checks. */ +#if TRUST_NO_ONE + BLI_assert(mip >= this->mip_min_ && mip <= this->mip_max_); + BLI_assert(mip < this->texture_.mipmapLevelCount); + BLI_assert(this->texture_.mipmapLevelCount >= this->mip_max_); +#endif + + /* DEPTH FLAG - Depth formats cannot use direct BLIT - pass off to their own routine which will + * do a depth-only render. */ + bool is_depth_format = (this->format_flag_ & GPU_FORMAT_DEPTH); + if (is_depth_format) { + switch (this->type_) { + + case GPU_TEXTURE_2D: { + update_sub_depth_2d(mip, offset, extent, type, data); + return; + } + default: + MTL_LOG_ERROR( + "[Error] gpu::MTLTexture::update_sub not yet supported for other depth " + "configurations\n"); + return; + return; + } + } + + @autoreleasepool { + /* Determine totalsize of INPUT Data. */ + int num_channels = to_component_len(this->format_); + int input_bytes_per_pixel = num_channels * to_bytesize(type); + int totalsize = 0; + + /* If unpack row length is used, size of input data uses the unpack row length, rather than the + * image lenght */ + int expected_update_w = ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length); + + /* Ensure calculated total size isn't larger than remaining image data size */ + switch (this->dimensions_count()) { + case 1: + totalsize = input_bytes_per_pixel * max_ii(expected_update_w, 1); + break; + case 2: + totalsize = input_bytes_per_pixel * max_ii(expected_update_w, 1) * max_ii(extent[1], 1); + break; + case 3: + totalsize = input_bytes_per_pixel * max_ii(expected_update_w, 1) * max_ii(extent[1], 1) * + max_ii(extent[2], 1); + break; + default: + BLI_assert(false); + break; + } + + /* When unpack row length is used, provided data does not necessarily contain padding for last + * row, so we only include up to the end of updated data. */ + if (ctx->pipeline_state.unpack_row_length > 0) { + BLI_assert(ctx->pipeline_state.unpack_row_length >= extent[0]); + totalsize -= (ctx->pipeline_state.unpack_row_length - extent[0]) * input_bytes_per_pixel; + } + + /* Check */ + BLI_assert(totalsize > 0); + + /* Determine expected destination data size. */ + MTLPixelFormat destination_format = gpu_texture_format_to_metal(this->format_); + int expected_dst_bytes_per_pixel = get_mtl_format_bytesize(destination_format); + int destination_num_channels = get_mtl_format_num_components(destination_format); + int destination_totalsize = 0; + switch (this->dimensions_count()) { + case 1: + destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1); + break; + case 2: + destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1) * + max_ii(extent[1], 1); + break; + case 3: + destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1) * + max_ii(extent[1], 1) * max_ii(extent[2], 1); + break; + default: + BLI_assert(false); + break; + } + + /* Prepare specialisation struct (For texture update routine). */ + TextureUpdateRoutineSpecialisation compute_specialisation_kernel = { + tex_data_format_to_msl_type_str(type), /* INPUT DATA FORMAT */ + tex_data_format_to_msl_texture_template_type(type), /* TEXTURE DATA FORMAT */ + num_channels, + destination_num_channels}; + + /* Determine whether we can do direct BLIT or not. */ + bool can_use_direct_blit = true; + if (expected_dst_bytes_per_pixel != input_bytes_per_pixel || + num_channels != destination_num_channels) { + can_use_direct_blit = false; + } + +#if MTL_VALIDATION_CRASH_DEPTH_1_1_1_WA + if (this->type_ == GPU_TEXTURE_2D || this->type_ == GPU_TEXTURE_2D_ARRAY) { + /* Workaround for crash in validation layer when blitting to depth2D target with + * dimensions (1, 1, 1); */ + if (extent[0] == 1 && extent[1] == 1 && extent[2] == 1 && totalsize == 4) { + can_use_direct_blit = false; + } + } +#endif + + if (this->format_ == GPU_SRGB8_A8 && !can_use_direct_blit) { + MTL_LOG_WARNING( + "SRGB data upload does not work correctly using compute upload. " + "texname '%s'\n", + this->name_); + } + + /* Safety Checks. */ + if (type == GPU_DATA_UINT_24_8 || type == GPU_DATA_10_11_11_REV) { + BLI_assert(can_use_direct_blit && + "Special input data type must be a 1-1 mapping with destination texture as it " + "cannot easily be split"); + } + + /* Debug and verification. */ + if (!can_use_direct_blit) { + MTL_LOG_WARNING( + "gpu::MTLTexture::update_sub supplied bpp is %d bytes (%d components per " + "pixel), but backing texture bpp is %d bytes (%d components per pixel) " + "(TODO(Metal): Channel Conversion needed) (w: %d, h: %d, d: %d)\n", + input_bytes_per_pixel, + num_channels, + expected_dst_bytes_per_pixel, + destination_num_channels, + w_, + h_, + d_); + + /* Check mip compatibility. */ + if (mip != 0) { + MTL_LOG_ERROR( + "[Error]: Updating texture layers other than mip=0 when data is mismatched is not " + "possible in METAL on macOS using texture->write\n"); + return; + } + + /* Check Format writeability. */ + if (mtl_format_get_writeable_view_format(destination_format) == MTLPixelFormatInvalid) { + MTL_LOG_ERROR( + "[Error]: Updating texture -- destination MTLPixelFormat '%d' does not support write " + "operations, and no suitable TextureView format exists.\n", + *(int *)(&destination_format)); + return; + } + } + + /* Debug hook for performing GPU capture of routine. */ + bool DO_CAPTURE = false; +#if DEBUG_TEXTURE_UPDATE_CAPTURE == 1 + DO_CAPTURE = true; + if (DO_CAPTURE) { + MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager]; + MTLCaptureDescriptor *capture_descriptor = [[MTLCaptureDescriptor alloc] init]; + capture_descriptor.captureObject = ctx->device; + NSError *error; + if (![capture_manager startCaptureWithDescriptor:capture_descriptor error:&error]) { + NSString *error_str = [NSString stringWithFormat:@"%@", error]; + const char *error_c_str = [error_str UTF8String]; + MTL_LOG_ERROR("Failed to start capture. Error: %s\n", error_c_str); + } + } +#endif + + /* Fetch or Create command buffer. */ + id<MTLCommandBuffer> cmd_buffer = ctx->get_active_command_buffer(); + bool own_command_buffer = false; + if (cmd_buffer == nil || DO_CAPTURE) { + cmd_buffer = [ctx->queue commandBuffer]; + own_command_buffer = true; + } + else { + /* Finish graphics work. */ + ctx->end_render_pass(); + } + + /* Prepare staging buffer for data. */ + id<MTLBuffer> staging_buffer = nil; + unsigned long long staging_buffer_offset = 0; + + /* Fetch allocation from scratch buffer. */ + MTLTemporaryBufferRange allocation; /* TODO(Metal): Metal Memory manager. */ + /* = ctx->get_memory_manager().scratch_buffer_allocate_range_aligned(totalsize, 256);*/ + memcpy(allocation.host_ptr, data, totalsize); + staging_buffer = allocation.metal_buffer; + if (own_command_buffer) { + if (allocation.requires_flush()) { + [staging_buffer didModifyRange:NSMakeRange(allocation.buffer_offset, allocation.size)]; + } + } + staging_buffer_offset = allocation.buffer_offset; + + /* Common Properties. */ + MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format( + destination_format); + + /* Some texture formats are not writeable so we need to use a texture view. */ + if (compatible_write_format == MTLPixelFormatInvalid) { + MTL_LOG_ERROR("Cannot use compute update blit with texture-view format: %d\n", + *((int *)&compatible_write_format)); + return; + } + id<MTLTexture> texture_handle = ((compatible_write_format == destination_format)) ? + this->texture_ : + [this->texture_ + newTextureViewWithPixelFormat:compatible_write_format]; + + /* Prepare encoders */ + id<MTLBlitCommandEncoder> blit_encoder = nil; + id<MTLComputeCommandEncoder> compute_encoder = nil; + if (can_use_direct_blit) { + blit_encoder = [cmd_buffer blitCommandEncoder]; + BLI_assert(blit_encoder != nil); + } + else { + compute_encoder = [cmd_buffer computeCommandEncoder]; + BLI_assert(compute_encoder != nil); + } + + switch (this->type_) { + + /* 1D */ + case GPU_TEXTURE_1D: + case GPU_TEXTURE_1D_ARRAY: { + if (can_use_direct_blit) { + /* Use Blit based update. */ + int bytes_per_row = expected_dst_bytes_per_pixel * + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length); + int bytes_per_image = bytes_per_row; + int max_array_index = ((this->type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1); + for (int array_index = 0; array_index < max_array_index; array_index++) { + + int buffer_array_offset = staging_buffer_offset + (bytes_per_image * array_index); + [blit_encoder copyFromBuffer:staging_buffer + sourceOffset:buffer_array_offset + sourceBytesPerRow:bytes_per_row + sourceBytesPerImage:bytes_per_image + sourceSize:MTLSizeMake(extent[0], 1, 1) + toTexture:texture_handle + destinationSlice:((this->type_ == GPU_TEXTURE_1D_ARRAY) ? + (array_index + offset[1]) : + 0) + destinationLevel:mip + destinationOrigin:MTLOriginMake(offset[0], 0, 0)]; + } + } + else { + /* Use Compute Based update. */ + if (this->type_ == GPU_TEXTURE_1D) { + id<MTLComputePipelineState> pso = texture_update_1d_get_kernel( + compute_specialisation_kernel); + TextureUpdateParams params = {mip, + {extent[0], 1, 1}, + {offset[0], 0, 0}, + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length)}; + [compute_encoder setComputePipelineState:pso]; + [compute_encoder setBytes:¶ms length:sizeof(params) atIndex:0]; + [compute_encoder setBuffer:staging_buffer offset:staging_buffer_offset atIndex:1]; + [compute_encoder setTexture:texture_handle atIndex:0]; + [compute_encoder + dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */ + threadsPerThreadgroup:MTLSizeMake(64, 1, 1)]; + } + else if (this->type_ == GPU_TEXTURE_1D_ARRAY) { + id<MTLComputePipelineState> pso = texture_update_1d_array_get_kernel( + compute_specialisation_kernel); + TextureUpdateParams params = {mip, + {extent[0], extent[1], 1}, + {offset[0], offset[1], 0}, + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length)}; + [compute_encoder setComputePipelineState:pso]; + [compute_encoder setBytes:¶ms length:sizeof(params) atIndex:0]; + [compute_encoder setBuffer:staging_buffer offset:staging_buffer_offset atIndex:1]; + [compute_encoder setTexture:texture_handle atIndex:0]; + [compute_encoder + dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */ + threadsPerThreadgroup:MTLSizeMake(8, 8, 1)]; + } + } + } break; + + /* 2D */ + case GPU_TEXTURE_2D: + case GPU_TEXTURE_2D_ARRAY: { + if (can_use_direct_blit) { + /* Use Blit encoder update. */ + int bytes_per_row = expected_dst_bytes_per_pixel * + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length); + int bytes_per_image = bytes_per_row * extent[1]; + + int texture_array_relative_offset = 0; + int base_slice = (this->type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0; + int final_slice = base_slice + ((this->type_ == GPU_TEXTURE_2D_ARRAY) ? extent[2] : 1); + + for (int array_slice = base_slice; array_slice < final_slice; array_slice++) { + + if (array_slice > 0) { + BLI_assert(this->type_ == GPU_TEXTURE_2D_ARRAY); + BLI_assert(array_slice < this->d_); + } + + [blit_encoder copyFromBuffer:staging_buffer + sourceOffset:staging_buffer_offset + texture_array_relative_offset + sourceBytesPerRow:bytes_per_row + sourceBytesPerImage:bytes_per_image + sourceSize:MTLSizeMake(extent[0], extent[1], 1) + toTexture:texture_handle + destinationSlice:array_slice + destinationLevel:mip + destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)]; + + texture_array_relative_offset += bytes_per_image; + } + } + else { + /* Use Compute texture update. */ + if (this->type_ == GPU_TEXTURE_2D) { + id<MTLComputePipelineState> pso = texture_update_2d_get_kernel( + compute_specialisation_kernel); + TextureUpdateParams params = {mip, + {extent[0], extent[1], 1}, + {offset[0], offset[1], 0}, + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length)}; + [compute_encoder setComputePipelineState:pso]; + [compute_encoder setBytes:¶ms length:sizeof(params) atIndex:0]; + [compute_encoder setBuffer:staging_buffer offset:staging_buffer_offset atIndex:1]; + [compute_encoder setTexture:texture_handle atIndex:0]; + [compute_encoder + dispatchThreads:MTLSizeMake( + extent[0], extent[1], 1) /* Width, Height, Layer */ + threadsPerThreadgroup:MTLSizeMake(8, 8, 1)]; + } + else if (this->type_ == GPU_TEXTURE_2D_ARRAY) { + id<MTLComputePipelineState> pso = texture_update_2d_array_get_kernel( + compute_specialisation_kernel); + TextureUpdateParams params = {mip, + {extent[0], extent[1], extent[2]}, + {offset[0], offset[1], offset[2]}, + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length)}; + [compute_encoder setComputePipelineState:pso]; + [compute_encoder setBytes:¶ms length:sizeof(params) atIndex:0]; + [compute_encoder setBuffer:staging_buffer offset:staging_buffer_offset atIndex:1]; + [compute_encoder setTexture:texture_handle atIndex:0]; + [compute_encoder dispatchThreads:MTLSizeMake(extent[0], + extent[1], + extent[2]) /* Width, Height, Layer */ + threadsPerThreadgroup:MTLSizeMake(4, 4, 4)]; + } + } + + } break; + + /* 3D */ + case GPU_TEXTURE_3D: { + if (can_use_direct_blit) { + int bytes_per_row = expected_dst_bytes_per_pixel * + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length); + int bytes_per_image = bytes_per_row * extent[1]; + [blit_encoder copyFromBuffer:staging_buffer + sourceOffset:staging_buffer_offset + sourceBytesPerRow:bytes_per_row + sourceBytesPerImage:bytes_per_image + sourceSize:MTLSizeMake(extent[0], extent[1], extent[2]) + toTexture:texture_handle + destinationSlice:0 + destinationLevel:mip + destinationOrigin:MTLOriginMake(offset[0], offset[1], offset[2])]; + } + else { + id<MTLComputePipelineState> pso = texture_update_3d_get_kernel( + compute_specialisation_kernel); + TextureUpdateParams params = {mip, + {extent[0], extent[1], extent[2]}, + {offset[0], offset[1], offset[2]}, + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length)}; + [compute_encoder setComputePipelineState:pso]; + [compute_encoder setBytes:¶ms length:sizeof(params) atIndex:0]; + [compute_encoder setBuffer:staging_buffer offset:staging_buffer_offset atIndex:1]; + [compute_encoder setTexture:texture_handle atIndex:0]; + [compute_encoder + dispatchThreads:MTLSizeMake( + extent[0], extent[1], extent[2]) /* Width, Height, Depth */ + threadsPerThreadgroup:MTLSizeMake(4, 4, 4)]; + } + } break; + + /* CUBE */ + case GPU_TEXTURE_CUBE: { + if (can_use_direct_blit) { + int bytes_per_row = expected_dst_bytes_per_pixel * + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length); + int bytes_per_image = bytes_per_row * extent[1]; + + int texture_array_relative_offset = 0; + + /* Iterate over all cube faces in range (offset[2], offset[2] + extent[2]). */ + for (int i = 0; i < extent[2]; i++) { + int face_index = offset[2] + i; + + [blit_encoder copyFromBuffer:staging_buffer + sourceOffset:staging_buffer_offset + texture_array_relative_offset + sourceBytesPerRow:bytes_per_row + sourceBytesPerImage:bytes_per_image + sourceSize:MTLSizeMake(extent[0], extent[1], 1) + toTexture:texture_handle + destinationSlice:face_index /* = cubeFace+arrayIndex*6 */ + destinationLevel:mip + destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)]; + texture_array_relative_offset += bytes_per_image; + } + } + else { + MTL_LOG_ERROR( + "TODO(Metal): Support compute texture update for GPU_TEXTURE_CUBE %d, %d, %d\n", + w_, + h_, + d_); + } + } break; + + case GPU_TEXTURE_CUBE_ARRAY: { + if (can_use_direct_blit) { + + int bytes_per_row = expected_dst_bytes_per_pixel * + ((ctx->pipeline_state.unpack_row_length == 0) ? + extent[0] : + ctx->pipeline_state.unpack_row_length); + int bytes_per_image = bytes_per_row * extent[1]; + + /* Upload to all faces between offset[2] (which is zero in most cases) AND extent[2]. */ + int texture_array_relative_offset = 0; + for (int i = 0; i < extent[2]; i++) { + int face_index = offset[2] + i; + [blit_encoder copyFromBuffer:staging_buffer + sourceOffset:staging_buffer_offset + texture_array_relative_offset + sourceBytesPerRow:bytes_per_row + sourceBytesPerImage:bytes_per_image + sourceSize:MTLSizeMake(extent[0], extent[1], 1) + toTexture:texture_handle + destinationSlice:face_index /* = cubeFace+arrayIndex*6. */ + destinationLevel:mip + destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)]; + texture_array_relative_offset += bytes_per_image; + } + } + else { + MTL_LOG_ERROR( + "TODO(Metal): Support compute texture update for GPU_TEXTURE_CUBE_ARRAY %d, %d, " + "%d\n", + w_, + h_, + d_); + } + } break; + + case GPU_TEXTURE_BUFFER: { + /* TODO(Metal): Support Data upload to TEXTURE BUFFER + * Data uploads generally happen via GPUVertBuf instead. */ + BLI_assert(false); + } break; + + case GPU_TEXTURE_ARRAY: + /* Not an actual format - modifier flag for others. */ + return; + } + + /* Finalize Blit Encoder. */ + if (can_use_direct_blit) { + + /* Textures which use MTLStorageModeManaged need to have updated contents + * synced back to CPU to avoid an automatic flush overwriting contents. */ + if (texture_.storageMode == MTLStorageModeManaged) { + [blit_encoder synchronizeResource:texture_buffer_]; + } + + /* End Encoding. */ + [blit_encoder endEncoding]; + } + else { + + /* End Encoding. */ + [compute_encoder endEncoding]; + + /* Textures which use MTLStorageModeManaged need to have updated contents + * synced back to CPU to avoid an automatic flush overwriting contents. */ + if (texture_.storageMode == MTLStorageModeManaged) { + blit_encoder = [cmd_buffer blitCommandEncoder]; + [blit_encoder synchronizeResource:texture_buffer_]; + [blit_encoder endEncoding]; + } + } + + if (own_command_buffer) { + [cmd_buffer commit]; + } + +#if DEBUG_TEXTURE_UPDATE_CAPTURE == 1 + if (DO_CAPTURE) { + [cmd_buffer waitUntilCompleted]; + MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager]; + [capture_manager stopCapture]; + } +#endif + } +} + +void gpu::MTLTexture::ensure_mipmaps(int miplvl) +{ + + /* Do not update texture view. */ + BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); + + /* Clamp level to maximum. */ + int effective_h = (this->type_ == GPU_TEXTURE_1D_ARRAY) ? 0 : this->h_; + int effective_d = (this->type_ != GPU_TEXTURE_3D) ? 0 : this->d_; + int max_dimension = max_iii(this->w_, effective_h, effective_d); + int max_miplvl = floor(log2(max_dimension)); + miplvl = min_ii(max_miplvl, miplvl); + + /* Increase mipmap level. */ + if (mipmaps_ < miplvl) { + mipmaps_ = miplvl; + + /* Check if baked. */ + if (this->is_baked_ && mipmaps_ > mtl_max_mips_) { + this->is_dirty_ = true; + MTL_LOG_WARNING("Texture requires regenerating due to increase in mip-count\n"); + } + } + this->mip_range_set(0, mipmaps_); +} + +void gpu::MTLTexture::generate_mipmap(void) +{ + /* Fetch Active Context. */ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + BLI_assert(ctx); + + if (!ctx->device) { + MTL_LOG_ERROR("Cannot Generate mip-maps -- metal device invalid\n"); + BLI_assert(false); + return; + } + + /* Ensure mipmaps. */ + this->ensure_mipmaps(9999); + + /* Ensure texture is baked. */ + this->ensure_baked(); + BLI_assert(this->is_baked_ && this->texture_ && "MTLTexture is not valid"); + + if (this->mipmaps_ == 1 || this->mtl_max_mips_ == 1) { + MTL_LOG_WARNING("Call to generate mipmaps on texture with 'mipmaps_=1\n'"); + return; + } + + /* Verify if we can perform mipmap generation. */ + if (this->format_ == GPU_DEPTH_COMPONENT32F || this->format_ == GPU_DEPTH_COMPONENT24 || + this->format_ == GPU_DEPTH_COMPONENT16 || this->format_ == GPU_DEPTH32F_STENCIL8 || + this->format_ == GPU_DEPTH24_STENCIL8) { + MTL_LOG_WARNING("Cannot generate mipmaps for textures using DEPTH formats\n"); + return; + } + + @autoreleasepool { + + id<MTLCommandBuffer> cmd_buffer = ctx->get_active_command_buffer(); + bool own_command_buffer = false; + if (cmd_buffer == nil) { + cmd_buffer = [ctx->queue commandBuffer]; + own_command_buffer = true; + } + else { + /* End active graphics work. */ + ctx->end_render_pass(); + } + + id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; +#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION + [enc insertDebugSignpost:@"Generate MipMaps"]; +#endif + [enc generateMipmapsForTexture:this->texture_]; + [enc endEncoding]; + + if (own_command_buffer) { + [cmd_buffer commit]; + } + } + return; +} + +void gpu::MTLTexture::copy_to(Texture *dst) +{ + /* Safety Checks. */ + gpu::MTLTexture *mt_src = this; + gpu::MTLTexture *mt_dst = static_cast<gpu::MTLTexture *>(dst); + BLI_assert((mt_dst->w_ == mt_src->w_) && (mt_dst->h_ == mt_src->h_) && + (mt_dst->d_ == mt_src->d_)); + BLI_assert(mt_dst->format_ == mt_src->format_); + BLI_assert(mt_dst->type_ == mt_src->type_); + + UNUSED_VARS_NDEBUG(mt_src); + + /* Fetch active context. */ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Ensure texture is baked. */ + this->ensure_baked(); + + @autoreleasepool { + /* End render pass. */ + ctx->end_render_pass(); + + /* Setup blit encoder. */ + id<MTLCommandBuffer> cmd_buffer = ctx->get_active_command_buffer(); + BLI_assert(cmd_buffer != nil); + id<MTLBlitCommandEncoder> blit_encoder = [cmd_buffer blitCommandEncoder]; + BLI_assert(blit_encoder != nil); + + /* TODO(Metal): Consider supporting multiple mip levels IF the GL implementation + * follows, currently it does not. */ + int mip = 0; + + /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */ + int extent[3] = {1, 1, 1}; + this->mip_size_get(mip, extent); + + switch (mt_dst->type_) { + case GPU_TEXTURE_2D_ARRAY: + case GPU_TEXTURE_CUBE_ARRAY: + case GPU_TEXTURE_3D: { + /* Do full texture copy for 3D textures */ + BLI_assert(mt_dst->d_ == this->d_); + [blit_encoder copyFromTexture:this->get_metal_handle_base() + toTexture:mt_dst->get_metal_handle_base()]; + } break; + default: { + int slice = 0; + this->blit(blit_encoder, + 0, + 0, + 0, + slice, + mip, + mt_dst, + 0, + 0, + 0, + slice, + mip, + extent[0], + extent[1], + extent[2]); + } break; + } + + /* End encoding */ + [blit_encoder endEncoding]; + } +} + +void gpu::MTLTexture::clear(eGPUDataFormat data_format, const void *data) +{ + /* Ensure texture is baked. */ + this->ensure_baked(); + + /* Create clear framebuffer. */ + GPUFrameBuffer *prev_fb = GPU_framebuffer_active_get(); + FrameBuffer *fb = reinterpret_cast<FrameBuffer *>(this->get_blit_framebuffer(0, 0)); + fb->bind(true); + fb->clear_attachment(this->attachment_type(0), data_format, data); + GPU_framebuffer_bind(prev_fb); +} + +static MTLTextureSwizzle swizzle_to_mtl(const char swizzle) +{ + switch (swizzle) { + default: + case 'x': + case 'r': + return MTLTextureSwizzleRed; + case 'y': + case 'g': + return MTLTextureSwizzleGreen; + case 'z': + case 'b': + return MTLTextureSwizzleBlue; + case 'w': + case 'a': + return MTLTextureSwizzleAlpha; + case '0': + return MTLTextureSwizzleZero; + case '1': + return MTLTextureSwizzleOne; + } +} + +void gpu::MTLTexture::swizzle_set(const char swizzle_mask[4]) +{ + if (memcmp(this->tex_swizzle_mask_, swizzle_mask, 4) != 0) { + memcpy(this->tex_swizzle_mask_, swizzle_mask, 4); + + /* Creating the swizzle mask and flagging as dirty if changed. */ + MTLTextureSwizzleChannels new_swizzle_mask = MTLTextureSwizzleChannelsMake( + swizzle_to_mtl(swizzle_mask[0]), + swizzle_to_mtl(swizzle_mask[1]), + swizzle_to_mtl(swizzle_mask[2]), + swizzle_to_mtl(swizzle_mask[3])); + + this->mtl_swizzle_mask_ = new_swizzle_mask; + this->texture_view_dirty_flags_ |= TEXTURE_VIEW_SWIZZLE_DIRTY; + } +} + +void gpu::MTLTexture::mip_range_set(int min, int max) +{ + BLI_assert(min <= max && min >= 0 && max <= mipmaps_); + + /* Note: + * - mip_min_ and mip_max_ are used to Clamp LODs during sampling. + * - Given functions like Framebuffer::recursive_downsample modifies the mip range + * between each layer, we do not want to be re-baking the texture. + * - For the time being, we are going to just need to generate a FULL mipmap chain + * as we do not know ahead of time whether mipmaps will be used. + * + * TODO(Metal): Add texture initialisation flag to determine whether mipmaps are used + * or not. Will be important for saving memory for big textures. */ + this->mip_min_ = min; + this->mip_max_ = max; + + if ((this->type_ == GPU_TEXTURE_1D || this->type_ == GPU_TEXTURE_1D_ARRAY || + this->type_ == GPU_TEXTURE_BUFFER) && + max > 1) { + + MTL_LOG_ERROR( + " MTLTexture of type TEXTURE_1D_ARRAY or TEXTURE_BUFFER cannot have a mipcount " + "greater than 1\n"); + this->mip_min_ = 0; + this->mip_max_ = 0; + this->mipmaps_ = 0; + BLI_assert(false); + } + + /* Mip range for texture view. */ + this->mip_texture_base_level_ = this->mip_min_; + this->mip_texture_max_level_ = this->mip_max_; + texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY; +} + +void *gpu::MTLTexture::read(int mip, eGPUDataFormat type) +{ + /* Prepare Array for return data. */ + BLI_assert(!(format_flag_ & GPU_FORMAT_COMPRESSED)); + BLI_assert(mip <= mipmaps_); + BLI_assert(validate_data_format_mtl(this->format_, type)); + + /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */ + int extent[3] = {1, 1, 1}; + this->mip_size_get(mip, extent); + + size_t sample_len = extent[0] * extent[1] * extent[2]; + size_t sample_size = to_bytesize(format_, type); + size_t texture_size = sample_len * sample_size; + int num_channels = to_component_len(this->format_); + + void *data = MEM_mallocN(texture_size + 8, "GPU_texture_read"); + + /* Ensure texture is baked. */ + if (this->is_baked_) { + this->read_internal( + mip, 0, 0, 0, extent[0], extent[1], extent[2], type, num_channels, texture_size + 8, data); + } + else { + /* Clear return values? */ + MTL_LOG_WARNING("MTLTexture::read - reading from texture with no image data\n"); + } + + return data; +} + +/* Fetch the raw buffer data from a texture and copy to CPU host ptr. */ +void gpu::MTLTexture::read_internal(int mip, + int x_off, + int y_off, + int z_off, + int width, + int height, + int depth, + eGPUDataFormat desired_output_format, + int num_output_components, + int debug_data_size, + void *r_data) +{ + /* Verify texures are baked. */ + if (!this->is_baked_) { + MTL_LOG_WARNING("gpu::MTLTexture::read_internal - Trying to read from a non-baked texture!\n"); + return; + } + /* Fetch active context. */ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Calculate Desired output size. */ + int num_channels = to_component_len(this->format_); + BLI_assert(num_output_components <= num_channels); + unsigned int desired_output_bpp = num_output_components * to_bytesize(desired_output_format); + + /* Calculate Metal data output for trivial copy. */ + unsigned int image_bpp = get_mtl_format_bytesize(this->texture_.pixelFormat); + unsigned int image_components = get_mtl_format_num_components(this->texture_.pixelFormat); + bool is_depth_format = (this->format_flag_ & GPU_FORMAT_DEPTH); + + /* Verify if we need to use compute read. */ + eGPUDataFormat data_format = to_mtl_internal_data_format(this->format_get()); + bool format_conversion_needed = (data_format != desired_output_format); + bool can_use_simple_read = (desired_output_bpp == image_bpp) && (!format_conversion_needed) && + (num_output_components == image_components); + + /* Depth must be read using the compute shader -- Some safety checks to verify that params are + * correct. */ + if (is_depth_format) { + can_use_simple_read = false; + /* TODO(Metal): Stencil data write not yet supported, so force components to one. */ + image_components = 1; + BLI_assert(num_output_components == 1); + BLI_assert(image_components == 1); + BLI_assert(data_format == GPU_DATA_FLOAT || data_format == GPU_DATA_UINT_24_8); + BLI_assert(validate_data_format_mtl(this->format_, data_format)); + } + + /* SPECIAL Workaround for R11G11B10 textures requesting a read using: GPU_DATA_10_11_11_REV. */ + if (desired_output_format == GPU_DATA_10_11_11_REV) { + BLI_assert(this->format_ == GPU_R11F_G11F_B10F); + + /* override parameters - we'll be able to use simple copy, as bpp will match at 4 bytes. */ + image_bpp = sizeof(int); + image_components = 1; + desired_output_bpp = sizeof(int); + num_output_components = 1; + + data_format = GPU_DATA_INT; + format_conversion_needed = false; + can_use_simple_read = true; + } + + /* Determine size of output data. */ + unsigned int bytes_per_row = desired_output_bpp * width; + unsigned int bytes_per_image = bytes_per_row * height; + unsigned int total_bytes = bytes_per_image * depth; + + if (can_use_simple_read) { + /* DEBUG check that if direct copy is being used, then both the expected output size matches + * the METAL texture size. */ + BLI_assert( + ((num_output_components * to_bytesize(desired_output_format)) == desired_output_bpp) && + (desired_output_bpp == image_bpp)); + } + /* DEBUG check that the allocated data size matches the bytes we expect. */ + BLI_assert(total_bytes <= debug_data_size); + + /* Fetch allocation from scratch buffer. */ + id<MTLBuffer> destination_buffer = nil; + unsigned int destination_offset = 0; + void *destination_buffer_host_ptr = nullptr; + + /* TODO(Metal): Optimise buffer allocation. */ + MTLResourceOptions bufferOptions = MTLResourceStorageModeManaged; + destination_buffer = [ctx->device newBufferWithLength:max_ii(total_bytes, 256) + options:bufferOptions]; + destination_offset = 0; + destination_buffer_host_ptr = (void *)((unsigned char *)([destination_buffer contents]) + + destination_offset); + + /* Prepare specialisation struct (For non-trivial texture read routine). */ + int depth_format_mode = 0; + if (is_depth_format) { + depth_format_mode = 1; + switch (desired_output_format) { + case GPU_DATA_FLOAT: + depth_format_mode = 1; + break; + case GPU_DATA_UINT_24_8: + depth_format_mode = 2; + break; + case GPU_DATA_UINT: + depth_format_mode = 4; + break; + default: + BLI_assert(false && "Unhandled depth read format case"); + break; + } + } + + TextureReadRoutineSpecialisation compute_specialisation_kernel = { + tex_data_format_to_msl_texture_template_type(data_format), /* TEXTURE DATA TYPE */ + tex_data_format_to_msl_type_str(desired_output_format), /* OUTPUT DATA TYPE */ + num_channels, /* TEXTURE COMPONENT COUNT */ + num_output_components, /* OUTPUT DATA COMPONENT COUNT */ + depth_format_mode}; + + bool copy_successful = false; + @autoreleasepool { + + bool DO_CAPTURE = false; +#if DEBUG_TEXTURE_READ_CAPTURE == 1 + DO_CAPTURE = true; + if (DO_CAPTURE) { + MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager]; + MTLCaptureDescriptor *capture_descriptor = [[MTLCaptureDescriptor alloc] init]; + capture_descriptor.captureObject = ctx->device; + NSError *error; + if (![capture_manager startCaptureWithDescriptor:capture_descriptor error:&error]) { + NSString *error_str = [NSString stringWithFormat:@"%@", error]; + const char *error_c_str = [error_str UTF8String]; + MTL_LOG_ERROR("Failed to start capture. Error: %s\n", error_c_str); + } + } +#endif + + /* TODO(Metal): Verify whether we need some form of barrier here to ensure reads + * happen after work with associated texture is finished. */ + GPU_finish(); + + /* Fetch or Create command buffer. */ + id<MTLCommandBuffer> cmd_buffer = ctx->get_active_command_buffer(); + bool own_command_buffer = false; + if (cmd_buffer == nil || DO_CAPTURE || true) { + cmd_buffer = [ctx->queue commandBuffer]; + own_command_buffer = true; + } + else { + /* End any graphics workloads. */ + ctx->end_render_pass(); + } + + /* Texture View for SRGB special case. */ + id<MTLTexture> read_texture = this->texture_; + if (this->format_ == GPU_SRGB8_A8) { + read_texture = [this->texture_ newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm]; + } + + /* Perform per-texture type read. */ + switch (this->type_) { + case GPU_TEXTURE_2D: { + if (can_use_simple_read) { + /* Use Blit Encoder READ. */ + id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; +#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION + [enc insertDebugSignpost:@"GPUTextureRead"]; +#endif + [enc copyFromTexture:read_texture + sourceSlice:0 + sourceLevel:mip + sourceOrigin:MTLOriginMake(x_off, y_off, 0) + sourceSize:MTLSizeMake(width, height, 1) + toBuffer:destination_buffer + destinationOffset:destination_offset + destinationBytesPerRow:bytes_per_row + destinationBytesPerImage:bytes_per_image]; + [enc synchronizeResource:destination_buffer]; + [enc endEncoding]; + copy_successful = true; + } + else { + + /* Use Compute READ. */ + id<MTLComputeCommandEncoder> compute_encoder = [cmd_buffer computeCommandEncoder]; + id<MTLComputePipelineState> pso = texture_read_2d_get_kernel( + compute_specialisation_kernel); + TextureReadParams params = { + mip, + {width, height, 1}, + {x_off, y_off, 0}, + }; + [compute_encoder setComputePipelineState:pso]; + [compute_encoder setBytes:¶ms length:sizeof(params) atIndex:0]; + [compute_encoder setBuffer:destination_buffer offset:destination_offset atIndex:1]; + [compute_encoder setTexture:read_texture atIndex:0]; + [compute_encoder dispatchThreads:MTLSizeMake(width, height, 1) /* Width, Height, Layer */ + threadsPerThreadgroup:MTLSizeMake(8, 8, 1)]; + [compute_encoder endEncoding]; + + /* Use Blit encoder to synchronize results back to CPU. */ + id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; +#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION + [enc insertDebugSignpost:@"GPUTextureRead-syncResource"]; +#endif + [enc synchronizeResource:destination_buffer]; + [enc endEncoding]; + copy_successful = true; + } + } break; + + case GPU_TEXTURE_2D_ARRAY: { + if (can_use_simple_read) { + /* Use Blit Encoder READ. */ + id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; +#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION + [enc insertDebugSignpost:@"GPUTextureRead"]; +#endif + int base_slice = z_off; + int final_slice = base_slice + depth; + int texture_array_relative_offset = 0; + + for (int array_slice = base_slice; array_slice < final_slice; array_slice++) { + [enc copyFromTexture:read_texture + sourceSlice:0 + sourceLevel:mip + sourceOrigin:MTLOriginMake(x_off, y_off, 0) + sourceSize:MTLSizeMake(width, height, 1) + toBuffer:destination_buffer + destinationOffset:destination_offset + texture_array_relative_offset + destinationBytesPerRow:bytes_per_row + destinationBytesPerImage:bytes_per_image]; + [enc synchronizeResource:destination_buffer]; + + texture_array_relative_offset += bytes_per_image; + } + [enc endEncoding]; + copy_successful = true; + } + else { + + /* Use Compute READ */ + id<MTLComputeCommandEncoder> compute_encoder = [cmd_buffer computeCommandEncoder]; + id<MTLComputePipelineState> pso = texture_read_2d_array_get_kernel( + compute_specialisation_kernel); + TextureReadParams params = { + mip, + {width, height, depth}, + {x_off, y_off, z_off}, + }; + [compute_encoder setComputePipelineState:pso]; + [compute_encoder setBytes:¶ms length:sizeof(params) atIndex:0]; + [compute_encoder setBuffer:destination_buffer offset:destination_offset atIndex:1]; + [compute_encoder setTexture:read_texture atIndex:0]; + [compute_encoder + dispatchThreads:MTLSizeMake(width, height, depth) /* Width, Height, Layer */ + threadsPerThreadgroup:MTLSizeMake(8, 8, 1)]; + [compute_encoder endEncoding]; + + /* Use Blit encoder to synchronize results back to CPU. */ + id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; +#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION + [enc insertDebugSignpost:@"GPUTextureRead-syncResource"]; +#endif + [enc synchronizeResource:destination_buffer]; + [enc endEncoding]; + copy_successful = true; + } + } break; + + case GPU_TEXTURE_CUBE_ARRAY: { + if (can_use_simple_read) { + id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; +#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION + [enc insertDebugSignpost:@"GPUTextureRead"]; +#endif + int base_slice = z_off; + int final_slice = base_slice + depth; + int texture_array_relative_offset = 0; + + for (int array_slice = base_slice; array_slice < final_slice; array_slice++) { + [enc copyFromTexture:read_texture + sourceSlice:array_slice + sourceLevel:mip + sourceOrigin:MTLOriginMake(x_off, y_off, 0) + sourceSize:MTLSizeMake(width, height, 1) + toBuffer:destination_buffer + destinationOffset:destination_offset + texture_array_relative_offset + destinationBytesPerRow:bytes_per_row + destinationBytesPerImage:bytes_per_image]; + [enc synchronizeResource:destination_buffer]; + + texture_array_relative_offset += bytes_per_image; + } + MTL_LOG_INFO("Copying texture data to buffer GPU_TEXTURE_CUBE_ARRAY\n"); + [enc endEncoding]; + copy_successful = true; + } + else { + MTL_LOG_ERROR("TODO(Metal): unsupported compute copy of texture cube array"); + } + } break; + + default: + MTL_LOG_WARNING( + "[Warning] gpu::MTLTexture::read_internal simple-copy not yet supported for texture " + "type: %d\n", + (int)this->type_); + break; + } + + if (copy_successful) { + /* Ensure GPU copy from texture to host-accessible buffer is complete. */ + if (own_command_buffer) { + [cmd_buffer commit]; + [cmd_buffer waitUntilCompleted]; + } + else { + /* Ensure GPU copy commands have completed. */ + GPU_finish(); + } + +#if DEBUG_TEXTURE_READ_CAPTURE == 1 + if (DO_CAPTURE) { + MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager]; + [capture_manager stopCapture]; + } +#endif + + /* Copy data from Shared Memory into ptr. */ + memcpy(r_data, destination_buffer_host_ptr, total_bytes); + MTL_LOG_INFO("gpu::MTLTexture::read_internal success! %d bytes read\n", total_bytes); + } + else { + MTL_LOG_WARNING( + "[Warning] gpu::MTLTexture::read_internal not yet supported for this config -- data " + "format different (src %d bytes, dst %d bytes) (src format: %d, dst format: %d), or " + "varying component counts (src %d, dst %d)\n", + image_bpp, + desired_output_bpp, + (int)data_format, + (int)desired_output_format, + image_components, + num_output_components); + } + } +} + +/* Remove once no longer required -- will just return 0 for now in MTL path. */ +uint gpu::MTLTexture::gl_bindcode_get(void) const +{ + return 0; +} + +bool gpu::MTLTexture::init_internal(void) +{ + if (this->format_ == GPU_DEPTH24_STENCIL8) { + /* Apple Silicon requires GPU_DEPTH32F_STENCIL8 instead of GPU_DEPTH24_STENCIL8. */ + this->format_ = GPU_DEPTH32F_STENCIL8; + } + + this->prepare_internal(); + return true; +} + +bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo) +{ + /* Zero initialise. */ + this->prepare_internal(); + + /* TODO(Metal): Add implementation for GPU Vert buf. */ + return false; +} + +bool gpu::MTLTexture::init_internal(const GPUTexture *src, int mip_offset, int layer_offset) +{ + BLI_assert(src); + + /* Zero initialise. */ + this->prepare_internal(); + + /* Flag as using texture view. */ + this->resource_mode_ = MTL_TEXTURE_MODE_TEXTURE_VIEW; + this->source_texture_ = src; + this->mip_texture_base_level_ = mip_offset; + this->mip_texture_base_layer_ = layer_offset; + + /* Assign texture as view. */ + const gpu::MTLTexture *mtltex = static_cast<const gpu::MTLTexture *>(unwrap(src)); + this->texture_ = mtltex->texture_; + BLI_assert(this->texture_); + [this->texture_ retain]; + + /* Flag texture as baked -- we do not need explicit initialisation. */ + this->is_baked_ = true; + this->is_dirty_ = false; + + /* Bake mip swizzle view. */ + bake_mip_swizzle_view(); + return true; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name METAL Resource creation and management + * \{ */ + +bool gpu::MTLTexture::texture_is_baked() +{ + return this->is_baked_; +} + +/* Prepare texture parameters after initialisation, but before baking. */ +void gpu::MTLTexture::prepare_internal() +{ + + /* Derive implicit usage flags for Depth/Stencil attachments. */ + if (this->format_flag_ & GPU_FORMAT_DEPTH || this->format_flag_ & GPU_FORMAT_STENCIL) { + this->gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT; + } + + /* Derive maxmimum number of mip levels by default. + * TODO(Metal): This can be removed if max mip counts are specified upfront. */ + if (this->type_ == GPU_TEXTURE_1D || this->type_ == GPU_TEXTURE_1D_ARRAY || + this->type_ == GPU_TEXTURE_BUFFER) { + this->mtl_max_mips_ = 1; + } + else { + int effective_h = (this->type_ == GPU_TEXTURE_1D_ARRAY) ? 0 : this->h_; + int effective_d = (this->type_ != GPU_TEXTURE_3D) ? 0 : this->d_; + int max_dimension = max_iii(this->w_, effective_h, effective_d); + int max_miplvl = max_ii(floor(log2(max_dimension)) + 1, 1); + this->mtl_max_mips_ = max_miplvl; + } +} + +void gpu::MTLTexture::ensure_baked() +{ + + /* If properties have changed, re-bake. */ + bool copy_previous_contents = false; + if (this->is_baked_ && this->is_dirty_) { + copy_previous_contents = true; + id<MTLTexture> previous_texture = this->texture_; + [previous_texture retain]; + + this->reset(); + } + + if (!this->is_baked_) { + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Ensure texture mode is valid. */ + BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_EXTERNAL); + BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); + BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_VBO); + + /* Format and mip levels (TODO(Metal): Optimise mipmaps counts, specify up-front). */ + MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_); + + /* Create texture descriptor. */ + switch (this->type_) { + + /* 1D */ + case GPU_TEXTURE_1D: + case GPU_TEXTURE_1D_ARRAY: { + BLI_assert(this->w_ > 0); + this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + this->texture_descriptor_.pixelFormat = mtl_format; + this->texture_descriptor_.textureType = (this->type_ == GPU_TEXTURE_1D_ARRAY) ? + MTLTextureType1DArray : + MTLTextureType1D; + this->texture_descriptor_.width = this->w_; + this->texture_descriptor_.height = 1; + this->texture_descriptor_.depth = 1; + this->texture_descriptor_.arrayLength = (this->type_ == GPU_TEXTURE_1D_ARRAY) ? this->h_ : + 1; + this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? + this->mtl_max_mips_ : + 1; + this->texture_descriptor_.usage = + MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | + MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimise usage flags. */ + this->texture_descriptor_.storageMode = MTLStorageModePrivate; + this->texture_descriptor_.sampleCount = 1; + this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + } break; + + /* 2D */ + case GPU_TEXTURE_2D: + case GPU_TEXTURE_2D_ARRAY: { + BLI_assert(this->w_ > 0 && this->h_ > 0); + this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + this->texture_descriptor_.pixelFormat = mtl_format; + this->texture_descriptor_.textureType = (this->type_ == GPU_TEXTURE_2D_ARRAY) ? + MTLTextureType2DArray : + MTLTextureType2D; + this->texture_descriptor_.width = this->w_; + this->texture_descriptor_.height = this->h_; + this->texture_descriptor_.depth = 1; + this->texture_descriptor_.arrayLength = (this->type_ == GPU_TEXTURE_2D_ARRAY) ? this->d_ : + 1; + this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? + this->mtl_max_mips_ : + 1; + this->texture_descriptor_.usage = + MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | + MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimise usage flags. */ + this->texture_descriptor_.storageMode = MTLStorageModePrivate; + this->texture_descriptor_.sampleCount = 1; + this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + } break; + + /* 3D */ + case GPU_TEXTURE_3D: { + BLI_assert(this->w_ > 0 && this->h_ > 0 && this->d_ > 0); + this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + this->texture_descriptor_.pixelFormat = mtl_format; + this->texture_descriptor_.textureType = MTLTextureType3D; + this->texture_descriptor_.width = this->w_; + this->texture_descriptor_.height = this->h_; + this->texture_descriptor_.depth = this->d_; + this->texture_descriptor_.arrayLength = 1; + this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? + this->mtl_max_mips_ : + 1; + this->texture_descriptor_.usage = + MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | + MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimise usage flags. */ + this->texture_descriptor_.storageMode = MTLStorageModePrivate; + this->texture_descriptor_.sampleCount = 1; + this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + } break; + + /* CUBE TEXTURES */ + case GPU_TEXTURE_CUBE: + case GPU_TEXTURE_CUBE_ARRAY: { + /* Note: For a cubemap 'Texture::d_' refers to total number of faces, not just array slices + */ + BLI_assert(this->w_ > 0 && this->h_ > 0); + this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + this->texture_descriptor_.pixelFormat = mtl_format; + this->texture_descriptor_.textureType = (this->type_ == GPU_TEXTURE_CUBE_ARRAY) ? + MTLTextureTypeCubeArray : + MTLTextureTypeCube; + this->texture_descriptor_.width = this->w_; + this->texture_descriptor_.height = this->h_; + this->texture_descriptor_.depth = 1; + this->texture_descriptor_.arrayLength = (this->type_ == GPU_TEXTURE_CUBE_ARRAY) ? + this->d_ / 6 : + 1; + this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? + this->mtl_max_mips_ : + 1; + this->texture_descriptor_.usage = + MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | + MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimise usage flags. */ + this->texture_descriptor_.storageMode = MTLStorageModePrivate; + this->texture_descriptor_.sampleCount = 1; + this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + } break; + + /* GPU_TEXTURE_BUFFER */ + case GPU_TEXTURE_BUFFER: { + this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + this->texture_descriptor_.pixelFormat = mtl_format; + this->texture_descriptor_.textureType = MTLTextureTypeTextureBuffer; + this->texture_descriptor_.width = this->w_; + this->texture_descriptor_.height = 1; + this->texture_descriptor_.depth = 1; + this->texture_descriptor_.arrayLength = 1; + this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? + this->mtl_max_mips_ : + 1; + this->texture_descriptor_.usage = + MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | + MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimise usage flags. */ + this->texture_descriptor_.storageMode = MTLStorageModePrivate; + this->texture_descriptor_.sampleCount = 1; + this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + } break; + + default: { + MTL_LOG_ERROR("[METAL] Error: Cannot create texture with unknown type: %d\n", this->type_); + return; + } break; + } + + /* Determine Resource Mode. */ + this->resource_mode_ = MTL_TEXTURE_MODE_DEFAULT; + + /* Create texture. */ + this->texture_ = [ctx->device newTextureWithDescriptor:this->texture_descriptor_]; + + [this->texture_descriptor_ release]; + this->texture_descriptor_ = nullptr; + this->texture_.label = [NSString stringWithUTF8String:this->get_name()]; + BLI_assert(this->texture_); + this->is_baked_ = true; + this->is_dirty_ = false; + } + + /* Re-apply previous contents. */ + if (copy_previous_contents) { + id<MTLTexture> previous_texture; + /* TODO(Metal): May need to copy previous contents of texture into new texture. */ + /*[previous_texture release]; */ + UNUSED_VARS(previous_texture); + } +} + +void gpu::MTLTexture::reset() +{ + + MTL_LOG_INFO("Texture %s reset. Size %d, %d, %d\n", this->get_name(), w_, h_, d_); + /* Delete associated METAL resources. */ + if (this->texture_ != nil) { + [this->texture_ release]; + this->texture_ = nil; + this->is_baked_ = false; + this->is_dirty_ = true; + } + + if (this->mip_swizzle_view_ != nil) { + [this->mip_swizzle_view_ release]; + this->mip_swizzle_view_ = nil; + } + + if (this->texture_buffer_ != nil) { + [this->texture_buffer_ release]; + } + + /* Blit framebuffer. */ + if (this->blit_fb_) { + GPU_framebuffer_free(this->blit_fb_); + this->blit_fb_ = nullptr; + } + + BLI_assert(this->texture_ == nil); + BLI_assert(this->mip_swizzle_view_ == nil); +} + +/** \} */ + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm new file mode 100644 index 00000000000..c166a9362c8 --- /dev/null +++ b/source/blender/gpu/metal/mtl_texture_util.mm @@ -0,0 +1,748 @@ +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "DNA_userdef_types.h" + +#include "GPU_batch.h" +#include "GPU_batch_presets.h" +#include "GPU_capabilities.h" +#include "GPU_framebuffer.h" +#include "GPU_platform.h" +#include "GPU_state.h" + +#include "mtl_backend.hh" +#include "mtl_context.hh" +#include "mtl_texture.hh" + +/* Utility file for secondary functionality which supports mtl_texture.mm. */ + +extern char datatoc_compute_texture_update_msl[]; +extern char datatoc_depth_2d_update_vert_glsl[]; +extern char datatoc_depth_2d_update_float_frag_glsl[]; +extern char datatoc_depth_2d_update_int24_frag_glsl[]; +extern char datatoc_depth_2d_update_int32_frag_glsl[]; +extern char datatoc_compute_texture_read_msl[]; +extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[]; +extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[]; + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Texture Utility Functions + * \{ */ + +MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format) +{ + + switch (tex_format) { + /* Formats texture & renderbuffer. */ + case GPU_RGBA8UI: + return MTLPixelFormatRGBA8Uint; + case GPU_RGBA8I: + return MTLPixelFormatRGBA8Sint; + case GPU_RGBA8: + return MTLPixelFormatRGBA8Unorm; + case GPU_RGBA32UI: + return MTLPixelFormatRGBA32Uint; + case GPU_RGBA32I: + return MTLPixelFormatRGBA32Sint; + case GPU_RGBA32F: + return MTLPixelFormatRGBA32Float; + case GPU_RGBA16UI: + return MTLPixelFormatRGBA16Uint; + case GPU_RGBA16I: + return MTLPixelFormatRGBA16Sint; + case GPU_RGBA16F: + return MTLPixelFormatRGBA16Float; + case GPU_RGBA16: + return MTLPixelFormatRGBA16Unorm; + case GPU_RG8UI: + return MTLPixelFormatRG8Uint; + case GPU_RG8I: + return MTLPixelFormatRG8Sint; + case GPU_RG8: + return MTLPixelFormatRG8Unorm; + case GPU_RG32UI: + return MTLPixelFormatRG32Uint; + case GPU_RG32I: + return MTLPixelFormatRG32Sint; + case GPU_RG32F: + return MTLPixelFormatRG32Float; + case GPU_RG16UI: + return MTLPixelFormatRG16Uint; + case GPU_RG16I: + return MTLPixelFormatRG16Sint; + case GPU_RG16F: + return MTLPixelFormatRG16Float; + case GPU_RG16: + return MTLPixelFormatRG16Float; + case GPU_R8UI: + return MTLPixelFormatR8Uint; + case GPU_R8I: + return MTLPixelFormatR8Sint; + case GPU_R8: + return MTLPixelFormatR8Unorm; + case GPU_R32UI: + return MTLPixelFormatR32Uint; + case GPU_R32I: + return MTLPixelFormatR32Sint; + case GPU_R32F: + return MTLPixelFormatR32Float; + case GPU_R16UI: + return MTLPixelFormatR16Uint; + case GPU_R16I: + return MTLPixelFormatR16Sint; + case GPU_R16F: + return MTLPixelFormatR16Float; + case GPU_R16: + return MTLPixelFormatR16Snorm; + + /* Special formats texture & renderbuffer. */ + case GPU_R11F_G11F_B10F: + return MTLPixelFormatRG11B10Float; + case GPU_DEPTH32F_STENCIL8: + return MTLPixelFormatDepth32Float_Stencil8; + case GPU_DEPTH24_STENCIL8: { + BLI_assert(false && "GPU_DEPTH24_STENCIL8 not supported by Apple Silicon."); + return MTLPixelFormatDepth24Unorm_Stencil8; + } + case GPU_SRGB8_A8: + return MTLPixelFormatRGBA8Unorm_sRGB; + case GPU_RGB16F: + return MTLPixelFormatRGBA16Float; + + /* Depth Formats. */ + case GPU_DEPTH_COMPONENT32F: + case GPU_DEPTH_COMPONENT24: + return MTLPixelFormatDepth32Float; + case GPU_DEPTH_COMPONENT16: + return MTLPixelFormatDepth16Unorm; + + default: + BLI_assert(!"Unrecognised GPU pixel format!\n"); + return MTLPixelFormatRGBA8Unorm; + } +} + +int get_mtl_format_bytesize(MTLPixelFormat tex_format) +{ + + switch (tex_format) { + case MTLPixelFormatRGBA8Uint: + case MTLPixelFormatRGBA8Sint: + case MTLPixelFormatRGBA8Unorm: + return 4; + case MTLPixelFormatRGBA32Uint: + case MTLPixelFormatRGBA32Sint: + case MTLPixelFormatRGBA32Float: + return 16; + case MTLPixelFormatRGBA16Uint: + case MTLPixelFormatRGBA16Sint: + case MTLPixelFormatRGBA16Float: + case MTLPixelFormatRGBA16Unorm: + return 8; + case MTLPixelFormatRG8Uint: + case MTLPixelFormatRG8Sint: + case MTLPixelFormatRG8Unorm: + return 2; + case MTLPixelFormatRG32Uint: + case MTLPixelFormatRG32Sint: + case MTLPixelFormatRG32Float: + return 8; + case MTLPixelFormatRG16Uint: + case MTLPixelFormatRG16Sint: + case MTLPixelFormatRG16Float: + return 4; + case MTLPixelFormatR8Uint: + case MTLPixelFormatR8Sint: + case MTLPixelFormatR8Unorm: + return 1; + case MTLPixelFormatR32Uint: + case MTLPixelFormatR32Sint: + case MTLPixelFormatR32Float: + return 4; + case MTLPixelFormatR16Uint: + case MTLPixelFormatR16Sint: + case MTLPixelFormatR16Float: + case MTLPixelFormatR16Snorm: + return 2; + case MTLPixelFormatRG11B10Float: + return 4; + case MTLPixelFormatDepth32Float_Stencil8: + return 8; + case MTLPixelFormatRGBA8Unorm_sRGB: + case MTLPixelFormatDepth32Float: + case MTLPixelFormatDepth24Unorm_Stencil8: + return 4; + case MTLPixelFormatDepth16Unorm: + return 2; + + default: + BLI_assert(!"Unrecognised GPU pixel format!\n"); + return 1; + } +} + +int get_mtl_format_num_components(MTLPixelFormat tex_format) +{ + + switch (tex_format) { + case MTLPixelFormatRGBA8Uint: + case MTLPixelFormatRGBA8Sint: + case MTLPixelFormatRGBA8Unorm: + case MTLPixelFormatRGBA32Uint: + case MTLPixelFormatRGBA32Sint: + case MTLPixelFormatRGBA32Float: + case MTLPixelFormatRGBA16Uint: + case MTLPixelFormatRGBA16Sint: + case MTLPixelFormatRGBA16Float: + case MTLPixelFormatRGBA16Unorm: + case MTLPixelFormatRGBA8Unorm_sRGB: + return 4; + + case MTLPixelFormatRG11B10Float: + return 3; + + case MTLPixelFormatRG8Uint: + case MTLPixelFormatRG8Sint: + case MTLPixelFormatRG8Unorm: + case MTLPixelFormatRG32Uint: + case MTLPixelFormatRG32Sint: + case MTLPixelFormatRG32Float: + case MTLPixelFormatRG16Uint: + case MTLPixelFormatRG16Sint: + case MTLPixelFormatRG16Float: + case MTLPixelFormatDepth32Float_Stencil8: + return 2; + + case MTLPixelFormatR8Uint: + case MTLPixelFormatR8Sint: + case MTLPixelFormatR8Unorm: + case MTLPixelFormatR32Uint: + case MTLPixelFormatR32Sint: + case MTLPixelFormatR32Float: + case MTLPixelFormatR16Uint: + case MTLPixelFormatR16Sint: + case MTLPixelFormatR16Float: + case MTLPixelFormatR16Snorm: + case MTLPixelFormatDepth32Float: + case MTLPixelFormatDepth16Unorm: + case MTLPixelFormatDepth24Unorm_Stencil8: + /* Treating this format as single-channel for direct data copies -- Stencil component is not + * addressable. */ + return 1; + + default: + BLI_assert(!"Unrecognised GPU pixel format!\n"); + return 1; + } +} + +bool mtl_format_supports_blending(MTLPixelFormat format) +{ + /* Add formats as needed -- Verify platforms. */ + const MTLCapabilities &capabilities = MTLBackend::get_capabilities(); + + if (capabilities.supports_family_mac1 || capabilities.supports_family_mac_catalyst1) { + + switch (format) { + case MTLPixelFormatA8Unorm: + case MTLPixelFormatR8Uint: + case MTLPixelFormatR8Sint: + case MTLPixelFormatR16Uint: + case MTLPixelFormatR16Sint: + case MTLPixelFormatRG32Uint: + case MTLPixelFormatRG32Sint: + case MTLPixelFormatRGBA8Uint: + case MTLPixelFormatRGBA8Sint: + case MTLPixelFormatRGBA32Uint: + case MTLPixelFormatRGBA32Sint: + case MTLPixelFormatDepth16Unorm: + case MTLPixelFormatDepth32Float: + case MTLPixelFormatInvalid: + case MTLPixelFormatBGR10A2Unorm: + case MTLPixelFormatRGB10A2Uint: + return false; + default: + return true; + } + } + else { + switch (format) { + case MTLPixelFormatA8Unorm: + case MTLPixelFormatR8Uint: + case MTLPixelFormatR8Sint: + case MTLPixelFormatR16Uint: + case MTLPixelFormatR16Sint: + case MTLPixelFormatRG32Uint: + case MTLPixelFormatRG32Sint: + case MTLPixelFormatRGBA8Uint: + case MTLPixelFormatRGBA8Sint: + case MTLPixelFormatRGBA32Uint: + case MTLPixelFormatRGBA32Sint: + case MTLPixelFormatRGBA32Float: + case MTLPixelFormatDepth16Unorm: + case MTLPixelFormatDepth32Float: + case MTLPixelFormatInvalid: + case MTLPixelFormatBGR10A2Unorm: + case MTLPixelFormatRGB10A2Uint: + return false; + default: + return true; + } + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Texture data upload routines + * \{ */ + +id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( + TextureUpdateRoutineSpecialisation specialisation_params, + blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> + &specialisation_cache, + eGPUTextureType texture_type) +{ + /* Check whether the Kernel exists. */ + id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params); + if (result != nullptr) { + return *result; + } + + id<MTLComputePipelineState> return_pso = nil; + @autoreleasepool { + + /* Fetch active context. */ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /** SOURCE. **/ + NSString *tex_update_kernel_src = [NSString + stringWithUTF8String:datatoc_compute_texture_update_msl]; + + /* Prepare options and specialisations. */ + MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease]; + options.languageVersion = MTLLanguageVersion2_2; + options.preprocessorMacros = @{ + @"INPUT_DATA_TYPE" : + [NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()], + @"OUTPUT_DATA_TYPE" : + [NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()], + @"COMPONENT_COUNT_INPUT" : + [NSNumber numberWithInt:specialisation_params.component_count_input], + @"COMPONENT_COUNT_OUTPUT" : + [NSNumber numberWithInt:specialisation_params.component_count_output], + @"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))] + }; + + /* Prepare shader library for conversion routine. */ + NSError *error = NULL; + id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src + options:options + error:&error] autorelease]; + if (error) { + NSLog(@"Compile Error - Metal Shader Library error %@ ", error); + BLI_assert(false); + return nullptr; + } + + /* Fetch compute function. */ + BLI_assert(temp_lib != nil); + id<MTLFunction> temp_compute_function = [[temp_lib + newFunctionWithName:@"compute_texture_update"] autorelease]; + BLI_assert(temp_compute_function); + + /* Otherwise, bake new Kernel. */ + id<MTLComputePipelineState> compute_pso = [ctx->device + newComputePipelineStateWithFunction:temp_compute_function + error:&error]; + if (error || compute_pso == nil) { + NSLog(@"Failed to prepare texture_update MTLComputePipelineState %@", error); + BLI_assert(false); + } + + /* Store PSO. */ + [compute_pso retain]; + specialisation_cache.add_new(specialisation_params, compute_pso); + return_pso = compute_pso; + } + + BLI_assert(return_pso != nil); + return return_pso; +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_get_kernel( + TextureUpdateRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_update_impl(specialisation, + mtl_context->get_texture_utils().texture_1d_update_compute_psos, + GPU_TEXTURE_1D); +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_array_get_kernel( + TextureUpdateRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_update_impl( + specialisation, + mtl_context->get_texture_utils().texture_1d_array_update_compute_psos, + GPU_TEXTURE_1D_ARRAY); +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_get_kernel( + TextureUpdateRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_update_impl(specialisation, + mtl_context->get_texture_utils().texture_2d_update_compute_psos, + GPU_TEXTURE_2D); +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_array_get_kernel( + TextureUpdateRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_update_impl( + specialisation, + mtl_context->get_texture_utils().texture_2d_array_update_compute_psos, + GPU_TEXTURE_2D_ARRAY); +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_update_3d_get_kernel( + TextureUpdateRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_update_impl(specialisation, + mtl_context->get_texture_utils().texture_3d_update_compute_psos, + GPU_TEXTURE_3D); +} + +/* TODO(Metal): Data upload routine kernel for texture cube and texture cube array. + * Currently does not appear to be hit. */ + +GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( + DepthTextureUpdateRoutineSpecialisation specialisation) +{ + + /* Check whether the Kernel exists. */ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + + GPUShader **result = mtl_context->get_texture_utils().depth_2d_update_shaders.lookup_ptr( + specialisation); + if (result != nullptr) { + return *result; + } + + const char *fragment_source = nullptr; + switch (specialisation.data_mode) { + case MTL_DEPTH_UPDATE_MODE_FLOAT: + fragment_source = datatoc_depth_2d_update_float_frag_glsl; + break; + case MTL_DEPTH_UPDATE_MODE_INT24: + fragment_source = datatoc_depth_2d_update_int24_frag_glsl; + break; + case MTL_DEPTH_UPDATE_MODE_INT32: + fragment_source = datatoc_depth_2d_update_int32_frag_glsl; + break; + default: + BLI_assert(false && "Invalid format mode\n"); + return nullptr; + } + + GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl, + fragment_source, + nullptr, + nullptr, + nullptr, + "depth_2d_update_sh_get"); + mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialisation, shader); + return shader; +} + +GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get() +{ + + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) { + const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl; + const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl; + GPUShader *shader = GPU_shader_create( + vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit"); + mtl_context->get_texture_utils().fullscreen_blit_shader = shader; + } + return mtl_context->get_texture_utils().fullscreen_blit_shader; +} + +/* Special routine for updating 2D depth textures using the rendering pipeline. */ +void gpu::MTLTexture::update_sub_depth_2d( + int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) +{ + /* Verify we are in a valid configuration. */ + BLI_assert(ELEM(this->format_, + GPU_DEPTH_COMPONENT24, + GPU_DEPTH_COMPONENT32F, + GPU_DEPTH_COMPONENT16, + GPU_DEPTH24_STENCIL8, + GPU_DEPTH32F_STENCIL8)); + BLI_assert(validate_data_format_mtl(this->format_, type)); + BLI_assert(ELEM(type, GPU_DATA_FLOAT, GPU_DATA_UINT_24_8, GPU_DATA_UINT)); + + /* Determine whether we are in GPU_DATA_UINT_24_8 or GPU_DATA_FLOAT mode. */ + bool is_float = (type == GPU_DATA_FLOAT); + eGPUTextureFormat format = (is_float) ? GPU_R32F : GPU_R32I; + + /* Shader key - Add parameters here for different configurations. */ + DepthTextureUpdateRoutineSpecialisation specialisation; + switch (type) { + case GPU_DATA_FLOAT: + specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_FLOAT; + break; + + case GPU_DATA_UINT_24_8: + specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT24; + break; + + case GPU_DATA_UINT: + specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT32; + break; + + default: + BLI_assert(false && "Unsupported eGPUDataFormat being passed to depth texture update\n"); + return; + } + + /* Push contents into an r32_tex and render contents to depth using a shader. */ + GPUTexture *r32_tex_tmp = GPU_texture_create_2d( + "depth_intermediate_copy_tex", this->w_, this->h_, 1, format, nullptr); + GPU_texture_filter_mode(r32_tex_tmp, false); + GPU_texture_wrap_mode(r32_tex_tmp, false, true); + gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(unwrap(r32_tex_tmp)); + mtl_tex->update_sub(mip, offset, extent, type, data); + + GPUFrameBuffer *restore_fb = GPU_framebuffer_active_get(); + GPUFrameBuffer *depth_fb_temp = GPU_framebuffer_create("depth_intermediate_copy_fb"); + GPU_framebuffer_texture_attach(depth_fb_temp, wrap(static_cast<Texture *>(this)), 0, mip); + GPU_framebuffer_bind(depth_fb_temp); + if (extent[0] == this->w_ && extent[1] == this->h_) { + /* Skip load if the whole texture is being updated. */ + GPU_framebuffer_clear_depth(depth_fb_temp, 0.0); + GPU_framebuffer_clear_stencil(depth_fb_temp, 0); + } + + GPUShader *depth_2d_update_sh = depth_2d_update_sh_get(specialisation); + BLI_assert(depth_2d_update_sh != nullptr); + GPUBatch *quad = GPU_batch_preset_quad(); + GPU_batch_set_shader(quad, depth_2d_update_sh); + + GPU_batch_texture_bind(quad, "source_data", r32_tex_tmp); + GPU_batch_uniform_1i(quad, "mip", mip); + GPU_batch_uniform_2f(quad, "extent", (float)extent[0], (float)extent[1]); + GPU_batch_uniform_2f(quad, "offset", (float)offset[0], (float)offset[1]); + GPU_batch_uniform_2f(quad, "size", (float)this->w_, (float)this->h_); + + bool depth_write_prev = GPU_depth_mask_get(); + uint stencil_mask_prev = GPU_stencil_mask_get(); + eGPUDepthTest depth_test_prev = GPU_depth_test_get(); + eGPUStencilTest stencil_test_prev = GPU_stencil_test_get(); + GPU_scissor_test(true); + GPU_scissor(offset[0], offset[1], extent[0], extent[1]); + + GPU_stencil_write_mask_set(0xFF); + GPU_stencil_reference_set(0); + GPU_stencil_test(GPU_STENCIL_ALWAYS); + GPU_depth_mask(true); + GPU_depth_test(GPU_DEPTH_ALWAYS); + + GPU_batch_draw(quad); + + GPU_depth_mask(depth_write_prev); + GPU_stencil_write_mask_set(stencil_mask_prev); + GPU_stencil_test(stencil_test_prev); + GPU_depth_test(depth_test_prev); + + if (restore_fb != nullptr) { + GPU_framebuffer_bind(restore_fb); + } + else { + GPU_framebuffer_restore(); + } + GPU_framebuffer_free(depth_fb_temp); + GPU_texture_free(r32_tex_tmp); +} +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Texture data read routines + * \{ */ + +id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( + TextureReadRoutineSpecialisation specialisation_params, + blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> + &specialisation_cache, + eGPUTextureType texture_type) +{ + /* Check whether the Kernel exists. */ + id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params); + if (result != nullptr) { + return *result; + } + + id<MTLComputePipelineState> return_pso = nil; + @autoreleasepool { + + /* Fetch active context. */ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /** SOURCE. **/ + NSString *tex_update_kernel_src = [NSString + stringWithUTF8String:datatoc_compute_texture_read_msl]; + + /* Defensive Debug Checks. */ + long long int depth_scale_factor = 1; + if (specialisation_params.depth_format_mode > 0) { + BLI_assert(specialisation_params.component_count_input == 1); + BLI_assert(specialisation_params.component_count_output == 1); + switch (specialisation_params.depth_format_mode) { + case 1: + /* FLOAT */ + depth_scale_factor = 1; + break; + case 2: + /* D24 unsigned int */ + depth_scale_factor = 0xFFFFFFu; + break; + case 4: + /* D32 unsigned int */ + depth_scale_factor = 0xFFFFFFFFu; + break; + default: + BLI_assert_msg(0, "Unrecognised mode"); + break; + } + } + + /* Prepare options and specialisations. */ + MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease]; + options.languageVersion = MTLLanguageVersion2_2; + options.preprocessorMacros = @{ + @"INPUT_DATA_TYPE" : + [NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()], + @"OUTPUT_DATA_TYPE" : + [NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()], + @"COMPONENT_COUNT_INPUT" : + [NSNumber numberWithInt:specialisation_params.component_count_input], + @"COMPONENT_COUNT_OUTPUT" : + [NSNumber numberWithInt:specialisation_params.component_count_output], + @"WRITE_COMPONENT_COUNT" : + [NSNumber numberWithInt:min_ii(specialisation_params.component_count_input, + specialisation_params.component_count_output)], + @"IS_DEPTH_FORMAT" : + [NSNumber numberWithInt:((specialisation_params.depth_format_mode > 0) ? 1 : 0)], + @"DEPTH_SCALE_FACTOR" : [NSNumber numberWithLongLong:depth_scale_factor], + @"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))] + }; + + /* Prepare shader library for conversion routine. */ + NSError *error = NULL; + id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src + options:options + error:&error] autorelease]; + if (error) { + NSLog(@"Compile Error - Metal Shader Library error %@ ", error); + BLI_assert(false); + return nil; + } + + /* Fetch compute function. */ + BLI_assert(temp_lib != nil); + id<MTLFunction> temp_compute_function = [[temp_lib newFunctionWithName:@"compute_texture_read"] + autorelease]; + BLI_assert(temp_compute_function); + + /* Otherwise, bake new Kernel. */ + id<MTLComputePipelineState> compute_pso = [ctx->device + newComputePipelineStateWithFunction:temp_compute_function + error:&error]; + if (error || compute_pso == nil) { + NSLog(@"Failed to prepare texture_read MTLComputePipelineState %@", error); + BLI_assert(false); + return nil; + } + + /* Store PSO. */ + [compute_pso retain]; + specialisation_cache.add_new(specialisation_params, compute_pso); + return_pso = compute_pso; + } + + BLI_assert(return_pso != nil); + return return_pso; +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_get_kernel( + TextureReadRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_read_impl(specialisation, + mtl_context->get_texture_utils().texture_2d_read_compute_psos, + GPU_TEXTURE_2D); +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_array_get_kernel( + TextureReadRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_read_impl(specialisation, + mtl_context->get_texture_utils().texture_2d_array_read_compute_psos, + GPU_TEXTURE_2D_ARRAY); +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_get_kernel( + TextureReadRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_read_impl(specialisation, + mtl_context->get_texture_utils().texture_1d_read_compute_psos, + GPU_TEXTURE_1D); +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_array_get_kernel( + TextureReadRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_read_impl(specialisation, + mtl_context->get_texture_utils().texture_1d_array_read_compute_psos, + GPU_TEXTURE_1D_ARRAY); +} + +id<MTLComputePipelineState> gpu::MTLTexture::texture_read_3d_get_kernel( + TextureReadRoutineSpecialisation specialisation) +{ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_context != nullptr); + return mtl_texture_read_impl(specialisation, + mtl_context->get_texture_utils().texture_3d_read_compute_psos, + GPU_TEXTURE_3D); +} + +/** \} */ + +} // namespace blender::gpu |