using namespace metal; /* MATCHING eGPUTextureType. */ #define GPU_TEXTURE_1D (1 << 0) #define GPU_TEXTURE_2D (1 << 1) #define GPU_TEXTURE_3D (1 << 2) #define GPU_TEXTURE_CUBE (1 << 3) #define GPU_TEXTURE_ARRAY (1 << 4) #define GPU_TEXTURE_BUFFER (1 << 5) #define GPU_TEXTURE_1D_ARRAY (GPU_TEXTURE_1D | GPU_TEXTURE_ARRAY) #define GPU_TEXTURE_2D_ARRAY (GPU_TEXTURE_2D | GPU_TEXTURE_ARRAY) #define GPU_TEXTURE_CUBE_ARRAY (GPU_TEXTURE_CUBE | GPU_TEXTURE_ARRAY) /* Assign parameters based on texture type. */ #if TEX_TYPE == GPU_TEXTURE_1D # define TEX_TYPE_NAME texture1d # define DIMS 1 #elif TEX_TYPE == GPU_TEXTURE_2D # define TEX_TYPE_NAME texture2d # define DIMS 2 #elif TEX_TYPE == GPU_TEXTURE_3D # define TEX_TYPE_NAME texture3d # define DIMS 3 #elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY # define TEX_TYPE_NAME texture1d_array # define DIMS 2 #elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY # define TEX_TYPE_NAME texture2d_array # define DIMS 3 #endif /* Position dimensionality for threadgroup. */ #if DIMS == 1 # define POSITION_TYPE uint #elif DIMS == 2 # define POSITION_TYPE uint2 #elif DIMS == 3 # define POSITION_TYPE uint3 #endif struct TextureUpdateParams { int mip_index; int extent[3]; int offset[3]; uint unpack_row_length; }; kernel void compute_texture_update(constant TextureUpdateParams ¶ms [[buffer(0)]], constant INPUT_DATA_TYPE *input_data [[buffer(1)]], TEX_TYPE_NAME update_tex [[texture(0)]], POSITION_TYPE position [[thread_position_in_grid]]) { /* 1D TEXTURE */ #if TEX_TYPE == GPU_TEXTURE_1D /* xx, yy, layer determined by kernel invocation pattern */ uint xx = position; int index = xx * COMPONENT_COUNT_INPUT; vec output; for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); } for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { output[i] = OUTPUT_DATA_TYPE(0); } update_tex.write(output, uint(params.offset[0]) + uint(xx)); /* 2D TEXTURE */ #elif TEX_TYPE == GPU_TEXTURE_2D /* xx, yy, layer determined by kernel invocation pattern */ uint xx = position[0]; uint yy = position[1]; int index = (yy * params.unpack_row_length + xx) * COMPONENT_COUNT_INPUT; vec output; for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); } for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { output[i] = OUTPUT_DATA_TYPE(0); } update_tex.write(output, uint2(params.offset[0], params.offset[1]) + uint2(xx, yy)); /* 3D TEXTURE */ #elif TEX_TYPE == GPU_TEXTURE_3D /* xx, yy, zz determined by kernel invocation pattern */ uint xx = position[0]; uint yy = position[1]; uint zz = position[2]; int index = (zz * (params.unpack_row_length * params.extent[1]) + yy * params.unpack_row_length + xx) * COMPONENT_COUNT_INPUT; vec output; for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); } for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { output[i] = OUTPUT_DATA_TYPE(0); } update_tex.write( output, uint3(params.offset[0], params.offset[1], params.offset[2]) + uint3(xx, yy, zz)); /* 1D ARRAY TEXTURE */ #elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY /* xx, yy, layer determined by kernel invocation pattern */ uint xx = position[0]; uint layer = position[1]; int index = (layer * params.unpack_row_length + xx) * COMPONENT_COUNT_INPUT; vec output; for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); } for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { output[i] = OUTPUT_DATA_TYPE(0); } update_tex.write( output, uint(params.offset[0]) + uint(xx), uint(params.offset[1]) + uint(layer)); /* 2D ARRAY TEXTURE */ #elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY /* xx, yy, layer determined by kernel invocation pattern */ uint xx = position[0]; uint yy = position[1]; uint layer = position[2]; int index = (layer * (params.unpack_row_length * params.extent[1]) + yy * params.unpack_row_length + xx) * COMPONENT_COUNT_INPUT; vec output; for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) { output[i] = OUTPUT_DATA_TYPE(input_data[index + i]); } for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) { output[i] = OUTPUT_DATA_TYPE(0); } update_tex.write( output, uint2(params.offset[0], params.offset[1]) + uint2(xx, yy), params.offset[2] + layer); #endif }