diff options
author | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-11-09 14:10:04 +0300 |
---|---|---|
committer | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-11-09 14:10:04 +0300 |
commit | 896e138b5e4893d148ea245d94d7989ce789facb (patch) | |
tree | 661be6d8a5a852b8beacd3879ca602711277eff9 | |
parent | bf666302f95e4ac276a5415cdca9861eecc9c901 (diff) |
vkd3d-shader: Add analysis of indexableTemps.
Try to reduce component count as much as possible.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
-rw-r--r-- | libs/vkd3d-shader/spirv.c | 8 | ||||
-rw-r--r-- | libs/vkd3d-shader/vkd3d_shader_main.c | 56 | ||||
-rw-r--r-- | libs/vkd3d-shader/vkd3d_shader_private.h | 2 |
3 files changed, 64 insertions, 2 deletions
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 0fa1e318..eccf6b2e 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -6075,6 +6075,7 @@ static void vkd3d_dxbc_compiler_emit_dcl_indexable_temp(struct vkd3d_dxbc_compil struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; struct vkd3d_shader_register reg; struct vkd3d_symbol reg_symbol; + unsigned int component_count; size_t function_location; uint32_t id; @@ -6088,11 +6089,13 @@ static void vkd3d_dxbc_compiler_emit_dcl_indexable_temp(struct vkd3d_dxbc_compil reg.idx[0].offset = temp->register_idx; reg.idx[1].offset = ~0u; + component_count = vkd3d_shader_scan_get_idxtemp_components(compiler->scan_info, ®); + function_location = vkd3d_dxbc_compiler_get_current_function_location(compiler); vkd3d_spirv_begin_function_stream_insertion(builder, function_location); id = vkd3d_dxbc_compiler_emit_array_variable(compiler, &builder->function_stream, - SpvStorageClassFunction, VKD3D_TYPE_FLOAT, VKD3D_VEC4_SIZE, temp->register_size); + SpvStorageClassFunction, VKD3D_TYPE_FLOAT, component_count, temp->register_size); vkd3d_dxbc_compiler_emit_register_debug_name(builder, id, ®); @@ -6100,7 +6103,8 @@ static void vkd3d_dxbc_compiler_emit_dcl_indexable_temp(struct vkd3d_dxbc_compil vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, id, - SpvStorageClassFunction, VKD3D_TYPE_FLOAT, VKD3DSP_WRITEMASK_ALL); + SpvStorageClassFunction, VKD3D_TYPE_FLOAT, + vkd3d_write_mask_from_component_count(component_count)); reg_symbol.info.reg.indexable_count = temp->register_size; vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); } diff --git a/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d-shader/vkd3d_shader_main.c index b8192e49..2b4e26ca 100644 --- a/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d-shader/vkd3d_shader_main.c @@ -239,6 +239,7 @@ struct vkd3d_shader_scan_entry struct hash_map_entry entry; struct vkd3d_shader_scan_key key; unsigned int flags; + unsigned required_components; }; static uint32_t vkd3d_shader_scan_entry_hash(const void *key) @@ -267,6 +268,19 @@ unsigned int vkd3d_shader_scan_get_register_flags(const struct vkd3d_shader_scan return e ? e->flags : 0u; } +unsigned int vkd3d_shader_scan_get_idxtemp_components(const struct vkd3d_shader_scan_info *scan_info, + const struct vkd3d_shader_register *reg) +{ + const struct vkd3d_shader_scan_entry *e; + struct vkd3d_shader_scan_key key; + + key.register_type = reg->type; + key.register_id = reg->idx[0].offset; + + e = (const struct vkd3d_shader_scan_entry *)hash_map_find(&scan_info->register_map, &key); + return e ? e->required_components : 4u; +} + static void vkd3d_shader_scan_set_register_flags(struct vkd3d_shader_scan_info *scan_info, enum vkd3d_shader_register_type type, unsigned int id, unsigned int flags) { @@ -285,6 +299,30 @@ static void vkd3d_shader_scan_set_register_flags(struct vkd3d_shader_scan_info * { entry.key = key; entry.flags = flags; + entry.required_components = 0; + hash_map_insert(&scan_info->register_map, &key, &entry.entry); + } +} + +static void vkd3d_shader_scan_record_idxtemp_components(struct vkd3d_shader_scan_info *scan_info, + const struct vkd3d_shader_register *reg, unsigned int required_components) +{ + struct vkd3d_shader_scan_entry entry; + struct vkd3d_shader_scan_entry *e; + struct vkd3d_shader_scan_key key; + + key.register_type = reg->type; + key.register_id = reg->idx[0].offset; + + if ((e = (struct vkd3d_shader_scan_entry *)hash_map_find(&scan_info->register_map, &key))) + { + e->required_components = max(required_components, e->required_components); + } + else + { + entry.key = key; + entry.flags = 0; + entry.required_components = required_components; hash_map_insert(&scan_info->register_map, &key, &entry.entry); } } @@ -556,6 +594,24 @@ static void vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_info *scan_in break; } + /* If we do nothing, we will have to assume that IDXTEMP is an array of vec4. + * This is problematic for performance if shader only accesses the first 1, 2 or 3 components. + * The dcl_indexableTemp instruction specifies number of components but FXC does not seem to + * care, so we have to analyze write masks instead. */ + for (i = 0; i < instruction->dst_count; ++i) + { + if (instruction->dst[i].reg.type == VKD3DSPR_IDXTEMP) + { + unsigned int write_mask, required_components; + write_mask = instruction->dst[i].write_mask; + write_mask |= write_mask >> 2; + write_mask |= write_mask >> 1; + required_components = vkd3d_write_mask_component_count(write_mask); + vkd3d_shader_scan_record_idxtemp_components(scan_info, + &instruction->dst[i].reg, required_components); + } + } + if (vkd3d_shader_instruction_is_uav_read(instruction)) { is_atomic = vkd3d_shader_instruction_is_uav_atomic(instruction); diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index c79f355a..279d7655 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -912,6 +912,8 @@ static inline unsigned int vkd3d_compact_swizzle(unsigned int swizzle, unsigned unsigned int vkd3d_shader_scan_get_register_flags(const struct vkd3d_shader_scan_info *scan_info, enum vkd3d_shader_register_type type, unsigned int id); +unsigned int vkd3d_shader_scan_get_idxtemp_components(const struct vkd3d_shader_scan_info *scan_info, + const struct vkd3d_shader_register *reg); /* DXIL support */ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc, |