diff options
author | Marek Vasut <marex@denx.de> | 2020-08-30 02:23:05 +0300 |
---|---|---|
committer | Marek Vasut <marex@denx.de> | 2020-09-19 04:08:46 +0300 |
commit | a91afda140b0ee9787db9803566169a319886eb9 (patch) | |
tree | 112437bc89109f0d98bb800db8216232988df132 | |
parent | ff37f64325aa6a762fe3f78facc29f8a1ce8d225 (diff) |
aarch64: Implement flags2d
Implement support for .flags 2d by adding code for handling the loop
counter. The implementation is very similar to aarch32.
Signed-off-by: Marek Vasut <marex@denx.de>
-rw-r--r-- | orc/orcprogram-neon.c | 51 |
1 files changed, 42 insertions, 9 deletions
diff --git a/orc/orcprogram-neon.c b/orc/orcprogram-neon.c index ae81810..8d55411 100644 --- a/orc/orcprogram-neon.c +++ b/orc/orcprogram-neon.c @@ -753,8 +753,20 @@ orc_compiler_neon_assemble (OrcCompiler *compiler) orc_neon_load_constants_outer (compiler); if (compiler->is_64bit) { - /** @todo not supported yet */ - if (compiler->program->is_2d) return; + if (compiler->program->is_2d) { + if (compiler->program->constant_m > 0) { + orc_arm64_emit_mov_imm (compiler, 32, ORC_ARM64_IP1, compiler->program->constant_m); + orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2])); + } else { + orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A1])); + orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A2])); + } + + orc_arm_emit_label (compiler, LABEL_OUTER_LOOP); + } if (compiler->loop_shift > 0) { orc_neon64_loop_shift (compiler); @@ -781,6 +793,17 @@ orc_compiler_neon_assemble (OrcCompiler *compiler) orc_neon64_loop_caches (compiler); } + + if (compiler->program->is_2d) { + neon_add_strides (compiler); + + orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A2])); + orc_arm64_emit_subs_imm (compiler, 32, ORC_ARM64_IP1, ORC_ARM64_IP1, 1); + orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2])); + orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_OUTER_LOOP); + } } else { if (compiler->program->is_2d) { if (compiler->program->constant_m > 0) { @@ -1275,13 +1298,23 @@ neon_add_strides (OrcCompiler *compiler) break; case ORC_VAR_TYPE_SRC: case ORC_VAR_TYPE_DEST: - orc_arm_emit_load_reg (compiler, ORC_ARM_A3, compiler->exec_reg, - (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i])); - orc_arm_emit_load_reg (compiler, ORC_ARM_A2, compiler->exec_reg, - (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i])); - orc_arm_emit_add (compiler, ORC_ARM_A3, ORC_ARM_A3, ORC_ARM_A2); - orc_arm_emit_store_reg (compiler, ORC_ARM_A3, compiler->exec_reg, - (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i])); + if (compiler->is_64bit) { + orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i])); + orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_R18, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i])); + orc_arm64_emit_add (compiler, 32, ORC_ARM64_IP1, ORC_ARM64_IP1, ORC_ARM64_R18); + orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor,arrays[i])); + } else { + orc_arm_emit_load_reg (compiler, ORC_ARM_A3, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i])); + orc_arm_emit_load_reg (compiler, ORC_ARM_A2, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i])); + orc_arm_emit_add (compiler, ORC_ARM_A3, ORC_ARM_A3, ORC_ARM_A2); + orc_arm_emit_store_reg (compiler, ORC_ARM_A3, compiler->exec_reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i])); + } break; case ORC_VAR_TYPE_ACCUMULATOR: break; |