Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/GStreamer/orc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Vasut <marex@denx.de>2020-08-30 02:23:05 +0300
committerMarek Vasut <marex@denx.de>2020-09-19 04:08:46 +0300
commita91afda140b0ee9787db9803566169a319886eb9 (patch)
tree112437bc89109f0d98bb800db8216232988df132
parentff37f64325aa6a762fe3f78facc29f8a1ce8d225 (diff)
aarch64: Implement flags2d
Implement support for .flags 2d by adding code for handling the loop counter. The implementation is very similar to aarch32. Signed-off-by: Marek Vasut <marex@denx.de>
-rw-r--r--orc/orcprogram-neon.c51
1 files changed, 42 insertions, 9 deletions
diff --git a/orc/orcprogram-neon.c b/orc/orcprogram-neon.c
index ae81810..8d55411 100644
--- a/orc/orcprogram-neon.c
+++ b/orc/orcprogram-neon.c
@@ -753,8 +753,20 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
orc_neon_load_constants_outer (compiler);
if (compiler->is_64bit) {
- /** @todo not supported yet */
- if (compiler->program->is_2d) return;
+ if (compiler->program->is_2d) {
+ if (compiler->program->constant_m > 0) {
+ orc_arm64_emit_mov_imm (compiler, 32, ORC_ARM64_IP1, compiler->program->constant_m);
+ orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2]));
+ } else {
+ orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A1]));
+ orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A2]));
+ }
+
+ orc_arm_emit_label (compiler, LABEL_OUTER_LOOP);
+ }
if (compiler->loop_shift > 0) {
orc_neon64_loop_shift (compiler);
@@ -781,6 +793,17 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
orc_neon64_loop_caches (compiler);
}
+
+ if (compiler->program->is_2d) {
+ neon_add_strides (compiler);
+
+ orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A2]));
+ orc_arm64_emit_subs_imm (compiler, 32, ORC_ARM64_IP1, ORC_ARM64_IP1, 1);
+ orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2]));
+ orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_OUTER_LOOP);
+ }
} else {
if (compiler->program->is_2d) {
if (compiler->program->constant_m > 0) {
@@ -1275,13 +1298,23 @@ neon_add_strides (OrcCompiler *compiler)
break;
case ORC_VAR_TYPE_SRC:
case ORC_VAR_TYPE_DEST:
- orc_arm_emit_load_reg (compiler, ORC_ARM_A3, compiler->exec_reg,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
- orc_arm_emit_load_reg (compiler, ORC_ARM_A2, compiler->exec_reg,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i]));
- orc_arm_emit_add (compiler, ORC_ARM_A3, ORC_ARM_A3, ORC_ARM_A2);
- orc_arm_emit_store_reg (compiler, ORC_ARM_A3, compiler->exec_reg,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+ if (compiler->is_64bit) {
+ orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+ orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_R18, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i]));
+ orc_arm64_emit_add (compiler, 32, ORC_ARM64_IP1, ORC_ARM64_IP1, ORC_ARM64_R18);
+ orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,arrays[i]));
+ } else {
+ orc_arm_emit_load_reg (compiler, ORC_ARM_A3, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+ orc_arm_emit_load_reg (compiler, ORC_ARM_A2, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i]));
+ orc_arm_emit_add (compiler, ORC_ARM_A3, ORC_ARM_A3, ORC_ARM_A2);
+ orc_arm_emit_store_reg (compiler, ORC_ARM_A3, compiler->exec_reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+ }
break;
case ORC_VAR_TYPE_ACCUMULATOR:
break;