Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/GStreamer/orc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Vasut <marex@denx.de>2020-06-26 06:08:14 +0300
committerMarek Vasut <marex@denx.de>2020-09-19 03:48:02 +0300
commit7bba5e91157a2e50f7b117c31b2595633037c3af (patch)
treea294cc9ad3217edc33b2e0b9386d6cbdb06e81b0
parentd4ad1632c24df5c34a981f54292095867bbcd66a (diff)
aarch64: Implement accX instructions
Fill in aarch64 opcodes for accX instructions. Signed-off-by: Marek Vasut <marex@denx.de>
-rw-r--r--orc/orcrules-neon.c199
1 files changed, 131 insertions, 68 deletions
diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c
index 643a13d..666db11 100644
--- a/orc/orcrules-neon.c
+++ b/orc/orcrules-neon.c
@@ -2773,52 +2773,90 @@ orc_neon_rule_sqrtf (OrcCompiler *p, void *user, OrcInstruction *insn)
static void
orc_neon_rule_accw (OrcCompiler *p, void *user, OrcInstruction *insn)
{
+ OrcVariable tmpreg = { .alloc = p->tmpreg, .size = p->vars[insn->src_args[0]].size };
unsigned int code;
if (p->insn_shift < 2) {
- ORC_ASM_CODE(p," vshl.i64 %s, %s, #%d\n",
- orc_neon_reg_name (p->tmpreg),
- orc_neon_reg_name (p->vars[insn->src_args[0]].alloc), 48);
- code = NEON_BINARY(0xf2a00590, p->tmpreg, 0,
- p->vars[insn->src_args[0]].alloc);
- code |= (48) << 16;
- orc_arm_emit (p, code);
+ if (p->is_64bit) {
+ orc_neon64_emit_unary (p, "shl",
+ 0x0f405400 | (48 << 16),
+ tmpreg, p->vars[insn->src_args[0]],
+ p->insn_shift - 1);
+ orc_neon64_emit_binary (p, "add", 0x0ee08400,
+ p->vars[insn->dest_args[0]],
+ p->vars[insn->dest_args[0]],
+ tmpreg, p->insn_shift - 1);
+ } else {
+ ORC_ASM_CODE(p," vshl.i64 %s, %s, #%d\n",
+ orc_neon_reg_name (p->tmpreg),
+ orc_neon_reg_name (p->vars[insn->src_args[0]].alloc), 48);
+ code = NEON_BINARY(0xf2a00590, p->tmpreg, 0,
+ p->vars[insn->src_args[0]].alloc);
+ code |= (48) << 16;
+ orc_arm_emit (p, code);
- orc_neon_emit_binary (p, "vadd.i16", 0xf2100800,
- p->vars[insn->dest_args[0]].alloc,
- p->vars[insn->dest_args[0]].alloc,
- p->tmpreg);
+ orc_neon_emit_binary (p, "vadd.i16", 0xf2100800,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc,
+ p->tmpreg);
+ }
} else {
- orc_neon_emit_binary (p, "vadd.i16", 0xf2100800,
- p->vars[insn->dest_args[0]].alloc,
- p->vars[insn->dest_args[0]].alloc,
- p->vars[insn->src_args[0]].alloc);
+ if (p->is_64bit) {
+ orc_neon64_emit_binary (p, "add", 0x0e608400,
+ p->vars[insn->dest_args[0]],
+ p->vars[insn->dest_args[0]],
+ p->vars[insn->src_args[0]], p->insn_shift);
+ } else {
+ orc_neon_emit_binary (p, "vadd.i16", 0xf2100800,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->src_args[0]].alloc);
+ }
}
}
static void
orc_neon_rule_accl (OrcCompiler *p, void *user, OrcInstruction *insn)
{
+ OrcVariable tmpreg = { .alloc = p->tmpreg, .size = p->vars[insn->src_args[0]].size };
unsigned int code;
if (p->insn_shift < 1) {
- ORC_ASM_CODE(p," vshl.i64 %s, %s, #%d\n",
- orc_neon_reg_name (p->tmpreg),
- orc_neon_reg_name (p->vars[insn->src_args[0]].alloc), 32);
- code = NEON_BINARY(0xf2a00590, p->tmpreg, 0,
- p->vars[insn->src_args[0]].alloc);
- code |= (32) << 16;
- orc_arm_emit (p, code);
+ if (p->is_64bit) {
+ orc_neon64_emit_unary (p, "shl",
+ 0x0f405400 | (32 << 16),
+ tmpreg, p->vars[insn->src_args[0]],
+ p->insn_shift - 1);
+ orc_neon64_emit_binary (p, "add", 0x0ee08400,
+ p->vars[insn->dest_args[0]],
+ p->vars[insn->dest_args[0]],
+ tmpreg, p->insn_shift - 1);
+ } else {
+ ORC_ASM_CODE(p," vshl.i64 %s, %s, #%d\n",
+ orc_neon_reg_name (p->tmpreg),
+ orc_neon_reg_name (p->vars[insn->src_args[0]].alloc), 32);
+ code = NEON_BINARY(0xf2a00590, p->tmpreg, 0,
+ p->vars[insn->src_args[0]].alloc);
+ code |= (32) << 16;
+ orc_arm_emit (p, code);
- orc_neon_emit_binary (p, "vadd.i32", 0xf2200800,
- p->vars[insn->dest_args[0]].alloc,
- p->vars[insn->dest_args[0]].alloc,
- p->tmpreg);
+ orc_neon_emit_binary (p, "vadd.i32", 0xf2200800,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc,
+ p->tmpreg);
+ }
} else {
- orc_neon_emit_binary (p, "vadd.i32", 0xf2200800,
- p->vars[insn->dest_args[0]].alloc,
- p->vars[insn->dest_args[0]].alloc,
- p->vars[insn->src_args[0]].alloc);
+ if (p->is_64bit) {
+ orc_neon64_emit_binary (p, "add", 0x0ea08400,
+ p->vars[insn->dest_args[0]],
+ p->vars[insn->dest_args[0]],
+ p->vars[insn->src_args[0]], p->insn_shift);
+ } else {
+ orc_neon_emit_binary (p, "vadd.i32", 0xf2200800,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc,
+ p->vars[insn->src_args[0]].alloc);
+ }
}
}
@@ -3206,50 +3244,75 @@ orc_neon_rule_splatw3q (OrcCompiler *p, void *user, OrcInstruction *insn)
static void
orc_neon_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn)
{
+ OrcVariable tmpreg = { .alloc = p->tmpreg, .size = p->vars[insn->src_args[0]].size };
orc_uint32 x;
unsigned int code;
if (p->insn_shift < 2) {
- x = 0xf3800700;
- ORC_ASM_CODE(p," vabdl.u8 %s, %s, %s\n",
- orc_neon_reg_name_quad (p->tmpreg),
- orc_neon_reg_name (p->vars[insn->src_args[0]].alloc),
- orc_neon_reg_name (p->vars[insn->src_args[1]].alloc));
- x |= (p->tmpreg&0xf)<<12;
- x |= ((p->tmpreg>>4)&0x1)<<22;
- x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
- x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
- x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0;
- x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5;
- orc_arm_emit (p, x);
-
- ORC_ASM_CODE(p," vshl.i64 %s, %s, #%d\n",
- orc_neon_reg_name (p->tmpreg),
- orc_neon_reg_name (p->tmpreg), 64 - (16<<p->insn_shift));
- code = NEON_BINARY(0xf2a00590, p->tmpreg, 0, p->tmpreg);
- code |= (64 - (16<<p->insn_shift)) << 16;
- orc_arm_emit (p, code);
+ if (p->is_64bit) {
+ orc_neon64_emit_binary (p, "uabdl", 0x2e207000,
+ tmpreg,
+ p->vars[insn->src_args[0]],
+ p->vars[insn->src_args[1]], p->insn_shift);
+ orc_neon64_emit_unary (p, "shl",
+ 0x0f405400 | ((64 - (16<<p->insn_shift)) << 16),
+ tmpreg, tmpreg,
+ p->insn_shift - 1);
+ orc_neon64_emit_unary (p, "uadalp", 0x2e606800,
+ p->vars[insn->dest_args[0]],
+ tmpreg, p->insn_shift);
+ } else {
+ x = 0xf3800700;
+ ORC_ASM_CODE(p," vabdl.u8 %s, %s, %s\n",
+ orc_neon_reg_name_quad (p->tmpreg),
+ orc_neon_reg_name (p->vars[insn->src_args[0]].alloc),
+ orc_neon_reg_name (p->vars[insn->src_args[1]].alloc));
+ x |= (p->tmpreg&0xf)<<12;
+ x |= ((p->tmpreg>>4)&0x1)<<22;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5;
+ orc_arm_emit (p, x);
+
+ ORC_ASM_CODE(p," vshl.i64 %s, %s, #%d\n",
+ orc_neon_reg_name (p->tmpreg),
+ orc_neon_reg_name (p->tmpreg), 64 - (16<<p->insn_shift));
+ code = NEON_BINARY(0xf2a00590, p->tmpreg, 0, p->tmpreg);
+ code |= (64 - (16<<p->insn_shift)) << 16;
+ orc_arm_emit (p, code);
- orc_neon_emit_unary (p, "vpadal.u16", 0xf3b40680,
- p->vars[insn->dest_args[0]].alloc,
- p->tmpreg);
+ orc_neon_emit_unary (p, "vpadal.u16", 0xf3b40680,
+ p->vars[insn->dest_args[0]].alloc,
+ p->tmpreg);
+ }
} else {
- x = 0xf3800700;
- ORC_ASM_CODE(p," vabdl.u8 %s, %s, %s\n",
- orc_neon_reg_name_quad (p->tmpreg),
- orc_neon_reg_name (p->vars[insn->src_args[0]].alloc),
- orc_neon_reg_name (p->vars[insn->src_args[1]].alloc));
- x |= (p->tmpreg&0xf)<<12;
- x |= ((p->tmpreg>>4)&0x1)<<22;
- x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
- x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
- x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0;
- x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5;
- orc_arm_emit (p, x);
-
- orc_neon_emit_unary (p, "vpadal.u16", 0xf3b40680,
- p->vars[insn->dest_args[0]].alloc,
- p->tmpreg);
+ if (p->is_64bit) {
+ orc_neon64_emit_binary (p, "uabdl", 0x2e207000,
+ tmpreg,
+ p->vars[insn->src_args[0]],
+ p->vars[insn->src_args[1]], p->insn_shift);
+ orc_neon64_emit_unary (p, "uadalp", 0x2e606800,
+ p->vars[insn->dest_args[0]],
+ tmpreg, p->insn_shift);
+ } else {
+ x = 0xf3800700;
+ ORC_ASM_CODE(p," vabdl.u8 %s, %s, %s\n",
+ orc_neon_reg_name_quad (p->tmpreg),
+ orc_neon_reg_name (p->vars[insn->src_args[0]].alloc),
+ orc_neon_reg_name (p->vars[insn->src_args[1]].alloc));
+ x |= (p->tmpreg&0xf)<<12;
+ x |= ((p->tmpreg>>4)&0x1)<<22;
+ x |= (p->vars[insn->src_args[0]].alloc&0xf)<<16;
+ x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<7;
+ x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0;
+ x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5;
+ orc_arm_emit (p, x);
+
+ orc_neon_emit_unary (p, "vpadal.u16", 0xf3b40680,
+ p->vars[insn->dest_args[0]].alloc,
+ p->tmpreg);
+ }
}
}