diff options
author | Marek Vasut <marex@denx.de> | 2020-08-30 19:18:38 +0300 |
---|---|---|
committer | Marek Vasut <marex@denx.de> | 2020-09-19 04:08:46 +0300 |
commit | 872da542a1ef77107b8d34de39f275824b085dc8 (patch) | |
tree | 09935743c17906a63cea71f20a3f40ae892bb983 | |
parent | d3e8915b38fb58789aac18e0bad8141d002dced9 (diff) |
aarch64: Implement divf instruction
Fill in aarch64 opcodes for divf instruction.
Signed-off-by: Marek Vasut <marex@denx.de>
-rw-r--r-- | orc/orcrules-neon.c | 102 |
1 files changed, 65 insertions, 37 deletions
diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c index 9485b13..789f42c 100644 --- a/orc/orcrules-neon.c +++ b/orc/orcrules-neon.c @@ -2693,50 +2693,78 @@ static void orc_neon_rule_divf (OrcCompiler *p, void *user, OrcInstruction *insn) { int vec_shift = 1; - if (p->insn_shift <= vec_shift) { + + if (p->is_64bit) { + OrcVariable tmpreg = { .alloc = p->tmpreg, .size = p->vars[insn->src_args[1]].size }; + OrcVariable tmpreg2 = { .alloc = p->tmpreg2, .size = p->vars[insn->src_args[1]].size }; int i; - orc_neon_emit_unary (p, "vrecpe.f32", 0xf3bb0500, - p->tmpreg, - p->vars[insn->src_args[1]].alloc); + + orc_neon64_emit_unary (p, "frecpe", 0x0ea1d800, + tmpreg, p->vars[insn->src_args[1]], + p->insn_shift); for(i = 0; i < NUM_ITERS_DIVF; i++) { - orc_neon_emit_binary (p, "vrecps.f32", 0xf2000f10, - p->tmpreg2, /* correction factor */ - p->tmpreg, /* the last estimate */ - p->vars[insn->src_args[1]].alloc); /* the original number */ - orc_neon_emit_binary (p, "vmul.f32", 0xf3000d10, - p->tmpreg, /* revised estimate */ - p->tmpreg, /* last estimate */ - p->tmpreg2); /* correction factor */ + orc_neon64_emit_binary (p, "frecps", 0x0e20fc00, + tmpreg2, /* correction factor */ + tmpreg, /* the last estimate */ + p->vars[insn->src_args[1]], /* the original number */ + p->insn_shift); + orc_neon64_emit_binary (p, "fmul", 0x2e20dc00, + tmpreg, /* revised estimate */ + tmpreg, /* last estimate */ + tmpreg2, /* correction factor */ + p->insn_shift); } - orc_neon_emit_binary (p, "vmul.f32", 0xf3000d10, - p->vars[insn->dest_args[0]].alloc, - p->vars[insn->src_args[0]].alloc, - p->tmpreg); + orc_neon64_emit_binary (p, "fmul", 0x2e20dc00, + p->vars[insn->dest_args[0]], + p->vars[insn->src_args[0]], + tmpreg, p->insn_shift); + } else { + if (p->insn_shift <= vec_shift) { + int i; + orc_neon_emit_unary (p, "vrecpe.f32", 0xf3bb0500, + p->tmpreg, + p->vars[insn->src_args[1]].alloc); + for(i = 0; i < NUM_ITERS_DIVF; i++) { + orc_neon_emit_binary (p, "vrecps.f32", 0xf2000f10, + p->tmpreg2, /* correction factor */ + p->tmpreg, /* the last estimate */ + p->vars[insn->src_args[1]].alloc); /* the original number */ + orc_neon_emit_binary (p, "vmul.f32", 0xf3000d10, + p->tmpreg, /* revised estimate */ + p->tmpreg, /* last estimate */ + p->tmpreg2); /* correction factor */ + } - } else if (p->insn_shift == vec_shift + 1) { - int i; - orc_neon_emit_unary_quad (p, "vrecpe.f32", 0xf3bb0500, - p->tmpreg, - p->vars[insn->src_args[1]].alloc); - for(i = 0; i < NUM_ITERS_DIVF; i++) { - orc_neon_emit_binary_quad (p, "vrecps.f32", 0xf2000f10, - p->tmpreg2, /* correction factor */ - p->tmpreg, /* the last estimate */ - p->vars[insn->src_args[1]].alloc); /* the original number */ - orc_neon_emit_binary_quad (p, "vmul.f32", 0xf3000d10, - p->tmpreg, /* revised estimate */ - p->tmpreg, /* last estimate */ - p->tmpreg2); /* correction factor */ - } + orc_neon_emit_binary (p, "vmul.f32", 0xf3000d10, + p->vars[insn->dest_args[0]].alloc, + p->vars[insn->src_args[0]].alloc, + p->tmpreg); - orc_neon_emit_binary_quad (p, "vmul.f32", 0xf3000d10, - p->vars[insn->dest_args[0]].alloc, - p->vars[insn->src_args[0]].alloc, - p->tmpreg); + } else if (p->insn_shift == vec_shift + 1) { + int i; + orc_neon_emit_unary_quad (p, "vrecpe.f32", 0xf3bb0500, + p->tmpreg, + p->vars[insn->src_args[1]].alloc); + for(i = 0; i < NUM_ITERS_DIVF; i++) { + orc_neon_emit_binary_quad (p, "vrecps.f32", 0xf2000f10, + p->tmpreg2, /* correction factor */ + p->tmpreg, /* the last estimate */ + p->vars[insn->src_args[1]].alloc); /* the original number */ + orc_neon_emit_binary_quad (p, "vmul.f32", 0xf3000d10, + p->tmpreg, /* revised estimate */ + p->tmpreg, /* last estimate */ + p->tmpreg2); /* correction factor */ + } - } else { - ORC_COMPILER_ERROR(p, "shift too large"); + orc_neon_emit_binary_quad (p, "vmul.f32", 0xf3000d10, + p->vars[insn->dest_args[0]].alloc, + p->vars[insn->src_args[0]].alloc, + p->tmpreg); + + } else { + ORC_COMPILER_ERROR(p, "shift too large"); + } } } #endif |