diff options
author | Gaetan Bahl <gaetan.bahl@nxp.com> | 2022-05-20 16:47:53 +0300 |
---|---|---|
committer | Gaetan Bahl <gaetan.bahl@nxp.com> | 2022-06-01 19:39:56 +0300 |
commit | 8b047142903c445d0c634a2c687b3fa967851846 (patch) | |
tree | 082ce15462d35a382788673abab44ffbfb0d0b85 | |
parent | 95a2926fb7bba1febe7c4db661209eb4db4711f6 (diff) |
neon: Fix sqrtf only processing two inputs out of four
This solves an issue where two out of four inputs are not processed
by passing the correct value of vec shift.
Fixes #33, #20.
Signed-off-by: Gaetan Bahl <gaetan.bahl@nxp.com>
-rw-r--r-- | orc/orcrules-neon.c | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c index 44802a5..c4146fa 100644 --- a/orc/orcrules-neon.c +++ b/orc/orcrules-neon.c @@ -3030,34 +3030,34 @@ orc_neon_rule_sqrtf (OrcCompiler *p, void *user, OrcInstruction *insn) orc_neon64_emit_unary (p, "frsqrte", 0x2ea1d800, tmpreg, p->vars[insn->src_args[0]], - p->insn_shift); + vec_shift); for(i = 0; i < NUM_ITERS_SQRTF; i++) { orc_neon64_emit_binary (p, "fmul", 0x2e20dc00, tmpreg2, tmpreg, p->vars[insn->src_args[0]], - p->insn_shift); + vec_shift); orc_neon64_emit_binary (p, "frsqrts", 0x0ea0fc00, - tmpreg2, tmpreg, tmpreg2, p->insn_shift); + tmpreg2, tmpreg, tmpreg2, vec_shift); orc_neon64_emit_binary (p, "fmul", 0x2e20dc00, tmpreg, tmpreg, tmpreg2, - p->insn_shift); + vec_shift); } orc_neon64_emit_unary (p, "frecpe", 0x0ea1d800, p->vars[insn->dest_args[0]], tmpreg, - p->insn_shift); + vec_shift); for(i = 0; i < NUM_ITERS_DIVF; i++) { orc_neon64_emit_binary (p, "frecps", 0x0e20fc00, tmpreg2, /* correction factor */ p->vars[insn->dest_args[0]], /* the last estimate */ tmpreg, /* the original number */ - p->insn_shift); + vec_shift); orc_neon64_emit_binary (p, "fmul", 0x2e20dc00, p->vars[insn->dest_args[0]], /* revised estimate */ p->vars[insn->dest_args[0]], /* last estimate */ tmpreg2, /* correction factor */ - p->insn_shift); + vec_shift); } } else { if (p->insn_shift <= vec_shift) { |