diff options
author | Knobe, Daniel <daniel-knobe@web.de> | 2021-12-07 12:22:15 +0300 |
---|---|---|
committer | Knobe, Daniel <daniel-knobe@web.de> | 2021-12-07 12:22:15 +0300 |
commit | c228582223978019eb0ac8f3143e8e9e45b999aa (patch) | |
tree | 2a68d13a190aee919f846425a14da5995844506d | |
parent | b8a83aaf62d908b5750627173f37f8a40ec65d00 (diff) |
neon: Fix unsigned only implementation of loadoffb, loadoffw and loadoffl
This has direct impact on bayer2rgb performance. Tested on i.MX8mm aarch64 -> Speedboost of ~17%.
Reason:
The line loadoffw t, s, -1 results in orc silent compile error
Pipeline:
gst-launch-1.0 -v videotestsrc ! video/x-bayer,width=1920,height=1080 ! bayer2rgb ! fpsdisplaysink video-sink=fakesink sync=0
Average performance with fix: 25.21fps
Average performance without fix: 21.60fps
Part-of: <https://gitlab.freedesktop.org/gstreamer/orc/-/merge_requests/62>
-rw-r--r-- | orc/orcrules-neon.c | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c index 726f0d4..a9c6bb0 100644 --- a/orc/orcrules-neon.c +++ b/orc/orcrules-neon.c @@ -1363,20 +1363,38 @@ neon_rule_loadX (OrcCompiler *compiler, void *user, OrcInstruction *insn) if (src->vartype == ORC_VAR_TYPE_DEST) update = FALSE; if (type == 1) { - if (compiler->vars[insn->src_args[1]].vartype != ORC_VAR_TYPE_CONST) { + OrcVariable *src2 = compiler->vars + insn->src_args[1]; + + if (src2->vartype != ORC_VAR_TYPE_CONST) { ORC_PROGRAM_ERROR(compiler,"unimplemented"); return; } ptr_register = compiler->gp_tmpreg; if (compiler->is_64bit) { + if (src2->value.i < 0) { + orc_arm64_emit_sub_imm (compiler, 64, ptr_register, + src->ptr_register, + src2->value.i * src->size * -1); + } + else + { orc_arm64_emit_add_imm (compiler, 64, ptr_register, src->ptr_register, - compiler->vars[insn->src_args[1]].value.i * src->size); + src2->value.i * src->size); + } } else { + if (src2->value.i < 0) { + orc_arm_emit_sub_imm (compiler, ptr_register, + src->ptr_register, + src2->value.i * src->size * -1, TRUE); + } + else + { orc_arm_emit_add_imm (compiler, ptr_register, src->ptr_register, - compiler->vars[insn->src_args[1]].value.i * src->size); + src2->value.i * src->size); + } } update = FALSE; |