From 887392103956a8733a750c4442e2664d5c26527d Mon Sep 17 00:00:00 2001 From: Gaetan Bahl Date: Fri, 20 May 2022 15:41:27 +0200 Subject: neon: Fix testsuite not passing on arm CPUs Set the FPCR.FZ bit before running tests using ARM NEON, in order to make tests pass for most opcodes. Add a way to check for expected failures in the test suite, since ARM NEON does not comply to IEEE754. Errors are expected when using divf (resp. sqrtf) on large (resp. small) numbers. Fixes #33, #20 Signed-off-by: Gaetan Bahl --- orc-test/orctest.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/orc-test/orctest.c b/orc-test/orctest.c index d9cb9d5..c9a48b3 100644 --- a/orc-test/orctest.c +++ b/orc-test/orctest.c @@ -57,6 +57,22 @@ orc_test_init (void) orc_random_init (&rand_context, 0x12345678); _orc_profile_init (); + +#ifdef __aarch64__ + // set the FPCR.FZ bit + // denormalized single-precision and double-precision inputs to, and + // outputs from, floating-point instructions are flushed to zero. + // Otherwise, outputs from neon float ops do not match emulated code. + int64_t fpcr_value = 0x1000000; +#ifdef __GNUC__ + asm("msr FPCR, %0" : : "r" (fpcr_value)); +#elif _MSC_VER + __asm msr FPCR, fpcr_value +#else + #error unsupported compiler for inline assembly +#endif +#endif + } @@ -510,6 +526,15 @@ print_array_val_float (OrcArray *array, int i, int j) } } +static float +get_array_val_float (OrcArray *array, int i, int j) +{ + void *ptr = ORC_PTR_OFFSET (array->data, + i*array->element_size + j*array->stride); + + return *(float *)ptr; +} + int float_compare (OrcArray *array1, OrcArray *array2, int i, int j) { @@ -537,6 +562,45 @@ float_compare (OrcArray *array1, OrcArray *array2, int i, int j) return FALSE; } +int +check_expected_failure (int flags, OrcProgram *p, OrcArray** src, OrcArray** dest_exec, OrcArray** dest_emul, int i, int j) { + + if (flags & ORC_TARGET_NEON_NEON) { + + if (strstr(p->name, "divf")) { + + float src_val = get_array_val_float (src[1], i, j); + float dest_exec_val = get_array_val_float (dest_exec[0], i, j); + + // Dividing by a large number in NEON will result in 0 + if (fabs(src_val) > 7e37 && fabs(dest_exec_val) == 0.0f) { + printf(" NEON divf mismatch expected"); + return TRUE; + } + + } else if (strstr(p->name, "sqrtf")) { + + float src_val = get_array_val_float (src[0], i, j); + float dest_exec_val = get_array_val_float (dest_exec[0], i, j); + float dest_emul_val = get_array_val_float (dest_emul[0], i, j); + + // sqrt of 0 or small numbers in NEON returns NaN because it uses reciprocal estimate + if (fabs(src_val) < 2e-38) { + printf(" NEON sqrtf mismatch expected"); + return TRUE; + } + + // sqrt in NEON will sometimes be imprecise because frecps returns 1 on small numbers + if (fabs(dest_exec_val - dest_emul_val)/dest_emul_val < 2e-7) { + printf(" NEON sqrtf mismatch expected"); + return TRUE; + } + } + } + + return FALSE; +} + OrcTestResult orc_test_compare_output (OrcProgram *program) { @@ -703,6 +767,7 @@ orc_test_compare_output_full (OrcProgram *program, int flags) } } if (bad) { + int n_lines_bad = 0; for(j=0;j