diff options
author | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2021-11-21 23:54:48 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2021-11-21 23:54:48 +0300 |
commit | 2640bcb61db4302aa7be900dfa763a0ea5c6fd8d (patch) | |
tree | 08d7e9ca47f1bf0b48458e7d71705a35bf3e9724 | |
parent | 0aa7b6b7632732d1f22f3d8169d3262a7e6b1e9d (diff) |
Update tests
-rw-r--r-- | tests/decoder/intgemm/.gitignore | 2 | ||||
-rw-r--r-- | tests/decoder/intgemm/test_intgemm_8bit.sh | 12 | ||||
-rw-r--r-- | tests/decoder/intgemm/test_intgemm_8bit_avx2.sh | 9 | ||||
-rwxr-xr-x | tools/diff.sh | 18 |
4 files changed, 35 insertions, 6 deletions
diff --git a/tests/decoder/intgemm/.gitignore b/tests/decoder/intgemm/.gitignore index 3ab86da..b35e1d8 100644 --- a/tests/decoder/intgemm/.gitignore +++ b/tests/decoder/intgemm/.gitignore @@ -2,3 +2,5 @@ *.out.bleu *.src *.ref +*.bleu_score +*.bleu_score.expected diff --git a/tests/decoder/intgemm/test_intgemm_8bit.sh b/tests/decoder/intgemm/test_intgemm_8bit.sh index 1bdec70..d6098a0 100644 --- a/tests/decoder/intgemm/test_intgemm_8bit.sh +++ b/tests/decoder/intgemm/test_intgemm_8bit.sh @@ -47,10 +47,16 @@ $MRT_MARIAN/marian-decoder \ # Print current and expected BLEU for debugging python3 $MRT_TOOLS/sacrebleu/sacrebleu.py newstest2018.ref < $prefix.out | tee $prefix.out.bleu -cat $prefix.$suffix.expected.bleu +# BLEU scores calculated on AVX, AVX2, AVX512 should be very similar, but does not have to be identical +tail -n1 $prefix.*.expected.bleu || true -# Compare with the expected output -$MRT_TOOLS/diff.sh $prefix.out $prefix.$suffix.expected > $prefix.diff +# Compare with the expected output, allow up to 20 different lines +$MRT_TOOLS/diff.sh $prefix.out $prefix.$suffix.expected 20 > $prefix.diff + +# Compare BLEU scores +cut -f3 -d' ' $prefix.out.bleu > $prefix.out.bleu_score +cut -f3 -d' ' $prefix.$suffix.expected.bleu > $prefix.out.bleu_score.expected +$MRT_TOOLS/diff-nums.py -a -p 0.9 $prefix.out.bleu_score{,.expected} # Exit with success code diff --git a/tests/decoder/intgemm/test_intgemm_8bit_avx2.sh b/tests/decoder/intgemm/test_intgemm_8bit_avx2.sh index ceaefb6..a42dd6f 100644 --- a/tests/decoder/intgemm/test_intgemm_8bit_avx2.sh +++ b/tests/decoder/intgemm/test_intgemm_8bit_avx2.sh @@ -48,8 +48,13 @@ python3 $MRT_TOOLS/sacrebleu/sacrebleu.py newstest2018.ref < $prefix.out | tee $ # BLEU scores calculated on AVX, AVX2, AVX512 should be very similar, but does not have to be identical tail -n1 $prefix.*.expected.bleu || true -# Compare with the expected output -$MRT_TOOLS/diff.sh $prefix.out $prefix.$suffix.expected > $prefix.diff +# Compare with the expected output, allow up to 20 different lines +$MRT_TOOLS/diff.sh $prefix.out $prefix.$suffix.expected 20 > $prefix.diff + +# Compare BLEU scores +cut -f3 -d' ' $prefix.out.bleu > $prefix.out.bleu_score +cut -f3 -d' ' $prefix.$suffix.expected.bleu > $prefix.out.bleu_score.expected +$MRT_TOOLS/diff-nums.py -a -p 0.9 $prefix.out.bleu_score{,.expected} # Exit with success code diff --git a/tools/diff.sh b/tools/diff.sh index ef03374..b09ba54 100755 --- a/tools/diff.sh +++ b/tools/diff.sh @@ -1,3 +1,19 @@ #!/bin/bash -[[ "$#" -eq 2 ]] && >&2 echo "Command: $(realpath $0) $(realpath -m $1) $(realpath -m $2)" +# +# Usage: +# ./diff.sh file1 file2 [number-of-allowed-diff-lines] + +[[ "$#" -ge 2 ]] && >&2 echo "Command: $(realpath $0) $(realpath -m $1) $(realpath -m $2)" diff $1 $2 +exitcode=$? +if [ -z "$3" ]; then + exit $exitcode +else + numlines=$(diff -y --suppress-common-lines $1 $2 | wc -l) + >&2 echo "Different lines: $numlines, allowed: $3" + if [[ "$numlines" -gt "$3" ]]; then + exit $exitcode + else + exit 0 + fi +fi |