Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundkiewicz@gmail.com>2021-11-21 23:54:48 +0300
committerRoman Grundkiewicz <rgrundkiewicz@gmail.com>2021-11-21 23:54:48 +0300
commit2640bcb61db4302aa7be900dfa763a0ea5c6fd8d (patch)
tree08d7e9ca47f1bf0b48458e7d71705a35bf3e9724
parent0aa7b6b7632732d1f22f3d8169d3262a7e6b1e9d (diff)
Update tests
-rw-r--r--tests/decoder/intgemm/.gitignore2
-rw-r--r--tests/decoder/intgemm/test_intgemm_8bit.sh12
-rw-r--r--tests/decoder/intgemm/test_intgemm_8bit_avx2.sh9
-rwxr-xr-xtools/diff.sh18
4 files changed, 35 insertions, 6 deletions
diff --git a/tests/decoder/intgemm/.gitignore b/tests/decoder/intgemm/.gitignore
index 3ab86da..b35e1d8 100644
--- a/tests/decoder/intgemm/.gitignore
+++ b/tests/decoder/intgemm/.gitignore
@@ -2,3 +2,5 @@
*.out.bleu
*.src
*.ref
+*.bleu_score
+*.bleu_score.expected
diff --git a/tests/decoder/intgemm/test_intgemm_8bit.sh b/tests/decoder/intgemm/test_intgemm_8bit.sh
index 1bdec70..d6098a0 100644
--- a/tests/decoder/intgemm/test_intgemm_8bit.sh
+++ b/tests/decoder/intgemm/test_intgemm_8bit.sh
@@ -47,10 +47,16 @@ $MRT_MARIAN/marian-decoder \
# Print current and expected BLEU for debugging
python3 $MRT_TOOLS/sacrebleu/sacrebleu.py newstest2018.ref < $prefix.out | tee $prefix.out.bleu
-cat $prefix.$suffix.expected.bleu
+# BLEU scores calculated on AVX, AVX2, AVX512 should be very similar, but does not have to be identical
+tail -n1 $prefix.*.expected.bleu || true
-# Compare with the expected output
-$MRT_TOOLS/diff.sh $prefix.out $prefix.$suffix.expected > $prefix.diff
+# Compare with the expected output, allow up to 20 different lines
+$MRT_TOOLS/diff.sh $prefix.out $prefix.$suffix.expected 20 > $prefix.diff
+
+# Compare BLEU scores
+cut -f3 -d' ' $prefix.out.bleu > $prefix.out.bleu_score
+cut -f3 -d' ' $prefix.$suffix.expected.bleu > $prefix.out.bleu_score.expected
+$MRT_TOOLS/diff-nums.py -a -p 0.9 $prefix.out.bleu_score{,.expected}
# Exit with success code
diff --git a/tests/decoder/intgemm/test_intgemm_8bit_avx2.sh b/tests/decoder/intgemm/test_intgemm_8bit_avx2.sh
index ceaefb6..a42dd6f 100644
--- a/tests/decoder/intgemm/test_intgemm_8bit_avx2.sh
+++ b/tests/decoder/intgemm/test_intgemm_8bit_avx2.sh
@@ -48,8 +48,13 @@ python3 $MRT_TOOLS/sacrebleu/sacrebleu.py newstest2018.ref < $prefix.out | tee $
# BLEU scores calculated on AVX, AVX2, AVX512 should be very similar, but does not have to be identical
tail -n1 $prefix.*.expected.bleu || true
-# Compare with the expected output
-$MRT_TOOLS/diff.sh $prefix.out $prefix.$suffix.expected > $prefix.diff
+# Compare with the expected output, allow up to 20 different lines
+$MRT_TOOLS/diff.sh $prefix.out $prefix.$suffix.expected 20 > $prefix.diff
+
+# Compare BLEU scores
+cut -f3 -d' ' $prefix.out.bleu > $prefix.out.bleu_score
+cut -f3 -d' ' $prefix.$suffix.expected.bleu > $prefix.out.bleu_score.expected
+$MRT_TOOLS/diff-nums.py -a -p 0.9 $prefix.out.bleu_score{,.expected}
# Exit with success code
diff --git a/tools/diff.sh b/tools/diff.sh
index ef03374..b09ba54 100755
--- a/tools/diff.sh
+++ b/tools/diff.sh
@@ -1,3 +1,19 @@
#!/bin/bash
-[[ "$#" -eq 2 ]] && >&2 echo "Command: $(realpath $0) $(realpath -m $1) $(realpath -m $2)"
+#
+# Usage:
+# ./diff.sh file1 file2 [number-of-allowed-diff-lines]
+
+[[ "$#" -ge 2 ]] && >&2 echo "Command: $(realpath $0) $(realpath -m $1) $(realpath -m $2)"
diff $1 $2
+exitcode=$?
+if [ -z "$3" ]; then
+ exit $exitcode
+else
+ numlines=$(diff -y --suppress-common-lines $1 $2 | wc -l)
+ >&2 echo "Different lines: $numlines, allowed: $3"
+ if [[ "$numlines" -gt "$3" ]]; then
+ exit $exitcode
+ else
+ exit 0
+ fi
+fi