Add test for in-validation ChrF metric

author: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2020-11-09 18:09:23 +0300
committer: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2020-11-09 18:09:23 +0300
commit: b349c6d8c8024a43ab0eaba9ca72038ad8768d1a (patch)
tree: 2f2372c2c290f535b10a5c1704b925d926061f47 /tests
parent: 5b4adeb50b91a0ef62fc348024632330284415b1 (diff)
8 files changed, 99 insertions, 48 deletions
diff --git a/tests/sentencepiece/.gitignore b/tests/sentencepiece/.gitignore
index 7575793..7e5a57d 100644
--- a/tests/sentencepiece/.gitignore
+++ b/tests/sentencepiece/.gitignore
@@ -3,6 +3,8 @@ vocab.joint/
 vocab.maxlines/
 vocab.norm/
 vocab.lm/
-bleu-detok/
+bleu/
+chrf/
 *.bleu
 *.sacrebleu
+*.score
diff --git a/tests/sentencepiece/bleu-detok.sacrebleu.expected b/tests/sentencepiece/bleu.sacrebleu.expected
index c632726..c632726 100644
--- a/tests/sentencepiece/bleu-detok.sacrebleu.expected
+++ b/tests/sentencepiece/bleu.sacrebleu.expected
diff --git a/tests/sentencepiece/bleu-detok.bleu.expected b/tests/sentencepiece/bleu.score.expected
index bf6ea51..bf6ea51 100644
--- a/tests/sentencepiece/bleu-detok.bleu.expected
+++ b/tests/sentencepiece/bleu.score.expected
diff --git a/tests/sentencepiece/chrf.sacrebleu.expected b/tests/sentencepiece/chrf.sacrebleu.expected
new file mode 100644
index 0000000..c632726
--- /dev/null
+++ b/tests/sentencepiece/chrf.sacrebleu.expected
@@ -0,0 +1 @@
+BLEU+case.mixed+numrefs.1+smooth.exp+tok.13a = 28.1 56.4/34.0/21.9/14.9 (BP = 1.000 ratio = 1.133 hyp_len = 612 ref_len = 540)
diff --git a/tests/sentencepiece/chrf.score.expected b/tests/sentencepiece/chrf.score.expected
new file mode 100644
index 0000000..8bc555f
--- /dev/null
+++ b/tests/sentencepiece/chrf.score.expected
@@ -0,0 +1 @@
+59.1572
diff --git a/tests/sentencepiece/test_bleu.sh b/tests/sentencepiece/test_bleu.sh
new file mode 100644
index 0000000..b0b5140
--- /dev/null
+++ b/tests/sentencepiece/test_bleu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Check if the BLEU validation measure equals to the SacreBLEU score
+# AUTHOR: snukky
+# TAGS: sentencepiece bleu valid-metrics
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf bleu bleu.*{log,out,diff,bleu}
+mkdir -p bleu
+
+# Copy the model
+cp -r $MRT_MODELS/rnn-spm/model.npz bleu/
+test -e bleu/model.npz
+
+# Run marian command
+$MRT_MARIAN/marian \
+    --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \
+    -m bleu/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
+    --valid-freq 1 --valid-metrics bleu --valid-sets dev.de dev.en --valid-translation-output bleu.out \
+    --beam-size 8 --normalize 1 \
+    --log bleu.log
+
+# Check if files exist
+test -e bleu/model.npz
+test -e bleu.out
+test -e bleu.log
+
+
+# Extract the BLEU score from logs
+cat bleu.log | grep ' : bleu : ' | sed -r 's/.* bleu : (.*) : new best.*/\1/' > bleu.score
+# Check BLEU from logs
+$MRT_TOOLS/diff.sh bleu.score bleu.score.expected > bleu.score.diff
+
+
+# Run sacreBLEU removing the version information
+python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < bleu.out | sed -r 's/.version[^ ]* / /' > bleu.sacrebleu
+# Check BLEU from the validation translation output
+$MRT_TOOLS/diff.sh bleu.sacrebleu bleu.sacrebleu.expected > bleu.sacrebleu.diff
+
+
+# Exit with success code
+exit 0
diff --git a/tests/sentencepiece/test_bleu_detok.sh b/tests/sentencepiece/test_bleu_detok.sh
deleted file mode 100644
index 00374ce..0000000
--- a/tests/sentencepiece/test_bleu_detok.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash -x
-
-#####################################################################
-# SUMMARY: Check if the BLEU-detok validation measure equals to the SacreBLEU score
-# AUTHOR: snukky
-# TAGS: sentencepiece bleu-detok
-#####################################################################
-
-# Exit on error
-set -e
-
-# Remove old artifacts and create working directory
-rm -rf bleu-detok bleu-detok.*{log,out,diff,bleu}
-mkdir -p bleu-detok
-
-# Copy the model
-cp -r $MRT_MODELS/rnn-spm/model.npz bleu-detok/
-test -e bleu-detok/model.npz
-
-# Run marian command
-$MRT_MARIAN/marian \
-    --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \
-    -m bleu-detok/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
-    --valid-freq 1 --valid-metrics bleu-detok --valid-sets dev.de dev.en --valid-translation-output bleu-detok.out \
-    --beam-size 8 --normalize 1 \
-    --log bleu-detok.log
-
-# Check if files exist
-test -e bleu-detok/model.npz
-test -e bleu-detok.out
-test -e bleu-detok.log
-
-
-# Extract the BLEU score from logs
-cat bleu-detok.log | grep ' : bleu-detok : ' | sed -r 's/.* bleu-detok : (.*) : new best.*/\1/' > bleu-detok.bleu
-# Check BLEU from logs
-$MRT_TOOLS/diff.sh bleu-detok.bleu bleu-detok.bleu.expected > bleu-detok.bleu.diff
-
-
-# Run sacreBLEU removing the version information
-python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < bleu-detok.out | sed -r 's/.version[^ ]* / /' > bleu-detok.sacrebleu
-# Check BLEU from the validation translation output 
-$MRT_TOOLS/diff.sh bleu-detok.sacrebleu bleu-detok.sacrebleu.expected > bleu-detok.sacrebleu.diff
-
-
-# Exit with success code
-exit 0
diff --git a/tests/sentencepiece/test_chrf.sh b/tests/sentencepiece/test_chrf.sh
new file mode 100644
index 0000000..0d928af
--- /dev/null
+++ b/tests/sentencepiece/test_chrf.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Check if the ChrF validation measure equals to the SacreBLEU score
+# AUTHOR: snukky
+# TAGS: sentencepiece chrf valid-metrics
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf chrf chrf.*{log,out,diff,score}
+mkdir -p chrf
+
+# Copy the model
+cp -r $MRT_MODELS/rnn-spm/model.npz chrf/
+test -e chrf/model.npz
+
+# Run marian command
+$MRT_MARIAN/marian \
+    --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \
+    -m chrf/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
+    --valid-freq 1 --valid-metrics chrf --valid-sets dev.de dev.en --valid-translation-output chrf.out \
+    --beam-size 8 --normalize 1 \
+    --log chrf.log
+
+# Check if files exist
+test -e chrf/model.npz
+test -e chrf.out
+test -e chrf.log
+
+
+# Extract the score from logs
+cat chrf.log | grep ' : chrf : ' | sed -r 's/.* chrf : (.*) : new best.*/\1/' > chrf.score
+# Check score from logs
+$MRT_TOOLS/diff.sh chrf.score chrf.score.expected > chrf.score.diff
+
+
+# Run sacreBLEU removing the version information
+python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < chrf.out | sed -r 's/.version[^ ]* / /' > chrf.sacrebleu
+# Check BLEU from the validation translation output
+$MRT_TOOLS/diff.sh chrf.sacrebleu chrf.sacrebleu.expected > chrf.sacrebleu.diff
+
+
+# Exit with success code
+exit 0
author	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2020-11-09 18:09:23 +0300
committer	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2020-11-09 18:09:23 +0300
commit	b349c6d8c8024a43ab0eaba9ca72038ad8768d1a (patch)
tree	2f2372c2c290f535b10a5c1704b925d926061f47 /tests
parent	5b4adeb50b91a0ef62fc348024632330284415b1 (diff)