Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2020-11-10 18:30:40 +0300
committerMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2020-11-10 18:30:40 +0300
commit8acd64aa1e579339db629e605ab46759c4268d50 (patch)
treed6a9822c3177833768b976c9491d4306e4f871a9 /tests
parent17dacc949bf837d5998cbdcc9edcec60495e6d6c (diff)
parenta30d3ffbebbe5e5587e6eabbdc6b7963c70bc87d (diff)
merge conflict
Diffstat (limited to 'tests')
-rw-r--r--tests/sentencepiece/.gitignore4
-rw-r--r--tests/sentencepiece/bleu.sacrebleu.expected (renamed from tests/sentencepiece/bleu-detok.sacrebleu.expected)0
-rw-r--r--tests/sentencepiece/bleu.score.expected (renamed from tests/sentencepiece/bleu-detok.bleu.expected)0
-rw-r--r--tests/sentencepiece/chrf.sacrebleu.expected1
-rw-r--r--tests/sentencepiece/chrf.score.expected1
-rw-r--r--tests/sentencepiece/test_bleu.sh47
-rw-r--r--tests/sentencepiece/test_bleu_detok.sh47
-rw-r--r--tests/sentencepiece/test_chrf.sh47
-rw-r--r--tests/training/scheduler/.gitignore2
-rw-r--r--tests/training/scheduler/log_epoch_e.expected20
-rw-r--r--tests/training/scheduler/log_epoch_t.expected12
-rw-r--r--tests/training/scheduler/log_epoch_u.expected15
-rw-r--r--tests/training/scheduler/setup.sh10
-rw-r--r--tests/training/scheduler/test_logical_epoch.sh32
-rw-r--r--tests/training/scheduler/test_logical_epoch_labels.sh32
-rw-r--r--tests/training/scheduler/test_logical_epoch_updates.sh32
16 files changed, 254 insertions, 48 deletions
diff --git a/tests/sentencepiece/.gitignore b/tests/sentencepiece/.gitignore
index 7575793..7e5a57d 100644
--- a/tests/sentencepiece/.gitignore
+++ b/tests/sentencepiece/.gitignore
@@ -3,6 +3,8 @@ vocab.joint/
vocab.maxlines/
vocab.norm/
vocab.lm/
-bleu-detok/
+bleu/
+chrf/
*.bleu
*.sacrebleu
+*.score
diff --git a/tests/sentencepiece/bleu-detok.sacrebleu.expected b/tests/sentencepiece/bleu.sacrebleu.expected
index c632726..c632726 100644
--- a/tests/sentencepiece/bleu-detok.sacrebleu.expected
+++ b/tests/sentencepiece/bleu.sacrebleu.expected
diff --git a/tests/sentencepiece/bleu-detok.bleu.expected b/tests/sentencepiece/bleu.score.expected
index bf6ea51..bf6ea51 100644
--- a/tests/sentencepiece/bleu-detok.bleu.expected
+++ b/tests/sentencepiece/bleu.score.expected
diff --git a/tests/sentencepiece/chrf.sacrebleu.expected b/tests/sentencepiece/chrf.sacrebleu.expected
new file mode 100644
index 0000000..c632726
--- /dev/null
+++ b/tests/sentencepiece/chrf.sacrebleu.expected
@@ -0,0 +1 @@
+BLEU+case.mixed+numrefs.1+smooth.exp+tok.13a = 28.1 56.4/34.0/21.9/14.9 (BP = 1.000 ratio = 1.133 hyp_len = 612 ref_len = 540)
diff --git a/tests/sentencepiece/chrf.score.expected b/tests/sentencepiece/chrf.score.expected
new file mode 100644
index 0000000..8bc555f
--- /dev/null
+++ b/tests/sentencepiece/chrf.score.expected
@@ -0,0 +1 @@
+59.1572
diff --git a/tests/sentencepiece/test_bleu.sh b/tests/sentencepiece/test_bleu.sh
new file mode 100644
index 0000000..b0b5140
--- /dev/null
+++ b/tests/sentencepiece/test_bleu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Check if the BLEU validation measure equals to the SacreBLEU score
+# AUTHOR: snukky
+# TAGS: sentencepiece bleu valid-metrics
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf bleu bleu.*{log,out,diff,bleu}
+mkdir -p bleu
+
+# Copy the model
+cp -r $MRT_MODELS/rnn-spm/model.npz bleu/
+test -e bleu/model.npz
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \
+ -m bleu/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
+ --valid-freq 1 --valid-metrics bleu --valid-sets dev.de dev.en --valid-translation-output bleu.out \
+ --beam-size 8 --normalize 1 \
+ --log bleu.log
+
+# Check if files exist
+test -e bleu/model.npz
+test -e bleu.out
+test -e bleu.log
+
+
+# Extract the BLEU score from logs
+cat bleu.log | grep ' : bleu : ' | sed -r 's/.* bleu : (.*) : new best.*/\1/' > bleu.score
+# Check BLEU from logs
+$MRT_TOOLS/diff.sh bleu.score bleu.score.expected > bleu.score.diff
+
+
+# Run sacreBLEU removing the version information
+python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < bleu.out | sed -r 's/.version[^ ]* / /' > bleu.sacrebleu
+# Check BLEU from the validation translation output
+$MRT_TOOLS/diff.sh bleu.sacrebleu bleu.sacrebleu.expected > bleu.sacrebleu.diff
+
+
+# Exit with success code
+exit 0
diff --git a/tests/sentencepiece/test_bleu_detok.sh b/tests/sentencepiece/test_bleu_detok.sh
deleted file mode 100644
index d0513f6..0000000
--- a/tests/sentencepiece/test_bleu_detok.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash -x
-
-#####################################################################
-# SUMMARY: Check if the BLEU-detok validation measure equals to the SacreBLEU score
-# AUTHOR: snukky
-# TAGS: sentencepiece bleu-detok
-#####################################################################
-
-# Exit on error
-set -e
-
-# Remove old artifacts and create working directory
-rm -rf bleu-detok bleu-detok.*{log,out,diff,bleu}
-mkdir -p bleu-detok
-
-# Copy the model
-cp -r $MRT_MODELS/rnn-spm/model.npz bleu-detok/
-test -e bleu-detok/model.npz
-
-# Run marian command
-$MRT_MARIAN/marian \
- --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \
- -m bleu-detok/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
- --valid-freq 1 --valid-metrics bleu-detok --valid-sets dev.de dev.en --valid-translation-output bleu-detok.out \
- --beam-size 8 --normalize 1 \
- --log bleu-detok.log
-
-# Check if files exist
-test -e bleu-detok/model.npz
-test -e bleu-detok.out
-test -e bleu-detok.log
-
-
-# Extract the BLEU score from logs
-cat bleu-detok.log | grep ' : bleu-detok : ' | sed -r 's/.* bleu-detok : (.*) : new best.*/\1/' > bleu-detok.bleu
-# Check BLEU from logs
-$MRT_TOOLS/diff.sh bleu-detok.bleu bleu-detok.bleu.expected > bleu-detok.bleu.diff
-
-
-# Run sacreBLEU removing the version information
-python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < bleu-detok.out | sed -r 's/.version[^ ]* / /' > bleu-detok.sacrebleu
-# Check BLEU from the validation translation output
-$MRT_TOOLS/diff.sh bleu-detok.sacrebleu bleu-detok.sacrebleu.expected > bleu-detok.sacrebleu.diff
-
-
-# Exit with success code
-exit 0
diff --git a/tests/sentencepiece/test_chrf.sh b/tests/sentencepiece/test_chrf.sh
new file mode 100644
index 0000000..0d928af
--- /dev/null
+++ b/tests/sentencepiece/test_chrf.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Check if the ChrF validation measure equals to the SacreBLEU score
+# AUTHOR: snukky
+# TAGS: sentencepiece chrf valid-metrics
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf chrf chrf.*{log,out,diff,score}
+mkdir -p chrf
+
+# Copy the model
+cp -r $MRT_MODELS/rnn-spm/model.npz chrf/
+test -e chrf/model.npz
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \
+ -m chrf/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
+ --valid-freq 1 --valid-metrics chrf --valid-sets dev.de dev.en --valid-translation-output chrf.out \
+ --beam-size 8 --normalize 1 \
+ --log chrf.log
+
+# Check if files exist
+test -e chrf/model.npz
+test -e chrf.out
+test -e chrf.log
+
+
+# Extract the score from logs
+cat chrf.log | grep ' : chrf : ' | sed -r 's/.* chrf : (.*) : new best.*/\1/' > chrf.score
+# Check score from logs
+$MRT_TOOLS/diff.sh chrf.score chrf.score.expected > chrf.score.diff
+
+
+# Run sacreBLEU removing the version information
+python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < chrf.out | sed -r 's/.version[^ ]* / /' > chrf.sacrebleu
+# Check BLEU from the validation translation output
+$MRT_TOOLS/diff.sh chrf.sacrebleu chrf.sacrebleu.expected > chrf.sacrebleu.diff
+
+
+# Exit with success code
+exit 0
diff --git a/tests/training/scheduler/.gitignore b/tests/training/scheduler/.gitignore
new file mode 100644
index 0000000..7958534
--- /dev/null
+++ b/tests/training/scheduler/.gitignore
@@ -0,0 +1,2 @@
+log_epoch_[etu]
+train.??.gz
diff --git a/tests/training/scheduler/log_epoch_e.expected b/tests/training/scheduler/log_epoch_e.expected
new file mode 100644
index 0000000..87cf167
--- /dev/null
+++ b/tests/training/scheduler/log_epoch_e.expected
@@ -0,0 +1,20 @@
+Training started
+Seen 1542 samples
+Starting data epoch 2 in logical epoch 1.000
+Ep. 1.000 : Up. 10 : Sen. 768 : Cost 9.68880177 * 61,315 after 61,315
+Seen 1542 samples
+Starting data epoch 3 in logical epoch 1.500
+Ep. 1.500 : Up. 20 : Sen. 1,536 : Cost 9.67091751 * 61,279 after 122,594
+Seen 1542 samples
+Starting data epoch 4 in logical epoch 2.000
+Seen 1542 samples
+Starting data epoch 5 in logical epoch 2.500
+Ep. 2.500 : Up. 30 : Sen. 512 : Cost 9.65089989 * 54,621 after 177,215
+Seen 1542 samples
+Starting data epoch 6 in logical epoch 3.000
+Ep. 3.000 : Up. 40 : Sen. 1,280 : Cost 9.63199997 * 61,545 after 238,760
+Seen 1542 samples
+Starting data epoch 7 in logical epoch 3.500
+Training finished
+Saving model to log_epoch_e/model.npz
+Saving Adam parameters to log_epoch_e/model.npz.optimizer.npz
diff --git a/tests/training/scheduler/log_epoch_t.expected b/tests/training/scheduler/log_epoch_t.expected
new file mode 100644
index 0000000..1f57c2e
--- /dev/null
+++ b/tests/training/scheduler/log_epoch_t.expected
@@ -0,0 +1,12 @@
+Training started
+Ep. 2.258 : Up. 4 : Sen. 512 : Cost 9.69286919 * 13,547 after 13,547
+Ep. 3.400 : Up. 6 : Sen. 768 : Cost 9.68953419 * 6,851 after 20,398
+Ep. 5.131 : Up. 9 : Sen. 1,152 : Cost 9.68455887 * 10,387 after 30,785
+Ep. 6.793 : Up. 12 : Sen. 1,536 : Cost 9.68291855 * 9,975 after 40,760
+Seen 1542 samples
+Starting data epoch 2 in logical epoch 6.819
+Ep. 8.472 : Up. 16 : Sen. 384 : Cost 9.67040443 * 10,074 after 50,834
+Ep. 10.219 : Up. 19 : Sen. 768 : Cost 9.66528606 * 10,481 after 61,315
+Training finished
+Saving model to log_epoch_t/model.npz
+Saving Adam parameters to log_epoch_t/model.npz.optimizer.npz
diff --git a/tests/training/scheduler/log_epoch_u.expected b/tests/training/scheduler/log_epoch_u.expected
new file mode 100644
index 0000000..a8855f2
--- /dev/null
+++ b/tests/training/scheduler/log_epoch_u.expected
@@ -0,0 +1,15 @@
+Training started
+Seen 1542 samples
+Starting data epoch 2 in logical epoch 0.700
+Ep. 1.000 : Up. 10 : Sen. 768 : Cost 9.68880177 * 61,315 after 61,315
+Seen 1542 samples
+Starting data epoch 3 in logical epoch 1.400
+Ep. 2.000 : Up. 20 : Sen. 1,536 : Cost 9.67091751 * 61,279 after 122,594
+Seen 1542 samples
+Starting data epoch 4 in logical epoch 2.100
+Seen 1542 samples
+Starting data epoch 5 in logical epoch 2.800
+Ep. 3.000 : Up. 30 : Sen. 512 : Cost 9.65089989 * 54,621 after 177,215
+Training finished
+Saving model to log_epoch_u/model.npz
+Saving Adam parameters to log_epoch_u/model.npz.optimizer.npz
diff --git a/tests/training/scheduler/setup.sh b/tests/training/scheduler/setup.sh
new file mode 100644
index 0000000..284e7c1
--- /dev/null
+++ b/tests/training/scheduler/setup.sh
@@ -0,0 +1,10 @@
+# Skip if compiled without SentencePiece
+test -n "$MRT_MARIAN_USE_SENTENCEPIECE" || exit 100
+
+test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1
+test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1
+
+test -f train.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | sed 's/@@ //g' | head -n 2000 | gzip > train.de.gz
+test -f train.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | sed 's/@@ //g' | head -n 2000 | gzip > train.en.gz
+
+test -f $MRT_MODELS/rnn-spm/vocab.deen.spm || exit 1
diff --git a/tests/training/scheduler/test_logical_epoch.sh b/tests/training/scheduler/test_logical_epoch.sh
new file mode 100644
index 0000000..fca9f52
--- /dev/null
+++ b/tests/training/scheduler/test_logical_epoch.sh
@@ -0,0 +1,32 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Test logical epoch defined via data epoch
+# AUTHOR: snukky
+# TAGS: sentencepiece stopping after logical-epoch
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf log_epoch_e log_epoch_e.*{log,out,diff}
+mkdir -p log_epoch_e
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \
+ -m log_epoch_e/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
+ --mini-batch 256 --logical-epoch 2e --log log_epoch_e.log --after 3e \
+ --disp-freq 10u
+
+# Check if files exist
+test -e log_epoch_e/model.npz
+test -e log_epoch_e.log
+
+# Compare actual and expected outputs
+cat log_epoch_e.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_e.out
+$MRT_TOOLS/diff-nums.py log_epoch_e.out log_epoch_e.expected -p 0.01 -o log_epoch_e.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/training/scheduler/test_logical_epoch_labels.sh b/tests/training/scheduler/test_logical_epoch_labels.sh
new file mode 100644
index 0000000..c37c0fa
--- /dev/null
+++ b/tests/training/scheduler/test_logical_epoch_labels.sh
@@ -0,0 +1,32 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Test logical epoch defined via labels
+# AUTHOR: snukky
+# TAGS: sentencepiece stopping after logical-epoch
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf log_epoch_t log_epoch_t.*{log,out,diff}
+mkdir -p log_epoch_t
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \
+ -m log_epoch_t/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
+ --mini-batch 128 --logical-epoch 6kt --log log_epoch_t.log --after 10e \
+ --disp-freq 10kt
+
+# Check if files exist
+test -e log_epoch_t/model.npz
+test -e log_epoch_t.log
+
+# Compare actual and expected outputs
+cat log_epoch_t.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_t.out
+$MRT_TOOLS/diff-nums.py log_epoch_t.out log_epoch_t.expected -p 0.01 -o log_epoch_t.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/training/scheduler/test_logical_epoch_updates.sh b/tests/training/scheduler/test_logical_epoch_updates.sh
new file mode 100644
index 0000000..8582120
--- /dev/null
+++ b/tests/training/scheduler/test_logical_epoch_updates.sh
@@ -0,0 +1,32 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Test logical epoch defined via updates
+# AUTHOR: snukky
+# TAGS: sentencepiece stopping after logical-epoch
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf log_epoch_u log_epoch_u.*{log,out,diff}
+mkdir -p log_epoch_u
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \
+ -m log_epoch_u/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \
+ --mini-batch 256 --logical-epoch 10u --log log_epoch_u.log --after 3e \
+ --disp-freq 10u
+
+# Check if files exist
+test -e log_epoch_u/model.npz
+test -e log_epoch_u.log
+
+# Compare actual and expected outputs
+cat log_epoch_u.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_u.out
+$MRT_TOOLS/diff-nums.py log_epoch_u.out log_epoch_u.expected -p 0.01 -o log_epoch_u.diff
+
+# Exit with success code
+exit 0