diff options
author | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2020-11-10 18:30:40 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2020-11-10 18:30:40 +0300 |
commit | 8acd64aa1e579339db629e605ab46759c4268d50 (patch) | |
tree | d6a9822c3177833768b976c9491d4306e4f871a9 /tests | |
parent | 17dacc949bf837d5998cbdcc9edcec60495e6d6c (diff) | |
parent | a30d3ffbebbe5e5587e6eabbdc6b7963c70bc87d (diff) |
merge conflict
Diffstat (limited to 'tests')
-rw-r--r-- | tests/sentencepiece/.gitignore | 4 | ||||
-rw-r--r-- | tests/sentencepiece/bleu.sacrebleu.expected (renamed from tests/sentencepiece/bleu-detok.sacrebleu.expected) | 0 | ||||
-rw-r--r-- | tests/sentencepiece/bleu.score.expected (renamed from tests/sentencepiece/bleu-detok.bleu.expected) | 0 | ||||
-rw-r--r-- | tests/sentencepiece/chrf.sacrebleu.expected | 1 | ||||
-rw-r--r-- | tests/sentencepiece/chrf.score.expected | 1 | ||||
-rw-r--r-- | tests/sentencepiece/test_bleu.sh | 47 | ||||
-rw-r--r-- | tests/sentencepiece/test_bleu_detok.sh | 47 | ||||
-rw-r--r-- | tests/sentencepiece/test_chrf.sh | 47 | ||||
-rw-r--r-- | tests/training/scheduler/.gitignore | 2 | ||||
-rw-r--r-- | tests/training/scheduler/log_epoch_e.expected | 20 | ||||
-rw-r--r-- | tests/training/scheduler/log_epoch_t.expected | 12 | ||||
-rw-r--r-- | tests/training/scheduler/log_epoch_u.expected | 15 | ||||
-rw-r--r-- | tests/training/scheduler/setup.sh | 10 | ||||
-rw-r--r-- | tests/training/scheduler/test_logical_epoch.sh | 32 | ||||
-rw-r--r-- | tests/training/scheduler/test_logical_epoch_labels.sh | 32 | ||||
-rw-r--r-- | tests/training/scheduler/test_logical_epoch_updates.sh | 32 |
16 files changed, 254 insertions, 48 deletions
diff --git a/tests/sentencepiece/.gitignore b/tests/sentencepiece/.gitignore index 7575793..7e5a57d 100644 --- a/tests/sentencepiece/.gitignore +++ b/tests/sentencepiece/.gitignore @@ -3,6 +3,8 @@ vocab.joint/ vocab.maxlines/ vocab.norm/ vocab.lm/ -bleu-detok/ +bleu/ +chrf/ *.bleu *.sacrebleu +*.score diff --git a/tests/sentencepiece/bleu-detok.sacrebleu.expected b/tests/sentencepiece/bleu.sacrebleu.expected index c632726..c632726 100644 --- a/tests/sentencepiece/bleu-detok.sacrebleu.expected +++ b/tests/sentencepiece/bleu.sacrebleu.expected diff --git a/tests/sentencepiece/bleu-detok.bleu.expected b/tests/sentencepiece/bleu.score.expected index bf6ea51..bf6ea51 100644 --- a/tests/sentencepiece/bleu-detok.bleu.expected +++ b/tests/sentencepiece/bleu.score.expected diff --git a/tests/sentencepiece/chrf.sacrebleu.expected b/tests/sentencepiece/chrf.sacrebleu.expected new file mode 100644 index 0000000..c632726 --- /dev/null +++ b/tests/sentencepiece/chrf.sacrebleu.expected @@ -0,0 +1 @@ +BLEU+case.mixed+numrefs.1+smooth.exp+tok.13a = 28.1 56.4/34.0/21.9/14.9 (BP = 1.000 ratio = 1.133 hyp_len = 612 ref_len = 540) diff --git a/tests/sentencepiece/chrf.score.expected b/tests/sentencepiece/chrf.score.expected new file mode 100644 index 0000000..8bc555f --- /dev/null +++ b/tests/sentencepiece/chrf.score.expected @@ -0,0 +1 @@ +59.1572 diff --git a/tests/sentencepiece/test_bleu.sh b/tests/sentencepiece/test_bleu.sh new file mode 100644 index 0000000..b0b5140 --- /dev/null +++ b/tests/sentencepiece/test_bleu.sh @@ -0,0 +1,47 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Check if the BLEU validation measure equals to the SacreBLEU score +# AUTHOR: snukky +# TAGS: sentencepiece bleu valid-metrics +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf bleu bleu.*{log,out,diff,bleu} +mkdir -p bleu + +# Copy the model +cp -r $MRT_MODELS/rnn-spm/model.npz bleu/ +test -e bleu/model.npz + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \ + -m bleu/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ + --valid-freq 1 --valid-metrics bleu --valid-sets dev.de dev.en --valid-translation-output bleu.out \ + --beam-size 8 --normalize 1 \ + --log bleu.log + +# Check if files exist +test -e bleu/model.npz +test -e bleu.out +test -e bleu.log + + +# Extract the BLEU score from logs +cat bleu.log | grep ' : bleu : ' | sed -r 's/.* bleu : (.*) : new best.*/\1/' > bleu.score +# Check BLEU from logs +$MRT_TOOLS/diff.sh bleu.score bleu.score.expected > bleu.score.diff + + +# Run sacreBLEU removing the version information +python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < bleu.out | sed -r 's/.version[^ ]* / /' > bleu.sacrebleu +# Check BLEU from the validation translation output +$MRT_TOOLS/diff.sh bleu.sacrebleu bleu.sacrebleu.expected > bleu.sacrebleu.diff + + +# Exit with success code +exit 0 diff --git a/tests/sentencepiece/test_bleu_detok.sh b/tests/sentencepiece/test_bleu_detok.sh deleted file mode 100644 index d0513f6..0000000 --- a/tests/sentencepiece/test_bleu_detok.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -x - -##################################################################### -# SUMMARY: Check if the BLEU-detok validation measure equals to the SacreBLEU score -# AUTHOR: snukky -# TAGS: sentencepiece bleu-detok -##################################################################### - -# Exit on error -set -e - -# Remove old artifacts and create working directory -rm -rf bleu-detok bleu-detok.*{log,out,diff,bleu} -mkdir -p bleu-detok - -# Copy the model -cp -r $MRT_MODELS/rnn-spm/model.npz bleu-detok/ -test -e bleu-detok/model.npz - -# Run marian command -$MRT_MARIAN/marian \ - --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \ - -m bleu-detok/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ - --valid-freq 1 --valid-metrics bleu-detok --valid-sets dev.de dev.en --valid-translation-output bleu-detok.out \ - --beam-size 8 --normalize 1 \ - --log bleu-detok.log - -# Check if files exist -test -e bleu-detok/model.npz -test -e bleu-detok.out -test -e bleu-detok.log - - -# Extract the BLEU score from logs -cat bleu-detok.log | grep ' : bleu-detok : ' | sed -r 's/.* bleu-detok : (.*) : new best.*/\1/' > bleu-detok.bleu -# Check BLEU from logs -$MRT_TOOLS/diff.sh bleu-detok.bleu bleu-detok.bleu.expected > bleu-detok.bleu.diff - - -# Run sacreBLEU removing the version information -python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < bleu-detok.out | sed -r 's/.version[^ ]* / /' > bleu-detok.sacrebleu -# Check BLEU from the validation translation output -$MRT_TOOLS/diff.sh bleu-detok.sacrebleu bleu-detok.sacrebleu.expected > bleu-detok.sacrebleu.diff - - -# Exit with success code -exit 0 diff --git a/tests/sentencepiece/test_chrf.sh b/tests/sentencepiece/test_chrf.sh new file mode 100644 index 0000000..0d928af --- /dev/null +++ b/tests/sentencepiece/test_chrf.sh @@ -0,0 +1,47 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Check if the ChrF validation measure equals to the SacreBLEU score +# AUTHOR: snukky +# TAGS: sentencepiece chrf valid-metrics +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf chrf chrf.*{log,out,diff,score} +mkdir -p chrf + +# Copy the model +cp -r $MRT_MODELS/rnn-spm/model.npz chrf/ +test -e chrf/model.npz + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --after-batches 1 --maxi-batch 1 --learn-rate 0 --overwrite \ + -m chrf/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ + --valid-freq 1 --valid-metrics chrf --valid-sets dev.de dev.en --valid-translation-output chrf.out \ + --beam-size 8 --normalize 1 \ + --log chrf.log + +# Check if files exist +test -e chrf/model.npz +test -e chrf.out +test -e chrf.log + + +# Extract the score from logs +cat chrf.log | grep ' : chrf : ' | sed -r 's/.* chrf : (.*) : new best.*/\1/' > chrf.score +# Check score from logs +$MRT_TOOLS/diff.sh chrf.score chrf.score.expected > chrf.score.diff + + +# Run sacreBLEU removing the version information +python3 $MRT_TOOLS/sacrebleu/sacrebleu.py dev.en < chrf.out | sed -r 's/.version[^ ]* / /' > chrf.sacrebleu +# Check BLEU from the validation translation output +$MRT_TOOLS/diff.sh chrf.sacrebleu chrf.sacrebleu.expected > chrf.sacrebleu.diff + + +# Exit with success code +exit 0 diff --git a/tests/training/scheduler/.gitignore b/tests/training/scheduler/.gitignore new file mode 100644 index 0000000..7958534 --- /dev/null +++ b/tests/training/scheduler/.gitignore @@ -0,0 +1,2 @@ +log_epoch_[etu] +train.??.gz diff --git a/tests/training/scheduler/log_epoch_e.expected b/tests/training/scheduler/log_epoch_e.expected new file mode 100644 index 0000000..87cf167 --- /dev/null +++ b/tests/training/scheduler/log_epoch_e.expected @@ -0,0 +1,20 @@ +Training started +Seen 1542 samples +Starting data epoch 2 in logical epoch 1.000 +Ep. 1.000 : Up. 10 : Sen. 768 : Cost 9.68880177 * 61,315 after 61,315 +Seen 1542 samples +Starting data epoch 3 in logical epoch 1.500 +Ep. 1.500 : Up. 20 : Sen. 1,536 : Cost 9.67091751 * 61,279 after 122,594 +Seen 1542 samples +Starting data epoch 4 in logical epoch 2.000 +Seen 1542 samples +Starting data epoch 5 in logical epoch 2.500 +Ep. 2.500 : Up. 30 : Sen. 512 : Cost 9.65089989 * 54,621 after 177,215 +Seen 1542 samples +Starting data epoch 6 in logical epoch 3.000 +Ep. 3.000 : Up. 40 : Sen. 1,280 : Cost 9.63199997 * 61,545 after 238,760 +Seen 1542 samples +Starting data epoch 7 in logical epoch 3.500 +Training finished +Saving model to log_epoch_e/model.npz +Saving Adam parameters to log_epoch_e/model.npz.optimizer.npz diff --git a/tests/training/scheduler/log_epoch_t.expected b/tests/training/scheduler/log_epoch_t.expected new file mode 100644 index 0000000..1f57c2e --- /dev/null +++ b/tests/training/scheduler/log_epoch_t.expected @@ -0,0 +1,12 @@ +Training started +Ep. 2.258 : Up. 4 : Sen. 512 : Cost 9.69286919 * 13,547 after 13,547 +Ep. 3.400 : Up. 6 : Sen. 768 : Cost 9.68953419 * 6,851 after 20,398 +Ep. 5.131 : Up. 9 : Sen. 1,152 : Cost 9.68455887 * 10,387 after 30,785 +Ep. 6.793 : Up. 12 : Sen. 1,536 : Cost 9.68291855 * 9,975 after 40,760 +Seen 1542 samples +Starting data epoch 2 in logical epoch 6.819 +Ep. 8.472 : Up. 16 : Sen. 384 : Cost 9.67040443 * 10,074 after 50,834 +Ep. 10.219 : Up. 19 : Sen. 768 : Cost 9.66528606 * 10,481 after 61,315 +Training finished +Saving model to log_epoch_t/model.npz +Saving Adam parameters to log_epoch_t/model.npz.optimizer.npz diff --git a/tests/training/scheduler/log_epoch_u.expected b/tests/training/scheduler/log_epoch_u.expected new file mode 100644 index 0000000..a8855f2 --- /dev/null +++ b/tests/training/scheduler/log_epoch_u.expected @@ -0,0 +1,15 @@ +Training started +Seen 1542 samples +Starting data epoch 2 in logical epoch 0.700 +Ep. 1.000 : Up. 10 : Sen. 768 : Cost 9.68880177 * 61,315 after 61,315 +Seen 1542 samples +Starting data epoch 3 in logical epoch 1.400 +Ep. 2.000 : Up. 20 : Sen. 1,536 : Cost 9.67091751 * 61,279 after 122,594 +Seen 1542 samples +Starting data epoch 4 in logical epoch 2.100 +Seen 1542 samples +Starting data epoch 5 in logical epoch 2.800 +Ep. 3.000 : Up. 30 : Sen. 512 : Cost 9.65089989 * 54,621 after 177,215 +Training finished +Saving model to log_epoch_u/model.npz +Saving Adam parameters to log_epoch_u/model.npz.optimizer.npz diff --git a/tests/training/scheduler/setup.sh b/tests/training/scheduler/setup.sh new file mode 100644 index 0000000..284e7c1 --- /dev/null +++ b/tests/training/scheduler/setup.sh @@ -0,0 +1,10 @@ +# Skip if compiled without SentencePiece +test -n "$MRT_MARIAN_USE_SENTENCEPIECE" || exit 100 + +test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1 +test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1 + +test -f train.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | sed 's/@@ //g' | head -n 2000 | gzip > train.de.gz +test -f train.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | sed 's/@@ //g' | head -n 2000 | gzip > train.en.gz + +test -f $MRT_MODELS/rnn-spm/vocab.deen.spm || exit 1 diff --git a/tests/training/scheduler/test_logical_epoch.sh b/tests/training/scheduler/test_logical_epoch.sh new file mode 100644 index 0000000..fca9f52 --- /dev/null +++ b/tests/training/scheduler/test_logical_epoch.sh @@ -0,0 +1,32 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Test logical epoch defined via data epoch +# AUTHOR: snukky +# TAGS: sentencepiece stopping after logical-epoch +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf log_epoch_e log_epoch_e.*{log,out,diff} +mkdir -p log_epoch_e + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \ + -m log_epoch_e/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ + --mini-batch 256 --logical-epoch 2e --log log_epoch_e.log --after 3e \ + --disp-freq 10u + +# Check if files exist +test -e log_epoch_e/model.npz +test -e log_epoch_e.log + +# Compare actual and expected outputs +cat log_epoch_e.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_e.out +$MRT_TOOLS/diff-nums.py log_epoch_e.out log_epoch_e.expected -p 0.01 -o log_epoch_e.diff + +# Exit with success code +exit 0 diff --git a/tests/training/scheduler/test_logical_epoch_labels.sh b/tests/training/scheduler/test_logical_epoch_labels.sh new file mode 100644 index 0000000..c37c0fa --- /dev/null +++ b/tests/training/scheduler/test_logical_epoch_labels.sh @@ -0,0 +1,32 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Test logical epoch defined via labels +# AUTHOR: snukky +# TAGS: sentencepiece stopping after logical-epoch +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf log_epoch_t log_epoch_t.*{log,out,diff} +mkdir -p log_epoch_t + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \ + -m log_epoch_t/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ + --mini-batch 128 --logical-epoch 6kt --log log_epoch_t.log --after 10e \ + --disp-freq 10kt + +# Check if files exist +test -e log_epoch_t/model.npz +test -e log_epoch_t.log + +# Compare actual and expected outputs +cat log_epoch_t.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_t.out +$MRT_TOOLS/diff-nums.py log_epoch_t.out log_epoch_t.expected -p 0.01 -o log_epoch_t.diff + +# Exit with success code +exit 0 diff --git a/tests/training/scheduler/test_logical_epoch_updates.sh b/tests/training/scheduler/test_logical_epoch_updates.sh new file mode 100644 index 0000000..8582120 --- /dev/null +++ b/tests/training/scheduler/test_logical_epoch_updates.sh @@ -0,0 +1,32 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Test logical epoch defined via updates +# AUTHOR: snukky +# TAGS: sentencepiece stopping after logical-epoch +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf log_epoch_u log_epoch_u.*{log,out,diff} +mkdir -p log_epoch_u + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \ + -m log_epoch_u/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ + --mini-batch 256 --logical-epoch 10u --log log_epoch_u.log --after 3e \ + --disp-freq 10u + +# Check if files exist +test -e log_epoch_u/model.npz +test -e log_epoch_u.log + +# Compare actual and expected outputs +cat log_epoch_u.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_u.out +$MRT_TOOLS/diff-nums.py log_epoch_u.out log_epoch_u.expected -p 0.01 -o log_epoch_u.diff + +# Exit with success code +exit 0 |