From 29b686ffe0d34e7e7a094a837eaa5966beb43adc Mon Sep 17 00:00:00 2001 From: Roman Grundkiewicz Date: Mon, 9 Nov 2020 09:39:47 -0800 Subject: Add tests with --logical-epoch and --after --- tests/training/scheduler/.gitignore | 2 ++ tests/training/scheduler/log_epoch_e.expected | 20 ++++++++++++++ tests/training/scheduler/log_epoch_t.expected | 12 ++++++++ tests/training/scheduler/log_epoch_u.expected | 15 ++++++++++ tests/training/scheduler/setup.sh | 10 +++++++ tests/training/scheduler/test_logical_epoch.sh | 32 ++++++++++++++++++++++ .../scheduler/test_logical_epoch_labels.sh | 32 ++++++++++++++++++++++ .../scheduler/test_logical_epoch_updates.sh | 32 ++++++++++++++++++++++ 8 files changed, 155 insertions(+) create mode 100644 tests/training/scheduler/.gitignore create mode 100644 tests/training/scheduler/log_epoch_e.expected create mode 100644 tests/training/scheduler/log_epoch_t.expected create mode 100644 tests/training/scheduler/log_epoch_u.expected create mode 100644 tests/training/scheduler/setup.sh create mode 100644 tests/training/scheduler/test_logical_epoch.sh create mode 100644 tests/training/scheduler/test_logical_epoch_labels.sh create mode 100644 tests/training/scheduler/test_logical_epoch_updates.sh (limited to 'tests') diff --git a/tests/training/scheduler/.gitignore b/tests/training/scheduler/.gitignore new file mode 100644 index 0000000..7958534 --- /dev/null +++ b/tests/training/scheduler/.gitignore @@ -0,0 +1,2 @@ +log_epoch_[etu] +train.??.gz diff --git a/tests/training/scheduler/log_epoch_e.expected b/tests/training/scheduler/log_epoch_e.expected new file mode 100644 index 0000000..87cf167 --- /dev/null +++ b/tests/training/scheduler/log_epoch_e.expected @@ -0,0 +1,20 @@ +Training started +Seen 1542 samples +Starting data epoch 2 in logical epoch 1.000 +Ep. 1.000 : Up. 10 : Sen. 768 : Cost 9.68880177 * 61,315 after 61,315 +Seen 1542 samples +Starting data epoch 3 in logical epoch 1.500 +Ep. 1.500 : Up. 20 : Sen. 1,536 : Cost 9.67091751 * 61,279 after 122,594 +Seen 1542 samples +Starting data epoch 4 in logical epoch 2.000 +Seen 1542 samples +Starting data epoch 5 in logical epoch 2.500 +Ep. 2.500 : Up. 30 : Sen. 512 : Cost 9.65089989 * 54,621 after 177,215 +Seen 1542 samples +Starting data epoch 6 in logical epoch 3.000 +Ep. 3.000 : Up. 40 : Sen. 1,280 : Cost 9.63199997 * 61,545 after 238,760 +Seen 1542 samples +Starting data epoch 7 in logical epoch 3.500 +Training finished +Saving model to log_epoch_e/model.npz +Saving Adam parameters to log_epoch_e/model.npz.optimizer.npz diff --git a/tests/training/scheduler/log_epoch_t.expected b/tests/training/scheduler/log_epoch_t.expected new file mode 100644 index 0000000..1f57c2e --- /dev/null +++ b/tests/training/scheduler/log_epoch_t.expected @@ -0,0 +1,12 @@ +Training started +Ep. 2.258 : Up. 4 : Sen. 512 : Cost 9.69286919 * 13,547 after 13,547 +Ep. 3.400 : Up. 6 : Sen. 768 : Cost 9.68953419 * 6,851 after 20,398 +Ep. 5.131 : Up. 9 : Sen. 1,152 : Cost 9.68455887 * 10,387 after 30,785 +Ep. 6.793 : Up. 12 : Sen. 1,536 : Cost 9.68291855 * 9,975 after 40,760 +Seen 1542 samples +Starting data epoch 2 in logical epoch 6.819 +Ep. 8.472 : Up. 16 : Sen. 384 : Cost 9.67040443 * 10,074 after 50,834 +Ep. 10.219 : Up. 19 : Sen. 768 : Cost 9.66528606 * 10,481 after 61,315 +Training finished +Saving model to log_epoch_t/model.npz +Saving Adam parameters to log_epoch_t/model.npz.optimizer.npz diff --git a/tests/training/scheduler/log_epoch_u.expected b/tests/training/scheduler/log_epoch_u.expected new file mode 100644 index 0000000..a8855f2 --- /dev/null +++ b/tests/training/scheduler/log_epoch_u.expected @@ -0,0 +1,15 @@ +Training started +Seen 1542 samples +Starting data epoch 2 in logical epoch 0.700 +Ep. 1.000 : Up. 10 : Sen. 768 : Cost 9.68880177 * 61,315 after 61,315 +Seen 1542 samples +Starting data epoch 3 in logical epoch 1.400 +Ep. 2.000 : Up. 20 : Sen. 1,536 : Cost 9.67091751 * 61,279 after 122,594 +Seen 1542 samples +Starting data epoch 4 in logical epoch 2.100 +Seen 1542 samples +Starting data epoch 5 in logical epoch 2.800 +Ep. 3.000 : Up. 30 : Sen. 512 : Cost 9.65089989 * 54,621 after 177,215 +Training finished +Saving model to log_epoch_u/model.npz +Saving Adam parameters to log_epoch_u/model.npz.optimizer.npz diff --git a/tests/training/scheduler/setup.sh b/tests/training/scheduler/setup.sh new file mode 100644 index 0000000..284e7c1 --- /dev/null +++ b/tests/training/scheduler/setup.sh @@ -0,0 +1,10 @@ +# Skip if compiled without SentencePiece +test -n "$MRT_MARIAN_USE_SENTENCEPIECE" || exit 100 + +test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1 +test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1 + +test -f train.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | sed 's/@@ //g' | head -n 2000 | gzip > train.de.gz +test -f train.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | sed 's/@@ //g' | head -n 2000 | gzip > train.en.gz + +test -f $MRT_MODELS/rnn-spm/vocab.deen.spm || exit 1 diff --git a/tests/training/scheduler/test_logical_epoch.sh b/tests/training/scheduler/test_logical_epoch.sh new file mode 100644 index 0000000..fca9f52 --- /dev/null +++ b/tests/training/scheduler/test_logical_epoch.sh @@ -0,0 +1,32 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Test logical epoch defined via data epoch +# AUTHOR: snukky +# TAGS: sentencepiece stopping after logical-epoch +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf log_epoch_e log_epoch_e.*{log,out,diff} +mkdir -p log_epoch_e + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \ + -m log_epoch_e/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ + --mini-batch 256 --logical-epoch 2e --log log_epoch_e.log --after 3e \ + --disp-freq 10u + +# Check if files exist +test -e log_epoch_e/model.npz +test -e log_epoch_e.log + +# Compare actual and expected outputs +cat log_epoch_e.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_e.out +$MRT_TOOLS/diff-nums.py log_epoch_e.out log_epoch_e.expected -p 0.01 -o log_epoch_e.diff + +# Exit with success code +exit 0 diff --git a/tests/training/scheduler/test_logical_epoch_labels.sh b/tests/training/scheduler/test_logical_epoch_labels.sh new file mode 100644 index 0000000..c37c0fa --- /dev/null +++ b/tests/training/scheduler/test_logical_epoch_labels.sh @@ -0,0 +1,32 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Test logical epoch defined via labels +# AUTHOR: snukky +# TAGS: sentencepiece stopping after logical-epoch +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf log_epoch_t log_epoch_t.*{log,out,diff} +mkdir -p log_epoch_t + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \ + -m log_epoch_t/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ + --mini-batch 128 --logical-epoch 6kt --log log_epoch_t.log --after 10e \ + --disp-freq 10kt + +# Check if files exist +test -e log_epoch_t/model.npz +test -e log_epoch_t.log + +# Compare actual and expected outputs +cat log_epoch_t.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_t.out +$MRT_TOOLS/diff-nums.py log_epoch_t.out log_epoch_t.expected -p 0.01 -o log_epoch_t.diff + +# Exit with success code +exit 0 diff --git a/tests/training/scheduler/test_logical_epoch_updates.sh b/tests/training/scheduler/test_logical_epoch_updates.sh new file mode 100644 index 0000000..8582120 --- /dev/null +++ b/tests/training/scheduler/test_logical_epoch_updates.sh @@ -0,0 +1,32 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Test logical epoch defined via updates +# AUTHOR: snukky +# TAGS: sentencepiece stopping after logical-epoch +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf log_epoch_u log_epoch_u.*{log,out,diff} +mkdir -p log_epoch_u + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \ + -m log_epoch_u/model.npz -t train.{de,en}.gz -v $MRT_MODELS/rnn-spm/vocab.{deen,deen}.spm \ + --mini-batch 256 --logical-epoch 10u --log log_epoch_u.log --after 3e \ + --disp-freq 10u + +# Check if files exist +test -e log_epoch_u/model.npz +test -e log_epoch_u.log + +# Compare actual and expected outputs +cat log_epoch_u.log | $MRT_TOOLS/strip-timestamps.sh | grep -v '^\[' | sed 's/ : Time.*//' > log_epoch_u.out +$MRT_TOOLS/diff-nums.py log_epoch_u.out log_epoch_u.expected -p 0.01 -o log_epoch_u.diff + +# Exit with success code +exit 0 -- cgit v1.2.3