diff options
author | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2021-01-25 17:41:38 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-25 17:41:38 +0300 |
commit | 4c44a8d92090a010d0cedf5fe2c26e692d51d20d (patch) | |
tree | 12d8c9a0dc459c9b6977481d7988e81a621ca0d9 | |
parent | 97b2f95abab6134c1632b286e373e513ecc52020 (diff) | |
parent | 88e775a09ad7326d1e974e150c5febfe06fc80ce (diff) |
Merge pull request #74 from marian-nmt/clip-norm-0
Update tests to set --clip-norm to 0 by default
152 files changed, 950 insertions, 839 deletions
diff --git a/tests/interface/input-tsv/restore_stdin.expected b/tests/interface/input-tsv/restore_stdin.expected index b9cc6d4..d26f7ff 100644 --- a/tests/interface/input-tsv/restore_stdin.expected +++ b/tests/interface/input-tsv/restore_stdin.expected @@ -1,30 +1,30 @@ -Ep. 1 : Up. 2 : Sen. 32 : Cost 214.87408447 -Ep. 1 : Up. 4 : Sen. 64 : Cost 195.88232422 -Ep. 1 : Up. 6 : Sen. 96 : Cost 222.75996399 -Ep. 1 : Up. 8 : Sen. 128 : Cost 232.49481201 -Ep. 1 : Up. 10 : Sen. 160 : Cost 204.78642273 -Ep. 1 : Up. 12 : Sen. 192 : Cost 256.89501953 -Ep. 1 : Up. 14 : Sen. 224 : Cost 237.73818970 -Ep. 1 : Up. 16 : Sen. 256 : Cost 210.21063232 -Ep. 1 : Up. 18 : Sen. 288 : Cost 178.70904541 -Ep. 1 : Up. 20 : Sen. 320 : Cost 224.30038452 -Ep. 1 : Up. 22 : Sen. 352 : Cost 225.22837830 -Ep. 1 : Up. 24 : Sen. 384 : Cost 210.81533813 -Ep. 1 : Up. 26 : Sen. 416 : Cost 202.19320679 -Ep. 1 : Up. 28 : Sen. 448 : Cost 211.53353882 -Ep. 1 : Up. 30 : Sen. 480 : Cost 209.39002991 -Ep. 1 : Up. 32 : Sen. 512 : Cost 206.38954163 -Ep. 1 : Up. 34 : Sen. 544 : Cost 202.88201904 -Ep. 1 : Up. 36 : Sen. 576 : Cost 192.36555481 -Ep. 1 : Up. 38 : Sen. 608 : Cost 179.21670532 -Ep. 1 : Up. 40 : Sen. 640 : Cost 164.29644775 -Ep. 1 : Up. 42 : Sen. 672 : Cost 187.61584473 -Ep. 1 : Up. 44 : Sen. 704 : Cost 244.09938049 -Ep. 1 : Up. 46 : Sen. 736 : Cost 266.25546265 -Ep. 1 : Up. 48 : Sen. 768 : Cost 197.74813843 -Ep. 1 : Up. 50 : Sen. 800 : Cost 187.12585449 -Ep. 1 : Up. 52 : Sen. 832 : Cost 186.14714050 -Ep. 1 : Up. 54 : Sen. 864 : Cost 227.19046021 -Ep. 1 : Up. 56 : Sen. 896 : Cost 210.90580750 -Ep. 1 : Up. 58 : Sen. 928 : Cost 210.68801880 -Ep. 1 : Up. 60 : Sen. 960 : Cost 182.89875793 +Ep. 1 : Up. 2 : Sen. 32 : Cost 214.83363342 +Ep. 1 : Up. 4 : Sen. 64 : Cost 195.70648193 +Ep. 1 : Up. 6 : Sen. 96 : Cost 222.41781616 +Ep. 1 : Up. 8 : Sen. 128 : Cost 231.91462708 +Ep. 1 : Up. 10 : Sen. 160 : Cost 204.08346558 +Ep. 1 : Up. 12 : Sen. 192 : Cost 255.86239624 +Ep. 1 : Up. 14 : Sen. 224 : Cost 236.60090637 +Ep. 1 : Up. 16 : Sen. 256 : Cost 209.00881958 +Ep. 1 : Up. 18 : Sen. 288 : Cost 177.51702881 +Ep. 1 : Up. 20 : Sen. 320 : Cost 222.74383545 +Ep. 1 : Up. 22 : Sen. 352 : Cost 223.34017944 +Ep. 1 : Up. 24 : Sen. 384 : Cost 208.93505859 +Ep. 1 : Up. 26 : Sen. 416 : Cost 200.02706909 +Ep. 1 : Up. 28 : Sen. 448 : Cost 209.29515076 +Ep. 1 : Up. 30 : Sen. 480 : Cost 207.00128174 +Ep. 1 : Up. 32 : Sen. 512 : Cost 203.81817627 +Ep. 1 : Up. 34 : Sen. 544 : Cost 200.10937500 +Ep. 1 : Up. 36 : Sen. 576 : Cost 189.81176758 +Ep. 1 : Up. 38 : Sen. 608 : Cost 176.77787781 +Ep. 1 : Up. 40 : Sen. 640 : Cost 161.60902405 +Ep. 1 : Up. 42 : Sen. 672 : Cost 184.40527344 +Ep. 1 : Up. 44 : Sen. 704 : Cost 239.88012695 +Ep. 1 : Up. 46 : Sen. 736 : Cost 262.33227539 +Ep. 1 : Up. 48 : Sen. 768 : Cost 194.13323975 +Ep. 1 : Up. 50 : Sen. 800 : Cost 183.32736206 +Ep. 1 : Up. 52 : Sen. 832 : Cost 181.78253174 +Ep. 1 : Up. 54 : Sen. 864 : Cost 222.31034851 +Ep. 1 : Up. 56 : Sen. 896 : Cost 206.36886597 +Ep. 1 : Up. 58 : Sen. 928 : Cost 205.69429016 +Ep. 1 : Up. 60 : Sen. 960 : Cost 178.27331543 diff --git a/tests/interface/input-tsv/test_tsv_train.sh b/tests/interface/input-tsv/test_tsv_train.sh index 72b87d8..822b249 100644 --- a/tests/interface/input-tsv/test_tsv_train.sh +++ b/tests/interface/input-tsv/test_tsv_train.sh @@ -14,7 +14,7 @@ mkdir -p train # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train/model.npz --tsv -t train.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 10 --disp-freq 2 \ --log train.log diff --git a/tests/interface/input-tsv/test_tsv_train_assume_stdin.sh b/tests/interface/input-tsv/test_tsv_train_assume_stdin.sh index 87f8c7c..75a2537 100644 --- a/tests/interface/input-tsv/test_tsv_train_assume_stdin.sh +++ b/tests/interface/input-tsv/test_tsv_train_assume_stdin.sh @@ -14,7 +14,7 @@ mkdir -p train_stdin2 # Run marian command cat train.tsv | $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train_stdin2/model.npz -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 10 --disp-freq 2 \ --log train_stdin2.log diff --git a/tests/interface/input-tsv/test_tsv_train_create_vocab_joint.sh b/tests/interface/input-tsv/test_tsv_train_create_vocab_joint.sh index 88b5b62..7cad7d2 100644 --- a/tests/interface/input-tsv/test_tsv_train_create_vocab_joint.sh +++ b/tests/interface/input-tsv/test_tsv_train_create_vocab_joint.sh @@ -14,7 +14,7 @@ mkdir -p train_vocab # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train_vocab/model.npz --tsv -t train.tsv -v train_vocab/vocab.spm train_vocab/vocab.spm --dim-vocabs 2000 2000 -T train_vocab \ --after-batches 20 --disp-freq 2 \ --log train_vocab.log diff --git a/tests/interface/input-tsv/test_tsv_train_create_vocabs.sh b/tests/interface/input-tsv/test_tsv_train_create_vocabs.sh index a23e7ac..0a3da3f 100644 --- a/tests/interface/input-tsv/test_tsv_train_create_vocabs.sh +++ b/tests/interface/input-tsv/test_tsv_train_create_vocabs.sh @@ -14,7 +14,7 @@ mkdir -p train_vocabs # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train_vocabs/model.npz --tsv -t train.tsv -v train_vocabs/vocab.de.spm train_vocabs/vocab.en.spm --dim-vocabs 2000 2000 -T train_vocabs \ --after-batches 20 --disp-freq 2 \ --log train_vocabs.log diff --git a/tests/interface/input-tsv/test_tsv_train_create_vocabs_yml.sh b/tests/interface/input-tsv/test_tsv_train_create_vocabs_yml.sh index 26d20a6..b257ea6 100644 --- a/tests/interface/input-tsv/test_tsv_train_create_vocabs_yml.sh +++ b/tests/interface/input-tsv/test_tsv_train_create_vocabs_yml.sh @@ -14,7 +14,7 @@ mkdir -p train_vocabs_yml # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train_vocabs_yml/model.npz --tsv -t train.bpe.tsv -v train_vocabs_yml/vocab.de.yml train_vocabs_yml/vocab.en.yml --dim-vocabs 2000 2000 -T train_vocabs_yml \ --after-batches 20 --disp-freq 2 \ --log train_vocabs_yml.log diff --git a/tests/interface/input-tsv/test_tsv_train_inputtypes_stdin.sh b/tests/interface/input-tsv/test_tsv_train_inputtypes_stdin.sh index 07f2eab..0f824ac 100644 --- a/tests/interface/input-tsv/test_tsv_train_inputtypes_stdin.sh +++ b/tests/interface/input-tsv/test_tsv_train_inputtypes_stdin.sh @@ -14,7 +14,7 @@ mkdir -p train_intypes_stdin # Run marian command cat train.tsv | $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train_intypes_stdin/model.npz --tsv -t stdin --input-types sequence sequence -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 10 --disp-freq 2 \ --log train_intypes_stdin.log diff --git a/tests/interface/input-tsv/test_tsv_train_mini_batch_fit.sh b/tests/interface/input-tsv/test_tsv_train_mini_batch_fit.sh index 42afeac..401c7f9 100644 --- a/tests/interface/input-tsv/test_tsv_train_mini_batch_fit.sh +++ b/tests/interface/input-tsv/test_tsv_train_mini_batch_fit.sh @@ -14,7 +14,7 @@ mkdir -p train_fit # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --mini-batch-fit -w 500 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 10 --optimizer sgd --sync-sgd \ + --cost-type ce-mean --mini-batch-fit -w 500 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 10 --optimizer sgd --sync-sgd --clip-norm 0 \ -m train_fit/model.npz --tsv -t train.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 20 --disp-freq 4 \ --log train_fit.log diff --git a/tests/interface/input-tsv/test_tsv_train_mini_batch_fit_stdin.sh b/tests/interface/input-tsv/test_tsv_train_mini_batch_fit_stdin.sh index 0b4ba42..81e9410 100644 --- a/tests/interface/input-tsv/test_tsv_train_mini_batch_fit_stdin.sh +++ b/tests/interface/input-tsv/test_tsv_train_mini_batch_fit_stdin.sh @@ -14,7 +14,7 @@ mkdir -p train_fit_stdin # Run marian command cat train.tsv | $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --mini-batch-fit -w 500 --seed 2222 --dim-emb 32 --dim-rnn 64 --maxi-batch 10 --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --mini-batch-fit -w 500 --seed 2222 --dim-emb 32 --dim-rnn 64 --maxi-batch 10 --optimizer sgd \ -m train_fit_stdin/model.npz --tsv -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --disp-freq 4 --log train_fit_stdin.log diff --git a/tests/interface/input-tsv/test_tsv_train_restore_from_stdin.sh b/tests/interface/input-tsv/test_tsv_train_restore_from_stdin.sh index f8953ef..36b7685 100644 --- a/tests/interface/input-tsv/test_tsv_train_restore_from_stdin.sh +++ b/tests/interface/input-tsv/test_tsv_train_restore_from_stdin.sh @@ -11,7 +11,7 @@ test -e vocab.de.yml || $MRT_MARIAN/marian-vocab < train.bpe.de > vocab.de.yml test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < train.bpe.en > vocab.en.yml # TODO: Weight decaying in Adam is disabled, because it gives unstable results on GPU -extra_opts="--no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --mini-batch 16 --dim-emb 128 --dim-rnn 256 --disp-freq 2 --type s2s --sync-sgd --optimizer sgd --cost-type ce-mean" +extra_opts="--no-shuffle --clip-norm 0 --seed 2222 --maxi-batch 1 --maxi-batch-sort none --mini-batch 16 --dim-emb 128 --dim-rnn 256 --disp-freq 2 --type s2s --sync-sgd --optimizer sgd --cost-type ce-mean" # Step 1: Train a model in one go, up to the update no. 70, and save training logs #$MRT_MARIAN/marian \ diff --git a/tests/interface/input-tsv/test_tsv_train_shuffle.sh b/tests/interface/input-tsv/test_tsv_train_shuffle.sh index e5ca216..9dd4ac9 100644 --- a/tests/interface/input-tsv/test_tsv_train_shuffle.sh +++ b/tests/interface/input-tsv/test_tsv_train_shuffle.sh @@ -14,7 +14,7 @@ mkdir -p train_shuffle # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 10 --optimizer sgd --sync-sgd \ + --cost-type ce-mean --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 10 --optimizer sgd --sync-sgd --clip-norm 0 \ -m train_shuffle/model.npz --tsv --tsv-fields 2 -t train.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 20 --disp-freq 4 \ --log train_shuffle.log diff --git a/tests/interface/input-tsv/test_tsv_train_shuffle_in_ram.sh b/tests/interface/input-tsv/test_tsv_train_shuffle_in_ram.sh index 8ce67c3..c6e0421 100644 --- a/tests/interface/input-tsv/test_tsv_train_shuffle_in_ram.sh +++ b/tests/interface/input-tsv/test_tsv_train_shuffle_in_ram.sh @@ -14,7 +14,7 @@ mkdir -p train_shuffle_ram # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --shuffle-in-ram --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 10 --optimizer sgd \ + --cost-type ce-mean --shuffle-in-ram --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 10 --optimizer sgd \ -m train_shuffle_ram/model.npz --tsv --tsv-fields 2 -t train.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 20 --disp-freq 4 \ --log train_shuffle_ram.log diff --git a/tests/interface/input-tsv/test_tsv_train_stdin.sh b/tests/interface/input-tsv/test_tsv_train_stdin.sh index 2a77516..7050b36 100644 --- a/tests/interface/input-tsv/test_tsv_train_stdin.sh +++ b/tests/interface/input-tsv/test_tsv_train_stdin.sh @@ -14,7 +14,7 @@ mkdir -p train_stdin # Run marian command cat train.tsv | $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train_stdin/model.npz --tsv -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 10 --disp-freq 2 \ --log train_stdin.log diff --git a/tests/interface/input-tsv/test_tsv_train_stdin_2_epochs.sh b/tests/interface/input-tsv/test_tsv_train_stdin_2_epochs.sh index af35c5c..654f019 100644 --- a/tests/interface/input-tsv/test_tsv_train_stdin_2_epochs.sh +++ b/tests/interface/input-tsv/test_tsv_train_stdin_2_epochs.sh @@ -14,7 +14,7 @@ mkdir -p train_stdin_2e # Train for the 1st epoch cat train.tsv | $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train_stdin_2e/model.npz --tsv -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --disp-freq 5 \ --log train_stdin_2e.log @@ -25,7 +25,7 @@ test -e train_stdin_2e.log # Train for the 2nd epoch cat train.tsv | $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.002 \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.002 \ -m train_stdin_2e/model.npz --tsv -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --disp-freq 5 \ --log train_stdin_2e.log diff --git a/tests/interface/input-tsv/test_tsv_train_stdin_empty_fields.sh b/tests/interface/input-tsv/test_tsv_train_stdin_empty_fields.sh index e9b0b33..d76ad80 100644 --- a/tests/interface/input-tsv/test_tsv_train_stdin_empty_fields.sh +++ b/tests/interface/input-tsv/test_tsv_train_stdin_empty_fields.sh @@ -20,7 +20,7 @@ paste train.{de,en} \ # Run marian command cat train_empty_lines.tsv | $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --max-length 200 \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --max-length 200 \ -m train_empty_lines/model.npz --tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-epochs 1 --disp-freq 2 \ --log train_empty_lines.log diff --git a/tests/interface/input-tsv/test_tsv_train_stdin_lm.sh b/tests/interface/input-tsv/test_tsv_train_stdin_lm.sh index e9a25c6..8ebd9ce 100644 --- a/tests/interface/input-tsv/test_tsv_train_stdin_lm.sh +++ b/tests/interface/input-tsv/test_tsv_train_stdin_lm.sh @@ -14,7 +14,7 @@ mkdir -p train_lm # Run marian command cat train.en | $MRT_MARIAN/marian --type lm \ - --cost-type ce-mean --no-shuffle --seed 4444 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 4444 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ -m train_lm/model.npz -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.spm \ --after-batches 10 --disp-freq 2 \ --log train_lm.log diff --git a/tests/interface/input-tsv/test_tsv_train_with_align.sh b/tests/interface/input-tsv/test_tsv_train_with_align.sh index c101d51..a95ad9d 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_align.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_align.sh @@ -14,7 +14,7 @@ mkdir -p train_align # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.001 \ -m train_align/model.npz --tsv -t train2.de-en-aln.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 100 --disp-freq 4 \ --guided-alignment 2 --guided-alignment-weight 1.0 \ diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh b/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh index b02488a..efa1793 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh @@ -14,7 +14,7 @@ mkdir -p train_align_weights # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 7777 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + --cost-type ce-mean --no-shuffle --clip-norm 1 --seed 7777 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ -m train_align_weights/model.npz --tsv -t train2.de-w-aln-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 60 --disp-freq 4 \ --guided-alignment 2 --guided-alignment-weight 1.0 --data-weighting 1 \ diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_and_weights_inputtypes.sh b/tests/interface/input-tsv/test_tsv_train_with_align_and_weights_inputtypes.sh index 8653a67..c0e175e 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_align_and_weights_inputtypes.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_align_and_weights_inputtypes.sh @@ -14,7 +14,7 @@ mkdir -p train_align_weights_intypes # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 7777 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + --cost-type ce-mean --no-shuffle --clip-norm 1 --seed 7777 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ -m train_align_weights_intypes/model.npz --tsv -t train2.de-w-aln-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 60 --disp-freq 4 \ --input-types sequence weight alignment sequence --guided-alignment-weight 1.0 \ diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_pos0.sh b/tests/interface/input-tsv/test_tsv_train_with_align_pos0.sh index 55f9995..34c829f 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_align_pos0.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_align_pos0.sh @@ -14,7 +14,7 @@ mkdir -p train_align0 # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.001 \ -m train_align0/model.npz --tsv -t train2.aln-de-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 100 --disp-freq 4 \ --guided-alignment 0 --guided-alignment-weight 1.0 \ diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_shuffle.sh b/tests/interface/input-tsv/test_tsv_train_with_align_shuffle.sh index d02f8dc..7a0503c 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_align_shuffle.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_align_shuffle.sh @@ -14,7 +14,7 @@ mkdir -p train_align_shuffle # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --seed 4444 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 --sync-sgd \ + --cost-type ce-mean --clip-norm 1 --seed 4444 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 --sync-sgd \ -m train_align_shuffle/model.npz --tsv -t train2.aln-de-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 100 --disp-freq 4 \ --guided-alignment 0 --guided-alignment-weight 1.0 \ diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_shuffle_in_ram.sh b/tests/interface/input-tsv/test_tsv_train_with_align_shuffle_in_ram.sh index 2c4dc1a..49e74de 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_align_shuffle_in_ram.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_align_shuffle_in_ram.sh @@ -14,7 +14,7 @@ mkdir -p train_align_shuffle_ram # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --shuffle-in-ram --seed 4444 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + --cost-type ce-mean --shuffle-in-ram --clip-norm 1 --seed 4444 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ -m train_align_shuffle_ram/model.npz --tsv -t train2.aln-de-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 100 --disp-freq 4 \ --guided-alignment 0 --guided-alignment-weight 1.0 \ diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh b/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh index b266f3d..6eb5386 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh @@ -14,7 +14,7 @@ mkdir -p train_align_stdin # Run marian command cat train2.aln-de-en.tsv | $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + --cost-type ce-mean --no-shuffle --clip-norm 0 --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.001 \ -m train_align_stdin/model.npz -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --disp-freq 4 \ --guided-alignment 0 --guided-alignment-weight 1.0 \ diff --git a/tests/interface/input-tsv/test_tsv_train_with_weights.sh b/tests/interface/input-tsv/test_tsv_train_with_weights.sh index 6528713..5b08134 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_weights.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_weights.sh @@ -14,7 +14,7 @@ mkdir -p train_weights # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + --cost-type ce-mean --no-shuffle --clip-norm 1 --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ -m train_weights/model.npz --tsv -t train2.de-en-w.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 100 --disp-freq 4 \ --data-weighting 2 --data-weighting-type sentence \ diff --git a/tests/interface/input-tsv/test_tsv_train_with_weights_pos0.sh b/tests/interface/input-tsv/test_tsv_train_with_weights_pos0.sh index ee4522f..5065dd3 100644 --- a/tests/interface/input-tsv/test_tsv_train_with_weights_pos0.sh +++ b/tests/interface/input-tsv/test_tsv_train_with_weights_pos0.sh @@ -14,7 +14,7 @@ mkdir -p train_weights0 # Run marian command $MRT_MARIAN/marian \ - --cost-type ce-mean --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + --cost-type ce-mean --no-shuffle --clip-norm 1 --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ -m train_weights0/model.npz --tsv -t train2.w-de-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ --after-batches 100 --disp-freq 4 \ --data-weighting 0 --data-weighting-type sentence \ diff --git a/tests/interface/input-tsv/test_tsv_valid.sh b/tests/interface/input-tsv/test_tsv_valid.sh index df72551..94c46f8 100644 --- a/tests/interface/input-tsv/test_tsv_valid.sh +++ b/tests/interface/input-tsv/test_tsv_valid.sh @@ -18,7 +18,7 @@ test -e valid/vocab.spm || cp $MRT_MODELS/rnn-spm/vocab.deen.spm valid/vocab.spm # Train $MRT_MARIAN/marian \ - --seed 2222 --no-shuffle --mini-batch 32 --maxi-batch 1 --optimizer sgd \ + --seed 2222 --no-shuffle --clip-norm 1 --mini-batch 32 --maxi-batch 1 --optimizer sgd \ -m valid/model.npz --tsv -t train.tsv -v valid/vocab.{spm,spm} \ --disp-freq 20 --valid-freq 30 --after-batches 30 \ --valid-metrics cross-entropy translation --valid-translation-output valid.out \ diff --git a/tests/interface/input-tsv/train.expected b/tests/interface/input-tsv/train.expected index 1cf2ecf..ed76575 100644 --- a/tests/interface/input-tsv/train.expected +++ b/tests/interface/input-tsv/train.expected @@ -1,5 +1,5 @@ -261.83258057 -257.78665161 -262.48895264 -253.13388062 -234.16917419 +261.58084106 +256.52697754 +260.31454468 +250.19743347 +230.69422913 diff --git a/tests/interface/input-tsv/train_align.expected b/tests/interface/input-tsv/train_align.expected index 2de51f1..6d1c8b3 100644 --- a/tests/interface/input-tsv/train_align.expected +++ b/tests/interface/input-tsv/train_align.expected @@ -1,25 +1,25 @@ -272.57867432 -267.45211792 -245.10440063 -243.12583923 -254.65167236 -251.95730591 -259.63885498 -243.55068970 -236.45735168 -215.81468201 -212.01930237 -222.31713867 -220.01065063 -230.49443054 -219.36715698 -214.80720520 -198.64233398 -195.14010620 -205.54002380 -204.59991455 -215.78044128 -205.05665588 -201.84078979 -187.56027222 -184.19506836 +267.55108643 +238.90954590 +203.89498901 +194.40493774 +201.26391602 +196.71656799 +205.61053467 +190.75955200 +190.06002808 +175.82437134 +171.81805420 +183.65437317 +183.88174438 +195.11131287 +181.43255615 +182.07211304 +168.76817322 +167.46075439 +175.70928955 +179.71203613 +187.29899597 +175.05770874 +175.59832764 +165.63943481 +163.86834717 diff --git a/tests/interface/input-tsv/train_align_stdin.expected b/tests/interface/input-tsv/train_align_stdin.expected index a468d22..e7a4610 100644 --- a/tests/interface/input-tsv/train_align_stdin.expected +++ b/tests/interface/input-tsv/train_align_stdin.expected @@ -1,7 +1,7 @@ -272.57867432 -267.45211792 -245.10440063 -243.12583923 -254.65167236 -251.95730591 -259.63885498 +267.55108643 +238.90954590 +203.89497375 +194.40493774 +201.26391602 +196.71656799 +205.61053467 diff --git a/tests/interface/input-tsv/train_empty_lines.expected b/tests/interface/input-tsv/train_empty_lines.expected index bf4b72e..acc5403 100644 --- a/tests/interface/input-tsv/train_empty_lines.expected +++ b/tests/interface/input-tsv/train_empty_lines.expected @@ -1,16 +1,16 @@ -270.58209229 -212.28765869 -285.14007568 -286.86123657 -168.05017090 -281.66876221 -267.08026123 -269.00738525 -262.89984131 -224.43609619 -284.56796265 -281.34075928 -231.55950928 -286.07806396 -249.47781372 -264.77264404 +270.44882202 +211.79843140 +284.02545166 +285.28485107 +166.57519531 +279.17941284 +264.18832397 +265.68725586 +259.24093628 +221.06784058 +279.52667236 +276.05969238 +226.25883484 +279.85809326 +242.84822083 +257.72427368 diff --git a/tests/interface/input-tsv/train_fit.expected b/tests/interface/input-tsv/train_fit.expected index 1c2d2eb..c577e77 100644 --- a/tests/interface/input-tsv/train_fit.expected +++ b/tests/interface/input-tsv/train_fit.expected @@ -1,5 +1,5 @@ -251.22476196 -252.45635986 -252.93251038 -258.67086792 -232.73229980 +250.19946289 +249.29103088 +247.88410950 +250.90098572 +222.61479187 diff --git a/tests/interface/input-tsv/train_fit_stdin.expected b/tests/interface/input-tsv/train_fit_stdin.expected index b9ce675..8d06b37 100644 --- a/tests/interface/input-tsv/train_fit_stdin.expected +++ b/tests/interface/input-tsv/train_fit_stdin.expected @@ -1,3 +1,3 @@ -344.08602905 -173.01716614 -248.64839172 +342.83029175 +170.86856079 +244.25839233 diff --git a/tests/interface/input-tsv/train_lm.expected b/tests/interface/input-tsv/train_lm.expected index c768644..fdbed00 100644 --- a/tests/interface/input-tsv/train_lm.expected +++ b/tests/interface/input-tsv/train_lm.expected @@ -1,5 +1,5 @@ -274.50836182 -274.87689209 -266.24481201 -259.36730957 -235.45114136 +274.36938477 +274.21545410 +265.08605957 +257.78823853 +233.71450806 diff --git a/tests/interface/input-tsv/train_shuffle.expected b/tests/interface/input-tsv/train_shuffle.expected index 912fc9a..f9d2a72 100644 --- a/tests/interface/input-tsv/train_shuffle.expected +++ b/tests/interface/input-tsv/train_shuffle.expected @@ -1,5 +1,5 @@ -216.91867065 -261.72125244 -267.90841675 -266.43109131 -300.19824219 +216.37680054 +259.30813599 +263.75015259 +260.62384033 +291.27304077 diff --git a/tests/interface/input-tsv/train_stdin_2e.expected b/tests/interface/input-tsv/train_stdin_2e.expected index 279c4ff..f80f79b 100644 --- a/tests/interface/input-tsv/train_stdin_2e.expected +++ b/tests/interface/input-tsv/train_stdin_2e.expected @@ -1,6 +1,6 @@ -262.65640259 -245.10810852 -248.28816223 -262.62274170 -245.00149536 -248.09956360 +261.66619873 +242.05940247 +243.13801575 +238.52865601 +317.93377686 +355.64865112 diff --git a/tests/interface/input-tsv/train_vocab.expected b/tests/interface/input-tsv/train_vocab.expected index 7e5f15e..2196992 100644 --- a/tests/interface/input-tsv/train_vocab.expected +++ b/tests/interface/input-tsv/train_vocab.expected @@ -1,10 +1,10 @@ -221.76995850 -213.01387024 -217.94252014 -216.67770386 -209.55206299 -233.91926575 -206.07537842 -231.02885437 -206.19801331 -222.06900024 +221.08058167 +211.53454590 +216.04510498 +213.98677063 +207.70535278 +230.34349060 +201.30010986 +224.49655151 +201.09184265 +214.30133057 diff --git a/tests/interface/input-tsv/train_vocabs.expected b/tests/interface/input-tsv/train_vocabs.expected index f003061..18492a4 100644 --- a/tests/interface/input-tsv/train_vocabs.expected +++ b/tests/interface/input-tsv/train_vocabs.expected @@ -1,10 +1,10 @@ -225.92840576 -209.46032715 -223.60330200 -207.56042480 -223.16098022 -198.65243530 -225.91799927 -209.45040894 -223.59326172 -207.55093384 +225.66580200 +208.27442932 +221.54891968 +204.83132935 +219.20014954 +194.64096069 +220.58721924 +203.36322021 +216.46800232 +199.75990295 diff --git a/tests/interface/input-tsv/train_vocabs_yml.expected b/tests/interface/input-tsv/train_vocabs_yml.expected index becf7bf..c6b5967 100644 --- a/tests/interface/input-tsv/train_vocabs_yml.expected +++ b/tests/interface/input-tsv/train_vocabs_yml.expected @@ -1,10 +1,10 @@ -200.10346985 -199.77453613 -196.38256836 -185.44483948 -168.59661865 -192.99969482 -181.77833557 -177.61343384 -200.09066772 -199.76245117 +199.88481140 +198.79373169 +194.57501221 +183.03530884 +165.68594360 +189.14419556 +177.06405640 +172.28703308 +193.72500610 +192.91064453 diff --git a/tests/interface/input-tsv/update_outputs.sh b/tests/interface/input-tsv/update_outputs.sh new file mode 100755 index 0000000..82436fb --- /dev/null +++ b/tests/interface/input-tsv/update_outputs.sh @@ -0,0 +1,27 @@ +#!/bin/sh -x +cp train.out train.expected +cp train_stdin2.out train.expected +cp train_vocab.out train_vocab.expected +cp train_vocabs.out train_vocabs.expected +cp train_vocabs.de.spm.out train_vocabs.de.spm.expected +cp train_vocabs.en.spm.out train_vocabs.en.spm.expected +cp train_vocabs_yml.out train_vocabs_yml.expected +cp train_intypes_stdin.out train.expected +cp train_fit.out train_fit.expected +cp train_fit_stdin.out train_fit_stdin.expected +cp restore_stdin.out restore_stdin.expected +cp train_shuffle.out train_shuffle.expected +cp train_shuffle_ram.out train_shuffle.expected +cp train_stdin.out train.expected +cp train_stdin_2e.out train_stdin_2e.expected +cp train_empty_lines.out train_empty_lines.expected +cp train_lm.out train_lm.expected +cp train_align.out train_align.expected +cp train_align_weights.out train_align_weights.expected +cp train_align_weights_intypes.out train_align_weights.expected +cp train_align0.out train_align.expected +cp train_align_shuffle.out train_align_shuffle.expected +cp train_align_shuffle_ram.out train_align_shuffle.expected +cp train_align_stdin.out train_align_stdin.expected +cp train_weights.out train_weights.expected +cp train_weights0.out train_weights.expected diff --git a/tests/training/basics/.gitignore b/tests/training/basics/.gitignore index 7cdacd2..a169c9f 100644 --- a/tests/training/basics/.gitignore +++ b/tests/training/basics/.gitignore @@ -1,4 +1,5 @@ toy +tiny valid trans sqlite diff --git a/tests/training/basics/test_tiny_vocab.sh b/tests/training/basics/test_tiny_vocab.sh new file mode 100644 index 0000000..f14683b --- /dev/null +++ b/tests/training/basics/test_tiny_vocab.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +##################################################################### +# SUMMARY: Run a basic training command with tiny vocabs +# AUTHOR: snukky +# TAGS: small-vocab +##################################################################### + +# Exit on error +set -e + +# Test code goes here +mkdir -p tiny +rm -f tiny/* tiny.log + +$MRT_MARIAN/marian \ + --seed 1111 --dim-emb 256 --dim-rnn 512 --no-shuffle --clip-norm 0 \ + -m tiny/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v tiny/vocab.de.yml tiny/vocab.en.yml \ + --log tiny.log --disp-freq 5 -e 5 + +test -e tiny/vocab.en.yml +test -e tiny/vocab.de.yml +test -e tiny/model.npz +test -e tiny/model.npz.yml +test -e tiny/model.npz.amun.yml + +cat tiny.log | $MRT_TOOLS/extract-costs.sh > tiny.out +$MRT_TOOLS/diff-nums.py tiny.out tiny.expected -p 0.1 -o tiny.diff + +# Exit with success code +exit 0 diff --git a/tests/training/basics/test_toy_vocab.sh b/tests/training/basics/test_toy_vocab.sh deleted file mode 100644 index 671843f..0000000 --- a/tests/training/basics/test_toy_vocab.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -##################################################################### -# SUMMARY: Run a basic training command with toy vocabs -# AUTHOR: snukky -# TAGS: small-vocab -##################################################################### - -# Exit on error -set -e - -# Test code goes here -mkdir -p toy -rm -f toy/* toy.log - -$MRT_MARIAN/marian \ - --seed 1111 --dim-emb 256 --dim-rnn 512 --no-shuffle \ - -m toy/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v toy/vocab.de.yml toy/vocab.en.yml \ - --log toy.log --disp-freq 5 -e 5 - -test -e toy/vocab.en.yml -test -e toy/vocab.de.yml -test -e toy/model.npz -test -e toy/model.npz.yml -test -e toy/model.npz.amun.yml - -cat toy.log | $MRT_TOOLS/extract-costs.sh > toy.out -$MRT_TOOLS/diff-nums.py toy.out toy.expected -p 0.1 -o toy.diff - -# Exit with success code -exit 0 diff --git a/tests/training/basics/tiny.expected b/tests/training/basics/tiny.expected new file mode 100644 index 0000000..f6c2caf --- /dev/null +++ b/tests/training/basics/tiny.expected @@ -0,0 +1,44 @@ +8.78632832 +8.75309849 +8.69854641 +8.60049725 +8.41462326 +8.04460907 +7.52984953 +7.11023378 +7.05034065 +7.12709856 +6.98178005 +6.85693312 +6.69474936 +6.58481455 +6.51727343 +6.49125957 +6.21844482 +6.46901369 +6.53361320 +6.46509838 +6.43254900 +6.35190487 +6.26485729 +6.21732950 +6.21957254 +5.91266489 +6.35146904 +6.36572838 +6.33053923 +6.28441715 +6.20286036 +6.15570545 +6.08000135 +6.03157282 +5.67149544 +6.35678244 +6.31723213 +6.26349401 +6.21279621 +6.12982273 +6.08621264 +5.94192123 +5.94551659 +5.48049164 diff --git a/tests/training/basics/toy.expected b/tests/training/basics/toy.expected deleted file mode 100644 index f134a62..0000000 --- a/tests/training/basics/toy.expected +++ /dev/null @@ -1,44 +0,0 @@ -8.78630924 -8.75282860 -8.69768810 -8.60030174 -8.42331791 -8.08456516 -7.58919859 -7.15421867 -7.07388210 -7.15911722 -7.00602388 -6.87553406 -6.70337963 -6.58344078 -6.49999046 -6.45548153 -6.15827656 -6.46891832 -6.53488016 -6.44239426 -6.40002155 -6.31497908 -6.22505951 -6.17120123 -6.16692400 -5.82793045 -6.37700939 -6.38672018 -6.30941343 -6.23907804 -6.14631748 -6.09236145 -5.99969482 -5.93795681 -5.50292015 -7.16254091 -6.92708254 -6.59395170 -6.40627527 -6.22969198 -6.13137770 -5.96749878 -5.96595860 -5.42677450 diff --git a/tests/training/cost-functions/ce-mean.expected b/tests/training/cost-functions/ce-mean.expected index 99dcb2e..d2284ad 100644 --- a/tests/training/cost-functions/ce-mean.expected +++ b/tests/training/cost-functions/ce-mean.expected @@ -1,8 +1,8 @@ -150.17980957 -263.68411255 -100.49322510 -253.63926697 -274.96899414 -145.67076111 -207.88955688 -245.54043579 +150.17977905 +263.68469238 +100.50036621 +253.64115906 +274.95046997 +145.69451904 +207.82330322 +245.20465088 diff --git a/tests/training/cost-functions/ce-sum.expected b/tests/training/cost-functions/ce-sum.expected index 211f95e..bf94e75 100644 --- a/tests/training/cost-functions/ce-sum.expected +++ b/tests/training/cost-functions/ce-sum.expected @@ -1,8 +1,8 @@ -9616.72363281 -16924.73828125 -6485.94677734 -16367.00390625 -17824.65820312 -9517.77148438 -13688.55371094 -16464.12500000 +9611.53320312 +16874.39843750 +6437.13916016 +16230.42187500 +17589.32421875 +9352.28906250 +13412.98730469 +16044.14746094 diff --git a/tests/training/cost-functions/perplexity.expected b/tests/training/cost-functions/perplexity.expected index 454d690..e72030a 100644 --- a/tests/training/cost-functions/perplexity.expected +++ b/tests/training/cost-functions/perplexity.expected @@ -1,8 +1,8 @@ -4855.37011719 -4845.00927734 -4863.23437500 -4840.18750000 -4846.01660156 -4849.56250000 -4848.35107422 -4839.06152344 +4833.17675781 +4724.25634766 +4562.26855469 +4509.33154297 +4332.39013672 +4184.27783203 +4086.94750977 +3897.33496094 diff --git a/tests/training/cost-functions/test_ce-mean-words.sh b/tests/training/cost-functions/test_ce-mean-words.sh index 7c875ff..b9bb8f5 100644 --- a/tests/training/cost-functions/test_ce-mean-words.sh +++ b/tests/training/cost-functions/test_ce-mean-words.sh @@ -14,7 +14,7 @@ rm -rf ce-mean-words ce-mean-words.log mkdir -p ce-mean-words $MRT_MARIAN/marian \ - --cost-type ce-mean-words \ + --cost-type ce-mean-words --clip-norm 0 \ --seed 9999 --sync-sgd \ -m ce-mean-words/model.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 2 --after-epochs 1 \ diff --git a/tests/training/cost-functions/test_ce-mean.sh b/tests/training/cost-functions/test_ce-mean.sh index c4109b1..e1b08d2 100644 --- a/tests/training/cost-functions/test_ce-mean.sh +++ b/tests/training/cost-functions/test_ce-mean.sh @@ -14,7 +14,7 @@ rm -rf ce-mean ce-mean.log mkdir -p ce-mean $MRT_MARIAN/marian \ - --cost-type ce-mean \ + --cost-type ce-mean --clip-norm 0 \ --seed 9999 --sync-sgd \ -m ce-mean/model.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 2 --after-epochs 1 \ diff --git a/tests/training/cost-functions/test_ce-sum.sh b/tests/training/cost-functions/test_ce-sum.sh index f22f137..a3cfb04 100644 --- a/tests/training/cost-functions/test_ce-sum.sh +++ b/tests/training/cost-functions/test_ce-sum.sh @@ -14,7 +14,7 @@ rm -rf ce-sum ce-sum.log mkdir -p ce-sum $MRT_MARIAN/marian \ - --cost-type ce-sum --disp-label-counts false \ + --cost-type ce-sum --disp-label-counts false --clip-norm 0 \ --seed 9999 --optimizer sgd --sync-sgd \ -m ce-sum/model.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 2 --after-epochs 1 \ diff --git a/tests/training/cost-functions/test_perplexity.sh b/tests/training/cost-functions/test_perplexity.sh index 7a790fd..83dfb36 100644 --- a/tests/training/cost-functions/test_perplexity.sh +++ b/tests/training/cost-functions/test_perplexity.sh @@ -14,7 +14,7 @@ rm -rf perplexity perplexity.log mkdir -p perplexity $MRT_MARIAN/marian \ - --cost-type perplexity \ + --cost-type perplexity --clip-norm 0 \ --seed 9999 --optimizer sgd --sync-sgd \ -m perplexity/model.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 2 --after-epochs 1 \ diff --git a/tests/training/features/data-weighting/maxibatch.expected b/tests/training/features/data-weighting/maxibatch.expected index 7c0001c..ab99d76 100644 --- a/tests/training/features/data-weighting/maxibatch.expected +++ b/tests/training/features/data-weighting/maxibatch.expected @@ -1,10 +1,10 @@ -6924.51171875 -5225.91162109 -4136.33691406 -3092.30273438 -2086.31420898 -5388.93750000 -5653.18310547 -4460.88183594 -3398.95581055 -2398.78735352 +6904.40136719 +5177.73974609 +4077.06103516 +3035.06811523 +2036.97106934 +5234.54150391 +5421.84570312 +4205.61328125 +3152.19384766 +2185.01635742 diff --git a/tests/training/features/data-weighting/sqlite.expected b/tests/training/features/data-weighting/sqlite.expected index d2f0f31..05298eb 100644 --- a/tests/training/features/data-weighting/sqlite.expected +++ b/tests/training/features/data-weighting/sqlite.expected @@ -1,100 +1,100 @@ -145.22386169 -408.22799683 -1194.63964844 -233.55360413 -1430.38696289 -126.14705658 -378.98550415 -96.71858215 -757.48663330 -330.56832886 -1604.02294922 -214.05000305 -438.19305420 -96.71667480 -466.88586426 -194.51495361 -1197.85363770 -146.07209778 -933.41638184 -389.17184448 -933.05151367 -622.52008057 -874.42907715 -825.77954102 -437.69128418 -155.24894714 -496.18453979 -232.95338440 -350.40795898 -321.22961426 -610.87152100 -670.83068848 -1778.17529297 -241.61517334 -1370.66113281 -136.27859497 -1510.65393066 -184.96159363 -815.58801270 -136.21365356 -729.38146973 -174.92094421 -408.49716187 -213.90168762 -233.55191040 -136.20471191 -290.47631836 -165.94480896 -584.11578369 -349.23815918 -958.81738281 -253.41488647 -437.03112793 -330.26031494 -435.45944214 -154.90957642 -521.61523438 -563.37170410 -496.03311157 -175.29434204 -438.16888428 -135.96989441 -496.29870605 -87.74061584 -436.65737915 -214.36607361 -58.15562820 -378.97769165 -348.81768799 -48.82063675 -993.13079834 -292.51773071 -524.27105713 -620.84045410 -1228.60827637 -535.02069092 -846.56628418 -291.05285645 -1658.58984375 -145.76992798 -1809.52172852 -680.63385010 -875.07159424 -78.35388947 -1049.29785156 -165.48069763 -611.14044189 -87.67588806 -438.09893799 -126.03882599 -262.80325317 -184.69609070 -262.46411133 -213.61804199 -1341.65759277 -106.89002228 -174.81327820 -116.23052979 -1165.48278809 -612.53643799 +145.22384644 +408.21359253 +1194.58532715 +233.50500488 +1429.99536133 +126.10730743 +378.85754395 +96.65737915 +757.20660400 +330.41281128 +1603.38867188 +213.94171143 +438.01345825 +96.60343933 +466.71673584 +194.41984558 +1197.37438965 +145.95291138 +932.55432129 +388.84637451 +932.01428223 +622.00317383 +873.45208740 +824.53771973 +437.26394653 +155.06787109 +495.64407349 +232.61149597 +349.86016846 +320.80847168 +610.00537109 +670.07312012 +1775.15356445 +241.20024109 +1368.22827148 +135.98722839 +1508.71228027 +184.60949707 +814.25140381 +135.82812500 +728.21057129 +174.76170349 +408.09228516 +213.54101562 +233.23889160 +136.04132080 +290.16552734 +165.65209961 +583.17614746 +348.41030884 +956.42248535 +252.87858582 +435.55230713 +329.51501465 +434.74572754 +154.38674927 +520.62304688 +562.26965332 +494.76245117 +174.66748047 +436.70803833 +135.57254028 +494.28051758 +87.53215790 +435.90051270 +213.83657837 +57.93138885 +377.68255615 +347.87652588 +48.66077423 +989.79504395 +291.43624878 +522.76562500 +619.18884277 +1225.72314453 +533.32568359 +844.25756836 +289.89556885 +1653.40588379 +144.93725586 +1802.95410156 +678.34832764 +870.25048828 +78.17111969 +1046.43676758 +164.60942078 +609.03454590 +87.29292297 +435.67877197 +125.71372223 +261.20431519 +184.11248779 +260.97955322 +212.86184692 +1336.96362305 +106.14862061 +173.34860229 +115.57688904 +1160.16357422 +609.54388428 diff --git a/tests/training/features/data-weighting/sqlite_word.expected b/tests/training/features/data-weighting/sqlite_word.expected index fb557e0..2271c82 100644 --- a/tests/training/features/data-weighting/sqlite_word.expected +++ b/tests/training/features/data-weighting/sqlite_word.expected @@ -1,14 +1,14 @@ -853.40081787 -710.09143066 -610.75262451 -526.56585693 -439.88232422 -345.80377197 -234.99189758 -606.08709717 -734.31378174 -627.71948242 -544.36926270 -456.04959106 -364.86871338 -264.01800537 +846.28295898 +683.08270264 +546.49383545 +436.32812500 +349.99142456 +268.15167236 +178.71995544 +478.48889160 +563.18597412 +471.57501221 +402.95126343 +333.62921143 +264.42492676 +190.03326416 diff --git a/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh b/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh index 58e9e82..bec6015 100644 --- a/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh +++ b/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh @@ -16,7 +16,7 @@ test -e vocab.de.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corp test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml $MRT_MARIAN/marian \ - --seed 3333 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \ + --seed 3333 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --optimizer sgd \ -m maxibatch/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \ --log maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-sum --disp-label-counts false \ --data-weighting train.1k.inc.txt --data-weighting-type sentence diff --git a/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh b/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh index 5cb5592..55bf916 100644 --- a/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh +++ b/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh @@ -16,7 +16,7 @@ test -e vocab.de.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corp test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml $MRT_MARIAN/marian \ - --seed 6666 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \ + --seed 6666 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --optimizer sgd \ -m word_maxibatch/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \ --log word_maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-mean \ --data-weighting train.1k.wordinc.txt --data-weighting-type word diff --git a/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh b/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh index 2295d64..9c85fac 100644 --- a/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh +++ b/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh @@ -13,7 +13,7 @@ rm -rf sqlite sqlite.log mkdir -p sqlite $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ + --seed 1111 --clip-norm 0 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ -m sqlite/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \ --log sqlite.log --disp-freq 1 --after-batches 100 --mini-batch 1 \ --data-weighting train.1k.weights.txt --data-weighting-type sentence --sqlite sqlite/corpus.sqlite3 diff --git a/tests/training/features/data-weighting/test_word_weighting_sqlite.sh b/tests/training/features/data-weighting/test_word_weighting_sqlite.sh index fe399c7..4518885 100644 --- a/tests/training/features/data-weighting/test_word_weighting_sqlite.sh +++ b/tests/training/features/data-weighting/test_word_weighting_sqlite.sh @@ -15,7 +15,7 @@ mkdir -p sqlite_word cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > sqlite_word.weights.txt $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ + --seed 1111 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ -m sqlite_word/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \ --log sqlite_word.log --disp-freq 5 -e 2 --mini-batch-fit -w 500 \ --data-weighting sqlite_word.weights.txt --data-weighting-type word --sqlite sqlite_word/corpus.sqlite3 diff --git a/tests/training/features/data-weighting/test_word_weighting_with_eos.sh b/tests/training/features/data-weighting/test_word_weighting_with_eos.sh index c3d7b93..2b7c3dd 100644 --- a/tests/training/features/data-weighting/test_word_weighting_with_eos.sh +++ b/tests/training/features/data-weighting/test_word_weighting_with_eos.sh @@ -17,7 +17,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r -e 's/[^ ]+/2/g' -e 's/$/ 2/' > # Train $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ + --seed 1111 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ -m word_eos/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \ --log word_eos.log --disp-freq 5 -e 2 \ --data-weighting word_eos.weights.txt --data-weighting-type word diff --git a/tests/training/features/data-weighting/test_word_weighting_with_twos.sh b/tests/training/features/data-weighting/test_word_weighting_with_twos.sh index 0660501..b514b28 100644 --- a/tests/training/features/data-weighting/test_word_weighting_with_twos.sh +++ b/tests/training/features/data-weighting/test_word_weighting_with_twos.sh @@ -18,7 +18,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > word_twos.weigh # Train with word weighting $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ + --seed 1111 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ -m word_twos/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \ --log word_twos.log --disp-freq 5 -e 2 \ --data-weighting word_twos.weights.txt --data-weighting-type word @@ -41,7 +41,7 @@ echo "data-weighting-type: word" >> word_twos.config.yml # Train with word weighting $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ + --seed 1111 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ -m word_twos_cfg/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \ --log word_twos_cfg.log --disp-freq 5 -e 2 \ -c word_twos.config.yml diff --git a/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh b/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh index 675ae8b..bf6d753 100644 --- a/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh +++ b/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh @@ -18,7 +18,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > word_twos_sync. # Train with word weighting $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ + --seed 1111 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \ -m word_twos_sync/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml --sync-sgd \ --log word_twos_sync.log --disp-freq 5 -e 2 \ --data-weighting word_twos_sync.weights.txt --data-weighting-type word diff --git a/tests/training/features/data-weighting/word_eos.expected b/tests/training/features/data-weighting/word_eos.expected index a4ec027..da2dc29 100644 --- a/tests/training/features/data-weighting/word_eos.expected +++ b/tests/training/features/data-weighting/word_eos.expected @@ -1,17 +1,17 @@ -Ep. 1 : Up. 5 : Sen. 320 : Cost 856.40283203 -Ep. 1 : Up. 10 : Sen. 640 : Cost 705.00976562 -Ep. 1 : Up. 15 : Sen. 960 : Cost 604.22814941 -Ep. 1 : Up. 20 : Sen. 1,280 : Cost 518.43249512 -Ep. 1 : Up. 25 : Sen. 1,600 : Cost 442.49536133 -Ep. 1 : Up. 30 : Sen. 1,920 : Cost 367.35723877 -Ep. 1 : Up. 35 : Sen. 2,240 : Cost 301.55618286 -Ep. 1 : Up. 40 : Sen. 2,560 : Cost 230.20394897 -Ep. 2 : Up. 45 : Sen. 64 : Cost 340.81048584 -Ep. 2 : Up. 50 : Sen. 384 : Cost 818.84265137 -Ep. 2 : Up. 55 : Sen. 704 : Cost 681.78875732 -Ep. 2 : Up. 60 : Sen. 1,024 : Cost 587.13653564 -Ep. 2 : Up. 65 : Sen. 1,344 : Cost 501.12982178 -Ep. 2 : Up. 70 : Sen. 1,664 : Cost 427.36920166 -Ep. 2 : Up. 75 : Sen. 1,984 : Cost 354.46206665 -Ep. 2 : Up. 80 : Sen. 2,304 : Cost 287.68417358 -Ep. 2 : Up. 85 : Sen. 2,624 : Cost 212.97563171 +Ep. 1 : Up. 5 : Sen. 320 : Cost 848.23455811 +Ep. 1 : Up. 10 : Sen. 640 : Cost 672.56451416 +Ep. 1 : Up. 15 : Sen. 960 : Cost 525.11682129 +Ep. 1 : Up. 20 : Sen. 1,280 : Cost 418.14608765 +Ep. 1 : Up. 25 : Sen. 1,600 : Cost 345.36917114 +Ep. 1 : Up. 30 : Sen. 1,920 : Cost 280.52749634 +Ep. 1 : Up. 35 : Sen. 2,240 : Cost 227.94680786 +Ep. 1 : Up. 40 : Sen. 2,560 : Cost 167.04818726 +Ep. 2 : Up. 45 : Sen. 64 : Cost 262.95532227 +Ep. 2 : Up. 50 : Sen. 384 : Cost 660.01922607 +Ep. 2 : Up. 55 : Sen. 704 : Cost 521.61163330 +Ep. 2 : Up. 60 : Sen. 1,024 : Cost 435.67529297 +Ep. 2 : Up. 65 : Sen. 1,344 : Cost 364.16458130 +Ep. 2 : Up. 70 : Sen. 1,664 : Cost 305.08660889 +Ep. 2 : Up. 75 : Sen. 1,984 : Cost 250.36157227 +Ep. 2 : Up. 80 : Sen. 2,304 : Cost 204.07473755 +Ep. 2 : Up. 85 : Sen. 2,624 : Cost 142.30810547 diff --git a/tests/training/features/data-weighting/word_maxibatch.expected b/tests/training/features/data-weighting/word_maxibatch.expected index 542de08..c71c457 100644 --- a/tests/training/features/data-weighting/word_maxibatch.expected +++ b/tests/training/features/data-weighting/word_maxibatch.expected @@ -1,10 +1,10 @@ -493.42471313 -355.55953979 -272.83404541 -199.24537659 -131.18077087 -386.23254395 -388.55151367 -296.93032837 -220.35517883 -152.07803345 +491.56161499 +351.35723877 +267.88531494 +194.60379028 +127.38488770 +371.88699341 +366.62310791 +272.43316650 +197.15826416 +132.87835693 diff --git a/tests/training/features/data-weighting/word_twos.expected b/tests/training/features/data-weighting/word_twos.expected index cc235d0..95d13cc 100644 --- a/tests/training/features/data-weighting/word_twos.expected +++ b/tests/training/features/data-weighting/word_twos.expected @@ -1,17 +1,17 @@ -Ep. 1 : Up. 5 : Sen. 320 : Cost 846.69714355 : -Ep. 1 : Up. 10 : Sen. 640 : Cost 695.30053711 : -Ep. 1 : Up. 15 : Sen. 960 : Cost 594.51928711 : -Ep. 1 : Up. 20 : Sen. 1,280 : Cost 508.72247314 : -Ep. 1 : Up. 25 : Sen. 1,600 : Cost 432.78329468 : -Ep. 1 : Up. 30 : Sen. 1,920 : Cost 357.64947510 : -Ep. 1 : Up. 35 : Sen. 2,240 : Cost 291.84161377 : -Ep. 1 : Up. 40 : Sen. 2,560 : Cost 220.49028015 : -Ep. 2 : Up. 45 : Sen. 64 : Cost 331.08535767 : -Ep. 2 : Up. 50 : Sen. 384 : Cost 809.13928223 : -Ep. 2 : Up. 55 : Sen. 704 : Cost 672.08361816 : -Ep. 2 : Up. 60 : Sen. 1,024 : Cost 577.43341064 : -Ep. 2 : Up. 65 : Sen. 1,344 : Cost 491.42279053 : -Ep. 2 : Up. 70 : Sen. 1,664 : Cost 417.66470337 : -Ep. 2 : Up. 75 : Sen. 1,984 : Cost 344.76025391 : -Ep. 2 : Up. 80 : Sen. 2,304 : Cost 277.97634888 : -Ep. 2 : Up. 85 : Sen. 2,624 : Cost 203.26664734 : +Ep. 1 : Up. 5 : Sen. 320 : Cost 838.97186279 : +Ep. 1 : Up. 10 : Sen. 640 : Cost 665.44097900 : +Ep. 1 : Up. 15 : Sen. 960 : Cost 523.22821045 : +Ep. 1 : Up. 20 : Sen. 1,280 : Cost 417.61639404 : +Ep. 1 : Up. 25 : Sen. 1,600 : Cost 343.39797974 : +Ep. 1 : Up. 30 : Sen. 1,920 : Cost 278.35540771 : +Ep. 1 : Up. 35 : Sen. 2,240 : Cost 225.92178345 : +Ep. 1 : Up. 40 : Sen. 2,560 : Cost 165.37797546 : +Ep. 2 : Up. 45 : Sen. 64 : Cost 257.18948364 : +Ep. 2 : Up. 50 : Sen. 384 : Cost 635.78594971 : +Ep. 2 : Up. 55 : Sen. 704 : Cost 507.77557373 : +Ep. 2 : Up. 60 : Sen. 1,024 : Cost 431.42156982 : +Ep. 2 : Up. 65 : Sen. 1,344 : Cost 361.39825439 : +Ep. 2 : Up. 70 : Sen. 1,664 : Cost 302.86456299 : +Ep. 2 : Up. 75 : Sen. 1,984 : Cost 248.74520874 : +Ep. 2 : Up. 80 : Sen. 2,304 : Cost 203.10728455 : +Ep. 2 : Up. 85 : Sen. 2,624 : Cost 141.87115479 : diff --git a/tests/training/features/data-weighting/word_twos_sync.expected b/tests/training/features/data-weighting/word_twos_sync.expected index df4c0d5..f199178 100644 --- a/tests/training/features/data-weighting/word_twos_sync.expected +++ b/tests/training/features/data-weighting/word_twos_sync.expected @@ -1,17 +1,17 @@ -Ep. 1 : Up. 5 : Sen. 320 : Cost 846.69714355 : -Ep. 1 : Up. 10 : Sen. 640 : Cost 695.30053711 : -Ep. 1 : Up. 15 : Sen. 960 : Cost 594.51928711 : -Ep. 1 : Up. 20 : Sen. 1,280 : Cost 508.72241211 : -Ep. 1 : Up. 25 : Sen. 1,600 : Cost 432.78320312 : -Ep. 1 : Up. 30 : Sen. 1,920 : Cost 357.64950562 : -Ep. 1 : Up. 35 : Sen. 2,240 : Cost 291.84161377 : -Ep. 1 : Up. 40 : Sen. 2,560 : Cost 220.49028015 : -Ep. 2 : Up. 45 : Sen. 64 : Cost 331.08532715 : -Ep. 2 : Up. 50 : Sen. 384 : Cost 809.13928223 : -Ep. 2 : Up. 55 : Sen. 704 : Cost 672.08367920 : -Ep. 2 : Up. 60 : Sen. 1,024 : Cost 577.43341064 : -Ep. 2 : Up. 65 : Sen. 1,344 : Cost 491.42279053 : -Ep. 2 : Up. 70 : Sen. 1,664 : Cost 417.66470337 : -Ep. 2 : Up. 75 : Sen. 1,984 : Cost 344.76025391 : -Ep. 2 : Up. 80 : Sen. 2,304 : Cost 277.97634888 : -Ep. 2 : Up. 85 : Sen. 2,624 : Cost 203.26664734 : +Ep. 1 : Up. 5 : Sen. 320 : Cost 838.97186279 : +Ep. 1 : Up. 10 : Sen. 640 : Cost 665.44097900 : +Ep. 1 : Up. 15 : Sen. 960 : Cost 523.22821045 : +Ep. 1 : Up. 20 : Sen. 1,280 : Cost 417.61639404 : +Ep. 1 : Up. 25 : Sen. 1,600 : Cost 343.39797974 : +Ep. 1 : Up. 30 : Sen. 1,920 : Cost 278.35540771 : +Ep. 1 : Up. 35 : Sen. 2,240 : Cost 225.92178345 : +Ep. 1 : Up. 40 : Sen. 2,560 : Cost 165.37797546 : +Ep. 2 : Up. 45 : Sen. 64 : Cost 257.18945312 : +Ep. 2 : Up. 50 : Sen. 384 : Cost 635.78594971 : +Ep. 2 : Up. 55 : Sen. 704 : Cost 507.77557373 : +Ep. 2 : Up. 60 : Sen. 1,024 : Cost 431.42156982 : +Ep. 2 : Up. 65 : Sen. 1,344 : Cost 361.39825439 : +Ep. 2 : Up. 70 : Sen. 1,664 : Cost 302.86456299 : +Ep. 2 : Up. 75 : Sen. 1,984 : Cost 248.74520874 : +Ep. 2 : Up. 80 : Sen. 2,304 : Cost 203.10728455 : +Ep. 2 : Up. 85 : Sen. 2,624 : Cost 141.87115479 : diff --git a/tests/training/features/exp-smoothing/test_expsmooth.sh b/tests/training/features/exp-smoothing/test_expsmooth.sh index b27cee9..69d7072 100644 --- a/tests/training/features/exp-smoothing/test_expsmooth.sh +++ b/tests/training/features/exp-smoothing/test_expsmooth.sh @@ -1,5 +1,9 @@ #!/bin/bash -x +##################################################################### +# TAGS: clip-norm +##################################################################### + # Exit on error set -e @@ -8,7 +12,7 @@ rm -rf expsmooth expsmooth*.log mkdir -p expsmooth -opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.5 --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32 --cost-type ce-mean" +opts="--no-shuffle --clip-norm 1 --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.5 --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32 --cost-type ce-mean" # No exponential smoothing $MRT_MARIAN/marian \ diff --git a/tests/training/features/exp-smoothing/test_expsmooth_sync.sh b/tests/training/features/exp-smoothing/test_expsmooth_sync.sh index 3bab8ee..29e2978 100644 --- a/tests/training/features/exp-smoothing/test_expsmooth_sync.sh +++ b/tests/training/features/exp-smoothing/test_expsmooth_sync.sh @@ -13,11 +13,11 @@ rm -rf expsmooth_sync expsmooth_sync*.log mkdir -p expsmooth_sync -opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer adam --learn-rate 0.0001 --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32 --devices 0 1 --sync-sgd" +opts="--no-shuffle --clip-norm 0 --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer adam --learn-rate 0.0001 --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32 --devices 0 1 --sync-sgd" # No exponential smoothing $MRT_MARIAN/marian \ - -m expsmooth_sync/model.noexp.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml --clip-norm 0 --cost-type ce-mean-words \ + -m expsmooth_sync/model.noexp.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml --cost-type ce-mean-words \ --disp-freq 20 --valid-freq 20 --after-batches 200 $opts \ --log expsmooth_sync_0.log @@ -30,7 +30,7 @@ cat expsmooth_sync_0.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | grep # With exponential smoothing $MRT_MARIAN/marian \ - -m expsmooth_sync/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml --clip-norm 0 --cost-type ce-mean-words \ + -m expsmooth_sync/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml --cost-type ce-mean-words \ --disp-freq 20 --valid-freq 20 --after-batches 200 --exponential-smoothing 0.0001 $opts \ --log expsmooth_sync.log diff --git a/tests/training/features/guided-alignment/test_guided_alignment_rnn.sh b/tests/training/features/guided-alignment/test_guided_alignment_rnn.sh index a022e5c..925f36f 100644 --- a/tests/training/features/guided-alignment/test_guided_alignment_rnn.sh +++ b/tests/training/features/guided-alignment/test_guided_alignment_rnn.sh @@ -3,7 +3,7 @@ ##################################################################### # SUMMARY: Training S2S model with guided alignment # AUTHOR: snukky -# TAGS: align rnn +# TAGS: align rnn clip-norm ##################################################################### # Exit on error @@ -15,7 +15,7 @@ mkdir -p rnn # Run marian command $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean \ + --no-shuffle --clip-norm 1 --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean \ -m rnn/model.npz -t corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --after-batches 100 --disp-freq 10 \ --guided-alignment corpus.bpe.align --guided-alignment-weight 1.0 --learn-rate 0.1 \ diff --git a/tests/training/features/guided-alignment/test_guided_alignment_transformer.sh b/tests/training/features/guided-alignment/test_guided_alignment_transformer.sh index f5f18b9..cd28f1c 100644 --- a/tests/training/features/guided-alignment/test_guided_alignment_transformer.sh +++ b/tests/training/features/guided-alignment/test_guided_alignment_transformer.sh @@ -15,7 +15,7 @@ mkdir -p transformer # Run marian command $MRT_MARIAN/marian --type transformer \ - --no-shuffle --seed 2222 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean \ + --no-shuffle --clip-norm 0 --seed 2222 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean \ -m transformer/model.npz -t corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --after-batches 100 --disp-freq 10 \ --guided-alignment corpus.bpe.align --guided-alignment-weight 1.0 --learn-rate 0.1 \ diff --git a/tests/training/features/guided-alignment/test_guided_alignment_transformer_sync.sh b/tests/training/features/guided-alignment/test_guided_alignment_transformer_sync.sh index 49675e8..963052d 100644 --- a/tests/training/features/guided-alignment/test_guided_alignment_transformer_sync.sh +++ b/tests/training/features/guided-alignment/test_guided_alignment_transformer_sync.sh @@ -15,7 +15,7 @@ mkdir -p transformer_sync # Run marian command $MRT_MARIAN/marian --type transformer \ - --no-shuffle --seed 2222 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean --sync-sgd \ + --no-shuffle --clip-norm 0 --seed 2222 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean --sync-sgd \ -m transformer_sync/model.npz -t corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --after-batches 100 --disp-freq 10 \ --guided-alignment corpus.bpe.align --guided-alignment-weight 1.0 --learn-rate 0.1 \ diff --git a/tests/training/features/guided-alignment/transformer.expected b/tests/training/features/guided-alignment/transformer.expected index 2513e28..e69de29 100644 --- a/tests/training/features/guided-alignment/transformer.expected +++ b/tests/training/features/guided-alignment/transformer.expected @@ -1,10 +0,0 @@ -244.42282104 -256.56842041 -238.89138794 -233.57333374 -223.44998169 -204.23277283 -232.98970032 -204.28886414 -215.29394531 -201.92327881 diff --git a/tests/training/features/mixed-ensembles/s2s_transf.expected b/tests/training/features/mixed-ensembles/s2s_transf.expected index 1aba12f..3f2ff2d 100644 --- a/tests/training/features/mixed-ensembles/s2s_transf.expected +++ b/tests/training/features/mixed-ensembles/s2s_transf.expected @@ -1,5 +1,5 @@ -herrsch@@ Binnengrenzen gli@@ Borrell nische Millennium nun@@ Millennium nun@@ waren gessen@@ tentei@@ 41 typ@@ rig aufweisen ethn@@ Baum@@ nahe Unter@@ Unterzeichnung teure Wohl itäten ausgewogene Pläne persönliche agieren Meeres@@ persönliche agieren Meeres@@ persönliche Schlußfolgerungen Unterschied Fe@@ Unter@@ Somm@@ Pläne persönliche rain wunder@@ extended persönliche rain -herrsch@@ Binnengrenzen Vorsitzes Pläne Kön@@ unterstützte tei@@ whol@@ Millennium wenngleich Kön@@ unterstützte tei@@ whol@@ VAT operator ethn@@ Baum@@ nahe Hague CI@@ COD rain will Kommissarin aush@@ wecken ASEM Konzep@@ Demokratisierungs@@ abzuwarten Voraussetzungen Kommissionspräsidenten unterbrochen COD Napole@@ Tür@@ log@@ Varela log@@ regulations wecken extreme Woh@@ log@@ Varela log@@ Varela -waren gessen@@ nehme gli@@ cut@@ ethn@@ Baum@@ lich ca. EPL@@ ca. cycle tive ely Pazi@@ eben@@ agents ethn@@ agieren ethn@@ agieren Meeres@@ brachte Umweltverträglichkeitsprü@@ oring Genuss agieren ethn@@ agieren Meeres@@ ethn@@ agieren Meeres@@ nü@@ Instan@@ Geflügel@@ ahn finanziell bund@@ fortführen reform@@ Einklang need extreme agents -herrsch@@ Binnengrenzen Binnengrenzen Binnengrenzen Dele@@ Tan@@ Texten Texten Texten Texten Texten Texten Texten wo nonsense thal@@ Sk@@ ethn@@ Baum@@ nahe nützlichen Konfrontation zielen Positionen mes@@ cor@@ Statistiken herrsch@@ Binnengrenzen nonsense reform just genügt erregend menschliche netz erregend menschliche tbewer@@ maj@@ coa log@@ Varela log@@ Varela log@@ Varela log@@ Varela log@@ Varela Budge@@ alitä@@ fit -Statistiken rig ASEM Papier@@ ethn@@ itäts@@ zusätzlich itäts@@ zusätzlich itäts@@ zusätzlich itäts@@ zusätzlich will Capp@@ break ethn@@ agieren ethn@@ agieren ethn@@ agieren ethn@@ agieren + + + + + diff --git a/tests/training/features/mixed-ensembles/test_ensemble_of_different_s2s.sh b/tests/training/features/mixed-ensembles/test_ensemble_of_different_s2s.sh index 42e231b..539956d 100644 --- a/tests/training/features/mixed-ensembles/test_ensemble_of_different_s2s.sh +++ b/tests/training/features/mixed-ensembles/test_ensemble_of_different_s2s.sh @@ -13,7 +13,7 @@ set -e rm -rf two_s2s two_s2s*.log mkdir -p two_s2s -options="--no-shuffle --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --disp-freq 20 --after-batches 100" +options="--no-shuffle --clip-norm 0 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --disp-freq 20 --after-batches 60" # Train model A $MRT_MARIAN/marian \ diff --git a/tests/training/features/mixed-ensembles/test_ensemble_of_s2s_and_transformer.sh b/tests/training/features/mixed-ensembles/test_ensemble_of_s2s_and_transformer.sh index a80fe32..fe33de3 100644 --- a/tests/training/features/mixed-ensembles/test_ensemble_of_s2s_and_transformer.sh +++ b/tests/training/features/mixed-ensembles/test_ensemble_of_s2s_and_transformer.sh @@ -7,7 +7,7 @@ set -e rm -rf s2s_transf s2s_transf*.log mkdir -p s2s_transf -options="--no-shuffle --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --disp-freq 20 --after-batches 100" +options="--no-shuffle --clip-norm 0 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --disp-freq 20 --after-batches 60" # Train model A $MRT_MARIAN/marian \ diff --git a/tests/training/features/mixed-ensembles/two_s2s.expected b/tests/training/features/mixed-ensembles/two_s2s.expected index 9cfe9b0..fe80305 100644 --- a/tests/training/features/mixed-ensembles/two_s2s.expected +++ b/tests/training/features/mixed-ensembles/two_s2s.expected @@ -1,5 +1,5 @@ -umgewandelt Davies Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt -staff@@ thalten Blut@@ beschä@@ trade Entlastungsverfahren Strafgerichtshof versu@@ gla@@ Jose@@ trade Entlastungsverfahren Strafgerichtshof versu@@ gla@@ Jose@@ Einfuhren Jose@@ trade Entlastungsverfahren Koordination ω@@ Hilfest@@ versu@@ gla@@ Jose@@ Einfuhren Jose@@ trade Entlastungsverfahren Koordination ω@@ Hilfest@@ versu@@ gla@@ Jose@@ Einfuhren Jose@@ trade Entlastungsverfahren Koordination ω@@ Hilfest@@ versu@@ gla@@ Jose@@ Einfuhren ärz@@ -Hed@@ Warrant Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt -tests Beihilfen General les gerich@@ verwendet Betrieben verei Prognosen les gerich@@ gerich@@ verwendet Betrieben anhaltenden freue day ismen Schle@@ gewünschte Nahrungsmittelhilfe sur Forscher les gerich@@ gerich@@ verwendet Betrieben verei Prognosen les gerich@@ gerich@@ verwendet Betrieben anhaltenden freue day ismen Schle@@ gewünschte Nahrungsmittelhilfe sur Forscher les gerich@@ gerich@@ verwendet Betrieben verei Prognosen les gerich@@ verwendet -Sal@@ Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ +. +. +. +. +. diff --git a/tests/training/features/quantized-model/model_centers.expected b/tests/training/features/quantized-model/model_centers.expected index 57380ae..954a001 100644 --- a/tests/training/features/quantized-model/model_centers.expected +++ b/tests/training/features/quantized-model/model_centers.expected @@ -1,51 +1,49 @@ -Tensor decoder_W_comb_att unique centers: [-0.17677179 -0.11784786 -0.05892393 -0. 0.05892393 0.11784786 - 0.17677179] -Tensor decoder_Wc_att unique centers: [-0.15336949 -0.10224632 -0.05112316 -0. 0.05112316 0.10224632 - 0.15336949] -Tensor Wemb_dec unique centers: [-0.32046145 -0.21364096 -0.10682048 0. 0.10682048 0.21364096 - 0.32046145] -Tensor decoder_U unique centers: [-0.17687811 -0.11791874 -0.05895937 -0. 0.05895937 0.11791874 - 0.17687811] -Tensor decoder_Ux unique centers: [-0.21770547 -0.14513698 -0.07256849 0. 0.07256849 0.14513698 - 0.21770547] -Tensor decoder_W unique centers: [-0.19397542 -0.12931694 -0.06465847 -0. 0.06465847 0.12931694 - 0.19397542] -Tensor decoder_Wx unique centers: [-0.25329626 -0.16886416 -0.08443208 -0. 0.08443208 0.16886416 - 0.25329626] -Tensor decoder_U_nl unique centers: [-0.17696194 -0.11797463 -0.05898732 0. 0.05898732 0.11797463 - 0.17696194] -Tensor decoder_Ux_nl unique centers: [-0.21896881 -0.14597921 -0.07298961 0. 0.07298961 0.14597921 - 0.21896881] -Tensor decoder_Wc unique centers: [-0.15324192 -0.10216128 -0.05108064 0. 0.05108064 0.10216128 - 0.15324192] -Tensor decoder_Wcx unique centers: [-0.18192002 -0.12128001 -0.06064001 -0. 0.06064001 0.12128001 - 0.18192002] -Tensor ff_logit_prev_W unique centers: [-0.32183957 -0.2145597 -0.10727985 -0. 0.10727985 0.2145597 - 0.32183957] -Tensor ff_logit_lstm_W unique centers: [-0.25455362 -0.16970241 -0.08485121 0. 0.08485121 0.16970241 - 0.25455362] -Tensor ff_logit_ctx_W unique centers: [-0.19867198 -0.13244799 -0.06622399 -0. 0.06622399 0.13244799 - 0.19867198] -Tensor decoder_ff_logit_l2_Wt unique centers: [-0.36124557 -0.24083039 -0.1204152 0. 0.1204152 0.24083039 - 0.36124557] -Tensor ff_state_W unique centers: [-0.17704961 -0.11803307 -0.05901653 0. 0.05901653 0.11803307 - 0.17704961] -Tensor Wemb unique centers: [-0.31208774 -0.20805849 -0.10402925 0. 0.10402925 0.20805849 - 0.31208774] -Tensor encoder_U unique centers: [-0.17686225 -0.11790817 -0.05895409 0. 0.05895409 0.11790817 - 0.17686225] -Tensor encoder_Ux unique centers: [-0.21824732 -0.14549822 -0.07274911 0. 0.07274911 0.14549822 - 0.21824732] -Tensor encoder_W unique centers: [-0.19403435 -0.12935624 -0.06467812 0. 0.06467812 0.12935624 - 0.19403435] -Tensor encoder_Wx unique centers: [-0.25213736 -0.16809157 -0.08404578 -0. 0.08404578 0.16809157 - 0.25213736] -Tensor encoder_r_U unique centers: [-0.17699143 -0.11799429 -0.05899715 0. 0.05899715 0.11799429 - 0.17699143] -Tensor encoder_r_Ux unique centers: [-0.21971346 -0.14647564 -0.07323782 -0. 0.07323782 0.14647564 - 0.21971346] -Tensor encoder_r_W unique centers: [-0.19410282 -0.12940188 -0.06470094 0. 0.06470094 0.12940188 - 0.19410282] -Tensor encoder_r_Wx unique centers: [-0.25225359 -0.16816907 -0.08408453 -0. 0.08408453 0.16816907 - 0.25225359] +Tensor decoder_W_comb_att unique centers: [-0.1826457 -0.1217638 -0.0608819 0. 0.0608819 0.1217638 + 0.1826457] +Tensor decoder_Wc_att unique centers: [-0.17328945 -0.1155263 -0.05776315 0. 0.05776315 0.1155263 + 0.17328945] +Tensor Wemb_dec unique centers: [-2.3631978 -1.5754652 -0.7877326 0. 0.7877326 1.5754652 + 2.3631978] +Tensor decoder_U unique centers: [-0.3221001 -0.2147334 -0.1073667 -0. 0.1073667 0.2147334 + 0.3221001] +Tensor decoder_Ux unique centers: [-0.43822908 -0.29215273 -0.14607637 0. 0.14607637 0.29215273 + 0.43822908] +Tensor decoder_W unique centers: [-0.22816041 -0.15210694 -0.07605347 0. 0.07605347 0.15210694 + 0.22816041] +Tensor decoder_Wx unique centers: [-0.49631694 -0.33087796 -0.16543898 -0. 0.16543898 0.33087796 + 0.49631694] +Tensor decoder_U_nl unique centers: [-0.3815875 -0.25439167 -0.12719584 -0. 0.12719584 0.25439167 + 0.3815875 ] +Tensor decoder_Ux_nl unique centers: [-0.5111215 -0.34074768 -0.17037384 0. 0.17037384 0.34074768 + 0.5111215 ] +Tensor decoder_Wc unique centers: [-0.42579597 -0.283864 -0.141932 -0. 0.141932 0.283864 + 0.42579597] +Tensor decoder_Wcx unique centers: [-0.8375 -0.55833334 -0.27916667 -0. 0.27916667 0.55833334 + 0.8375 ] +Tensor ff_logit_prev_W unique centers: [-70.87341 -23.624472 0. 23.624472 47.248943] +Tensor ff_logit_lstm_W unique centers: [-246.07938 -164.05292 -82.02646 0. 82.02646 164.05292 + 246.07938] +Tensor ff_logit_ctx_W unique centers: [-240.9685 -160.64568 -80.32284 0. 80.32284 160.64568 + 240.9685 ] +Tensor decoder_ff_logit_l2_Wt unique centers: [-106.12637 -70.750916 -35.375458 -0. 35.375458 70.750916 + 106.12637 ] +Tensor ff_state_W unique centers: [-0.2559117 -0.1706078 -0.0853039 -0. 0.0853039 0.1706078 + 0.2559117] +Tensor Wemb unique centers: [-0.39904252 -0.19952126 0. 0.19952126 0.39904252 0.5985638 ] +Tensor encoder_U unique centers: [-0.30375382 -0.20250255 -0.10125127 -0. 0.10125127 0.20250255 + 0.30375382] +Tensor encoder_Ux unique centers: [-0.45867392 -0.30578262 -0.15289131 -0. 0.15289131 0.30578262 + 0.45867392] +Tensor encoder_W unique centers: [-0.2062971 -0.1375314 -0.0687657 0. 0.0687657 0.1375314 + 0.2062971] +Tensor encoder_Wx unique centers: [-0.3073737 -0.20491579 -0.1024579 0. 0.1024579 0.20491579 + 0.3073737 ] +Tensor encoder_r_U unique centers: [-0.34318972 -0.22879314 -0.11439657 0. 0.11439657 0.22879314 + 0.34318972] +Tensor encoder_r_Ux unique centers: [-0.72291785 -0.48194525 -0.24097262 -0. 0.24097262 0.48194525 + 0.72291785] +Tensor encoder_r_W unique centers: [-0.21613705 -0.14409137 -0.07204568 -0. 0.07204568 0.14409137 + 0.21613705] +Tensor encoder_r_Wx unique centers: [-0.39892155 -0.2659477 -0.13297385 -0. 0.13297385 0.2659477 + 0.39892155] Tensor decoder_c_tt unique centers: [] diff --git a/tests/training/features/quantized-model/quantized.expected b/tests/training/features/quantized-model/quantized.expected index 17620ec..2d0638e 100644 --- a/tests/training/features/quantized-model/quantized.expected +++ b/tests/training/features/quantized-model/quantized.expected @@ -1,10 +1,10 @@ -225.10929871 -243.58345032 -229.45071411 -224.28813171 -212.65242004 -204.06596375 -197.81690979 -190.08915710 -193.72299194 -195.20808411 +5296.80419922 +14729.64062500 +14570.66210938 +17166.55859375 +16055.21875000 +16277.48437500 +18673.34765625 +16747.37109375 +17298.72070312 +16335.72949219 diff --git a/tests/training/features/quantized-model/test_quant_centers.sh b/tests/training/features/quantized-model/test_quant_centers.sh index 22dd863..8318c24 100644 --- a/tests/training/features/quantized-model/test_quant_centers.sh +++ b/tests/training/features/quantized-model/test_quant_centers.sh @@ -16,7 +16,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 10 --disp-freq 2 --quantize-bits 3 diff --git a/tests/training/features/quantized-model/test_quantmodel.sh b/tests/training/features/quantized-model/test_quantmodel.sh index 8b55697..67019f2 100644 --- a/tests/training/features/quantized-model/test_quantmodel.sh +++ b/tests/training/features/quantized-model/test_quantmodel.sh @@ -16,7 +16,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 \ --log $PREFIX.log diff --git a/tests/training/features/quantized-model/test_quantmodel_log.sh b/tests/training/features/quantized-model/test_quantmodel_log.sh index f79809b..924eb4b 100644 --- a/tests/training/features/quantized-model/test_quantmodel_log.sh +++ b/tests/training/features/quantized-model/test_quantmodel_log.sh @@ -16,7 +16,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 4 --quantize-log-based --quantize-optimization-steps 3 \ --log $PREFIX.log diff --git a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh index de14ffb..8dee56b 100644 --- a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh +++ b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh @@ -16,14 +16,14 @@ mkdir -p train # training with quantized bias is tricky, so we start by training a normal model first before finetuning it to the quantized space. $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 20 --disp-freq 10 \ --log $PREFIX.log # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-biases \ --log $PREFIX.log diff --git a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh index 510c339..17a72d8 100644 --- a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh +++ b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh @@ -16,7 +16,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-optimization-steps 3 \ --log $PREFIX.log diff --git a/tests/training/features/quantized-model/update.sh b/tests/training/features/quantized-model/update.sh new file mode 100755 index 0000000..04be645 --- /dev/null +++ b/tests/training/features/quantized-model/update.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh +cp model_centers.out model_centers.expected +cp test-center.out test-center.expected +cp quantized-log4bit.out quantized-log4bit.expected +cp quantized.out quantized.expected +cp quantized-with-bias.out quantized-with-bias.expected +cp quantized-opt.out quantized-opt.expected diff --git a/tests/training/features/right-left/rnn.expected b/tests/training/features/right-left/rnn.expected index c683efb..ff302f0 100644 --- a/tests/training/features/right-left/rnn.expected +++ b/tests/training/features/right-left/rnn.expected @@ -1,10 +1,10 @@ -227.26374817 -251.25552368 -244.43490601 -247.96240234 -242.51679993 -239.25460815 -236.51896667 -231.50540161 -238.35562134 -242.17578125 +226.89152527 +249.98703003 +242.43225098 +245.21345520 +239.08744812 +234.52084351 +230.54391479 +224.25790405 +228.97502136 +230.70504761 diff --git a/tests/training/features/right-left/test_right_left_rnn.sh b/tests/training/features/right-left/test_right_left_rnn.sh index 245125e..ae3976b 100644 --- a/tests/training/features/right-left/test_right_left_rnn.sh +++ b/tests/training/features/right-left/test_right_left_rnn.sh @@ -14,7 +14,7 @@ mkdir -p rnn # Run marian command $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean \ + --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean \ -m rnn/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --after-batches 100 --disp-freq 10 \ --right-left --log rnn.log diff --git a/tests/training/features/right-left/test_right_left_transformer.sh b/tests/training/features/right-left/test_right_left_transformer.sh index 8a40bdd..2939c3c 100644 --- a/tests/training/features/right-left/test_right_left_transformer.sh +++ b/tests/training/features/right-left/test_right_left_transformer.sh @@ -14,7 +14,7 @@ mkdir -p transformer # Run marian command $MRT_MARIAN/marian --type transformer \ - --no-shuffle --seed 2222 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean \ + --no-shuffle --clip-norm 0 --seed 2222 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --cost-type ce-mean \ -m transformer/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --after-batches 100 --disp-freq 10 \ --right-left --log transformer.log diff --git a/tests/training/features/right-left/transformer.expected b/tests/training/features/right-left/transformer.expected index a63420f..0fc2662 100644 --- a/tests/training/features/right-left/transformer.expected +++ b/tests/training/features/right-left/transformer.expected @@ -1,10 +1,10 @@ -237.99105835 -263.23455811 -255.45816040 -259.72146606 -254.48379517 -250.45918274 -248.04586792 -242.33943176 -249.78984070 -253.30130005 +233.16964722 +249.00422668 +235.84651184 +234.45532227 +225.37080383 +216.76550293 +210.73200989 +202.66915894 +207.25146484 +209.57803345 diff --git a/tests/training/models/lm/lm-transformer.expected b/tests/training/models/lm/lm-transformer.expected index a5e9556..593d94e 100644 --- a/tests/training/models/lm/lm-transformer.expected +++ b/tests/training/models/lm/lm-transformer.expected @@ -1,5 +1,5 @@ -405.95352173 -277.85601807 -198.49377441 -135.86233521 -74.85224152 +406.14587402 +279.24975586 +201.01249695 +139.34625244 +78.13222504 diff --git a/tests/training/models/lm/lm-transformer.scores.expected b/tests/training/models/lm/lm-transformer.scores.expected index d40b653..ed3599e 100644 --- a/tests/training/models/lm/lm-transformer.scores.expected +++ b/tests/training/models/lm/lm-transformer.scores.expected @@ -1,10 +1,10 @@ --90.117882 --179.877197 --81.371750 --204.165802 --636.969482 --131.934113 --359.475616 --56.732944 --68.373947 --100.774132 +-94.390747 +-187.140671 +-85.050323 +-211.180054 +-645.849121 +-136.817657 +-366.460815 +-57.487789 +-72.613792 +-102.306747 diff --git a/tests/training/models/lm/lm.expected b/tests/training/models/lm/lm.expected index c6b5c74..dc0ae65 100644 --- a/tests/training/models/lm/lm.expected +++ b/tests/training/models/lm/lm.expected @@ -1,5 +1,5 @@ -410.02645874 -306.52648926 -233.58132935 -167.19117737 -91.86805725 +410.03164673 +306.58309937 +233.76004028 +167.80232239 +92.23210144 diff --git a/tests/training/models/lm/lm.scores.expected b/tests/training/models/lm/lm.scores.expected index 9ea8ae4..b42cd9b 100644 --- a/tests/training/models/lm/lm.scores.expected +++ b/tests/training/models/lm/lm.scores.expected @@ -1,10 +1,10 @@ --114.927658 --208.074463 --102.252083 --244.505508 --677.256836 --154.783279 --411.580017 --58.307816 --89.968994 --111.055710 +-114.125137 +-206.581238 +-101.570534 +-242.844177 +-673.484863 +-153.583893 +-409.248169 +-57.871357 +-89.267410 +-110.574005 diff --git a/tests/training/models/lm/test_lm-transformer.sh b/tests/training/models/lm/test_lm-transformer.sh index 476c2e2..aa2188c 100644 --- a/tests/training/models/lm/test_lm-transformer.sh +++ b/tests/training/models/lm/test_lm-transformer.sh @@ -14,7 +14,7 @@ rm -rf lm-transformer lm-transformer.log mkdir -p lm-transformer $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle \ + --seed 1111 --no-shuffle --clip-norm 0 \ --type lm-transformer --dim-emb 128 --dim-rnn 256 --cost-type ce-mean \ -m lm-transformer/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.en -v vocab.en.yml \ --disp-freq 20 --after-batches 100 \ diff --git a/tests/training/models/lm/test_lm.sh b/tests/training/models/lm/test_lm.sh index 91f94d4..f55e860 100644 --- a/tests/training/models/lm/test_lm.sh +++ b/tests/training/models/lm/test_lm.sh @@ -14,7 +14,7 @@ rm -rf lm lm.log mkdir -p lm $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle \ + --seed 1111 --no-shuffle --clip-norm 0 \ --type lm --dim-emb 128 --dim-rnn 256 --cost-type ce-mean \ -m lm/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.en -v vocab.en.yml \ --disp-freq 20 --after-batches 100 \ diff --git a/tests/training/models/multi-source/multi-s2s.expected b/tests/training/models/multi-source/multi-s2s.expected index 61608c4..2258c02 100644 --- a/tests/training/models/multi-source/multi-s2s.expected +++ b/tests/training/models/multi-source/multi-s2s.expected @@ -1,5 +1,5 @@ -388.14068604 -279.26577759 -198.44155884 -137.92988586 -75.97171021 +388.15350342 +278.90570068 +197.63183594 +137.78120422 +76.32478333 diff --git a/tests/training/models/multi-source/multi-transformer.expected b/tests/training/models/multi-source/multi-transformer.expected index 35c5e4b..538ed15 100644 --- a/tests/training/models/multi-source/multi-transformer.expected +++ b/tests/training/models/multi-source/multi-transformer.expected @@ -1,5 +1,5 @@ -382.23056030 -264.14666748 -193.33871460 -133.58370972 -71.43719482 +382.69680786 +265.52267456 +196.04469299 +138.10417175 +75.06012726 diff --git a/tests/training/models/multi-source/test_multi-s2s.sh b/tests/training/models/multi-source/test_multi-s2s.sh index a1ceef1..52c3ded 100644 --- a/tests/training/models/multi-source/test_multi-s2s.sh +++ b/tests/training/models/multi-source/test_multi-s2s.sh @@ -14,7 +14,7 @@ rm -rf multi-s2s multi-s2s.log mkdir -p multi-s2s $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle \ + --seed 1111 --no-shuffle --clip-norm 0 \ --type multi-s2s --dim-emb 128 --dim-rnn 256 --cost-type ce-mean \ -m multi-s2s/model.npz -t train.bpe.{en,xx,de} -v vocab.en.yml vocab.xx.yml vocab.de.yml \ --disp-freq 20 --after-batches 100 \ diff --git a/tests/training/models/multi-source/test_multi-transformer.sh b/tests/training/models/multi-source/test_multi-transformer.sh index 425ebdc..e41d918 100644 --- a/tests/training/models/multi-source/test_multi-transformer.sh +++ b/tests/training/models/multi-source/test_multi-transformer.sh @@ -14,7 +14,7 @@ rm -rf multi-transformer multi-transformer.log mkdir -p multi-transformer $MRT_MARIAN/marian \ - --seed 1111 --no-shuffle \ + --seed 1111 --no-shuffle --clip-norm 0 \ --type multi-transformer --dim-emb 128 --dim-rnn 256 --cost-type ce-mean \ -m multi-transformer/model.npz -t train.bpe.{en,xx,de} -v vocab.en.yml vocab.xx.yml vocab.de.yml \ --disp-freq 20 --after-batches 100 \ diff --git a/tests/training/models/nematus/encdec_depth.expected b/tests/training/models/nematus/encdec_depth.expected index af2a74a..6cf8ebd 100644 --- a/tests/training/models/nematus/encdec_depth.expected +++ b/tests/training/models/nematus/encdec_depth.expected @@ -1,5 +1,5 @@ -489.13665771 -462.08361816 -439.01745605 -420.90402222 -404.19827271 +488.88616943 +461.44476318 +437.74578857 +419.76626587 +403.67724609 diff --git a/tests/training/models/nematus/test_encdec_depth.sh b/tests/training/models/nematus/test_encdec_depth.sh index ed5276e..0de9026 100644 --- a/tests/training/models/nematus/test_encdec_depth.sh +++ b/tests/training/models/nematus/test_encdec_depth.sh @@ -17,7 +17,7 @@ $MRT_MARIAN/marian \ --type nematus --enc-cell gru-nematus --dec-cell gru-nematus \ --enc-depth 4 --enc-cell-depth 4 --enc-type bidirectional --dec-depth 4 --dec-cell-base-depth 4 --dec-cell-high-depth 1 \ --layer-normalization \ - --no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 128 --cost-type ce-mean \ + --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 64 --dim-rnn 128 --cost-type ce-mean \ -m encdec_depth/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{de,en} -v vocab.en.yml vocab.de.yml \ --log encdec_depth.log --disp-freq 2 --after-batches 10 @@ -25,7 +25,7 @@ test -e encdec_depth/model.npz test -e encdec_depth/model.npz.yml cat encdec_depth.log | $MRT_TOOLS/extract-costs.sh > encdec_depth.out -$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 3 -o encdec_depth.diff +$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 3.0 -o encdec_depth.diff # Exit with success code exit 0 diff --git a/tests/training/models/nematus/test_wmt17_model.sh b/tests/training/models/nematus/test_wmt17_model.sh index d43ff9c..814630a 100644 --- a/tests/training/models/nematus/test_wmt17_model.sh +++ b/tests/training/models/nematus/test_wmt17_model.sh @@ -11,7 +11,7 @@ $MRT_MARIAN/marian \ --type nematus --enc-cell gru-nematus --dec-cell gru-nematus \ --enc-depth 1 --enc-cell-depth 4 --enc-type bidirectional --dec-depth 1 --dec-cell-base-depth 8 --dec-cell-high-depth 1 \ --layer-normalization \ - --no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 128 --cost-type ce-mean \ + --no-shuffle --clip-norm 0 --seed 1111 --dim-emb 64 --dim-rnn 128 --cost-type ce-mean \ -m wmt17/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{de,en} -v vocab.en.yml vocab.de.yml \ --log wmt17.log --disp-freq 2 --after-batches 10 @@ -19,7 +19,7 @@ test -e wmt17/model.npz test -e wmt17/model.npz.yml cat wmt17.log | $MRT_TOOLS/extract-costs.sh > wmt17.out -$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 2 -o wmt17.diff +$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 0.9 -o wmt17.diff # Exit with success code exit 0 diff --git a/tests/training/models/nematus/wmt17.expected b/tests/training/models/nematus/wmt17.expected index c7ffbcd..0a14b91 100644 --- a/tests/training/models/nematus/wmt17.expected +++ b/tests/training/models/nematus/wmt17.expected @@ -1,5 +1,5 @@ -490.30654907 -466.26824951 -442.79544067 -426.92376709 -411.47766113 +490.18170166 +466.03765869 +442.34454346 +426.33612061 +411.51934814 diff --git a/tests/training/models/transformer/test_transformer.sh b/tests/training/models/transformer/test_transformer.sh index b1ad881..41870a4 100644 --- a/tests/training/models/transformer/test_transformer.sh +++ b/tests/training/models/transformer/test_transformer.sh @@ -7,7 +7,7 @@ set -e rm -rf transformer transformer*.log mkdir -p transformer -opts="--no-shuffle --seed 1111 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --dim-emb 64 --dim-rnn 128 --cost-type ce-mean" +opts="--no-shuffle --clip-norm 0 --seed 1111 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --dim-emb 64 --dim-rnn 128 --cost-type ce-mean" $MRT_MARIAN/marian \ --type transformer -m transformer/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ diff --git a/tests/training/models/transformer/transformer.expected b/tests/training/models/transformer/transformer.expected index 50d85ee..28778b1 100644 --- a/tests/training/models/transformer/transformer.expected +++ b/tests/training/models/transformer/transformer.expected @@ -1,10 +1,10 @@ -236.04219055 -260.96929932 -254.12194824 -257.51263428 -253.00631714 -248.38674927 -245.93569946 -240.78047180 -248.01782227 -252.18527222 +226.79606628 +235.31921387 +219.17929077 +216.75386047 +210.01785278 +203.72079468 +198.40823364 +190.92230225 +195.92117310 +199.06428528 diff --git a/tests/training/multi-gpu/sync_sgd_1gpu.expected b/tests/training/multi-gpu/sync_sgd_1gpu.expected index 73c9503..b826610 100644 --- a/tests/training/multi-gpu/sync_sgd_1gpu.expected +++ b/tests/training/multi-gpu/sync_sgd_1gpu.expected @@ -1,4 +1,4 @@ -236.64883423 -197.38874817 -198.74374390 -183.10134888 +230.86734009 +176.34066772 +178.55038452 +161.01515198 diff --git a/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected b/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected index 0600ba2..b826610 100644 --- a/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected +++ b/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected @@ -1,4 +1,4 @@ -236.64883423 -197.38874817 -198.74374390 -183.10137939 +230.86734009 +176.34066772 +178.55038452 +161.01515198 diff --git a/tests/training/multi-gpu/test_sync_sgd_1gpu.sh b/tests/training/multi-gpu/test_sync_sgd_1gpu.sh index 2b9c72b..a1a4453 100644 --- a/tests/training/multi-gpu/test_sync_sgd_1gpu.sh +++ b/tests/training/multi-gpu/test_sync_sgd_1gpu.sh @@ -8,8 +8,8 @@ rm -rf sync_sgd_1gpu sync_sgd_1gpu.log mkdir -p sync_sgd_1gpu $MRT_MARIAN/marian \ - --no-shuffle --seed 888 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none \ - --dim-rnn 64 --dim-emb 32 --learn-rate 0.1 \ + --no-shuffle --clip-norm 0 --seed 888 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none \ + --dim-rnn 64 --dim-emb 32 --learn-rate 0.02 \ --devices 0 --sync-sgd --optimizer sgd --cost-type ce-mean \ -m sync_sgd_1gpu/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 5 --save-freq 10 --after-batches 20 \ @@ -19,7 +19,7 @@ test -e sync_sgd_1gpu/model.npz test -e sync_sgd_1gpu.log cat sync_sgd_1gpu.log | $MRT_TOOLS/extract-costs.sh > sync_sgd_1gpu.out -$MRT_TOOLS/diff-nums.py sync_sgd_1gpu.out sync_sgd_1gpu.expected -o sync_sgd_1gpu.diff +$MRT_TOOLS/diff-nums.py -p 0.02 sync_sgd_1gpu.out sync_sgd_1gpu.expected -o sync_sgd_1gpu.diff # Exit with success code exit 0 diff --git a/tests/training/multi-gpu/test_sync_sgd_1gpu_expsmooth.sh b/tests/training/multi-gpu/test_sync_sgd_1gpu_expsmooth.sh index d1d77d3..586a57a 100644 --- a/tests/training/multi-gpu/test_sync_sgd_1gpu_expsmooth.sh +++ b/tests/training/multi-gpu/test_sync_sgd_1gpu_expsmooth.sh @@ -8,8 +8,8 @@ rm -rf sync_sgd_1gpu_expsmooth sync_sgd_1gpu_expsmooth.log mkdir -p sync_sgd_1gpu_expsmooth $MRT_MARIAN/marian \ - --no-shuffle --seed 888 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none \ - --dim-rnn 64 --dim-emb 32 --learn-rate 0.1 \ + --no-shuffle --clip-norm 0 --seed 888 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none \ + --dim-rnn 64 --dim-emb 32 --learn-rate 0.02 \ --devices 0 --sync-sgd --optimizer sgd --exponential-smoothing --cost-type ce-mean \ -m sync_sgd_1gpu_expsmooth/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 5 --save-freq 10 --after-batches 20 \ @@ -19,7 +19,7 @@ test -e sync_sgd_1gpu_expsmooth/model.npz test -e sync_sgd_1gpu_expsmooth.log cat sync_sgd_1gpu_expsmooth.log | $MRT_TOOLS/extract-costs.sh > sync_sgd_1gpu_expsmooth.out -$MRT_TOOLS/diff-nums.py sync_sgd_1gpu_expsmooth.out sync_sgd_1gpu_expsmooth.expected -o sync_sgd_1gpu_expsmooth.diff +$MRT_TOOLS/diff-nums.py -p 0.02 sync_sgd_1gpu_expsmooth.out sync_sgd_1gpu_expsmooth.expected -o sync_sgd_1gpu_expsmooth.diff # Exit with success code exit 0 diff --git a/tests/training/restarting/sgd_2e.expected b/tests/training/restarting/sgd_2e.expected index 2a801f2..15bbe18 100644 --- a/tests/training/restarting/sgd_2e.expected +++ b/tests/training/restarting/sgd_2e.expected @@ -1,16 +1,16 @@ Ep. 1 : Up. 4 : Sen. 128 : Cost 257.99652100 Ep. 1 : Up. 8 : Sen. 256 : Cost 267.93783569 -Ep. 1 : Up. 12 : Sen. 384 : Cost 243.39039612 -Ep. 1 : Up. 16 : Sen. 512 : Cost 235.87208557 +Ep. 1 : Up. 12 : Sen. 384 : Cost 243.39041138 +Ep. 1 : Up. 16 : Sen. 512 : Cost 235.87210083 Ep. 1 : Up. 20 : Sen. 640 : Cost 204.79017639 -Ep. 1 : Up. 24 : Sen. 768 : Cost 240.11624146 -Ep. 1 : Up. 28 : Sen. 896 : Cost 208.47099304 -Ep. 1 : Up. 32 : Sen. 1,024 : Cost 199.36221313 -Ep. 2 : Up. 36 : Sen. 128 : Cost 213.58728027 -Ep. 2 : Up. 40 : Sen. 256 : Cost 220.51084900 -Ep. 2 : Up. 44 : Sen. 384 : Cost 199.10847473 -Ep. 2 : Up. 48 : Sen. 512 : Cost 194.56945801 -Ep. 2 : Up. 52 : Sen. 640 : Cost 171.77461243 -Ep. 2 : Up. 56 : Sen. 768 : Cost 208.75405884 -Ep. 2 : Up. 60 : Sen. 896 : Cost 186.17434692 -Ep. 2 : Up. 64 : Sen. 1,024 : Cost 179.63316345 +Ep. 1 : Up. 24 : Sen. 768 : Cost 240.11631775 +Ep. 1 : Up. 28 : Sen. 896 : Cost 208.47109985 +Ep. 1 : Up. 32 : Sen. 1,024 : Cost 199.36233521 +Ep. 2 : Up. 36 : Sen. 128 : Cost 213.58744812 +Ep. 2 : Up. 40 : Sen. 256 : Cost 220.51107788 +Ep. 2 : Up. 44 : Sen. 384 : Cost 199.10870361 +Ep. 2 : Up. 48 : Sen. 512 : Cost 194.56954956 +Ep. 2 : Up. 52 : Sen. 640 : Cost 171.77453613 +Ep. 2 : Up. 56 : Sen. 768 : Cost 208.75399780 +Ep. 2 : Up. 60 : Sen. 896 : Cost 186.17416382 +Ep. 2 : Up. 64 : Sen. 1,024 : Cost 179.63301086 diff --git a/tests/training/restarting/sgd_sync_2e.expected b/tests/training/restarting/sgd_sync_2e.expected index 07061db..a83990d 100644 --- a/tests/training/restarting/sgd_sync_2e.expected +++ b/tests/training/restarting/sgd_sync_2e.expected @@ -7,10 +7,10 @@ Ep. 1 : Up. 24 : Sen. 768 : Cost 240.11631775 Ep. 1 : Up. 28 : Sen. 896 : Cost 208.47109985 Ep. 1 : Up. 32 : Sen. 1,024 : Cost 199.36233521 Ep. 2 : Up. 36 : Sen. 128 : Cost 213.58744812 -Ep. 2 : Up. 40 : Sen. 256 : Cost 220.51107788 +Ep. 2 : Up. 40 : Sen. 256 : Cost 220.51104736 Ep. 2 : Up. 44 : Sen. 384 : Cost 199.10870361 Ep. 2 : Up. 48 : Sen. 512 : Cost 194.56954956 Ep. 2 : Up. 52 : Sen. 640 : Cost 171.77453613 -Ep. 2 : Up. 56 : Sen. 768 : Cost 208.75396729 +Ep. 2 : Up. 56 : Sen. 768 : Cost 208.75399780 Ep. 2 : Up. 60 : Sen. 896 : Cost 186.17416382 Ep. 2 : Up. 64 : Sen. 1,024 : Cost 179.63301086 diff --git a/tests/training/restarting/test_sgd_for_two_epochs.sh b/tests/training/restarting/test_sgd_for_two_epochs.sh index ad92b5f..11bf76e 100644 --- a/tests/training/restarting/test_sgd_for_two_epochs.sh +++ b/tests/training/restarting/test_sgd_for_two_epochs.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Restaring training after the 1st epoch (async) +# AUTHOR: snukky +# TAGS: optimizer clip-norm +##################################################################### + # Exit on error set -e @@ -7,12 +13,13 @@ set -e rm -rf sgd_2e sgd_1st_epoch.log sgd_2nd_epoch.log mkdir -p sgd_2e -extra_opts="--no-shuffle --seed 1111 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --optimizer sgd" +extra_opts="--no-shuffle --clip-norm 1 --seed 1111 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --optimizer sgd" # Added because default options has changes extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false" # Uncomment to prepare the expected output +#rm -f sgd_two_epochs.log #$MRT_MARIAN/marian \ #-m sgd_2e/model_2e.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \ #--disp-freq 4 --save-freq 32 --after-epochs 2 -l 0.1 $extra_opts \ diff --git a/tests/training/restarting/test_sgd_for_two_epochs_sync.sh b/tests/training/restarting/test_sgd_for_two_epochs_sync.sh index d3ee295..8615e80 100644 --- a/tests/training/restarting/test_sgd_for_two_epochs_sync.sh +++ b/tests/training/restarting/test_sgd_for_two_epochs_sync.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Restaring training after the 1st epoch (sync-sgd) +# AUTHOR: snukky +# TAGS: optimizer clip-norm +##################################################################### + # Exit on error set -e @@ -7,12 +13,13 @@ set -e rm -rf sgd_sync_2e sgd_sync_*_epoch.log mkdir -p sgd_sync_2e -extra_opts="--no-shuffle --seed 1111 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --optimizer sgd --sync-sgd" +extra_opts="--no-shuffle --clip-norm 1 --seed 1111 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --optimizer sgd --sync-sgd" # Added because default options has changes extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false" # Uncomment to prepare the expected output +#rm -f sgd_sync_two_epochs.log #$MRT_MARIAN/marian \ #-m sgd_sync_2e/model_2e.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \ #--disp-freq 4 --save-freq 32 --after-epochs 2 -l 0.1 $extra_opts \ diff --git a/tests/training/restoring/corpus/finetune.expected b/tests/training/restoring/corpus/finetune.expected index 6f4bc95..21f1847 100644 --- a/tests/training/restoring/corpus/finetune.expected +++ b/tests/training/restoring/corpus/finetune.expected @@ -1,15 +1,15 @@ -Ep. 1 : Up. 4 : Sen. 256 : Cost 239.27255249 -Ep. 1 : Up. 8 : Sen. 512 : Cost 246.85655212 -Ep. 1 : Up. 12 : Sen. 768 : Cost 230.16513062 -Ep. 1 : Up. 16 : Sen. 1,024 : Cost 251.03186035 -Ep. 1 : Up. 20 : Sen. 1,280 : Cost 249.74163818 -Ep. 1 : Up. 24 : Sen. 1,536 : Cost 239.31179810 -Ep. 1 : Up. 28 : Sen. 1,792 : Cost 231.93222046 -Ep. 1 : Up. 32 : Sen. 128 : Cost 255.42749023 -Ep. 1 : Up. 36 : Sen. 384 : Cost 250.27011108 -Ep. 1 : Up. 40 : Sen. 640 : Cost 249.66784668 -Ep. 1 : Up. 44 : Sen. 896 : Cost 254.14111328 -Ep. 2 : Up. 48 : Sen. 128 : Cost 237.40222168 -Ep. 2 : Up. 52 : Sen. 384 : Cost 255.97949219 -Ep. 2 : Up. 56 : Sen. 640 : Cost 252.84860229 -Ep. 2 : Up. 60 : Sen. 896 : Cost 244.12496948 +Ep. 1 : Up. 4 : Sen. 256 : Cost 238.82701111 +Ep. 1 : Up. 8 : Sen. 512 : Cost 245.15895081 +Ep. 1 : Up. 12 : Sen. 768 : Cost 227.24861145 +Ep. 1 : Up. 16 : Sen. 1,024 : Cost 246.25918579 +Ep. 1 : Up. 20 : Sen. 1,280 : Cost 243.25015259 +Ep. 1 : Up. 24 : Sen. 1,536 : Cost 230.48197937 +Ep. 1 : Up. 28 : Sen. 1,792 : Cost 219.80914307 +Ep. 1 : Up. 32 : Sen. 128 : Cost 236.07504272 +Ep. 1 : Up. 36 : Sen. 384 : Cost 225.42373657 +Ep. 1 : Up. 40 : Sen. 640 : Cost 218.38552856 +Ep. 1 : Up. 44 : Sen. 896 : Cost 217.53744507 +Ep. 2 : Up. 48 : Sen. 128 : Cost 201.09486389 +Ep. 2 : Up. 52 : Sen. 384 : Cost 215.17204285 +Ep. 2 : Up. 56 : Sen. 640 : Cost 211.10237122 +Ep. 2 : Up. 60 : Sen. 896 : Cost 200.33345032 diff --git a/tests/training/restoring/corpus/test_finetune.sh b/tests/training/restoring/corpus/test_finetune.sh index 1e99645..78099d4 100644 --- a/tests/training/restoring/corpus/test_finetune.sh +++ b/tests/training/restoring/corpus/test_finetune.sh @@ -17,8 +17,7 @@ test -e vocab.de.yml test -e vocab.en.yml extra_opts="--seed 2222 --maxi-batch 1 --maxi-batch-sort none --mini-batch 64 --optimizer sgd --dim-emb 128 --dim-rnn 256 --disp-freq 4" -# Added because default options has changes -extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false" +extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0" # Train a model on a training corpus diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth.sh b/tests/training/restoring/exp-smoothing/test_expsmooth.sh index f048018..e7c7b6d 100644 --- a/tests/training/restoring/exp-smoothing/test_expsmooth.sh +++ b/tests/training/restoring/exp-smoothing/test_expsmooth.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Compare costs from a restarted training with exp-smoothing with a single pass +# AUTHOR: snukky +# TAGS: exp-smooth clip-norm +##################################################################### + # Exit on error set -e @@ -8,7 +14,7 @@ rm -rf expsmooth expsmooth_*.log mkdir -p expsmooth -opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none" +opts="--no-shuffle --clip-norm 1 --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none" opts="$opts --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.5" opts="$opts --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32" # Added because default options has changes diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh index 1080546..831ebec 100644 --- a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh +++ b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Compare costs from a restarted training with exp-smoothing with a single pass +# AUTHOR: snukky +# TAGS: exp-smooth clip-norm +##################################################################### + # Exit on error set -e @@ -8,7 +14,7 @@ rm -rf expsmooth_s2s expsmooth_s2s_*.log mkdir -p expsmooth_s2s -opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none" +opts="--no-shuffle --clip-norm 1 --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --sync-sgd" opts="$opts --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.5" opts="$opts --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32 --type s2s" # Added because default options has changes diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh index 3e26acf..eafc1cc 100644 --- a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh +++ b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Compare costs from a restarted training with exp-smoothing with a single pass on 2 GPUs +# AUTHOR: snukky +# TAGS: exp-smooth clip-norm multigpu +##################################################################### + # Exit on error set -e diff --git a/tests/training/restoring/multi-gpu/test_adam_sync.sh b/tests/training/restoring/multi-gpu/test_adam_sync.sh index 84079d3..daf2524 100644 --- a/tests/training/restoring/multi-gpu/test_adam_sync.sh +++ b/tests/training/restoring/multi-gpu/test_adam_sync.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Training with Adam on 2 GPUs (sync-sgd) +# AUTHOR: snukky +# TAGS: optimizer adam multigpu +##################################################################### + # Exit on error set -e diff --git a/tests/training/restoring/multi-gpu/test_async.sh b/tests/training/restoring/multi-gpu/test_async.sh index 8e22f30..ba13ec6 100644 --- a/tests/training/restoring/multi-gpu/test_async.sh +++ b/tests/training/restoring/multi-gpu/test_async.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Training with SGD on 2 GPUs (async) +# AUTHOR: snukky +# TAGS: optimizer multigpu clip-norm +##################################################################### + # Exit on error set -e @@ -12,7 +18,7 @@ fi rm -rf async async_*.log async.*out async.*expected mkdir -p async -opts="--no-shuffle --seed 777 --mini-batch 1 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.1 --devices 0 1" +opts="--no-shuffle --clip-norm 0 --seed 777 --mini-batch 1 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.005 --devices 0 1" # Added because default options has changes opts="$opts --cost-type ce-mean --disp-label-counts false" diff --git a/tests/training/restoring/multi-gpu/test_sync.sh b/tests/training/restoring/multi-gpu/test_sync.sh index ff10d23..57fc76d 100644 --- a/tests/training/restoring/multi-gpu/test_sync.sh +++ b/tests/training/restoring/multi-gpu/test_sync.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Training with SGD on 2 GPUs (sync-sgd) +# AUTHOR: snukky +# TAGS: optimizer multigpu clip-norm +##################################################################### + # Exit on error set -e @@ -12,7 +18,7 @@ fi rm -rf sync sync_*.log mkdir -p sync -opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.1 --devices 0 1 --sync-sgd" +opts="--no-shuffle --clip-norm 0 --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.01 --devices 0 1 --sync-sgd" # Added because default options has changes opts="$opts --cost-type ce-mean --disp-label-counts false" @@ -49,7 +55,7 @@ test -e sync_2.log cat sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' >> sync.out -$MRT_TOOLS/diff-nums.py -p 0.3 sync.out sync.expected -o sync.diff +$MRT_TOOLS/diff-nums.py -p 0.1 sync.out sync.expected -o sync.diff # Exit with success code exit 0 diff --git a/tests/training/restoring/optimizer/adagrad.costs.expected b/tests/training/restoring/optimizer/adagrad.costs.expected index 7b4f7e1..533d10c 100644 --- a/tests/training/restoring/optimizer/adagrad.costs.expected +++ b/tests/training/restoring/optimizer/adagrad.costs.expected @@ -1,10 +1,10 @@ -238.52751160 -245.27938843 -239.83557129 -232.83401489 -238.87149048 -253.74154663 -255.69897461 -243.06086731 -244.85818481 -235.55209351 +238.52250671 +245.26730347 +239.82205200 +232.81472778 +238.84121704 +253.69161987 +255.61422729 +242.94416809 +244.69099426 +235.35519409 diff --git a/tests/training/restoring/optimizer/adagrad.gt.expected b/tests/training/restoring/optimizer/adagrad.gt.expected index 30a932c..a6c90a5 100644 --- a/tests/training/restoring/optimizer/adagrad.gt.expected +++ b/tests/training/restoring/optimizer/adagrad.gt.expected @@ -1,2 +1,2 @@ -[[ 4.38133684e-05 1.40065049e-06 3.63037943e-05 ..., 1.23982169e-02 - 3.66997421e-02 4.11312692e-02]] +[[8.0574207e+00 1.5418689e-01 4.3262744e+00 ... 4.0905408e+03 + 1.1550205e+04 1.3359570e+04]] diff --git a/tests/training/restoring/optimizer/adam.costs.expected b/tests/training/restoring/optimizer/adam.costs.expected index a6b5f9a..565b1ea 100644 --- a/tests/training/restoring/optimizer/adam.costs.expected +++ b/tests/training/restoring/optimizer/adam.costs.expected @@ -1,10 +1,10 @@ -238.40983582 -244.61091614 -238.22981262 -229.24475098 -230.14970398 -234.50399780 -228.12467957 -210.38107300 -206.17379761 -196.83959961 +238.40853882 +244.59863281 +238.15905762 +228.80813599 +227.96830750 +231.00505066 +225.24502563 +207.64001465 +203.54002380 +194.72296143 diff --git a/tests/training/restoring/optimizer/adam.mt.expected b/tests/training/restoring/optimizer/adam.mt.expected index 3a2de9b..67c9756 100644 --- a/tests/training/restoring/optimizer/adam.mt.expected +++ b/tests/training/restoring/optimizer/adam.mt.expected @@ -1,2 +1,2 @@ -[[ 8.0254285e-06 -5.1497386e-07 3.8298724e-05 ... 1.5516396e-03 - 1.5692838e-03 2.0285486e-03]] +[[-0.00667148 0.00525377 0.0564099 ... 1.5877182 1.6200635 + 2.2804906 ]] diff --git a/tests/training/restoring/optimizer/adam.vt.expected b/tests/training/restoring/optimizer/adam.vt.expected index 6fbbfe5..c54930d 100644 --- a/tests/training/restoring/optimizer/adam.vt.expected +++ b/tests/training/restoring/optimizer/adam.vt.expected @@ -1,2 +1,2 @@ -[[ 9.29374124e-08 4.41528991e-09 3.45339437e-08 ..., 2.22943163e-05 - 2.69053471e-05 5.34869505e-05]] +[[8.1617765e-02 3.0912522e-03 1.2053944e-02 ... 4.4662014e+01 + 3.7031158e+01 7.2262390e+01]] diff --git a/tests/training/restoring/optimizer/adam_load.expected b/tests/training/restoring/optimizer/adam_load.expected index bf5fef4..5dd5164 100644 --- a/tests/training/restoring/optimizer/adam_load.expected +++ b/tests/training/restoring/optimizer/adam_load.expected @@ -1,6 +1,6 @@ Ep. 1 : Up. 1 : Sen. 2 : Cost 223.64685059 -Ep. 1 : Up. 2 : Sen. 4 : Cost 258.80792236 -Ep. 1 : Up. 3 : Sen. 6 : Cost 255.67260742 -Ep. 1 : Up. 4 : Sen. 8 : Cost 346.67749023 -Ep. 1 : Up. 5 : Sen. 10 : Cost 278.72695923 -Ep. 1 : Up. 6 : Sen. 12 : Cost 178.23016357 +Ep. 1 : Up. 2 : Sen. 4 : Cost 258.78131104 +Ep. 1 : Up. 3 : Sen. 6 : Cost 256.86120605 +Ep. 1 : Up. 4 : Sen. 8 : Cost 365.52239990 +Ep. 1 : Up. 5 : Sen. 10 : Cost 281.86376953 +Ep. 1 : Up. 6 : Sen. 12 : Cost 203.98873901 diff --git a/tests/training/restoring/optimizer/adam_sync.costs.expected b/tests/training/restoring/optimizer/adam_sync.costs.expected index d390e92..29a7e61 100644 --- a/tests/training/restoring/optimizer/adam_sync.costs.expected +++ b/tests/training/restoring/optimizer/adam_sync.costs.expected @@ -1,10 +1,10 @@ -7245.93652344 -7990.90771484 -7741.82177734 -7778.60302734 -7445.29589844 -7015.16699219 -6661.45312500 -6346.10888672 -6402.09814453 -6369.64550781 +7245.93505859 +7990.90478516 +7741.81542969 +7778.61621094 +7445.38574219 +7015.21337891 +6661.38769531 +6346.22802734 +6402.10009766 +6369.72216797 diff --git a/tests/training/restoring/optimizer/test_adagrad_params.sh b/tests/training/restoring/optimizer/test_adagrad_params.sh index 8fca356..1372071 100644 --- a/tests/training/restoring/optimizer/test_adagrad_params.sh +++ b/tests/training/restoring/optimizer/test_adagrad_params.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Training with Adagrad optimizer +# AUTHOR: snukky +# TAGS: optimizer adagrad +##################################################################### + # Exit on error set -e @@ -8,7 +14,7 @@ rm -rf adagrad adagrad*.log mkdir -p adagrad $MRT_MARIAN/marian \ - --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \ + --no-shuffle --clip-norm 0 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \ -m adagrad/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 10 --after-batches 100 --save-freq 60 --optimizer adagrad --cost-type ce-mean \ --log adagrad.log @@ -24,7 +30,7 @@ python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adagrad/model.npz.optimi $MRT_TOOLS/diff.sh adagrad.keys.out adagrad.keys.expected > adagrad.keys.diff python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz -k "adagrad_gt" > adagrad.gt.out -$MRT_TOOLS/diff-nums.py --numpy -p 0.001 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff +$MRT_TOOLS/diff-nums.py --numpy -p 0.009 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff # Exit with success code exit 0 diff --git a/tests/training/restoring/optimizer/test_adam_params.sh b/tests/training/restoring/optimizer/test_adam_params.sh index e0bd76f..4310850 100644 --- a/tests/training/restoring/optimizer/test_adam_params.sh +++ b/tests/training/restoring/optimizer/test_adam_params.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Training with Adam +# AUTHOR: snukky +# TAGS: optimizer adam +##################################################################### + # Exit on error set -e @@ -8,7 +14,7 @@ rm -rf adam adam.log mkdir -p adam $MRT_MARIAN/marian \ - --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \ + --no-shuffle --clip-norm 0 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \ -m adam/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 10 --after-batches 100 --save-freq 60 --cost-type ce-mean \ --log adam.log diff --git a/tests/training/restoring/optimizer/test_adam_params_async.sh b/tests/training/restoring/optimizer/test_adam_params_async.sh index 2b2c869..6dee216 100644 --- a/tests/training/restoring/optimizer/test_adam_params_async.sh +++ b/tests/training/restoring/optimizer/test_adam_params_async.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Training with Adam on 2 GPUs with asynchronous SGD +# AUTHOR: snukky +# TAGS: optimizer adam multigpu async clip-norm +##################################################################### + # Exit on error set -e @@ -13,7 +19,7 @@ if (( $MRT_NUM_DEVICES < 2 )); then fi $MRT_MARIAN/marian \ - --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --dim-emb 128 --dim-rnn 256 \ + --no-shuffle --clip-norm 1 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --dim-emb 128 --dim-rnn 256 \ -m adam_async/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 10 --after-batches 100 --save-freq 60 --cost-type ce-sum --disp-label-counts false \ --log adam_async.log --devices 0 1 @@ -33,8 +39,8 @@ $MRT_TOOLS/diff.sh adam_async.keys.out adam.keys.expected > adam_async.keys.diff python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_mt" > adam_async.mt.out python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_vt" > adam_async.vt.out -$MRT_TOOLS/diff-nums.py --numpy -a -p 0.02 adam_async.mt.out adam_async.mt.expected -o adam_async.mt.diff -$MRT_TOOLS/diff-nums.py --numpy -p 0.001 adam_async.vt.out adam_async.vt.expected -o adam_async.vt.diff +$MRT_TOOLS/diff-nums.py --numpy -a -p 0.03 adam_async.mt.out adam_async.mt.expected -o adam_async.mt.diff +$MRT_TOOLS/diff-nums.py --numpy -p 0.03 adam_async.vt.out adam_async.vt.expected -o adam_async.vt.diff # Exit with success code exit 0 diff --git a/tests/training/restoring/optimizer/test_adam_params_sync.sh b/tests/training/restoring/optimizer/test_adam_params_sync.sh index 1e2481d..43dcda1 100644 --- a/tests/training/restoring/optimizer/test_adam_params_sync.sh +++ b/tests/training/restoring/optimizer/test_adam_params_sync.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Training with Adam on 2 GPUs with sync-sgd +# AUTHOR: snukky +# TAGS: optimizer adam multigpu +##################################################################### + # Exit on error set -e @@ -13,10 +19,10 @@ if (( $MRT_NUM_DEVICES < 2 )); then fi $MRT_MARIAN/marian \ - --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --dim-emb 128 --dim-rnn 256 \ + --no-shuffle --clip-norm 0 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --dim-emb 128 --dim-rnn 256 \ -m adam_sync/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 10 --after-batches 100 --save-freq 60 \ - --log adam_sync.log --devices 0 1 --sync-sgd --cost-type ce-sum --disp-label-counts false --clip-norm 0 + --log adam_sync.log --devices 0 1 --sync-sgd --cost-type ce-sum --disp-label-counts false test -e adam_sync/model.npz test -e adam_sync/model.npz.optimizer.npz diff --git a/tests/training/restoring/optimizer/test_loading_adam_params.sh b/tests/training/restoring/optimizer/test_loading_adam_params.sh index 95a48c8..2ded056 100644 --- a/tests/training/restoring/optimizer/test_loading_adam_params.sh +++ b/tests/training/restoring/optimizer/test_loading_adam_params.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Loading Adam parameters after restarting training +# AUTHOR: snukky +# TAGS: optimizer adam +##################################################################### + # Exit on error set -e @@ -8,8 +14,7 @@ rm -rf adam_load adam_load_?.log mkdir -p adam_load extra_opts="--no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 2 --dim-rnn 64 --dim-emb 32" -# Added because default options has changes -extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false" +extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0" $MRT_MARIAN/marian \ -m adam_load/model.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \ diff --git a/tests/training/restoring/validation/test_adding_validator_after_restart.sh b/tests/training/restoring/validation/test_adding_validator_after_restart.sh index ff95d90..6a6f2f3 100644 --- a/tests/training/restoring/validation/test_adding_validator_after_restart.sh +++ b/tests/training/restoring/validation/test_adding_validator_after_restart.sh @@ -9,7 +9,7 @@ mkdir -p valid_add extra_opts="--no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd" extra_opts="$extra_opts --dim-emb 128 --dim-rnn 256 --mini-batch 16" -extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false" +extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0" #$MRT_MARIAN/marian $extra_opts \ #-m valid_add/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ diff --git a/tests/training/restoring/validation/test_restoring_newbest_validators.sh b/tests/training/restoring/validation/test_restoring_newbest_validators.sh index 444599b..fa8b37a 100644 --- a/tests/training/restoring/validation/test_restoring_newbest_validators.sh +++ b/tests/training/restoring/validation/test_restoring_newbest_validators.sh @@ -14,7 +14,7 @@ head -n 8 $MRT_DATA/europarl.de-en/toy.bpe.de > valid.mini.bpe.de # Uncomment to re-generate the expected output #$MRT_MARIAN/marian \ - #--type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation \ + #--type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation --clip-norm 0 \ #--dim-emb 64 --dim-rnn 128 --mini-batch 16 --optimizer sgd --cost-type ce-mean \ #-m valid_newbest/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ #--disp-freq 5 --valid-freq 10 --after-batches 100 \ @@ -28,7 +28,7 @@ head -n 8 $MRT_DATA/europarl.de-en/toy.bpe.de > valid.mini.bpe.de $MRT_MARIAN/marian \ - --type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation \ + --type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation --clip-norm 0 \ --dim-emb 64 --dim-rnn 128 --mini-batch 16 --optimizer sgd --cost-type ce-mean \ -m valid_newbest/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 5 --valid-freq 10 --after-batches 50 \ diff --git a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh index a29e534..f700e3c 100644 --- a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh +++ b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh @@ -9,7 +9,7 @@ mkdir -p valid_lowisbet extra_opts="--no-shuffle --seed 1111 --maxi-batch 1 --maxi-batch-sort none" extra_opts="$extra_opts --dim-emb 64 --dim-rnn 128 --mini-batch 32" -extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false" +extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0" # Files for the validation sets are swapped intentionally diff --git a/tests/training/restoring/validation/test_valid_reset_stalled.sh b/tests/training/restoring/validation/test_valid_reset_stalled.sh index e967a1f..c2c7d4b 100644 --- a/tests/training/restoring/validation/test_valid_reset_stalled.sh +++ b/tests/training/restoring/validation/test_valid_reset_stalled.sh @@ -27,7 +27,7 @@ $MRT_MARIAN/marian $extra_opts \ --disp-freq 10 --valid-freq 20 --after-batches 140 --early-stopping 5 \ --valid-metrics translation valid-script cross-entropy --valid-script-path ./valid_script_ab.sh \ --valid-sets valid.mini.bpe.{de,en} \ - --overwrite --keep-best \ + --overwrite --keep-best --clip-norm 0 \ --log valid_reset_stalled_1.log test -e valid_reset_stalled/model.npz @@ -43,7 +43,7 @@ $MRT_MARIAN/marian $extra_opts \ --disp-freq 10 --valid-freq 20 --after-batches 200 --early-stopping 5 --valid-reset-stalled \ --valid-metrics translation valid-script cross-entropy --valid-script-path ./valid_script_ab.sh \ --valid-sets valid.mini.bpe.{de,en} \ - --overwrite --keep-best \ + --overwrite --keep-best --clip-norm 0 \ --log valid_reset_stalled_2.log test -e valid_reset_stalled/model.npz diff --git a/tests/training/restoring/validation/update.sh b/tests/training/restoring/validation/update.sh new file mode 100755 index 0000000..809fbaa --- /dev/null +++ b/tests/training/restoring/validation/update.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env sh +cp valid_reset_stalled.out valid_reset_stalled.expected +cp valid_add.out valid_add.expected +cp valid_newbest.out valid_newbest.expected +cp valid_stalled.out valid_stalled.expected +cp valid_lowisbet.out valid_lowisbet.expected diff --git a/tests/training/restoring/validation/valid_add.expected b/tests/training/restoring/validation/valid_add.expected index fb2d8a5..893a57d 100644 --- a/tests/training/restoring/validation/valid_add.expected +++ b/tests/training/restoring/validation/valid_add.expected @@ -1,15 +1,15 @@ -[valid] Ep. 1 : Up. 20 : cross-entropy : 296.282 : new best -[valid] Ep. 1 : Up. 40 : cross-entropy : 296.269 : new best -[valid] Ep. 1 : Up. 60 : cross-entropy : 296.255 : new best -[valid] Ep. 1 : Up. 80 : cross-entropy : 296.242 : new best -[valid] Ep. 1 : Up. 100 : cross-entropy : 296.229 : new best -[valid] Ep. 1 : Up. 120 : cross-entropy : 296.216 : new best -[valid] Ep. 1 : Up. 120 : ce-mean-words : 10.1618 : new best -[valid] Ep. 1 : Up. 140 : cross-entropy : 296.202 : new best -[valid] Ep. 1 : Up. 140 : ce-mean-words : 10.1613 : new best -[valid] Ep. 1 : Up. 160 : cross-entropy : 296.189 : new best -[valid] Ep. 1 : Up. 160 : ce-mean-words : 10.1609 : new best -[valid] Ep. 1 : Up. 180 : cross-entropy : 296.176 : new best -[valid] Ep. 1 : Up. 180 : ce-mean-words : 10.1604 : new best -[valid] Ep. 1 : Up. 200 : cross-entropy : 296.162 : new best -[valid] Ep. 1 : Up. 200 : ce-mean-words : 10.1599 : new best +[valid] Ep. 1 : Up. 20 : cross-entropy : 294.63 : new best +[valid] Ep. 1 : Up. 40 : cross-entropy : 292.643 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 290.224 : new best +[valid] Ep. 1 : Up. 80 : cross-entropy : 286.857 : new best +[valid] Ep. 1 : Up. 100 : cross-entropy : 282.156 : new best +[valid] Ep. 1 : Up. 120 : cross-entropy : 274.584 : new best +[valid] Ep. 1 : Up. 120 : ce-mean-words : 9.41969 : new best +[valid] Ep. 1 : Up. 140 : cross-entropy : 264.996 : new best +[valid] Ep. 1 : Up. 140 : ce-mean-words : 9.09079 : new best +[valid] Ep. 1 : Up. 160 : cross-entropy : 258.914 : new best +[valid] Ep. 1 : Up. 160 : ce-mean-words : 8.88213 : new best +[valid] Ep. 1 : Up. 180 : cross-entropy : 255.943 : new best +[valid] Ep. 1 : Up. 180 : ce-mean-words : 8.78019 : new best +[valid] Ep. 1 : Up. 200 : cross-entropy : 253.146 : new best +[valid] Ep. 1 : Up. 200 : ce-mean-words : 8.68424 : new best diff --git a/tests/training/restoring/validation/valid_lowisbet.expected b/tests/training/restoring/validation/valid_lowisbet.expected index daa223a..8a2ca20 100644 --- a/tests/training/restoring/validation/valid_lowisbet.expected +++ b/tests/training/restoring/validation/valid_lowisbet.expected @@ -1,7 +1,7 @@ -[valid] Ep. 1 : Up. 30 : cross-entropy : 299.128 : new best -[valid] Ep. 2 : Up. 60 : cross-entropy : 298.528 : new best -[valid] Ep. 3 : Up. 90 : cross-entropy : 296.43 : new best -[valid] Ep. 4 : Up. 120 : cross-entropy : 297.912 : stalled 1 times (last best: 296.43) -[valid] Ep. 5 : Up. 150 : cross-entropy : 297.791 : stalled 2 times (last best: 296.43) -[valid] Ep. 6 : Up. 180 : cross-entropy : 297.654 : stalled 3 times (last best: 296.43) -[valid] Ep. 7 : Up. 210 : cross-entropy : 297.794 : stalled 4 times (last best: 296.43) +[valid] Ep. 1 : Up. 30 : cross-entropy : 299.127 : new best +[valid] Ep. 2 : Up. 60 : cross-entropy : 298.417 : new best +[valid] Ep. 3 : Up. 90 : cross-entropy : 296.252 : new best +[valid] Ep. 4 : Up. 120 : cross-entropy : 298.171 : stalled 1 times (last best: 296.252) +[valid] Ep. 5 : Up. 150 : cross-entropy : 298.057 : stalled 2 times (last best: 296.252) +[valid] Ep. 6 : Up. 180 : cross-entropy : 298.052 : stalled 3 times (last best: 296.252) +[valid] Ep. 7 : Up. 210 : cross-entropy : 298.133 : stalled 4 times (last best: 296.252) diff --git a/tests/training/restoring/validation/valid_newbest.expected b/tests/training/restoring/validation/valid_newbest.expected index d03d098..22ce219 100644 --- a/tests/training/restoring/validation/valid_newbest.expected +++ b/tests/training/restoring/validation/valid_newbest.expected @@ -1,20 +1,20 @@ -[valid] Ep. 1 : Up. 10 : cross-entropy : 250.506 : new best -[valid] Ep. 1 : Up. 10 : translation : 8 : new best -[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best -[valid] Ep. 1 : Up. 20 : translation : 8 : stalled 1 times (last best: 8) -[valid] Ep. 1 : Up. 30 : cross-entropy : 250.497 : new best -[valid] Ep. 1 : Up. 30 : translation : 8 : stalled 2 times (last best: 8) -[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best -[valid] Ep. 1 : Up. 40 : translation : 9 : new best -[valid] Ep. 1 : Up. 50 : cross-entropy : 250.486 : new best -[valid] Ep. 1 : Up. 50 : translation : 7 : stalled 1 times (last best: 9) -[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best -[valid] Ep. 1 : Up. 60 : translation : 3 : stalled 2 times (last best: 9) -[valid] Ep. 1 : Up. 70 : cross-entropy : 250.476 : new best -[valid] Ep. 1 : Up. 70 : translation : 6 : stalled 3 times (last best: 9) -[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best -[valid] Ep. 1 : Up. 80 : translation : 0 : stalled 4 times (last best: 9) -[valid] Ep. 1 : Up. 90 : cross-entropy : 250.465 : new best -[valid] Ep. 1 : Up. 90 : translation : 9 : stalled 5 times (last best: 9) -[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best -[valid] Ep. 1 : Up. 100 : translation : 6 : stalled 6 times (last best: 9) +[valid] Ep. 1 : Up. 10 : cross-entropy : 249.884 : new best +[valid] Ep. 1 : Up. 10 : translation : 5 : new best +[valid] Ep. 1 : Up. 20 : cross-entropy : 249.337 : new best +[valid] Ep. 1 : Up. 20 : translation : 4 : stalled 1 times (last best: 5) +[valid] Ep. 1 : Up. 30 : cross-entropy : 248.804 : new best +[valid] Ep. 1 : Up. 30 : translation : 3 : stalled 2 times (last best: 5) +[valid] Ep. 1 : Up. 40 : cross-entropy : 248.218 : new best +[valid] Ep. 1 : Up. 40 : translation : 6 : new best +[valid] Ep. 1 : Up. 50 : cross-entropy : 247.56 : new best +[valid] Ep. 1 : Up. 50 : translation : 9 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 246.856 : new best +[valid] Ep. 1 : Up. 60 : translation : 6 : stalled 1 times (last best: 9) +[valid] Ep. 1 : Up. 70 : cross-entropy : 246.112 : new best +[valid] Ep. 1 : Up. 70 : translation : 8 : stalled 2 times (last best: 9) +[valid] Ep. 1 : Up. 80 : cross-entropy : 245.247 : new best +[valid] Ep. 1 : Up. 80 : translation : 8 : stalled 3 times (last best: 9) +[valid] Ep. 1 : Up. 90 : cross-entropy : 244.336 : new best +[valid] Ep. 1 : Up. 90 : translation : 8 : stalled 4 times (last best: 9) +[valid] Ep. 1 : Up. 100 : cross-entropy : 243.37 : new best +[valid] Ep. 1 : Up. 100 : translation : 8 : stalled 5 times (last best: 9) diff --git a/tests/training/restoring/validation/valid_reset_stalled.expected b/tests/training/restoring/validation/valid_reset_stalled.expected index eed1393..da5b590 100644 --- a/tests/training/restoring/validation/valid_reset_stalled.expected +++ b/tests/training/restoring/validation/valid_reset_stalled.expected @@ -1,30 +1,30 @@ [valid] Ep. 1 : Up. 20 : translation : 333.5 : new best [valid] Ep. 1 : Up. 20 : valid-script : 222.3 : new best -[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best +[valid] Ep. 1 : Up. 20 : cross-entropy : 249.337 : new best [valid] Ep. 1 : Up. 40 : translation : 333.4 : stalled 1 times (last best: 333.5) [valid] Ep. 1 : Up. 40 : valid-script : 222.2 : stalled 1 times (last best: 222.3) -[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best +[valid] Ep. 1 : Up. 40 : cross-entropy : 248.218 : new best [valid] Ep. 1 : Up. 60 : translation : 333.3 : stalled 2 times (last best: 333.5) [valid] Ep. 1 : Up. 60 : valid-script : 222.1 : stalled 2 times (last best: 222.3) -[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 246.856 : new best [valid] Ep. 1 : Up. 80 : translation : 333.2 : stalled 3 times (last best: 333.5) [valid] Ep. 1 : Up. 80 : valid-script : 222.6 : new best -[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best +[valid] Ep. 1 : Up. 80 : cross-entropy : 245.247 : new best [valid] Ep. 1 : Up. 100 : translation : 333.1 : stalled 4 times (last best: 333.5) [valid] Ep. 1 : Up. 100 : valid-script : 222.5 : stalled 1 times (last best: 222.6) -[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best +[valid] Ep. 1 : Up. 100 : cross-entropy : 243.37 : new best [valid] Ep. 1 : Up. 120 : translation : 333.9 : new best [valid] Ep. 1 : Up. 120 : valid-script : 222.4 : stalled 2 times (last best: 222.6) -[valid] Ep. 1 : Up. 120 : cross-entropy : 250.45 : new best +[valid] Ep. 1 : Up. 120 : cross-entropy : 240.802 : new best [valid] Ep. 1 : Up. 140 : translation : 333.8 : stalled 1 times (last best: 333.9) [valid] Ep. 1 : Up. 140 : valid-script : 222.3 : stalled 3 times (last best: 222.6) -[valid] Ep. 1 : Up. 140 : cross-entropy : 250.441 : new best +[valid] Ep. 1 : Up. 140 : cross-entropy : 237.65 : new best [valid] Ep. 1 : Up. 160 : translation : 333.7 : stalled 1 times (last best: 333.9) [valid] Ep. 1 : Up. 160 : valid-script : 222.2 : stalled 1 times (last best: 222.6) -[valid] Ep. 1 : Up. 160 : cross-entropy : 250.43 : new best +[valid] Ep. 1 : Up. 160 : cross-entropy : 233.833 : new best [valid] Ep. 2 : Up. 180 : translation : 333.6 : stalled 2 times (last best: 333.9) [valid] Ep. 2 : Up. 180 : valid-script : 222.1 : stalled 2 times (last best: 222.6) -[valid] Ep. 2 : Up. 180 : cross-entropy : 250.42 : new best +[valid] Ep. 2 : Up. 180 : cross-entropy : 230.035 : new best [valid] Ep. 2 : Up. 200 : translation : 333.5 : stalled 3 times (last best: 333.9) [valid] Ep. 2 : Up. 200 : valid-script : 222.6 : stalled 3 times (last best: 222.6) -[valid] Ep. 2 : Up. 200 : cross-entropy : 250.41 : new best +[valid] Ep. 2 : Up. 200 : cross-entropy : 227.982 : new best diff --git a/tests/training/validation/final_batch.expected b/tests/training/validation/final_batch.expected index eac5cdc..2e0a8b3 100644 --- a/tests/training/validation/final_batch.expected +++ b/tests/training/validation/final_batch.expected @@ -1,3 +1,3 @@ -[valid] Ep. 1 : Up. 60 : cross-entropy : 240.376 : new best -[valid] Ep. 1 : Up. 120 : cross-entropy : 240.348 : new best -[valid] Ep. 1 : Up. 150 : cross-entropy : 240.332 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 198.667 : new best +[valid] Ep. 1 : Up. 120 : cross-entropy : 186.536 : new best +[valid] Ep. 1 : Up. 150 : cross-entropy : 181.413 : new best diff --git a/tests/training/validation/final_epoch.expected b/tests/training/validation/final_epoch.expected index ebcb25c..f96a0df 100644 --- a/tests/training/validation/final_epoch.expected +++ b/tests/training/validation/final_epoch.expected @@ -1,3 +1,3 @@ -[valid] Ep. 1 : Up. 40 : cross-entropy : 240.475 : new best -[valid] Ep. 1 : Up. 80 : cross-entropy : 240.459 : new best -[valid] Ep. 2 : Up. 81 : cross-entropy : 240.459 : new best +[valid] Ep. 1 : Up. 40 : cross-entropy : 234.305 : new best +[valid] Ep. 1 : Up. 80 : cross-entropy : 227.512 : new best +[valid] Ep. 2 : Up. 81 : cross-entropy : 227.471 : new best diff --git a/tests/training/validation/final_match.expected b/tests/training/validation/final_match.expected index 87c9cc2..d685062 100644 --- a/tests/training/validation/final_match.expected +++ b/tests/training/validation/final_match.expected @@ -1,3 +1,3 @@ -[valid] Ep. 1 : Up. 60 : cross-entropy : 240.376 : new best -[valid] Ep. 1 : Up. 120 : cross-entropy : 240.348 : new best -[valid] Ep. 1 : Up. 180 : cross-entropy : 240.317 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 198.667 : new best +[valid] Ep. 1 : Up. 120 : cross-entropy : 186.536 : new best +[valid] Ep. 1 : Up. 180 : cross-entropy : 179.091 : new best diff --git a/tests/training/validation/test_final_validation_after_batches.sh b/tests/training/validation/test_final_validation_after_batches.sh index b4ccc3c..84a3dda 100644 --- a/tests/training/validation/test_final_validation_after_batches.sh +++ b/tests/training/validation/test_final_validation_after_batches.sh @@ -8,10 +8,10 @@ rm -rf final_batch final_batch.log vocab.*.yml mkdir -p final_batch $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --optimizer sgd --dim-emb 64 --dim-rnn 128 \ + --no-shuffle --clip-norm 0 --seed 1111 --optimizer sgd --dim-emb 64 --dim-rnn 128 \ -m final_batch/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} \ -v vocab.en.yml vocab.de.yml --dim-vocabs 50000 50000 \ - --disp-freq 30 --valid-freq 60 --after-batches 150 \ + --disp-freq 30 --valid-freq 60 --after 150u \ --valid-metrics cross-entropy --valid-sets valid.bpe.{en,de} \ --valid-log final_batch.log diff --git a/tests/training/validation/test_final_validation_after_batches_match.sh b/tests/training/validation/test_final_validation_after_batches_match.sh index fc676cf..dc64991 100644 --- a/tests/training/validation/test_final_validation_after_batches_match.sh +++ b/tests/training/validation/test_final_validation_after_batches_match.sh @@ -8,10 +8,10 @@ rm -rf final_match final_match.log vocab.*.yml mkdir -p final_match $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --optimizer sgd --dim-emb 64 --dim-rnn 128 \ + --no-shuffle --clip-norm 0 --seed 1111 --optimizer sgd --dim-emb 64 --dim-rnn 128 \ -m final_match/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} \ -v vocab.en.yml vocab.de.yml --dim-vocabs 50000 50000 \ - --disp-freq 30 --valid-freq 60 --after-batches 180 \ + --disp-freq 30 --valid-freq 60 --after 180u \ --valid-metrics cross-entropy --valid-sets valid.bpe.{en,de} \ --valid-log final_match.log diff --git a/tests/training/validation/test_final_validation_after_epochs.sh b/tests/training/validation/test_final_validation_after_epochs.sh index e8e606f..e263259 100644 --- a/tests/training/validation/test_final_validation_after_epochs.sh +++ b/tests/training/validation/test_final_validation_after_epochs.sh @@ -11,10 +11,10 @@ test -e train.bpe.en || head -n 3000 $MRT_DATA/europarl.de-en/corpus.bpe.en > tr test -e train.bpe.de || head -n 3000 $MRT_DATA/europarl.de-en/corpus.bpe.de > train.bpe.de $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --optimizer sgd --dim-emb 64 --dim-rnn 128 \ + --no-shuffle --clip-norm 0 --seed 1111 --optimizer sgd --dim-emb 64 --dim-rnn 128 \ -m final_epoch/model.npz -t train.bpe.{en,de} \ -v vocab.small.en.yml vocab.small.de.yml --dim-vocabs 50000 50000 \ - --mini-batch 32 --disp-freq 20 --valid-freq 40 --after-epochs 1 \ + --mini-batch 32 --disp-freq 20 --valid-freq 40 --after 1e \ --valid-metrics cross-entropy --valid-sets valid.bpe.{en,de} \ --valid-log final_epoch.log diff --git a/tests/training/validation/test_translation_metric_with_empty_lines.sh b/tests/training/validation/test_translation_metric_with_empty_lines.sh index 95333c2..5ff7115 100644 --- a/tests/training/validation/test_translation_metric_with_empty_lines.sh +++ b/tests/training/validation/test_translation_metric_with_empty_lines.sh @@ -28,9 +28,9 @@ test -e trans_empty_lines/train.en || cat $MRT_DATA/train.max50.en | sed 's/@@ / # Train $MRT_MARIAN/marian \ - --seed 2222 --no-shuffle --mini-batch 32 --maxi-batch 1 --optimizer sgd \ + --seed 2222 --no-shuffle --clip-norm 0 --mini-batch 32 --maxi-batch 1 --optimizer sgd -l 0.00001 \ -m trans_empty_lines/model.npz -t trans_empty_lines/train.{de,en} -v trans_empty_lines/vocab.{spm,spm} \ - --disp-freq 20 --valid-freq 60 --after-batches 60 \ + --disp-freq 10 --valid-freq 30 --after 30u \ --valid-metrics cross-entropy translation --valid-translation-output trans_empty_lines.out \ --valid-sets trans_empty_lines.de trans_empty_lines.en \ --valid-log trans_empty_lines.log diff --git a/tests/training/validation/test_translation_script.sh b/tests/training/validation/test_translation_script.sh index 2bf8648..11d0883 100644 --- a/tests/training/validation/test_translation_script.sh +++ b/tests/training/validation/test_translation_script.sh @@ -14,11 +14,11 @@ rm -rf trans trans.log trans_script.temp mkdir -p trans $MRT_MARIAN/marian \ - --seed 2222 --no-shuffle --dim-emb 128 --dim-rnn 256 --maxi-batch 1 --mini-batch 16 \ + --seed 2222 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --maxi-batch 1 --mini-batch 16 \ -m trans/model.npz \ -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.50k.en.yml vocab.50k.de.yml \ --dim-vocabs 50000 50000 \ - --disp-freq 30 --valid-freq 60 --after-batches 150 \ + --disp-freq 30 --valid-freq 60 --after 150u \ --valid-metrics cross-entropy translation --valid-script-path ./trans_script.sh \ --valid-sets trans.bpe.en trans.bpe.de \ --valid-log trans.log diff --git a/tests/training/validation/test_valid_script.sh b/tests/training/validation/test_valid_script.sh index 53cfba5..7cd60e9 100644 --- a/tests/training/validation/test_valid_script.sh +++ b/tests/training/validation/test_valid_script.sh @@ -14,7 +14,7 @@ rm -rf valid valid.log valid_script.temp mkdir -p valid $MRT_MARIAN/marian \ - --seed 2222 --no-shuffle --dim-emb 128 --dim-rnn 256 --maxi-batch 1 --mini-batch 16 \ + --seed 2222 --no-shuffle --clip-norm 0 --dim-emb 128 --dim-rnn 256 --maxi-batch 1 --mini-batch 16 \ -m valid/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} \ -v vocab.50k.en.yml vocab.50k.de.yml --dim-vocabs 50000 50000 \ --disp-freq 5 --valid-freq 15 --after-batches 75 \ diff --git a/tests/training/validation/trans.expected b/tests/training/validation/trans.expected index 68987df..875a980 100644 --- a/tests/training/validation/trans.expected +++ b/tests/training/validation/trans.expected @@ -1,4 +1,4 @@ -[valid] Ep. 1 : Up. 60 : cross-entropy : 218.835 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 215.525 : new best [valid] Ep. 1 : Up. 60 : translation : 1 : new best -[valid] Ep. 1 : Up. 120 : cross-entropy : 187.91 : new best +[valid] Ep. 1 : Up. 120 : cross-entropy : 186.551 : new best [valid] Ep. 1 : Up. 120 : translation : 2 : new best diff --git a/tests/training/validation/trans_empty_lines.expected b/tests/training/validation/trans_empty_lines.expected index 45ae20b..46a9bae 100644 --- a/tests/training/validation/trans_empty_lines.expected +++ b/tests/training/validation/trans_empty_lines.expected @@ -1,9 +1,9 @@ -That concludes the agenda, ladies and gentlemen. -The Minutes of this sitting will be submitted to plenary at the beginning of the next part-session. -Mr Manders has the floor for a point of order. -Mr President, I would like to take this opportunity to wish you, the Bureau and all Members well for the new year. +, ladies and gentlemen , Parliament has completed the agenda . +The Minutes of this sitting will be submitted to plenary at the beginning of the next part-session . +Mr Manders has the floor for a point of order . +Mr President , I would like to take this opportunity to wish you , the Bureau and all Members well for the new year . -I would even like to allow me to name the Commission and the Council, even if they are not present. +I would even allow me to name the Commission and the Council , even if they are not present . I declare the session of the European Parliament adjourned. diff --git a/tests/training/validation/valid.expected b/tests/training/validation/valid.expected index 66764ec..48227e6 100644 --- a/tests/training/validation/valid.expected +++ b/tests/training/validation/valid.expected @@ -1,10 +1,10 @@ [valid] Ep. 1 : Up. 15 : cross-entropy : 307.647 : new best [valid] Ep. 1 : Up. 15 : valid-script : 1 : new best -[valid] Ep. 1 : Up. 30 : cross-entropy : 305.551 : new best +[valid] Ep. 1 : Up. 30 : cross-entropy : 305.336 : new best [valid] Ep. 1 : Up. 30 : valid-script : 2 : new best -[valid] Ep. 1 : Up. 45 : cross-entropy : 299.442 : new best +[valid] Ep. 1 : Up. 45 : cross-entropy : 297.463 : new best [valid] Ep. 1 : Up. 45 : valid-script : 3 : new best -[valid] Ep. 1 : Up. 60 : cross-entropy : 281.549 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 277.038 : new best [valid] Ep. 1 : Up. 60 : valid-script : 4 : new best -[valid] Ep. 1 : Up. 75 : cross-entropy : 268.403 : new best +[valid] Ep. 1 : Up. 75 : cross-entropy : 265.286 : new best [valid] Ep. 1 : Up. 75 : valid-script : 5 : new best |