Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundkiewicz@gmail.com>2020-11-25 17:54:26 +0300
committerRoman Grundkiewicz <rgrundkiewicz@gmail.com>2020-11-25 17:54:26 +0300
commita76b695b11fdb26d19a53132946f869c56d0d7a3 (patch)
treeed3237eb5f09126d7ad4c6a89d9dbfe0e8831e3e
parentb6596ea3eee62e76029e1f27e93838a2752ffbd4 (diff)
Update missing tests to clip-norm 0
-rw-r--r--tests/training/features/quantized-model/model_centers.expected98
-rw-r--r--tests/training/features/quantized-model/quantized.expected20
-rw-r--r--tests/training/features/quantized-model/test_quant_centers.sh2
-rw-r--r--tests/training/features/quantized-model/test_quantmodel.sh2
-rw-r--r--tests/training/features/quantized-model/test_quantmodel_log.sh2
-rw-r--r--tests/training/features/quantized-model/test_quantmodel_with_bias.sh4
-rw-r--r--tests/training/features/quantized-model/test_quantmodel_with_optimization.sh2
-rwxr-xr-xtests/training/features/quantized-model/update.sh7
-rw-r--r--tests/training/restoring/validation/test_adding_validator_after_restart.sh2
-rw-r--r--tests/training/restoring/validation/test_restoring_newbest_validators.sh4
-rw-r--r--tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh2
-rw-r--r--tests/training/restoring/validation/test_valid_reset_stalled.sh4
-rwxr-xr-xtests/training/restoring/validation/update.sh6
-rw-r--r--tests/training/restoring/validation/valid_add.expected30
-rw-r--r--tests/training/restoring/validation/valid_lowisbet.expected14
-rw-r--r--tests/training/restoring/validation/valid_newbest.expected40
-rw-r--r--tests/training/restoring/validation/valid_reset_stalled.expected20
17 files changed, 135 insertions, 124 deletions
diff --git a/tests/training/features/quantized-model/model_centers.expected b/tests/training/features/quantized-model/model_centers.expected
index 57380ae..954a001 100644
--- a/tests/training/features/quantized-model/model_centers.expected
+++ b/tests/training/features/quantized-model/model_centers.expected
@@ -1,51 +1,49 @@
-Tensor decoder_W_comb_att unique centers: [-0.17677179 -0.11784786 -0.05892393 -0. 0.05892393 0.11784786
- 0.17677179]
-Tensor decoder_Wc_att unique centers: [-0.15336949 -0.10224632 -0.05112316 -0. 0.05112316 0.10224632
- 0.15336949]
-Tensor Wemb_dec unique centers: [-0.32046145 -0.21364096 -0.10682048 0. 0.10682048 0.21364096
- 0.32046145]
-Tensor decoder_U unique centers: [-0.17687811 -0.11791874 -0.05895937 -0. 0.05895937 0.11791874
- 0.17687811]
-Tensor decoder_Ux unique centers: [-0.21770547 -0.14513698 -0.07256849 0. 0.07256849 0.14513698
- 0.21770547]
-Tensor decoder_W unique centers: [-0.19397542 -0.12931694 -0.06465847 -0. 0.06465847 0.12931694
- 0.19397542]
-Tensor decoder_Wx unique centers: [-0.25329626 -0.16886416 -0.08443208 -0. 0.08443208 0.16886416
- 0.25329626]
-Tensor decoder_U_nl unique centers: [-0.17696194 -0.11797463 -0.05898732 0. 0.05898732 0.11797463
- 0.17696194]
-Tensor decoder_Ux_nl unique centers: [-0.21896881 -0.14597921 -0.07298961 0. 0.07298961 0.14597921
- 0.21896881]
-Tensor decoder_Wc unique centers: [-0.15324192 -0.10216128 -0.05108064 0. 0.05108064 0.10216128
- 0.15324192]
-Tensor decoder_Wcx unique centers: [-0.18192002 -0.12128001 -0.06064001 -0. 0.06064001 0.12128001
- 0.18192002]
-Tensor ff_logit_prev_W unique centers: [-0.32183957 -0.2145597 -0.10727985 -0. 0.10727985 0.2145597
- 0.32183957]
-Tensor ff_logit_lstm_W unique centers: [-0.25455362 -0.16970241 -0.08485121 0. 0.08485121 0.16970241
- 0.25455362]
-Tensor ff_logit_ctx_W unique centers: [-0.19867198 -0.13244799 -0.06622399 -0. 0.06622399 0.13244799
- 0.19867198]
-Tensor decoder_ff_logit_l2_Wt unique centers: [-0.36124557 -0.24083039 -0.1204152 0. 0.1204152 0.24083039
- 0.36124557]
-Tensor ff_state_W unique centers: [-0.17704961 -0.11803307 -0.05901653 0. 0.05901653 0.11803307
- 0.17704961]
-Tensor Wemb unique centers: [-0.31208774 -0.20805849 -0.10402925 0. 0.10402925 0.20805849
- 0.31208774]
-Tensor encoder_U unique centers: [-0.17686225 -0.11790817 -0.05895409 0. 0.05895409 0.11790817
- 0.17686225]
-Tensor encoder_Ux unique centers: [-0.21824732 -0.14549822 -0.07274911 0. 0.07274911 0.14549822
- 0.21824732]
-Tensor encoder_W unique centers: [-0.19403435 -0.12935624 -0.06467812 0. 0.06467812 0.12935624
- 0.19403435]
-Tensor encoder_Wx unique centers: [-0.25213736 -0.16809157 -0.08404578 -0. 0.08404578 0.16809157
- 0.25213736]
-Tensor encoder_r_U unique centers: [-0.17699143 -0.11799429 -0.05899715 0. 0.05899715 0.11799429
- 0.17699143]
-Tensor encoder_r_Ux unique centers: [-0.21971346 -0.14647564 -0.07323782 -0. 0.07323782 0.14647564
- 0.21971346]
-Tensor encoder_r_W unique centers: [-0.19410282 -0.12940188 -0.06470094 0. 0.06470094 0.12940188
- 0.19410282]
-Tensor encoder_r_Wx unique centers: [-0.25225359 -0.16816907 -0.08408453 -0. 0.08408453 0.16816907
- 0.25225359]
+Tensor decoder_W_comb_att unique centers: [-0.1826457 -0.1217638 -0.0608819 0. 0.0608819 0.1217638
+ 0.1826457]
+Tensor decoder_Wc_att unique centers: [-0.17328945 -0.1155263 -0.05776315 0. 0.05776315 0.1155263
+ 0.17328945]
+Tensor Wemb_dec unique centers: [-2.3631978 -1.5754652 -0.7877326 0. 0.7877326 1.5754652
+ 2.3631978]
+Tensor decoder_U unique centers: [-0.3221001 -0.2147334 -0.1073667 -0. 0.1073667 0.2147334
+ 0.3221001]
+Tensor decoder_Ux unique centers: [-0.43822908 -0.29215273 -0.14607637 0. 0.14607637 0.29215273
+ 0.43822908]
+Tensor decoder_W unique centers: [-0.22816041 -0.15210694 -0.07605347 0. 0.07605347 0.15210694
+ 0.22816041]
+Tensor decoder_Wx unique centers: [-0.49631694 -0.33087796 -0.16543898 -0. 0.16543898 0.33087796
+ 0.49631694]
+Tensor decoder_U_nl unique centers: [-0.3815875 -0.25439167 -0.12719584 -0. 0.12719584 0.25439167
+ 0.3815875 ]
+Tensor decoder_Ux_nl unique centers: [-0.5111215 -0.34074768 -0.17037384 0. 0.17037384 0.34074768
+ 0.5111215 ]
+Tensor decoder_Wc unique centers: [-0.42579597 -0.283864 -0.141932 -0. 0.141932 0.283864
+ 0.42579597]
+Tensor decoder_Wcx unique centers: [-0.8375 -0.55833334 -0.27916667 -0. 0.27916667 0.55833334
+ 0.8375 ]
+Tensor ff_logit_prev_W unique centers: [-70.87341 -23.624472 0. 23.624472 47.248943]
+Tensor ff_logit_lstm_W unique centers: [-246.07938 -164.05292 -82.02646 0. 82.02646 164.05292
+ 246.07938]
+Tensor ff_logit_ctx_W unique centers: [-240.9685 -160.64568 -80.32284 0. 80.32284 160.64568
+ 240.9685 ]
+Tensor decoder_ff_logit_l2_Wt unique centers: [-106.12637 -70.750916 -35.375458 -0. 35.375458 70.750916
+ 106.12637 ]
+Tensor ff_state_W unique centers: [-0.2559117 -0.1706078 -0.0853039 -0. 0.0853039 0.1706078
+ 0.2559117]
+Tensor Wemb unique centers: [-0.39904252 -0.19952126 0. 0.19952126 0.39904252 0.5985638 ]
+Tensor encoder_U unique centers: [-0.30375382 -0.20250255 -0.10125127 -0. 0.10125127 0.20250255
+ 0.30375382]
+Tensor encoder_Ux unique centers: [-0.45867392 -0.30578262 -0.15289131 -0. 0.15289131 0.30578262
+ 0.45867392]
+Tensor encoder_W unique centers: [-0.2062971 -0.1375314 -0.0687657 0. 0.0687657 0.1375314
+ 0.2062971]
+Tensor encoder_Wx unique centers: [-0.3073737 -0.20491579 -0.1024579 0. 0.1024579 0.20491579
+ 0.3073737 ]
+Tensor encoder_r_U unique centers: [-0.34318972 -0.22879314 -0.11439657 0. 0.11439657 0.22879314
+ 0.34318972]
+Tensor encoder_r_Ux unique centers: [-0.72291785 -0.48194525 -0.24097262 -0. 0.24097262 0.48194525
+ 0.72291785]
+Tensor encoder_r_W unique centers: [-0.21613705 -0.14409137 -0.07204568 -0. 0.07204568 0.14409137
+ 0.21613705]
+Tensor encoder_r_Wx unique centers: [-0.39892155 -0.2659477 -0.13297385 -0. 0.13297385 0.2659477
+ 0.39892155]
Tensor decoder_c_tt unique centers: []
diff --git a/tests/training/features/quantized-model/quantized.expected b/tests/training/features/quantized-model/quantized.expected
index 17620ec..2d0638e 100644
--- a/tests/training/features/quantized-model/quantized.expected
+++ b/tests/training/features/quantized-model/quantized.expected
@@ -1,10 +1,10 @@
-225.10929871
-243.58345032
-229.45071411
-224.28813171
-212.65242004
-204.06596375
-197.81690979
-190.08915710
-193.72299194
-195.20808411
+5296.80419922
+14729.64062500
+14570.66210938
+17166.55859375
+16055.21875000
+16277.48437500
+18673.34765625
+16747.37109375
+17298.72070312
+16335.72949219
diff --git a/tests/training/features/quantized-model/test_quant_centers.sh b/tests/training/features/quantized-model/test_quant_centers.sh
index 22dd863..8318c24 100644
--- a/tests/training/features/quantized-model/test_quant_centers.sh
+++ b/tests/training/features/quantized-model/test_quant_centers.sh
@@ -16,7 +16,7 @@ mkdir -p train
# Train an 8-bits model
$MRT_MARIAN/marian \
- --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \
-m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
--cost-type cross-entropy --sync-sgd --after-batches 10 --disp-freq 2 --quantize-bits 3
diff --git a/tests/training/features/quantized-model/test_quantmodel.sh b/tests/training/features/quantized-model/test_quantmodel.sh
index 8b55697..67019f2 100644
--- a/tests/training/features/quantized-model/test_quantmodel.sh
+++ b/tests/training/features/quantized-model/test_quantmodel.sh
@@ -16,7 +16,7 @@ mkdir -p train
# Train an 8-bits model
$MRT_MARIAN/marian \
- --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \
-m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
--cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 \
--log $PREFIX.log
diff --git a/tests/training/features/quantized-model/test_quantmodel_log.sh b/tests/training/features/quantized-model/test_quantmodel_log.sh
index f79809b..924eb4b 100644
--- a/tests/training/features/quantized-model/test_quantmodel_log.sh
+++ b/tests/training/features/quantized-model/test_quantmodel_log.sh
@@ -16,7 +16,7 @@ mkdir -p train
# Train an 8-bits model
$MRT_MARIAN/marian \
- --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
-m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
--cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 4 --quantize-log-based --quantize-optimization-steps 3 \
--log $PREFIX.log
diff --git a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
index de14ffb..8dee56b 100644
--- a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
+++ b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
@@ -16,14 +16,14 @@ mkdir -p train
# training with quantized bias is tricky, so we start by training a normal model first before finetuning it to the quantized space.
$MRT_MARIAN/marian \
- --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
-m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
--cost-type cross-entropy --sync-sgd --after-batches 20 --disp-freq 10 \
--log $PREFIX.log
# Train an 8-bits model
$MRT_MARIAN/marian \
- --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
-m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
--cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-biases \
--log $PREFIX.log
diff --git a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
index fe7993e..a768ffc 100644
--- a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
+++ b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
@@ -16,7 +16,7 @@ mkdir -p train
# Train an 8-bits model
$MRT_MARIAN/marian \
- --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
-m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
--cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-optimization-steps 3 \
--log $PREFIX.log
diff --git a/tests/training/features/quantized-model/update.sh b/tests/training/features/quantized-model/update.sh
new file mode 100755
index 0000000..04be645
--- /dev/null
+++ b/tests/training/features/quantized-model/update.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env sh
+cp model_centers.out model_centers.expected
+cp test-center.out test-center.expected
+cp quantized-log4bit.out quantized-log4bit.expected
+cp quantized.out quantized.expected
+cp quantized-with-bias.out quantized-with-bias.expected
+cp quantized-opt.out quantized-opt.expected
diff --git a/tests/training/restoring/validation/test_adding_validator_after_restart.sh b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
index ff95d90..6a6f2f3 100644
--- a/tests/training/restoring/validation/test_adding_validator_after_restart.sh
+++ b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
@@ -9,7 +9,7 @@ mkdir -p valid_add
extra_opts="--no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd"
extra_opts="$extra_opts --dim-emb 128 --dim-rnn 256 --mini-batch 16"
-extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false"
+extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0"
#$MRT_MARIAN/marian $extra_opts \
#-m valid_add/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
diff --git a/tests/training/restoring/validation/test_restoring_newbest_validators.sh b/tests/training/restoring/validation/test_restoring_newbest_validators.sh
index 444599b..fa8b37a 100644
--- a/tests/training/restoring/validation/test_restoring_newbest_validators.sh
+++ b/tests/training/restoring/validation/test_restoring_newbest_validators.sh
@@ -14,7 +14,7 @@ head -n 8 $MRT_DATA/europarl.de-en/toy.bpe.de > valid.mini.bpe.de
# Uncomment to re-generate the expected output
#$MRT_MARIAN/marian \
- #--type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation \
+ #--type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation --clip-norm 0 \
#--dim-emb 64 --dim-rnn 128 --mini-batch 16 --optimizer sgd --cost-type ce-mean \
#-m valid_newbest/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
#--disp-freq 5 --valid-freq 10 --after-batches 100 \
@@ -28,7 +28,7 @@ head -n 8 $MRT_DATA/europarl.de-en/toy.bpe.de > valid.mini.bpe.de
$MRT_MARIAN/marian \
- --type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation \
+ --type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation --clip-norm 0 \
--dim-emb 64 --dim-rnn 128 --mini-batch 16 --optimizer sgd --cost-type ce-mean \
-m valid_newbest/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
--disp-freq 5 --valid-freq 10 --after-batches 50 \
diff --git a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
index a29e534..f700e3c 100644
--- a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
+++ b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
@@ -9,7 +9,7 @@ mkdir -p valid_lowisbet
extra_opts="--no-shuffle --seed 1111 --maxi-batch 1 --maxi-batch-sort none"
extra_opts="$extra_opts --dim-emb 64 --dim-rnn 128 --mini-batch 32"
-extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false"
+extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0"
# Files for the validation sets are swapped intentionally
diff --git a/tests/training/restoring/validation/test_valid_reset_stalled.sh b/tests/training/restoring/validation/test_valid_reset_stalled.sh
index e967a1f..c2c7d4b 100644
--- a/tests/training/restoring/validation/test_valid_reset_stalled.sh
+++ b/tests/training/restoring/validation/test_valid_reset_stalled.sh
@@ -27,7 +27,7 @@ $MRT_MARIAN/marian $extra_opts \
--disp-freq 10 --valid-freq 20 --after-batches 140 --early-stopping 5 \
--valid-metrics translation valid-script cross-entropy --valid-script-path ./valid_script_ab.sh \
--valid-sets valid.mini.bpe.{de,en} \
- --overwrite --keep-best \
+ --overwrite --keep-best --clip-norm 0 \
--log valid_reset_stalled_1.log
test -e valid_reset_stalled/model.npz
@@ -43,7 +43,7 @@ $MRT_MARIAN/marian $extra_opts \
--disp-freq 10 --valid-freq 20 --after-batches 200 --early-stopping 5 --valid-reset-stalled \
--valid-metrics translation valid-script cross-entropy --valid-script-path ./valid_script_ab.sh \
--valid-sets valid.mini.bpe.{de,en} \
- --overwrite --keep-best \
+ --overwrite --keep-best --clip-norm 0 \
--log valid_reset_stalled_2.log
test -e valid_reset_stalled/model.npz
diff --git a/tests/training/restoring/validation/update.sh b/tests/training/restoring/validation/update.sh
new file mode 100755
index 0000000..809fbaa
--- /dev/null
+++ b/tests/training/restoring/validation/update.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env sh
+cp valid_reset_stalled.out valid_reset_stalled.expected
+cp valid_add.out valid_add.expected
+cp valid_newbest.out valid_newbest.expected
+cp valid_stalled.out valid_stalled.expected
+cp valid_lowisbet.out valid_lowisbet.expected
diff --git a/tests/training/restoring/validation/valid_add.expected b/tests/training/restoring/validation/valid_add.expected
index fb2d8a5..893a57d 100644
--- a/tests/training/restoring/validation/valid_add.expected
+++ b/tests/training/restoring/validation/valid_add.expected
@@ -1,15 +1,15 @@
-[valid] Ep. 1 : Up. 20 : cross-entropy : 296.282 : new best
-[valid] Ep. 1 : Up. 40 : cross-entropy : 296.269 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 296.255 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 296.242 : new best
-[valid] Ep. 1 : Up. 100 : cross-entropy : 296.229 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 296.216 : new best
-[valid] Ep. 1 : Up. 120 : ce-mean-words : 10.1618 : new best
-[valid] Ep. 1 : Up. 140 : cross-entropy : 296.202 : new best
-[valid] Ep. 1 : Up. 140 : ce-mean-words : 10.1613 : new best
-[valid] Ep. 1 : Up. 160 : cross-entropy : 296.189 : new best
-[valid] Ep. 1 : Up. 160 : ce-mean-words : 10.1609 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 296.176 : new best
-[valid] Ep. 1 : Up. 180 : ce-mean-words : 10.1604 : new best
-[valid] Ep. 1 : Up. 200 : cross-entropy : 296.162 : new best
-[valid] Ep. 1 : Up. 200 : ce-mean-words : 10.1599 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 294.63 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 292.643 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 290.224 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 286.857 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 282.156 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 274.584 : new best
+[valid] Ep. 1 : Up. 120 : ce-mean-words : 9.41969 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 264.996 : new best
+[valid] Ep. 1 : Up. 140 : ce-mean-words : 9.09079 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 258.914 : new best
+[valid] Ep. 1 : Up. 160 : ce-mean-words : 8.88213 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 255.943 : new best
+[valid] Ep. 1 : Up. 180 : ce-mean-words : 8.78019 : new best
+[valid] Ep. 1 : Up. 200 : cross-entropy : 253.146 : new best
+[valid] Ep. 1 : Up. 200 : ce-mean-words : 8.68424 : new best
diff --git a/tests/training/restoring/validation/valid_lowisbet.expected b/tests/training/restoring/validation/valid_lowisbet.expected
index daa223a..8a2ca20 100644
--- a/tests/training/restoring/validation/valid_lowisbet.expected
+++ b/tests/training/restoring/validation/valid_lowisbet.expected
@@ -1,7 +1,7 @@
-[valid] Ep. 1 : Up. 30 : cross-entropy : 299.128 : new best
-[valid] Ep. 2 : Up. 60 : cross-entropy : 298.528 : new best
-[valid] Ep. 3 : Up. 90 : cross-entropy : 296.43 : new best
-[valid] Ep. 4 : Up. 120 : cross-entropy : 297.912 : stalled 1 times (last best: 296.43)
-[valid] Ep. 5 : Up. 150 : cross-entropy : 297.791 : stalled 2 times (last best: 296.43)
-[valid] Ep. 6 : Up. 180 : cross-entropy : 297.654 : stalled 3 times (last best: 296.43)
-[valid] Ep. 7 : Up. 210 : cross-entropy : 297.794 : stalled 4 times (last best: 296.43)
+[valid] Ep. 1 : Up. 30 : cross-entropy : 299.127 : new best
+[valid] Ep. 2 : Up. 60 : cross-entropy : 298.417 : new best
+[valid] Ep. 3 : Up. 90 : cross-entropy : 296.252 : new best
+[valid] Ep. 4 : Up. 120 : cross-entropy : 298.171 : stalled 1 times (last best: 296.252)
+[valid] Ep. 5 : Up. 150 : cross-entropy : 298.057 : stalled 2 times (last best: 296.252)
+[valid] Ep. 6 : Up. 180 : cross-entropy : 298.052 : stalled 3 times (last best: 296.252)
+[valid] Ep. 7 : Up. 210 : cross-entropy : 298.133 : stalled 4 times (last best: 296.252)
diff --git a/tests/training/restoring/validation/valid_newbest.expected b/tests/training/restoring/validation/valid_newbest.expected
index d03d098..22ce219 100644
--- a/tests/training/restoring/validation/valid_newbest.expected
+++ b/tests/training/restoring/validation/valid_newbest.expected
@@ -1,20 +1,20 @@
-[valid] Ep. 1 : Up. 10 : cross-entropy : 250.506 : new best
-[valid] Ep. 1 : Up. 10 : translation : 8 : new best
-[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best
-[valid] Ep. 1 : Up. 20 : translation : 8 : stalled 1 times (last best: 8)
-[valid] Ep. 1 : Up. 30 : cross-entropy : 250.497 : new best
-[valid] Ep. 1 : Up. 30 : translation : 8 : stalled 2 times (last best: 8)
-[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best
-[valid] Ep. 1 : Up. 40 : translation : 9 : new best
-[valid] Ep. 1 : Up. 50 : cross-entropy : 250.486 : new best
-[valid] Ep. 1 : Up. 50 : translation : 7 : stalled 1 times (last best: 9)
-[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best
-[valid] Ep. 1 : Up. 60 : translation : 3 : stalled 2 times (last best: 9)
-[valid] Ep. 1 : Up. 70 : cross-entropy : 250.476 : new best
-[valid] Ep. 1 : Up. 70 : translation : 6 : stalled 3 times (last best: 9)
-[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best
-[valid] Ep. 1 : Up. 80 : translation : 0 : stalled 4 times (last best: 9)
-[valid] Ep. 1 : Up. 90 : cross-entropy : 250.465 : new best
-[valid] Ep. 1 : Up. 90 : translation : 9 : stalled 5 times (last best: 9)
-[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best
-[valid] Ep. 1 : Up. 100 : translation : 6 : stalled 6 times (last best: 9)
+[valid] Ep. 1 : Up. 10 : cross-entropy : 249.884 : new best
+[valid] Ep. 1 : Up. 10 : translation : 5 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 249.337 : new best
+[valid] Ep. 1 : Up. 20 : translation : 4 : stalled 1 times (last best: 5)
+[valid] Ep. 1 : Up. 30 : cross-entropy : 248.804 : new best
+[valid] Ep. 1 : Up. 30 : translation : 3 : stalled 2 times (last best: 5)
+[valid] Ep. 1 : Up. 40 : cross-entropy : 248.218 : new best
+[valid] Ep. 1 : Up. 40 : translation : 6 : new best
+[valid] Ep. 1 : Up. 50 : cross-entropy : 247.56 : new best
+[valid] Ep. 1 : Up. 50 : translation : 9 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 246.856 : new best
+[valid] Ep. 1 : Up. 60 : translation : 6 : stalled 1 times (last best: 9)
+[valid] Ep. 1 : Up. 70 : cross-entropy : 246.112 : new best
+[valid] Ep. 1 : Up. 70 : translation : 8 : stalled 2 times (last best: 9)
+[valid] Ep. 1 : Up. 80 : cross-entropy : 245.247 : new best
+[valid] Ep. 1 : Up. 80 : translation : 8 : stalled 3 times (last best: 9)
+[valid] Ep. 1 : Up. 90 : cross-entropy : 244.336 : new best
+[valid] Ep. 1 : Up. 90 : translation : 8 : stalled 4 times (last best: 9)
+[valid] Ep. 1 : Up. 100 : cross-entropy : 243.37 : new best
+[valid] Ep. 1 : Up. 100 : translation : 8 : stalled 5 times (last best: 9)
diff --git a/tests/training/restoring/validation/valid_reset_stalled.expected b/tests/training/restoring/validation/valid_reset_stalled.expected
index eed1393..da5b590 100644
--- a/tests/training/restoring/validation/valid_reset_stalled.expected
+++ b/tests/training/restoring/validation/valid_reset_stalled.expected
@@ -1,30 +1,30 @@
[valid] Ep. 1 : Up. 20 : translation : 333.5 : new best
[valid] Ep. 1 : Up. 20 : valid-script : 222.3 : new best
-[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 249.337 : new best
[valid] Ep. 1 : Up. 40 : translation : 333.4 : stalled 1 times (last best: 333.5)
[valid] Ep. 1 : Up. 40 : valid-script : 222.2 : stalled 1 times (last best: 222.3)
-[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 248.218 : new best
[valid] Ep. 1 : Up. 60 : translation : 333.3 : stalled 2 times (last best: 333.5)
[valid] Ep. 1 : Up. 60 : valid-script : 222.1 : stalled 2 times (last best: 222.3)
-[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 246.856 : new best
[valid] Ep. 1 : Up. 80 : translation : 333.2 : stalled 3 times (last best: 333.5)
[valid] Ep. 1 : Up. 80 : valid-script : 222.6 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 245.247 : new best
[valid] Ep. 1 : Up. 100 : translation : 333.1 : stalled 4 times (last best: 333.5)
[valid] Ep. 1 : Up. 100 : valid-script : 222.5 : stalled 1 times (last best: 222.6)
-[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 243.37 : new best
[valid] Ep. 1 : Up. 120 : translation : 333.9 : new best
[valid] Ep. 1 : Up. 120 : valid-script : 222.4 : stalled 2 times (last best: 222.6)
-[valid] Ep. 1 : Up. 120 : cross-entropy : 250.45 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 240.802 : new best
[valid] Ep. 1 : Up. 140 : translation : 333.8 : stalled 1 times (last best: 333.9)
[valid] Ep. 1 : Up. 140 : valid-script : 222.3 : stalled 3 times (last best: 222.6)
-[valid] Ep. 1 : Up. 140 : cross-entropy : 250.441 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 237.65 : new best
[valid] Ep. 1 : Up. 160 : translation : 333.7 : stalled 1 times (last best: 333.9)
[valid] Ep. 1 : Up. 160 : valid-script : 222.2 : stalled 1 times (last best: 222.6)
-[valid] Ep. 1 : Up. 160 : cross-entropy : 250.43 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 233.833 : new best
[valid] Ep. 2 : Up. 180 : translation : 333.6 : stalled 2 times (last best: 333.9)
[valid] Ep. 2 : Up. 180 : valid-script : 222.1 : stalled 2 times (last best: 222.6)
-[valid] Ep. 2 : Up. 180 : cross-entropy : 250.42 : new best
+[valid] Ep. 2 : Up. 180 : cross-entropy : 230.035 : new best
[valid] Ep. 2 : Up. 200 : translation : 333.5 : stalled 3 times (last best: 333.9)
[valid] Ep. 2 : Up. 200 : valid-script : 222.6 : stalled 3 times (last best: 222.6)
-[valid] Ep. 2 : Up. 200 : cross-entropy : 250.41 : new best
+[valid] Ep. 2 : Up. 200 : cross-entropy : 227.982 : new best