Update missing tests to clip-norm 0

author: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2020-11-25 17:54:26 +0300
committer: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2020-11-25 17:54:26 +0300
commit: a76b695b11fdb26d19a53132946f869c56d0d7a3 (patch)
tree: ed3237eb5f09126d7ad4c6a89d9dbfe0e8831e3e
parent: b6596ea3eee62e76029e1f27e93838a2752ffbd4 (diff)
17 files changed, 135 insertions, 124 deletions
diff --git a/tests/training/features/quantized-model/model_centers.expected b/tests/training/features/quantized-model/model_centers.expected
index 57380ae..954a001 100644
--- a/tests/training/features/quantized-model/model_centers.expected
+++ b/tests/training/features/quantized-model/model_centers.expected
@@ -1,51 +1,49 @@
-Tensor decoder_W_comb_att unique centers: [-0.17677179 -0.11784786 -0.05892393 -0.          0.05892393  0.11784786
-  0.17677179]
-Tensor decoder_Wc_att unique centers: [-0.15336949 -0.10224632 -0.05112316 -0.          0.05112316  0.10224632
-  0.15336949]
-Tensor Wemb_dec unique centers: [-0.32046145 -0.21364096 -0.10682048  0.          0.10682048  0.21364096
-  0.32046145]
-Tensor decoder_U unique centers: [-0.17687811 -0.11791874 -0.05895937 -0.          0.05895937  0.11791874
-  0.17687811]
-Tensor decoder_Ux unique centers: [-0.21770547 -0.14513698 -0.07256849  0.          0.07256849  0.14513698
-  0.21770547]
-Tensor decoder_W unique centers: [-0.19397542 -0.12931694 -0.06465847 -0.          0.06465847  0.12931694
-  0.19397542]
-Tensor decoder_Wx unique centers: [-0.25329626 -0.16886416 -0.08443208 -0.          0.08443208  0.16886416
-  0.25329626]
-Tensor decoder_U_nl unique centers: [-0.17696194 -0.11797463 -0.05898732  0.          0.05898732  0.11797463
-  0.17696194]
-Tensor decoder_Ux_nl unique centers: [-0.21896881 -0.14597921 -0.07298961  0.          0.07298961  0.14597921
-  0.21896881]
-Tensor decoder_Wc unique centers: [-0.15324192 -0.10216128 -0.05108064  0.          0.05108064  0.10216128
-  0.15324192]
-Tensor decoder_Wcx unique centers: [-0.18192002 -0.12128001 -0.06064001 -0.          0.06064001  0.12128001
-  0.18192002]
-Tensor ff_logit_prev_W unique centers: [-0.32183957 -0.2145597  -0.10727985 -0.          0.10727985  0.2145597
-  0.32183957]
-Tensor ff_logit_lstm_W unique centers: [-0.25455362 -0.16970241 -0.08485121  0.          0.08485121  0.16970241
-  0.25455362]
-Tensor ff_logit_ctx_W unique centers: [-0.19867198 -0.13244799 -0.06622399 -0.          0.06622399  0.13244799
-  0.19867198]
-Tensor decoder_ff_logit_l2_Wt unique centers: [-0.36124557 -0.24083039 -0.1204152   0.          0.1204152   0.24083039
-  0.36124557]
-Tensor ff_state_W unique centers: [-0.17704961 -0.11803307 -0.05901653  0.          0.05901653  0.11803307
-  0.17704961]
-Tensor Wemb unique centers: [-0.31208774 -0.20805849 -0.10402925  0.          0.10402925  0.20805849
-  0.31208774]
-Tensor encoder_U unique centers: [-0.17686225 -0.11790817 -0.05895409  0.          0.05895409  0.11790817
-  0.17686225]
-Tensor encoder_Ux unique centers: [-0.21824732 -0.14549822 -0.07274911  0.          0.07274911  0.14549822
-  0.21824732]
-Tensor encoder_W unique centers: [-0.19403435 -0.12935624 -0.06467812  0.          0.06467812  0.12935624
-  0.19403435]
-Tensor encoder_Wx unique centers: [-0.25213736 -0.16809157 -0.08404578 -0.          0.08404578  0.16809157
-  0.25213736]
-Tensor encoder_r_U unique centers: [-0.17699143 -0.11799429 -0.05899715  0.          0.05899715  0.11799429
-  0.17699143]
-Tensor encoder_r_Ux unique centers: [-0.21971346 -0.14647564 -0.07323782 -0.          0.07323782  0.14647564
-  0.21971346]
-Tensor encoder_r_W unique centers: [-0.19410282 -0.12940188 -0.06470094  0.          0.06470094  0.12940188
-  0.19410282]
-Tensor encoder_r_Wx unique centers: [-0.25225359 -0.16816907 -0.08408453 -0.          0.08408453  0.16816907
-  0.25225359]
+Tensor decoder_W_comb_att unique centers: [-0.1826457 -0.1217638 -0.0608819  0.         0.0608819  0.1217638
+  0.1826457]
+Tensor decoder_Wc_att unique centers: [-0.17328945 -0.1155263  -0.05776315  0.          0.05776315  0.1155263
+  0.17328945]
+Tensor Wemb_dec unique centers: [-2.3631978 -1.5754652 -0.7877326  0.         0.7877326  1.5754652
+  2.3631978]
+Tensor decoder_U unique centers: [-0.3221001 -0.2147334 -0.1073667 -0.         0.1073667  0.2147334
+  0.3221001]
+Tensor decoder_Ux unique centers: [-0.43822908 -0.29215273 -0.14607637  0.          0.14607637  0.29215273
+  0.43822908]
+Tensor decoder_W unique centers: [-0.22816041 -0.15210694 -0.07605347  0.          0.07605347  0.15210694
+  0.22816041]
+Tensor decoder_Wx unique centers: [-0.49631694 -0.33087796 -0.16543898 -0.          0.16543898  0.33087796
+  0.49631694]
+Tensor decoder_U_nl unique centers: [-0.3815875  -0.25439167 -0.12719584 -0.          0.12719584  0.25439167
+  0.3815875 ]
+Tensor decoder_Ux_nl unique centers: [-0.5111215  -0.34074768 -0.17037384  0.          0.17037384  0.34074768
+  0.5111215 ]
+Tensor decoder_Wc unique centers: [-0.42579597 -0.283864   -0.141932   -0.          0.141932    0.283864
+  0.42579597]
+Tensor decoder_Wcx unique centers: [-0.8375     -0.55833334 -0.27916667 -0.          0.27916667  0.55833334
+  0.8375    ]
+Tensor ff_logit_prev_W unique centers: [-70.87341  -23.624472   0.        23.624472  47.248943]
+Tensor ff_logit_lstm_W unique centers: [-246.07938 -164.05292  -82.02646    0.        82.02646  164.05292
+  246.07938]
+Tensor ff_logit_ctx_W unique centers: [-240.9685  -160.64568  -80.32284    0.        80.32284  160.64568
+  240.9685 ]
+Tensor decoder_ff_logit_l2_Wt unique centers: [-106.12637   -70.750916  -35.375458   -0.         35.375458   70.750916
+  106.12637 ]
+Tensor ff_state_W unique centers: [-0.2559117 -0.1706078 -0.0853039 -0.         0.0853039  0.1706078
+  0.2559117]
+Tensor Wemb unique centers: [-0.39904252 -0.19952126  0.          0.19952126  0.39904252  0.5985638 ]
+Tensor encoder_U unique centers: [-0.30375382 -0.20250255 -0.10125127 -0.          0.10125127  0.20250255
+  0.30375382]
+Tensor encoder_Ux unique centers: [-0.45867392 -0.30578262 -0.15289131 -0.          0.15289131  0.30578262
+  0.45867392]
+Tensor encoder_W unique centers: [-0.2062971 -0.1375314 -0.0687657  0.         0.0687657  0.1375314
+  0.2062971]
+Tensor encoder_Wx unique centers: [-0.3073737  -0.20491579 -0.1024579   0.          0.1024579   0.20491579
+  0.3073737 ]
+Tensor encoder_r_U unique centers: [-0.34318972 -0.22879314 -0.11439657  0.          0.11439657  0.22879314
+  0.34318972]
+Tensor encoder_r_Ux unique centers: [-0.72291785 -0.48194525 -0.24097262 -0.          0.24097262  0.48194525
+  0.72291785]
+Tensor encoder_r_W unique centers: [-0.21613705 -0.14409137 -0.07204568 -0.          0.07204568  0.14409137
+  0.21613705]
+Tensor encoder_r_Wx unique centers: [-0.39892155 -0.2659477  -0.13297385 -0.          0.13297385  0.2659477
+  0.39892155]
 Tensor decoder_c_tt unique centers: []
diff --git a/tests/training/features/quantized-model/quantized.expected b/tests/training/features/quantized-model/quantized.expected
index 17620ec..2d0638e 100644
--- a/tests/training/features/quantized-model/quantized.expected
+++ b/tests/training/features/quantized-model/quantized.expected
@@ -1,10 +1,10 @@
-225.10929871
-243.58345032
-229.45071411
-224.28813171
-212.65242004
-204.06596375
-197.81690979
-190.08915710
-193.72299194
-195.20808411
+5296.80419922
+14729.64062500
+14570.66210938
+17166.55859375
+16055.21875000
+16277.48437500
+18673.34765625
+16747.37109375
+17298.72070312
+16335.72949219
diff --git a/tests/training/features/quantized-model/test_quant_centers.sh b/tests/training/features/quantized-model/test_quant_centers.sh
index 22dd863..8318c24 100644
--- a/tests/training/features/quantized-model/test_quant_centers.sh
+++ b/tests/training/features/quantized-model/test_quant_centers.sh
@@ -16,7 +16,7 @@ mkdir -p train
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \
     -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 10 --disp-freq 2 --quantize-bits 3
 
diff --git a/tests/training/features/quantized-model/test_quantmodel.sh b/tests/training/features/quantized-model/test_quantmodel.sh
index 8b55697..67019f2 100644
--- a/tests/training/features/quantized-model/test_quantmodel.sh
+++ b/tests/training/features/quantized-model/test_quantmodel.sh
@@ -16,7 +16,7 @@ mkdir -p train
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \
     -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 \
     --log $PREFIX.log
diff --git a/tests/training/features/quantized-model/test_quantmodel_log.sh b/tests/training/features/quantized-model/test_quantmodel_log.sh
index f79809b..924eb4b 100644
--- a/tests/training/features/quantized-model/test_quantmodel_log.sh
+++ b/tests/training/features/quantized-model/test_quantmodel_log.sh
@@ -16,7 +16,7 @@ mkdir -p train
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
     -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 4 --quantize-log-based --quantize-optimization-steps 3 \
     --log $PREFIX.log
diff --git a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
index de14ffb..8dee56b 100644
--- a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
+++ b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
@@ -16,14 +16,14 @@ mkdir -p train
 
 # training with quantized bias is tricky, so we start by training a normal model first before finetuning it to the quantized space.
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
     -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 20 --disp-freq 10 \
     --log $PREFIX.log
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
     -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-biases \
     --log $PREFIX.log
diff --git a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
index fe7993e..a768ffc 100644
--- a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
+++ b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
@@ -16,7 +16,7 @@ mkdir -p train
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
     -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-optimization-steps 3 \
     --log $PREFIX.log
diff --git a/tests/training/features/quantized-model/update.sh b/tests/training/features/quantized-model/update.sh
new file mode 100755
index 0000000..04be645
--- /dev/null
+++ b/tests/training/features/quantized-model/update.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env sh
+cp model_centers.out model_centers.expected
+cp test-center.out test-center.expected
+cp quantized-log4bit.out quantized-log4bit.expected
+cp quantized.out quantized.expected
+cp quantized-with-bias.out quantized-with-bias.expected
+cp quantized-opt.out quantized-opt.expected
diff --git a/tests/training/restoring/validation/test_adding_validator_after_restart.sh b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
index ff95d90..6a6f2f3 100644
--- a/tests/training/restoring/validation/test_adding_validator_after_restart.sh
+++ b/tests/training/restoring/validation/test_adding_validator_after_restart.sh
@@ -9,7 +9,7 @@ mkdir -p valid_add
 
 extra_opts="--no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd"
 extra_opts="$extra_opts --dim-emb 128 --dim-rnn 256 --mini-batch 16"
-extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false"
+extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0"
 
 #$MRT_MARIAN/marian $extra_opts \
     #-m valid_add/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
diff --git a/tests/training/restoring/validation/test_restoring_newbest_validators.sh b/tests/training/restoring/validation/test_restoring_newbest_validators.sh
index 444599b..fa8b37a 100644
--- a/tests/training/restoring/validation/test_restoring_newbest_validators.sh
+++ b/tests/training/restoring/validation/test_restoring_newbest_validators.sh
@@ -14,7 +14,7 @@ head -n 8 $MRT_DATA/europarl.de-en/toy.bpe.de > valid.mini.bpe.de
 # Uncomment to re-generate the expected output
 
 #$MRT_MARIAN/marian \
-    #--type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation \
+    #--type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation --clip-norm 0 \
     #--dim-emb 64 --dim-rnn 128 --mini-batch 16 --optimizer sgd --cost-type ce-mean \
     #-m valid_newbest/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     #--disp-freq 5 --valid-freq 10 --after-batches 100 \
@@ -28,7 +28,7 @@ head -n 8 $MRT_DATA/europarl.de-en/toy.bpe.de > valid.mini.bpe.de
 
 
 $MRT_MARIAN/marian \
-    --type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation \
+    --type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation --clip-norm 0 \
     --dim-emb 64 --dim-rnn 128 --mini-batch 16 --optimizer sgd --cost-type ce-mean \
     -m valid_newbest/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     --disp-freq 5 --valid-freq 10 --after-batches 50 \
diff --git a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
index a29e534..f700e3c 100644
--- a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
+++ b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
@@ -9,7 +9,7 @@ mkdir -p valid_lowisbet
 
 extra_opts="--no-shuffle --seed 1111 --maxi-batch 1 --maxi-batch-sort none"
 extra_opts="$extra_opts --dim-emb 64 --dim-rnn 128 --mini-batch 32"
-extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false"
+extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0"
 
 
 # Files for the validation sets are swapped intentionally
diff --git a/tests/training/restoring/validation/test_valid_reset_stalled.sh b/tests/training/restoring/validation/test_valid_reset_stalled.sh
index e967a1f..c2c7d4b 100644
--- a/tests/training/restoring/validation/test_valid_reset_stalled.sh
+++ b/tests/training/restoring/validation/test_valid_reset_stalled.sh
@@ -27,7 +27,7 @@ $MRT_MARIAN/marian $extra_opts \
     --disp-freq 10 --valid-freq 20 --after-batches 140 --early-stopping 5 \
     --valid-metrics translation valid-script cross-entropy --valid-script-path ./valid_script_ab.sh \
     --valid-sets valid.mini.bpe.{de,en} \
-    --overwrite --keep-best \
+    --overwrite --keep-best --clip-norm 0 \
     --log valid_reset_stalled_1.log
 
 test -e valid_reset_stalled/model.npz
@@ -43,7 +43,7 @@ $MRT_MARIAN/marian $extra_opts \
     --disp-freq 10 --valid-freq 20 --after-batches 200 --early-stopping 5 --valid-reset-stalled \
     --valid-metrics translation valid-script cross-entropy --valid-script-path ./valid_script_ab.sh \
     --valid-sets valid.mini.bpe.{de,en} \
-    --overwrite --keep-best \
+    --overwrite --keep-best --clip-norm 0 \
     --log valid_reset_stalled_2.log
 
 test -e valid_reset_stalled/model.npz
diff --git a/tests/training/restoring/validation/update.sh b/tests/training/restoring/validation/update.sh
new file mode 100755
index 0000000..809fbaa
--- /dev/null
+++ b/tests/training/restoring/validation/update.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env sh
+cp valid_reset_stalled.out valid_reset_stalled.expected
+cp valid_add.out valid_add.expected
+cp valid_newbest.out valid_newbest.expected
+cp valid_stalled.out valid_stalled.expected
+cp valid_lowisbet.out valid_lowisbet.expected
diff --git a/tests/training/restoring/validation/valid_add.expected b/tests/training/restoring/validation/valid_add.expected
index fb2d8a5..893a57d 100644
--- a/tests/training/restoring/validation/valid_add.expected
+++ b/tests/training/restoring/validation/valid_add.expected
@@ -1,15 +1,15 @@
-[valid] Ep. 1 : Up. 20 : cross-entropy : 296.282 : new best
-[valid] Ep. 1 : Up. 40 : cross-entropy : 296.269 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 296.255 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 296.242 : new best
-[valid] Ep. 1 : Up. 100 : cross-entropy : 296.229 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 296.216 : new best
-[valid] Ep. 1 : Up. 120 : ce-mean-words : 10.1618 : new best
-[valid] Ep. 1 : Up. 140 : cross-entropy : 296.202 : new best
-[valid] Ep. 1 : Up. 140 : ce-mean-words : 10.1613 : new best
-[valid] Ep. 1 : Up. 160 : cross-entropy : 296.189 : new best
-[valid] Ep. 1 : Up. 160 : ce-mean-words : 10.1609 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 296.176 : new best
-[valid] Ep. 1 : Up. 180 : ce-mean-words : 10.1604 : new best
-[valid] Ep. 1 : Up. 200 : cross-entropy : 296.162 : new best
-[valid] Ep. 1 : Up. 200 : ce-mean-words : 10.1599 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 294.63 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 292.643 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 290.224 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 286.857 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 282.156 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 274.584 : new best
+[valid] Ep. 1 : Up. 120 : ce-mean-words : 9.41969 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 264.996 : new best
+[valid] Ep. 1 : Up. 140 : ce-mean-words : 9.09079 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 258.914 : new best
+[valid] Ep. 1 : Up. 160 : ce-mean-words : 8.88213 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 255.943 : new best
+[valid] Ep. 1 : Up. 180 : ce-mean-words : 8.78019 : new best
+[valid] Ep. 1 : Up. 200 : cross-entropy : 253.146 : new best
+[valid] Ep. 1 : Up. 200 : ce-mean-words : 8.68424 : new best
diff --git a/tests/training/restoring/validation/valid_lowisbet.expected b/tests/training/restoring/validation/valid_lowisbet.expected
index daa223a..8a2ca20 100644
--- a/tests/training/restoring/validation/valid_lowisbet.expected
+++ b/tests/training/restoring/validation/valid_lowisbet.expected
@@ -1,7 +1,7 @@
-[valid] Ep. 1 : Up. 30 : cross-entropy : 299.128 : new best
-[valid] Ep. 2 : Up. 60 : cross-entropy : 298.528 : new best
-[valid] Ep. 3 : Up. 90 : cross-entropy : 296.43 : new best
-[valid] Ep. 4 : Up. 120 : cross-entropy : 297.912 : stalled 1 times (last best: 296.43)
-[valid] Ep. 5 : Up. 150 : cross-entropy : 297.791 : stalled 2 times (last best: 296.43)
-[valid] Ep. 6 : Up. 180 : cross-entropy : 297.654 : stalled 3 times (last best: 296.43)
-[valid] Ep. 7 : Up. 210 : cross-entropy : 297.794 : stalled 4 times (last best: 296.43)
+[valid] Ep. 1 : Up. 30 : cross-entropy : 299.127 : new best
+[valid] Ep. 2 : Up. 60 : cross-entropy : 298.417 : new best
+[valid] Ep. 3 : Up. 90 : cross-entropy : 296.252 : new best
+[valid] Ep. 4 : Up. 120 : cross-entropy : 298.171 : stalled 1 times (last best: 296.252)
+[valid] Ep. 5 : Up. 150 : cross-entropy : 298.057 : stalled 2 times (last best: 296.252)
+[valid] Ep. 6 : Up. 180 : cross-entropy : 298.052 : stalled 3 times (last best: 296.252)
+[valid] Ep. 7 : Up. 210 : cross-entropy : 298.133 : stalled 4 times (last best: 296.252)
diff --git a/tests/training/restoring/validation/valid_newbest.expected b/tests/training/restoring/validation/valid_newbest.expected
index d03d098..22ce219 100644
--- a/tests/training/restoring/validation/valid_newbest.expected
+++ b/tests/training/restoring/validation/valid_newbest.expected
@@ -1,20 +1,20 @@
-[valid] Ep. 1 : Up. 10 : cross-entropy : 250.506 : new best
-[valid] Ep. 1 : Up. 10 : translation : 8 : new best
-[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best
-[valid] Ep. 1 : Up. 20 : translation : 8 : stalled 1 times (last best: 8)
-[valid] Ep. 1 : Up. 30 : cross-entropy : 250.497 : new best
-[valid] Ep. 1 : Up. 30 : translation : 8 : stalled 2 times (last best: 8)
-[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best
-[valid] Ep. 1 : Up. 40 : translation : 9 : new best
-[valid] Ep. 1 : Up. 50 : cross-entropy : 250.486 : new best
-[valid] Ep. 1 : Up. 50 : translation : 7 : stalled 1 times (last best: 9)
-[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best
-[valid] Ep. 1 : Up. 60 : translation : 3 : stalled 2 times (last best: 9)
-[valid] Ep. 1 : Up. 70 : cross-entropy : 250.476 : new best
-[valid] Ep. 1 : Up. 70 : translation : 6 : stalled 3 times (last best: 9)
-[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best
-[valid] Ep. 1 : Up. 80 : translation : 0 : stalled 4 times (last best: 9)
-[valid] Ep. 1 : Up. 90 : cross-entropy : 250.465 : new best
-[valid] Ep. 1 : Up. 90 : translation : 9 : stalled 5 times (last best: 9)
-[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best
-[valid] Ep. 1 : Up. 100 : translation : 6 : stalled 6 times (last best: 9)
+[valid] Ep. 1 : Up. 10 : cross-entropy : 249.884 : new best
+[valid] Ep. 1 : Up. 10 : translation : 5 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 249.337 : new best
+[valid] Ep. 1 : Up. 20 : translation : 4 : stalled 1 times (last best: 5)
+[valid] Ep. 1 : Up. 30 : cross-entropy : 248.804 : new best
+[valid] Ep. 1 : Up. 30 : translation : 3 : stalled 2 times (last best: 5)
+[valid] Ep. 1 : Up. 40 : cross-entropy : 248.218 : new best
+[valid] Ep. 1 : Up. 40 : translation : 6 : new best
+[valid] Ep. 1 : Up. 50 : cross-entropy : 247.56 : new best
+[valid] Ep. 1 : Up. 50 : translation : 9 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 246.856 : new best
+[valid] Ep. 1 : Up. 60 : translation : 6 : stalled 1 times (last best: 9)
+[valid] Ep. 1 : Up. 70 : cross-entropy : 246.112 : new best
+[valid] Ep. 1 : Up. 70 : translation : 8 : stalled 2 times (last best: 9)
+[valid] Ep. 1 : Up. 80 : cross-entropy : 245.247 : new best
+[valid] Ep. 1 : Up. 80 : translation : 8 : stalled 3 times (last best: 9)
+[valid] Ep. 1 : Up. 90 : cross-entropy : 244.336 : new best
+[valid] Ep. 1 : Up. 90 : translation : 8 : stalled 4 times (last best: 9)
+[valid] Ep. 1 : Up. 100 : cross-entropy : 243.37 : new best
+[valid] Ep. 1 : Up. 100 : translation : 8 : stalled 5 times (last best: 9)
diff --git a/tests/training/restoring/validation/valid_reset_stalled.expected b/tests/training/restoring/validation/valid_reset_stalled.expected
index eed1393..da5b590 100644
--- a/tests/training/restoring/validation/valid_reset_stalled.expected
+++ b/tests/training/restoring/validation/valid_reset_stalled.expected
@@ -1,30 +1,30 @@
 [valid] Ep. 1 : Up. 20 : translation : 333.5 : new best
 [valid] Ep. 1 : Up. 20 : valid-script : 222.3 : new best
-[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 249.337 : new best
 [valid] Ep. 1 : Up. 40 : translation : 333.4 : stalled 1 times (last best: 333.5)
 [valid] Ep. 1 : Up. 40 : valid-script : 222.2 : stalled 1 times (last best: 222.3)
-[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 248.218 : new best
 [valid] Ep. 1 : Up. 60 : translation : 333.3 : stalled 2 times (last best: 333.5)
 [valid] Ep. 1 : Up. 60 : valid-script : 222.1 : stalled 2 times (last best: 222.3)
-[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 246.856 : new best
 [valid] Ep. 1 : Up. 80 : translation : 333.2 : stalled 3 times (last best: 333.5)
 [valid] Ep. 1 : Up. 80 : valid-script : 222.6 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 245.247 : new best
 [valid] Ep. 1 : Up. 100 : translation : 333.1 : stalled 4 times (last best: 333.5)
 [valid] Ep. 1 : Up. 100 : valid-script : 222.5 : stalled 1 times (last best: 222.6)
-[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 243.37 : new best
 [valid] Ep. 1 : Up. 120 : translation : 333.9 : new best
 [valid] Ep. 1 : Up. 120 : valid-script : 222.4 : stalled 2 times (last best: 222.6)
-[valid] Ep. 1 : Up. 120 : cross-entropy : 250.45 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 240.802 : new best
 [valid] Ep. 1 : Up. 140 : translation : 333.8 : stalled 1 times (last best: 333.9)
 [valid] Ep. 1 : Up. 140 : valid-script : 222.3 : stalled 3 times (last best: 222.6)
-[valid] Ep. 1 : Up. 140 : cross-entropy : 250.441 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 237.65 : new best
 [valid] Ep. 1 : Up. 160 : translation : 333.7 : stalled 1 times (last best: 333.9)
 [valid] Ep. 1 : Up. 160 : valid-script : 222.2 : stalled 1 times (last best: 222.6)
-[valid] Ep. 1 : Up. 160 : cross-entropy : 250.43 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 233.833 : new best
 [valid] Ep. 2 : Up. 180 : translation : 333.6 : stalled 2 times (last best: 333.9)
 [valid] Ep. 2 : Up. 180 : valid-script : 222.1 : stalled 2 times (last best: 222.6)
-[valid] Ep. 2 : Up. 180 : cross-entropy : 250.42 : new best
+[valid] Ep. 2 : Up. 180 : cross-entropy : 230.035 : new best
 [valid] Ep. 2 : Up. 200 : translation : 333.5 : stalled 3 times (last best: 333.9)
 [valid] Ep. 2 : Up. 200 : valid-script : 222.6 : stalled 3 times (last best: 222.6)
-[valid] Ep. 2 : Up. 200 : cross-entropy : 250.41 : new best
+[valid] Ep. 2 : Up. 200 : cross-entropy : 227.982 : new best
author	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2020-11-25 17:54:26 +0300
committer	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2020-11-25 17:54:26 +0300
commit	a76b695b11fdb26d19a53132946f869c56d0d7a3 (patch)
tree	ed3237eb5f09126d7ad4c6a89d9dbfe0e8831e3e
parent	b6596ea3eee62e76029e1f27e93838a2752ffbd4 (diff)