diff options
author | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2020-11-25 17:54:26 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2020-11-25 17:54:26 +0300 |
commit | a76b695b11fdb26d19a53132946f869c56d0d7a3 (patch) | |
tree | ed3237eb5f09126d7ad4c6a89d9dbfe0e8831e3e | |
parent | b6596ea3eee62e76029e1f27e93838a2752ffbd4 (diff) |
Update missing tests to clip-norm 0
17 files changed, 135 insertions, 124 deletions
diff --git a/tests/training/features/quantized-model/model_centers.expected b/tests/training/features/quantized-model/model_centers.expected index 57380ae..954a001 100644 --- a/tests/training/features/quantized-model/model_centers.expected +++ b/tests/training/features/quantized-model/model_centers.expected @@ -1,51 +1,49 @@ -Tensor decoder_W_comb_att unique centers: [-0.17677179 -0.11784786 -0.05892393 -0. 0.05892393 0.11784786 - 0.17677179] -Tensor decoder_Wc_att unique centers: [-0.15336949 -0.10224632 -0.05112316 -0. 0.05112316 0.10224632 - 0.15336949] -Tensor Wemb_dec unique centers: [-0.32046145 -0.21364096 -0.10682048 0. 0.10682048 0.21364096 - 0.32046145] -Tensor decoder_U unique centers: [-0.17687811 -0.11791874 -0.05895937 -0. 0.05895937 0.11791874 - 0.17687811] -Tensor decoder_Ux unique centers: [-0.21770547 -0.14513698 -0.07256849 0. 0.07256849 0.14513698 - 0.21770547] -Tensor decoder_W unique centers: [-0.19397542 -0.12931694 -0.06465847 -0. 0.06465847 0.12931694 - 0.19397542] -Tensor decoder_Wx unique centers: [-0.25329626 -0.16886416 -0.08443208 -0. 0.08443208 0.16886416 - 0.25329626] -Tensor decoder_U_nl unique centers: [-0.17696194 -0.11797463 -0.05898732 0. 0.05898732 0.11797463 - 0.17696194] -Tensor decoder_Ux_nl unique centers: [-0.21896881 -0.14597921 -0.07298961 0. 0.07298961 0.14597921 - 0.21896881] -Tensor decoder_Wc unique centers: [-0.15324192 -0.10216128 -0.05108064 0. 0.05108064 0.10216128 - 0.15324192] -Tensor decoder_Wcx unique centers: [-0.18192002 -0.12128001 -0.06064001 -0. 0.06064001 0.12128001 - 0.18192002] -Tensor ff_logit_prev_W unique centers: [-0.32183957 -0.2145597 -0.10727985 -0. 0.10727985 0.2145597 - 0.32183957] -Tensor ff_logit_lstm_W unique centers: [-0.25455362 -0.16970241 -0.08485121 0. 0.08485121 0.16970241 - 0.25455362] -Tensor ff_logit_ctx_W unique centers: [-0.19867198 -0.13244799 -0.06622399 -0. 0.06622399 0.13244799 - 0.19867198] -Tensor decoder_ff_logit_l2_Wt unique centers: [-0.36124557 -0.24083039 -0.1204152 0. 0.1204152 0.24083039 - 0.36124557] -Tensor ff_state_W unique centers: [-0.17704961 -0.11803307 -0.05901653 0. 0.05901653 0.11803307 - 0.17704961] -Tensor Wemb unique centers: [-0.31208774 -0.20805849 -0.10402925 0. 0.10402925 0.20805849 - 0.31208774] -Tensor encoder_U unique centers: [-0.17686225 -0.11790817 -0.05895409 0. 0.05895409 0.11790817 - 0.17686225] -Tensor encoder_Ux unique centers: [-0.21824732 -0.14549822 -0.07274911 0. 0.07274911 0.14549822 - 0.21824732] -Tensor encoder_W unique centers: [-0.19403435 -0.12935624 -0.06467812 0. 0.06467812 0.12935624 - 0.19403435] -Tensor encoder_Wx unique centers: [-0.25213736 -0.16809157 -0.08404578 -0. 0.08404578 0.16809157 - 0.25213736] -Tensor encoder_r_U unique centers: [-0.17699143 -0.11799429 -0.05899715 0. 0.05899715 0.11799429 - 0.17699143] -Tensor encoder_r_Ux unique centers: [-0.21971346 -0.14647564 -0.07323782 -0. 0.07323782 0.14647564 - 0.21971346] -Tensor encoder_r_W unique centers: [-0.19410282 -0.12940188 -0.06470094 0. 0.06470094 0.12940188 - 0.19410282] -Tensor encoder_r_Wx unique centers: [-0.25225359 -0.16816907 -0.08408453 -0. 0.08408453 0.16816907 - 0.25225359] +Tensor decoder_W_comb_att unique centers: [-0.1826457 -0.1217638 -0.0608819 0. 0.0608819 0.1217638 + 0.1826457] +Tensor decoder_Wc_att unique centers: [-0.17328945 -0.1155263 -0.05776315 0. 0.05776315 0.1155263 + 0.17328945] +Tensor Wemb_dec unique centers: [-2.3631978 -1.5754652 -0.7877326 0. 0.7877326 1.5754652 + 2.3631978] +Tensor decoder_U unique centers: [-0.3221001 -0.2147334 -0.1073667 -0. 0.1073667 0.2147334 + 0.3221001] +Tensor decoder_Ux unique centers: [-0.43822908 -0.29215273 -0.14607637 0. 0.14607637 0.29215273 + 0.43822908] +Tensor decoder_W unique centers: [-0.22816041 -0.15210694 -0.07605347 0. 0.07605347 0.15210694 + 0.22816041] +Tensor decoder_Wx unique centers: [-0.49631694 -0.33087796 -0.16543898 -0. 0.16543898 0.33087796 + 0.49631694] +Tensor decoder_U_nl unique centers: [-0.3815875 -0.25439167 -0.12719584 -0. 0.12719584 0.25439167 + 0.3815875 ] +Tensor decoder_Ux_nl unique centers: [-0.5111215 -0.34074768 -0.17037384 0. 0.17037384 0.34074768 + 0.5111215 ] +Tensor decoder_Wc unique centers: [-0.42579597 -0.283864 -0.141932 -0. 0.141932 0.283864 + 0.42579597] +Tensor decoder_Wcx unique centers: [-0.8375 -0.55833334 -0.27916667 -0. 0.27916667 0.55833334 + 0.8375 ] +Tensor ff_logit_prev_W unique centers: [-70.87341 -23.624472 0. 23.624472 47.248943] +Tensor ff_logit_lstm_W unique centers: [-246.07938 -164.05292 -82.02646 0. 82.02646 164.05292 + 246.07938] +Tensor ff_logit_ctx_W unique centers: [-240.9685 -160.64568 -80.32284 0. 80.32284 160.64568 + 240.9685 ] +Tensor decoder_ff_logit_l2_Wt unique centers: [-106.12637 -70.750916 -35.375458 -0. 35.375458 70.750916 + 106.12637 ] +Tensor ff_state_W unique centers: [-0.2559117 -0.1706078 -0.0853039 -0. 0.0853039 0.1706078 + 0.2559117] +Tensor Wemb unique centers: [-0.39904252 -0.19952126 0. 0.19952126 0.39904252 0.5985638 ] +Tensor encoder_U unique centers: [-0.30375382 -0.20250255 -0.10125127 -0. 0.10125127 0.20250255 + 0.30375382] +Tensor encoder_Ux unique centers: [-0.45867392 -0.30578262 -0.15289131 -0. 0.15289131 0.30578262 + 0.45867392] +Tensor encoder_W unique centers: [-0.2062971 -0.1375314 -0.0687657 0. 0.0687657 0.1375314 + 0.2062971] +Tensor encoder_Wx unique centers: [-0.3073737 -0.20491579 -0.1024579 0. 0.1024579 0.20491579 + 0.3073737 ] +Tensor encoder_r_U unique centers: [-0.34318972 -0.22879314 -0.11439657 0. 0.11439657 0.22879314 + 0.34318972] +Tensor encoder_r_Ux unique centers: [-0.72291785 -0.48194525 -0.24097262 -0. 0.24097262 0.48194525 + 0.72291785] +Tensor encoder_r_W unique centers: [-0.21613705 -0.14409137 -0.07204568 -0. 0.07204568 0.14409137 + 0.21613705] +Tensor encoder_r_Wx unique centers: [-0.39892155 -0.2659477 -0.13297385 -0. 0.13297385 0.2659477 + 0.39892155] Tensor decoder_c_tt unique centers: [] diff --git a/tests/training/features/quantized-model/quantized.expected b/tests/training/features/quantized-model/quantized.expected index 17620ec..2d0638e 100644 --- a/tests/training/features/quantized-model/quantized.expected +++ b/tests/training/features/quantized-model/quantized.expected @@ -1,10 +1,10 @@ -225.10929871 -243.58345032 -229.45071411 -224.28813171 -212.65242004 -204.06596375 -197.81690979 -190.08915710 -193.72299194 -195.20808411 +5296.80419922 +14729.64062500 +14570.66210938 +17166.55859375 +16055.21875000 +16277.48437500 +18673.34765625 +16747.37109375 +17298.72070312 +16335.72949219 diff --git a/tests/training/features/quantized-model/test_quant_centers.sh b/tests/training/features/quantized-model/test_quant_centers.sh index 22dd863..8318c24 100644 --- a/tests/training/features/quantized-model/test_quant_centers.sh +++ b/tests/training/features/quantized-model/test_quant_centers.sh @@ -16,7 +16,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 10 --disp-freq 2 --quantize-bits 3 diff --git a/tests/training/features/quantized-model/test_quantmodel.sh b/tests/training/features/quantized-model/test_quantmodel.sh index 8b55697..67019f2 100644 --- a/tests/training/features/quantized-model/test_quantmodel.sh +++ b/tests/training/features/quantized-model/test_quantmodel.sh @@ -16,7 +16,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 \ --log $PREFIX.log diff --git a/tests/training/features/quantized-model/test_quantmodel_log.sh b/tests/training/features/quantized-model/test_quantmodel_log.sh index f79809b..924eb4b 100644 --- a/tests/training/features/quantized-model/test_quantmodel_log.sh +++ b/tests/training/features/quantized-model/test_quantmodel_log.sh @@ -16,7 +16,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 4 --quantize-log-based --quantize-optimization-steps 3 \ --log $PREFIX.log diff --git a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh index de14ffb..8dee56b 100644 --- a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh +++ b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh @@ -16,14 +16,14 @@ mkdir -p train # training with quantized bias is tricky, so we start by training a normal model first before finetuning it to the quantized space. $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 20 --disp-freq 10 \ --log $PREFIX.log # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-biases \ --log $PREFIX.log diff --git a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh index fe7993e..a768ffc 100644 --- a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh +++ b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh @@ -16,7 +16,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-optimization-steps 3 \ --log $PREFIX.log diff --git a/tests/training/features/quantized-model/update.sh b/tests/training/features/quantized-model/update.sh new file mode 100755 index 0000000..04be645 --- /dev/null +++ b/tests/training/features/quantized-model/update.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh +cp model_centers.out model_centers.expected +cp test-center.out test-center.expected +cp quantized-log4bit.out quantized-log4bit.expected +cp quantized.out quantized.expected +cp quantized-with-bias.out quantized-with-bias.expected +cp quantized-opt.out quantized-opt.expected diff --git a/tests/training/restoring/validation/test_adding_validator_after_restart.sh b/tests/training/restoring/validation/test_adding_validator_after_restart.sh index ff95d90..6a6f2f3 100644 --- a/tests/training/restoring/validation/test_adding_validator_after_restart.sh +++ b/tests/training/restoring/validation/test_adding_validator_after_restart.sh @@ -9,7 +9,7 @@ mkdir -p valid_add extra_opts="--no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd" extra_opts="$extra_opts --dim-emb 128 --dim-rnn 256 --mini-batch 16" -extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false" +extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0" #$MRT_MARIAN/marian $extra_opts \ #-m valid_add/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ diff --git a/tests/training/restoring/validation/test_restoring_newbest_validators.sh b/tests/training/restoring/validation/test_restoring_newbest_validators.sh index 444599b..fa8b37a 100644 --- a/tests/training/restoring/validation/test_restoring_newbest_validators.sh +++ b/tests/training/restoring/validation/test_restoring_newbest_validators.sh @@ -14,7 +14,7 @@ head -n 8 $MRT_DATA/europarl.de-en/toy.bpe.de > valid.mini.bpe.de # Uncomment to re-generate the expected output #$MRT_MARIAN/marian \ - #--type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation \ + #--type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation --clip-norm 0 \ #--dim-emb 64 --dim-rnn 128 --mini-batch 16 --optimizer sgd --cost-type ce-mean \ #-m valid_newbest/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ #--disp-freq 5 --valid-freq 10 --after-batches 100 \ @@ -28,7 +28,7 @@ head -n 8 $MRT_DATA/europarl.de-en/toy.bpe.de > valid.mini.bpe.de $MRT_MARIAN/marian \ - --type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation \ + --type s2s --no-shuffle --seed 2222 --maxi-batch 1 --maxi-batch-sort none --quiet-translation --clip-norm 0 \ --dim-emb 64 --dim-rnn 128 --mini-batch 16 --optimizer sgd --cost-type ce-mean \ -m valid_newbest/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 5 --valid-freq 10 --after-batches 50 \ diff --git a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh index a29e534..f700e3c 100644 --- a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh +++ b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh @@ -9,7 +9,7 @@ mkdir -p valid_lowisbet extra_opts="--no-shuffle --seed 1111 --maxi-batch 1 --maxi-batch-sort none" extra_opts="$extra_opts --dim-emb 64 --dim-rnn 128 --mini-batch 32" -extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false" +extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0" # Files for the validation sets are swapped intentionally diff --git a/tests/training/restoring/validation/test_valid_reset_stalled.sh b/tests/training/restoring/validation/test_valid_reset_stalled.sh index e967a1f..c2c7d4b 100644 --- a/tests/training/restoring/validation/test_valid_reset_stalled.sh +++ b/tests/training/restoring/validation/test_valid_reset_stalled.sh @@ -27,7 +27,7 @@ $MRT_MARIAN/marian $extra_opts \ --disp-freq 10 --valid-freq 20 --after-batches 140 --early-stopping 5 \ --valid-metrics translation valid-script cross-entropy --valid-script-path ./valid_script_ab.sh \ --valid-sets valid.mini.bpe.{de,en} \ - --overwrite --keep-best \ + --overwrite --keep-best --clip-norm 0 \ --log valid_reset_stalled_1.log test -e valid_reset_stalled/model.npz @@ -43,7 +43,7 @@ $MRT_MARIAN/marian $extra_opts \ --disp-freq 10 --valid-freq 20 --after-batches 200 --early-stopping 5 --valid-reset-stalled \ --valid-metrics translation valid-script cross-entropy --valid-script-path ./valid_script_ab.sh \ --valid-sets valid.mini.bpe.{de,en} \ - --overwrite --keep-best \ + --overwrite --keep-best --clip-norm 0 \ --log valid_reset_stalled_2.log test -e valid_reset_stalled/model.npz diff --git a/tests/training/restoring/validation/update.sh b/tests/training/restoring/validation/update.sh new file mode 100755 index 0000000..809fbaa --- /dev/null +++ b/tests/training/restoring/validation/update.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env sh +cp valid_reset_stalled.out valid_reset_stalled.expected +cp valid_add.out valid_add.expected +cp valid_newbest.out valid_newbest.expected +cp valid_stalled.out valid_stalled.expected +cp valid_lowisbet.out valid_lowisbet.expected diff --git a/tests/training/restoring/validation/valid_add.expected b/tests/training/restoring/validation/valid_add.expected index fb2d8a5..893a57d 100644 --- a/tests/training/restoring/validation/valid_add.expected +++ b/tests/training/restoring/validation/valid_add.expected @@ -1,15 +1,15 @@ -[valid] Ep. 1 : Up. 20 : cross-entropy : 296.282 : new best -[valid] Ep. 1 : Up. 40 : cross-entropy : 296.269 : new best -[valid] Ep. 1 : Up. 60 : cross-entropy : 296.255 : new best -[valid] Ep. 1 : Up. 80 : cross-entropy : 296.242 : new best -[valid] Ep. 1 : Up. 100 : cross-entropy : 296.229 : new best -[valid] Ep. 1 : Up. 120 : cross-entropy : 296.216 : new best -[valid] Ep. 1 : Up. 120 : ce-mean-words : 10.1618 : new best -[valid] Ep. 1 : Up. 140 : cross-entropy : 296.202 : new best -[valid] Ep. 1 : Up. 140 : ce-mean-words : 10.1613 : new best -[valid] Ep. 1 : Up. 160 : cross-entropy : 296.189 : new best -[valid] Ep. 1 : Up. 160 : ce-mean-words : 10.1609 : new best -[valid] Ep. 1 : Up. 180 : cross-entropy : 296.176 : new best -[valid] Ep. 1 : Up. 180 : ce-mean-words : 10.1604 : new best -[valid] Ep. 1 : Up. 200 : cross-entropy : 296.162 : new best -[valid] Ep. 1 : Up. 200 : ce-mean-words : 10.1599 : new best +[valid] Ep. 1 : Up. 20 : cross-entropy : 294.63 : new best +[valid] Ep. 1 : Up. 40 : cross-entropy : 292.643 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 290.224 : new best +[valid] Ep. 1 : Up. 80 : cross-entropy : 286.857 : new best +[valid] Ep. 1 : Up. 100 : cross-entropy : 282.156 : new best +[valid] Ep. 1 : Up. 120 : cross-entropy : 274.584 : new best +[valid] Ep. 1 : Up. 120 : ce-mean-words : 9.41969 : new best +[valid] Ep. 1 : Up. 140 : cross-entropy : 264.996 : new best +[valid] Ep. 1 : Up. 140 : ce-mean-words : 9.09079 : new best +[valid] Ep. 1 : Up. 160 : cross-entropy : 258.914 : new best +[valid] Ep. 1 : Up. 160 : ce-mean-words : 8.88213 : new best +[valid] Ep. 1 : Up. 180 : cross-entropy : 255.943 : new best +[valid] Ep. 1 : Up. 180 : ce-mean-words : 8.78019 : new best +[valid] Ep. 1 : Up. 200 : cross-entropy : 253.146 : new best +[valid] Ep. 1 : Up. 200 : ce-mean-words : 8.68424 : new best diff --git a/tests/training/restoring/validation/valid_lowisbet.expected b/tests/training/restoring/validation/valid_lowisbet.expected index daa223a..8a2ca20 100644 --- a/tests/training/restoring/validation/valid_lowisbet.expected +++ b/tests/training/restoring/validation/valid_lowisbet.expected @@ -1,7 +1,7 @@ -[valid] Ep. 1 : Up. 30 : cross-entropy : 299.128 : new best -[valid] Ep. 2 : Up. 60 : cross-entropy : 298.528 : new best -[valid] Ep. 3 : Up. 90 : cross-entropy : 296.43 : new best -[valid] Ep. 4 : Up. 120 : cross-entropy : 297.912 : stalled 1 times (last best: 296.43) -[valid] Ep. 5 : Up. 150 : cross-entropy : 297.791 : stalled 2 times (last best: 296.43) -[valid] Ep. 6 : Up. 180 : cross-entropy : 297.654 : stalled 3 times (last best: 296.43) -[valid] Ep. 7 : Up. 210 : cross-entropy : 297.794 : stalled 4 times (last best: 296.43) +[valid] Ep. 1 : Up. 30 : cross-entropy : 299.127 : new best +[valid] Ep. 2 : Up. 60 : cross-entropy : 298.417 : new best +[valid] Ep. 3 : Up. 90 : cross-entropy : 296.252 : new best +[valid] Ep. 4 : Up. 120 : cross-entropy : 298.171 : stalled 1 times (last best: 296.252) +[valid] Ep. 5 : Up. 150 : cross-entropy : 298.057 : stalled 2 times (last best: 296.252) +[valid] Ep. 6 : Up. 180 : cross-entropy : 298.052 : stalled 3 times (last best: 296.252) +[valid] Ep. 7 : Up. 210 : cross-entropy : 298.133 : stalled 4 times (last best: 296.252) diff --git a/tests/training/restoring/validation/valid_newbest.expected b/tests/training/restoring/validation/valid_newbest.expected index d03d098..22ce219 100644 --- a/tests/training/restoring/validation/valid_newbest.expected +++ b/tests/training/restoring/validation/valid_newbest.expected @@ -1,20 +1,20 @@ -[valid] Ep. 1 : Up. 10 : cross-entropy : 250.506 : new best -[valid] Ep. 1 : Up. 10 : translation : 8 : new best -[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best -[valid] Ep. 1 : Up. 20 : translation : 8 : stalled 1 times (last best: 8) -[valid] Ep. 1 : Up. 30 : cross-entropy : 250.497 : new best -[valid] Ep. 1 : Up. 30 : translation : 8 : stalled 2 times (last best: 8) -[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best -[valid] Ep. 1 : Up. 40 : translation : 9 : new best -[valid] Ep. 1 : Up. 50 : cross-entropy : 250.486 : new best -[valid] Ep. 1 : Up. 50 : translation : 7 : stalled 1 times (last best: 9) -[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best -[valid] Ep. 1 : Up. 60 : translation : 3 : stalled 2 times (last best: 9) -[valid] Ep. 1 : Up. 70 : cross-entropy : 250.476 : new best -[valid] Ep. 1 : Up. 70 : translation : 6 : stalled 3 times (last best: 9) -[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best -[valid] Ep. 1 : Up. 80 : translation : 0 : stalled 4 times (last best: 9) -[valid] Ep. 1 : Up. 90 : cross-entropy : 250.465 : new best -[valid] Ep. 1 : Up. 90 : translation : 9 : stalled 5 times (last best: 9) -[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best -[valid] Ep. 1 : Up. 100 : translation : 6 : stalled 6 times (last best: 9) +[valid] Ep. 1 : Up. 10 : cross-entropy : 249.884 : new best +[valid] Ep. 1 : Up. 10 : translation : 5 : new best +[valid] Ep. 1 : Up. 20 : cross-entropy : 249.337 : new best +[valid] Ep. 1 : Up. 20 : translation : 4 : stalled 1 times (last best: 5) +[valid] Ep. 1 : Up. 30 : cross-entropy : 248.804 : new best +[valid] Ep. 1 : Up. 30 : translation : 3 : stalled 2 times (last best: 5) +[valid] Ep. 1 : Up. 40 : cross-entropy : 248.218 : new best +[valid] Ep. 1 : Up. 40 : translation : 6 : new best +[valid] Ep. 1 : Up. 50 : cross-entropy : 247.56 : new best +[valid] Ep. 1 : Up. 50 : translation : 9 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 246.856 : new best +[valid] Ep. 1 : Up. 60 : translation : 6 : stalled 1 times (last best: 9) +[valid] Ep. 1 : Up. 70 : cross-entropy : 246.112 : new best +[valid] Ep. 1 : Up. 70 : translation : 8 : stalled 2 times (last best: 9) +[valid] Ep. 1 : Up. 80 : cross-entropy : 245.247 : new best +[valid] Ep. 1 : Up. 80 : translation : 8 : stalled 3 times (last best: 9) +[valid] Ep. 1 : Up. 90 : cross-entropy : 244.336 : new best +[valid] Ep. 1 : Up. 90 : translation : 8 : stalled 4 times (last best: 9) +[valid] Ep. 1 : Up. 100 : cross-entropy : 243.37 : new best +[valid] Ep. 1 : Up. 100 : translation : 8 : stalled 5 times (last best: 9) diff --git a/tests/training/restoring/validation/valid_reset_stalled.expected b/tests/training/restoring/validation/valid_reset_stalled.expected index eed1393..da5b590 100644 --- a/tests/training/restoring/validation/valid_reset_stalled.expected +++ b/tests/training/restoring/validation/valid_reset_stalled.expected @@ -1,30 +1,30 @@ [valid] Ep. 1 : Up. 20 : translation : 333.5 : new best [valid] Ep. 1 : Up. 20 : valid-script : 222.3 : new best -[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best +[valid] Ep. 1 : Up. 20 : cross-entropy : 249.337 : new best [valid] Ep. 1 : Up. 40 : translation : 333.4 : stalled 1 times (last best: 333.5) [valid] Ep. 1 : Up. 40 : valid-script : 222.2 : stalled 1 times (last best: 222.3) -[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best +[valid] Ep. 1 : Up. 40 : cross-entropy : 248.218 : new best [valid] Ep. 1 : Up. 60 : translation : 333.3 : stalled 2 times (last best: 333.5) [valid] Ep. 1 : Up. 60 : valid-script : 222.1 : stalled 2 times (last best: 222.3) -[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best +[valid] Ep. 1 : Up. 60 : cross-entropy : 246.856 : new best [valid] Ep. 1 : Up. 80 : translation : 333.2 : stalled 3 times (last best: 333.5) [valid] Ep. 1 : Up. 80 : valid-script : 222.6 : new best -[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best +[valid] Ep. 1 : Up. 80 : cross-entropy : 245.247 : new best [valid] Ep. 1 : Up. 100 : translation : 333.1 : stalled 4 times (last best: 333.5) [valid] Ep. 1 : Up. 100 : valid-script : 222.5 : stalled 1 times (last best: 222.6) -[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best +[valid] Ep. 1 : Up. 100 : cross-entropy : 243.37 : new best [valid] Ep. 1 : Up. 120 : translation : 333.9 : new best [valid] Ep. 1 : Up. 120 : valid-script : 222.4 : stalled 2 times (last best: 222.6) -[valid] Ep. 1 : Up. 120 : cross-entropy : 250.45 : new best +[valid] Ep. 1 : Up. 120 : cross-entropy : 240.802 : new best [valid] Ep. 1 : Up. 140 : translation : 333.8 : stalled 1 times (last best: 333.9) [valid] Ep. 1 : Up. 140 : valid-script : 222.3 : stalled 3 times (last best: 222.6) -[valid] Ep. 1 : Up. 140 : cross-entropy : 250.441 : new best +[valid] Ep. 1 : Up. 140 : cross-entropy : 237.65 : new best [valid] Ep. 1 : Up. 160 : translation : 333.7 : stalled 1 times (last best: 333.9) [valid] Ep. 1 : Up. 160 : valid-script : 222.2 : stalled 1 times (last best: 222.6) -[valid] Ep. 1 : Up. 160 : cross-entropy : 250.43 : new best +[valid] Ep. 1 : Up. 160 : cross-entropy : 233.833 : new best [valid] Ep. 2 : Up. 180 : translation : 333.6 : stalled 2 times (last best: 333.9) [valid] Ep. 2 : Up. 180 : valid-script : 222.1 : stalled 2 times (last best: 222.6) -[valid] Ep. 2 : Up. 180 : cross-entropy : 250.42 : new best +[valid] Ep. 2 : Up. 180 : cross-entropy : 230.035 : new best [valid] Ep. 2 : Up. 200 : translation : 333.5 : stalled 3 times (last best: 333.9) [valid] Ep. 2 : Up. 200 : valid-script : 222.6 : stalled 3 times (last best: 222.6) -[valid] Ep. 2 : Up. 200 : cross-entropy : 250.41 : new best +[valid] Ep. 2 : Up. 200 : cross-entropy : 227.982 : new best |