Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <Marcin.Junczys-Dowmunt@microsoft.com>2019-01-25 04:19:25 +0300
committerMarcin Junczys-Dowmunt <Marcin.Junczys-Dowmunt@microsoft.com>2019-01-25 04:19:25 +0300
commit14d1cd1169e3b2ade4894ac6013ec63deca1c35d (patch)
treee91f48c73dda5adda25f3340b586bfee500f5201 /tests/training
parentb4c6ac10d2b98a06fd27bef2bb9b96ecb2e6c8e1 (diff)
adjust expected values to reflect changes in embeddings initialization
Diffstat (limited to 'tests/training')
-rw-r--r--tests/training/basics/gzip.expected10
-rw-r--r--tests/training/basics/toy.expected88
-rw-r--r--tests/training/basics/trans.expected4
-rw-r--r--tests/training/basics/valid.expected10
-rw-r--r--tests/training/cost-functions/ce-mean-words.expected16
-rw-r--r--tests/training/cost-functions/ce-mean.expected16
-rw-r--r--tests/training/cost-functions/ce-sum.expected16
-rw-r--r--tests/training/cost-functions/perplexity.expected16
-rw-r--r--tests/training/features/data-weighting/maxibatch.expected20
-rw-r--r--tests/training/features/data-weighting/sqlite.expected200
-rw-r--r--tests/training/features/data-weighting/sqlite_word.expected30
-rw-r--r--tests/training/features/data-weighting/train.expected20
-rw-r--r--tests/training/features/data-weighting/valid.expected8
-rw-r--r--tests/training/features/data-weighting/word_eos.expected34
-rw-r--r--tests/training/features/data-weighting/word_maxibatch.expected20
-rw-r--r--tests/training/features/data-weighting/word_twos.expected34
-rw-r--r--tests/training/features/exp-smoothing/expsmooth.expected20
-rw-r--r--tests/training/features/exp-smoothing/expsmooth.valid.expected20
-rw-r--r--tests/training/features/exp-smoothing/expsmooth_sync.expected20
-rw-r--r--tests/training/features/exp-smoothing/expsmooth_sync.valid.expected20
-rw-r--r--tests/training/features/guided-alignment/rnn.expected20
-rw-r--r--tests/training/features/guided-alignment/transformer.expected20
-rw-r--r--tests/training/features/mixed-ensembles/s2s_transf.expected10
-rw-r--r--tests/training/features/mixed-ensembles/two_s2s.expected10
-rw-r--r--tests/training/models/lm/lm-transformer.expected10
-rw-r--r--tests/training/models/lm/lm-transformer.scores.expected20
-rw-r--r--tests/training/models/lm/lm.expected10
-rw-r--r--tests/training/models/lm/lm.scores.expected20
-rw-r--r--tests/training/models/multi-source/multi-s2s.expected10
-rw-r--r--tests/training/models/multi-source/multi-transformer.expected10
-rw-r--r--tests/training/models/nematus/encdec_depth.expected10
-rw-r--r--tests/training/models/nematus/wmt17.expected10
-rw-r--r--tests/training/models/transformer/transformer.expected20
-rw-r--r--tests/training/multi-gpu/async_sgd.expected40
-rw-r--r--tests/training/multi-gpu/sync_sgd.expected20
-rw-r--r--tests/training/multi-gpu/sync_sgd_1gpu.expected8
-rw-r--r--tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected8
-rw-r--r--tests/training/restarting/sgd_2e.expected32
-rw-r--r--tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh2
-rw-r--r--tests/training/restoring/optimizer/adagrad.costs.expected20
-rw-r--r--tests/training/restoring/optimizer/adagrad.gt.expected4
-rw-r--r--tests/training/restoring/optimizer/adam.costs.expected20
-rw-r--r--tests/training/restoring/optimizer/adam.mt.expected4
-rw-r--r--tests/training/restoring/optimizer/adam.vt.expected4
-rw-r--r--tests/training/restoring/optimizer/adam_async.costs.expected20
-rw-r--r--tests/training/restoring/optimizer/adam_async.mt.expected2
-rw-r--r--tests/training/restoring/optimizer/adam_async.vt.expected2
-rw-r--r--tests/training/restoring/optimizer/adam_load.expected12
-rw-r--r--tests/training/restoring/optimizer/adam_sync.costs.expected20
-rw-r--r--tests/training/restoring/optimizer/adam_sync.mt.expected2
-rw-r--r--tests/training/restoring/optimizer/adam_sync.vt.expected2
-rw-r--r--tests/training/restoring/validation/valid_add.expected30
-rw-r--r--tests/training/restoring/validation/valid_lowisbet.expected21
-rw-r--r--tests/training/restoring/validation/valid_newbest.expected40
-rw-r--r--tests/training/validation/final_batch.expected6
-rw-r--r--tests/training/validation/final_epoch.expected6
-rw-r--r--tests/training/validation/final_match.expected6
57 files changed, 567 insertions, 566 deletions
diff --git a/tests/training/basics/gzip.expected b/tests/training/basics/gzip.expected
index cc069b6..c6960ce 100644
--- a/tests/training/basics/gzip.expected
+++ b/tests/training/basics/gzip.expected
@@ -1,5 +1,5 @@
-447.89
-374.88
-324.98
-284.49
-248.72
+447.84643555
+374.76788330
+324.78485107
+284.22189331
+248.39517212
diff --git a/tests/training/basics/toy.expected b/tests/training/basics/toy.expected
index 1c2ef85..f5094ef 100644
--- a/tests/training/basics/toy.expected
+++ b/tests/training/basics/toy.expected
@@ -1,44 +1,44 @@
-208.03
-179.47
-212.82
-237.45
-186.91
-234.73
-209.09
-214.79
-180.34
-198.19
-145.59
-263.72
-213.26
-127.88
-118.83
-131.60
-174.33
-141.15
-178.26
-99.79
-124.64
-139.91
-211.91
-243.59
-168.14
-101.86
-115.36
-175.50
-128.12
-138.18
-123.40
-213.24
-148.44
-198.26
-135.91
-173.55
-181.04
-187.23
-129.47
-126.50
-162.25
-95.58
-133.95
-206.49
+207.97480774
+179.29321289
+212.48278809
+236.76557922
+185.84127808
+233.23265076
+208.95817566
+217.90177917
+183.83541870
+201.72744751
+148.79904175
+269.31390381
+217.85786438
+130.89492798
+121.50901794
+134.44161987
+177.97956848
+143.88101196
+181.38346863
+101.61656189
+126.42018890
+141.65258789
+213.97846985
+245.28518677
+169.17837524
+102.50040436
+115.88985443
+176.02175903
+128.50570679
+138.32833862
+123.53054047
+213.36453247
+148.53483582
+198.26582336
+135.98811340
+173.58190918
+181.12872314
+187.30583191
+129.51103210
+126.53293610
+162.27357483
+95.55107117
+133.98471069
+206.47187805
diff --git a/tests/training/basics/trans.expected b/tests/training/basics/trans.expected
index 767041c..6b7678b 100644
--- a/tests/training/basics/trans.expected
+++ b/tests/training/basics/trans.expected
@@ -1,4 +1,4 @@
-[valid] Ep. 1 : Up. 60 : cross-entropy : 215.723 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 221.035 : new best
[valid] Ep. 1 : Up. 60 : translation : 1 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 188.591 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 195.984 : new best
[valid] Ep. 1 : Up. 120 : translation : 2 : new best
diff --git a/tests/training/basics/valid.expected b/tests/training/basics/valid.expected
index 3d6efdd..63aac29 100644
--- a/tests/training/basics/valid.expected
+++ b/tests/training/basics/valid.expected
@@ -1,10 +1,10 @@
-[valid] Ep. 1 : Up. 15 : cross-entropy : 308.492 : new best
+[valid] Ep. 1 : Up. 15 : cross-entropy : 308.376 : new best
[valid] Ep. 1 : Up. 15 : valid-script : 1 : new best
-[valid] Ep. 1 : Up. 30 : cross-entropy : 307.656 : new best
+[valid] Ep. 1 : Up. 30 : cross-entropy : 307.337 : new best
[valid] Ep. 1 : Up. 30 : valid-script : 2 : new best
-[valid] Ep. 1 : Up. 45 : cross-entropy : 298.9 : new best
+[valid] Ep. 1 : Up. 45 : cross-entropy : 301.132 : new best
[valid] Ep. 1 : Up. 45 : valid-script : 3 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 278.869 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 285.37 : new best
[valid] Ep. 1 : Up. 60 : valid-script : 4 : new best
-[valid] Ep. 1 : Up. 75 : cross-entropy : 268.069 : new best
+[valid] Ep. 1 : Up. 75 : cross-entropy : 275.612 : new best
[valid] Ep. 1 : Up. 75 : valid-script : 5 : new best
diff --git a/tests/training/cost-functions/ce-mean-words.expected b/tests/training/cost-functions/ce-mean-words.expected
index b0ddd93..f8e42ef 100644
--- a/tests/training/cost-functions/ce-mean-words.expected
+++ b/tests/training/cost-functions/ce-mean-words.expected
@@ -1,8 +1,8 @@
-8.48
-8.48
-8.47
-8.46
-8.44
-8.43
-8.39
-8.34
+8.48374081
+8.47330475
+8.45676517
+8.45006180
+8.42407513
+8.39919662
+8.34003067
+8.26019096
diff --git a/tests/training/cost-functions/ce-mean.expected b/tests/training/cost-functions/ce-mean.expected
index df3f498..9649b45 100644
--- a/tests/training/cost-functions/ce-mean.expected
+++ b/tests/training/cost-functions/ce-mean.expected
@@ -1,8 +1,8 @@
-205.88
-203.88
-113.27
-291.75
-151.29
-261.22
-182.64
-249.88
+205.86325073
+203.75646973
+113.10920715
+291.26306152
+150.84355164
+260.24383545
+181.52583313
+247.80560303
diff --git a/tests/training/cost-functions/ce-sum.expected b/tests/training/cost-functions/ce-sum.expected
index 14fdcca..b108bbe 100644
--- a/tests/training/cost-functions/ce-sum.expected
+++ b/tests/training/cost-functions/ce-sum.expected
@@ -1,8 +1,8 @@
-13176.05
-13048.07
-7249.52
-18671.83
-9682.47
-16718.13
-11689.00
-15992.25
+13175.24804688
+13040.41406250
+7238.98876953
+18640.82226562
+9653.98242188
+16655.56445312
+11617.62695312
+15859.46289062
diff --git a/tests/training/cost-functions/perplexity.expected b/tests/training/cost-functions/perplexity.expected
index 7840946..c97e54c 100644
--- a/tests/training/cost-functions/perplexity.expected
+++ b/tests/training/cost-functions/perplexity.expected
@@ -1,8 +1,8 @@
-4838.01416016
-4809.17578125
-4765.07958984
-4741.49853516
-4670.07226562
-4585.83740234
-4408.33154297
-4143.53173828
+4835.50488281
+4785.30371094
+4706.80761719
+4675.36181641
+4555.41650391
+4443.47412109
+4188.16210938
+3866.76611328
diff --git a/tests/training/features/data-weighting/maxibatch.expected b/tests/training/features/data-weighting/maxibatch.expected
index a192062..72fbb73 100644
--- a/tests/training/features/data-weighting/maxibatch.expected
+++ b/tests/training/features/data-weighting/maxibatch.expected
@@ -1,10 +1,10 @@
-6921.27832031
-5224.02587891
-4133.76660156
-3090.43554688
-2086.00024414
-5386.83349609
-5651.46093750
-4458.89208984
-3396.67822266
-2398.48266602
+6921.77050781
+5224.29541016
+4134.13769531
+3090.62255859
+2086.21093750
+5387.22900391
+5651.76074219
+4459.25537109
+3396.93627930
+2398.67187500
diff --git a/tests/training/features/data-weighting/sqlite.expected b/tests/training/features/data-weighting/sqlite.expected
index f572cd2..e5dc23a 100644
--- a/tests/training/features/data-weighting/sqlite.expected
+++ b/tests/training/features/data-weighting/sqlite.expected
@@ -1,100 +1,100 @@
-145.71
-407.97
-1194.81
-233.12
-1427.90
-126.27
-378.83
-97.13
-757.66
-330.25
-1602.69
-213.70
-437.12
-97.13
-466.23
-194.27
-1194.74
-145.70
-932.48
-388.53
-932.45
-621.65
-874.20
-825.62
-437.12
-155.41
-495.40
-233.12
-349.69
-320.55
-611.94
-670.22
-1777.47
-242.83
-1369.54
-135.99
-1515.31
-184.55
-815.88
-135.98
-728.46
-174.84
-407.95
-213.68
-233.13
-135.98
-291.39
-165.13
-582.84
-349.68
-961.59
-252.55
-437.11
-330.26
-437.10
-155.41
-524.51
-563.36
-495.39
-174.84
-437.06
-135.98
-495.34
-87.42
-437.08
-213.68
-58.28
-378.81
-349.66
-48.56
-990.65
-291.40
-524.50
-621.64
-1223.85
-534.22
-844.99
-291.38
-1660.95
-145.69
-1806.65
-679.91
-874.14
-77.71
-1049.04
-165.14
-611.91
-87.43
-437.06
-126.26
-262.22
-184.54
-262.25
-213.68
-1340.38
-106.84
-174.82
-116.55
-1165.53
-611.90
+145.76625061
+407.85418701
+1194.96923828
+233.20181274
+1427.79321289
+126.29446411
+378.78878784
+97.17931366
+757.44409180
+330.21224976
+1602.50830078
+213.69992065
+437.10656738
+97.17844391
+466.32504272
+194.22283936
+1194.76013184
+145.61857605
+932.43957520
+388.58657837
+932.55883789
+621.67999268
+873.99902344
+825.64160156
+437.16613770
+155.39439392
+495.47885132
+233.18109131
+349.39471436
+320.56796265
+612.07836914
+670.26416016
+1777.50268555
+242.84895325
+1369.95361328
+136.01496887
+1515.54931641
+184.50100708
+815.76287842
+135.94961548
+728.80718994
+174.92292786
+408.04016113
+213.76396179
+232.92919922
+135.99600220
+291.41558838
+165.11375427
+582.64404297
+349.64080811
+961.79394531
+252.50859070
+437.06246948
+330.19354248
+436.77810669
+155.42767334
+524.70916748
+563.38171387
+495.48129272
+174.89411926
+436.91708374
+135.98927307
+495.24850464
+87.39509583
+437.12753296
+213.65098572
+58.25393295
+378.78936768
+349.33700562
+48.53756332
+990.68878174
+291.55676270
+524.49810791
+621.72137451
+1223.91223145
+534.35400391
+845.23986816
+291.38967896
+1661.09838867
+145.75514221
+1806.95227051
+680.00848389
+874.08618164
+77.69504547
+1048.91845703
+165.15805054
+611.82751465
+87.41978455
+437.08914185
+126.29270172
+262.40588379
+184.46842957
+262.17800903
+213.70729065
+1340.37475586
+106.82720947
+174.82476807
+116.54352570
+1165.54418945
+611.97637939
diff --git a/tests/training/features/data-weighting/sqlite_word.expected b/tests/training/features/data-weighting/sqlite_word.expected
index ac21f4e..3f43848 100644
--- a/tests/training/features/data-weighting/sqlite_word.expected
+++ b/tests/training/features/data-weighting/sqlite_word.expected
@@ -1,15 +1,15 @@
-861.56
-727.99
-630.17
-553.85
-469.53
-387.44
-299.74
-257.14
-828.24
-706.26
-615.01
-536.82
-454.13
-369.41
-274.20
+861.55938721
+727.98687744
+630.16650391
+553.84191895
+469.53985596
+387.43038940
+299.73706055
+257.13076782
+828.24053955
+706.25305176
+615.00897217
+536.81494141
+454.13830566
+369.39624023
+274.19537354
diff --git a/tests/training/features/data-weighting/train.expected b/tests/training/features/data-weighting/train.expected
index 6d1a7ec..cd3f965 100644
--- a/tests/training/features/data-weighting/train.expected
+++ b/tests/training/features/data-weighting/train.expected
@@ -1,10 +1,10 @@
-473.66
-495.60
-504.09
-498.92
-494.94
-475.42
-518.79
-495.73
-467.42
-523.65
+473.66577148
+495.55804443
+504.03683472
+498.77752686
+494.78051758
+475.22576904
+518.50439453
+495.43505859
+467.06631470
+523.22912598
diff --git a/tests/training/features/data-weighting/valid.expected b/tests/training/features/data-weighting/valid.expected
index 6c8f80f..6afb154 100644
--- a/tests/training/features/data-weighting/valid.expected
+++ b/tests/training/features/data-weighting/valid.expected
@@ -1,8 +1,8 @@
-[valid] Ep. 2 : Up. 15 : cross-entropy : 277.05 : new best
+[valid] Ep. 2 : Up. 15 : cross-entropy : 277.031 : new best
[valid] Ep. 2 : Up. 15 : valid-script : 1 : new best
-[valid] Ep. 3 : Up. 30 : cross-entropy : 277.023 : new best
+[valid] Ep. 3 : Up. 30 : cross-entropy : 276.978 : new best
[valid] Ep. 3 : Up. 30 : valid-script : 2 : new best
-[valid] Ep. 4 : Up. 45 : cross-entropy : 276.973 : new best
+[valid] Ep. 4 : Up. 45 : cross-entropy : 276.894 : new best
[valid] Ep. 4 : Up. 45 : valid-script : 3 : new best
-[valid] Ep. 4 : Up. 50 : cross-entropy : 276.948 : new best
+[valid] Ep. 4 : Up. 50 : cross-entropy : 276.855 : new best
[valid] Ep. 4 : Up. 50 : valid-script : 4 : new best
diff --git a/tests/training/features/data-weighting/word_eos.expected b/tests/training/features/data-weighting/word_eos.expected
index 646aa2d..6965f3a 100644
--- a/tests/training/features/data-weighting/word_eos.expected
+++ b/tests/training/features/data-weighting/word_eos.expected
@@ -1,17 +1,17 @@
-Ep. 1 : Up. 5 : Sen. 320 : Cost 855.93
-Ep. 1 : Up. 10 : Sen. 640 : Cost 704.64
-Ep. 1 : Up. 15 : Sen. 960 : Cost 603.98
-Ep. 1 : Up. 20 : Sen. 1280 : Cost 518.20
-Ep. 1 : Up. 25 : Sen. 1600 : Cost 442.31
-Ep. 1 : Up. 30 : Sen. 1920 : Cost 367.15
-Ep. 1 : Up. 35 : Sen. 2240 : Cost 301.41
-Ep. 1 : Up. 40 : Sen. 2560 : Cost 230.01
-Ep. 2 : Up. 45 : Sen. 64 : Cost 340.51
-Ep. 2 : Up. 50 : Sen. 384 : Cost 818.49
-Ep. 2 : Up. 55 : Sen. 704 : Cost 681.48
-Ep. 2 : Up. 60 : Sen. 1024 : Cost 586.95
-Ep. 2 : Up. 65 : Sen. 1344 : Cost 500.93
-Ep. 2 : Up. 70 : Sen. 1664 : Cost 427.23
-Ep. 2 : Up. 75 : Sen. 1984 : Cost 354.26
-Ep. 2 : Up. 80 : Sen. 2304 : Cost 287.55
-Ep. 2 : Up. 85 : Sen. 2624 : Cost 212.82
+Ep. 1 : Up. 5 : Sen. 320 : Cost 855.91491699
+Ep. 1 : Up. 10 : Sen. 640 : Cost 704.62048340
+Ep. 1 : Up. 15 : Sen. 960 : Cost 603.96533203
+Ep. 1 : Up. 20 : Sen. 1,280 : Cost 518.18493652
+Ep. 1 : Up. 25 : Sen. 1,600 : Cost 442.29742432
+Ep. 1 : Up. 30 : Sen. 1,920 : Cost 367.12915039
+Ep. 1 : Up. 35 : Sen. 2,240 : Cost 301.38732910
+Ep. 1 : Up. 40 : Sen. 2,560 : Cost 229.98681641
+Ep. 2 : Up. 45 : Sen. 64 : Cost 340.48742676
+Ep. 2 : Up. 50 : Sen. 384 : Cost 818.48626709
+Ep. 2 : Up. 55 : Sen. 704 : Cost 681.45458984
+Ep. 2 : Up. 60 : Sen. 1,024 : Cost 586.93054199
+Ep. 2 : Up. 65 : Sen. 1,344 : Cost 500.92144775
+Ep. 2 : Up. 70 : Sen. 1,664 : Cost 427.21176147
+Ep. 2 : Up. 75 : Sen. 1,984 : Cost 354.24258423
+Ep. 2 : Up. 80 : Sen. 2,304 : Cost 287.52239990
+Ep. 2 : Up. 85 : Sen. 2,624 : Cost 212.79341125
diff --git a/tests/training/features/data-weighting/word_maxibatch.expected b/tests/training/features/data-weighting/word_maxibatch.expected
index 5a0b271..72a1404 100644
--- a/tests/training/features/data-weighting/word_maxibatch.expected
+++ b/tests/training/features/data-weighting/word_maxibatch.expected
@@ -1,10 +1,10 @@
-493.09
-355.25
-272.68
-199.14
-131.11
-385.98
-388.26
-296.74
-220.32
-151.99
+493.10702515
+355.26889038
+272.68411255
+199.14933777
+131.11528015
+386.00247192
+388.27536011
+296.74649048
+220.32846069
+151.99040222
diff --git a/tests/training/features/data-weighting/word_twos.expected b/tests/training/features/data-weighting/word_twos.expected
index 9525f5e..24ed38d 100644
--- a/tests/training/features/data-weighting/word_twos.expected
+++ b/tests/training/features/data-weighting/word_twos.expected
@@ -1,17 +1,17 @@
-Ep. 1 : Up. 5 : Sen. 320 : Cost 846.21 :
-Ep. 1 : Up. 10 : Sen. 640 : Cost 694.93 :
-Ep. 1 : Up. 15 : Sen. 960 : Cost 594.27 :
-Ep. 1 : Up. 20 : Sen. 1280 : Cost 508.49 :
-Ep. 1 : Up. 25 : Sen. 1600 : Cost 432.60 :
-Ep. 1 : Up. 30 : Sen. 1920 : Cost 357.44 :
-Ep. 1 : Up. 35 : Sen. 2240 : Cost 291.70 :
-Ep. 1 : Up. 40 : Sen. 2560 : Cost 220.30 :
-Ep. 2 : Up. 45 : Sen. 64 : Cost 330.81 :
-Ep. 2 : Up. 50 : Sen. 384 : Cost 808.78 :
-Ep. 2 : Up. 55 : Sen. 704 : Cost 671.76 :
-Ep. 2 : Up. 60 : Sen. 1024 : Cost 577.24 :
-Ep. 2 : Up. 65 : Sen. 1344 : Cost 491.22 :
-Ep. 2 : Up. 70 : Sen. 1664 : Cost 417.52 :
-Ep. 2 : Up. 75 : Sen. 1984 : Cost 344.56 :
-Ep. 2 : Up. 80 : Sen. 2304 : Cost 277.84 :
-Ep. 2 : Up. 85 : Sen. 2624 : Cost 203.11 :
+Ep. 1 : Up. 5 : Sen. 320 : Cost 846.21844482 :
+Ep. 1 : Up. 10 : Sen. 640 : Cost 694.92529297 :
+Ep. 1 : Up. 15 : Sen. 960 : Cost 594.27014160 :
+Ep. 1 : Up. 20 : Sen. 1,280 : Cost 508.49014282 :
+Ep. 1 : Up. 25 : Sen. 1,600 : Cost 432.60327148 :
+Ep. 1 : Up. 30 : Sen. 1,920 : Cost 357.43505859 :
+Ep. 1 : Up. 35 : Sen. 2,240 : Cost 291.69354248 :
+Ep. 1 : Up. 40 : Sen. 2,560 : Cost 220.29316711 :
+Ep. 2 : Up. 45 : Sen. 64 : Cost 330.79293823 :
+Ep. 2 : Up. 50 : Sen. 384 : Cost 808.79095459 :
+Ep. 2 : Up. 55 : Sen. 704 : Cost 671.76074219 :
+Ep. 2 : Up. 60 : Sen. 1,024 : Cost 577.23736572 :
+Ep. 2 : Up. 65 : Sen. 1,344 : Cost 491.22949219 :
+Ep. 2 : Up. 70 : Sen. 1,664 : Cost 417.52026367 :
+Ep. 2 : Up. 75 : Sen. 1,984 : Cost 344.55108643 :
+Ep. 2 : Up. 80 : Sen. 2,304 : Cost 277.83230591 :
+Ep. 2 : Up. 85 : Sen. 2,624 : Cost 203.10263062 :
diff --git a/tests/training/features/exp-smoothing/expsmooth.expected b/tests/training/features/exp-smoothing/expsmooth.expected
index 4f8d8ab..39b5a3f 100644
--- a/tests/training/features/exp-smoothing/expsmooth.expected
+++ b/tests/training/features/exp-smoothing/expsmooth.expected
@@ -1,10 +1,10 @@
-Ep. 1 : Up. 20 : Sen. 80 : Cost 187.23
-Ep. 1 : Up. 40 : Sen. 160 : Cost 158.39
-Ep. 1 : Up. 60 : Sen. 240 : Cost 187.37
-Ep. 1 : Up. 80 : Sen. 320 : Cost 189.81
-Ep. 1 : Up. 100 : Sen. 400 : Cost 195.45
-Ep. 1 : Up. 120 : Sen. 480 : Cost 201.01
-Ep. 1 : Up. 140 : Sen. 560 : Cost 192.11
-Ep. 1 : Up. 160 : Sen. 640 : Cost 140.86
-Ep. 1 : Up. 180 : Sen. 720 : Cost 181.89
-Ep. 1 : Up. 200 : Sen. 800 : Cost 176.45
+Ep. 1 : Up. 20 : Sen. 80 : Cost 188.28628540
+Ep. 1 : Up. 40 : Sen. 160 : Cost 158.74415588
+Ep. 1 : Up. 60 : Sen. 240 : Cost 186.73446655
+Ep. 1 : Up. 80 : Sen. 320 : Cost 187.43354797
+Ep. 1 : Up. 100 : Sen. 400 : Cost 191.98048401
+Ep. 1 : Up. 120 : Sen. 480 : Cost 197.26301575
+Ep. 1 : Up. 140 : Sen. 560 : Cost 188.48814392
+Ep. 1 : Up. 160 : Sen. 640 : Cost 138.19192505
+Ep. 1 : Up. 180 : Sen. 720 : Cost 178.13829041
+Ep. 1 : Up. 200 : Sen. 800 : Cost 172.75942993
diff --git a/tests/training/features/exp-smoothing/expsmooth.valid.expected b/tests/training/features/exp-smoothing/expsmooth.valid.expected
index 9bb3c8e..c9cf296 100644
--- a/tests/training/features/exp-smoothing/expsmooth.valid.expected
+++ b/tests/training/features/exp-smoothing/expsmooth.valid.expected
@@ -1,10 +1,10 @@
-[valid] Ep. 1 : Up. 20 : cross-entropy : 220.959 : new best
-[valid] Ep. 1 : Up. 40 : cross-entropy : 204.698 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 198.919 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 195.038 : new best
-[valid] Ep. 1 : Up. 100 : cross-entropy : 192.552 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 191.119 : new best
-[valid] Ep. 1 : Up. 140 : cross-entropy : 190.45 : new best
-[valid] Ep. 1 : Up. 160 : cross-entropy : 188.617 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 188.614 : new best
-[valid] Ep. 1 : Up. 200 : cross-entropy : 187.96 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 222.122 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 204.974 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 198.165 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 193.006 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 189.999 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 188.414 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 187.68 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 185.726 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 185.776 : stalled 1 times (last best: 185.726)
+[valid] Ep. 1 : Up. 200 : cross-entropy : 185.164 : new best
diff --git a/tests/training/features/exp-smoothing/expsmooth_sync.expected b/tests/training/features/exp-smoothing/expsmooth_sync.expected
index 4c3f145..60c7b0f 100644
--- a/tests/training/features/exp-smoothing/expsmooth_sync.expected
+++ b/tests/training/features/exp-smoothing/expsmooth_sync.expected
@@ -1,10 +1,10 @@
-Ep. 1 : Up. 20 : Sen. 80 : Cost 181.71638489
-Ep. 1 : Up. 40 : Sen. 160 : Cost 156.81752014
-Ep. 1 : Up. 60 : Sen. 240 : Cost 186.68283081
-Ep. 1 : Up. 80 : Sen. 320 : Cost 189.41621399
-Ep. 1 : Up. 100 : Sen. 400 : Cost 195.21343994
-Ep. 1 : Up. 120 : Sen. 480 : Cost 200.84815979
-Ep. 1 : Up. 140 : Sen. 560 : Cost 191.96876526
-Ep. 1 : Up. 160 : Sen. 640 : Cost 140.72854614
-Ep. 1 : Up. 180 : Sen. 720 : Cost 181.82040405
-Ep. 1 : Up. 200 : Sen. 800 : Cost 176.37911987
+Ep. 1 : Up. 20 : Sen. 80 : Cost 182.65203857
+Ep. 1 : Up. 40 : Sen. 160 : Cost 155.88166809
+Ep. 1 : Up. 60 : Sen. 240 : Cost 184.18322754
+Ep. 1 : Up. 80 : Sen. 320 : Cost 186.25080872
+Ep. 1 : Up. 100 : Sen. 400 : Cost 191.62405396
+Ep. 1 : Up. 120 : Sen. 480 : Cost 197.04354858
+Ep. 1 : Up. 140 : Sen. 560 : Cost 188.32565308
+Ep. 1 : Up. 160 : Sen. 640 : Cost 138.54452515
+Ep. 1 : Up. 180 : Sen. 720 : Cost 178.15878296
+Ep. 1 : Up. 200 : Sen. 800 : Cost 172.65933228
diff --git a/tests/training/features/exp-smoothing/expsmooth_sync.valid.expected b/tests/training/features/exp-smoothing/expsmooth_sync.valid.expected
index af8a1cd..990b5bd 100644
--- a/tests/training/features/exp-smoothing/expsmooth_sync.valid.expected
+++ b/tests/training/features/exp-smoothing/expsmooth_sync.valid.expected
@@ -1,10 +1,10 @@
-[valid] Ep. 1 : Up. 20 : cross-entropy : 216.966 : new best
-[valid] Ep. 1 : Up. 40 : cross-entropy : 203.05 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 197.959 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 194.409 : new best
-[valid] Ep. 1 : Up. 100 : cross-entropy : 192.099 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 190.768 : new best
-[valid] Ep. 1 : Up. 140 : cross-entropy : 190.158 : new best
-[valid] Ep. 1 : Up. 160 : cross-entropy : 188.364 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 188.391 : stalled 1 times (last best: 188.364)
-[valid] Ep. 1 : Up. 200 : cross-entropy : 187.76 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 217.366 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 201.761 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 195.803 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 191.88 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 189.405 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 188.01 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 187.335 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 185.499 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 185.617 : stalled 1 times (last best: 185.499)
+[valid] Ep. 1 : Up. 200 : cross-entropy : 184.961 : new best
diff --git a/tests/training/features/guided-alignment/rnn.expected b/tests/training/features/guided-alignment/rnn.expected
index e83b9c0..7c0ba57 100644
--- a/tests/training/features/guided-alignment/rnn.expected
+++ b/tests/training/features/guided-alignment/rnn.expected
@@ -1,10 +1,10 @@
-237.24304199
-258.95599365
-244.84423828
-237.89193726
-223.12332153
-201.88925171
-230.43579102
-200.95161438
-210.63932800
-197.50350952
+237.17997742
+258.60153198
+244.89590454
+238.79472351
+224.54911804
+202.88813782
+231.55581665
+201.81591797
+211.45260620
+198.07586670
diff --git a/tests/training/features/guided-alignment/transformer.expected b/tests/training/features/guided-alignment/transformer.expected
index d9b602c..54b713d 100644
--- a/tests/training/features/guided-alignment/transformer.expected
+++ b/tests/training/features/guided-alignment/transformer.expected
@@ -1,10 +1,10 @@
-233.14053345
-244.46212769
-223.61355591
-218.63639832
-208.97558594
-189.97067261
-216.38546753
-189.31729126
-197.90573120
-185.54168701
+235.15219116
+248.11796570
+226.96292114
+220.31608582
+210.20214844
+190.91659546
+216.90063477
+189.43588257
+197.92109680
+185.81344604
diff --git a/tests/training/features/mixed-ensembles/s2s_transf.expected b/tests/training/features/mixed-ensembles/s2s_transf.expected
index 6f6b65e..4509e3d 100644
--- a/tests/training/features/mixed-ensembles/s2s_transf.expected
+++ b/tests/training/features/mixed-ensembles/s2s_transf.expected
@@ -1,5 +1,5 @@
-zwec@@ Briti@@ angelegt utter@@ utter@@ utter@@ sage@@ sage@@ sage@@ sage@@ sage@@ sage@@ sage@@ wasser Ob@@ feste feste feste feste feste feste feste feste feste seine seine Mensch@@ cau@@ Berücksichtigung Berücksichtigung Berücksichtigung Corn@@ Briti@@ Corn@@ Briti@@ Briti@@ Corn@@ Briti@@ Corn@@ cau@@ cau@@ cau@@ cau@@ cau@@ cau@@
-seine seine seine Guant@@ Guant@@ Guant@@ Gestaltung sage@@ sage@@ sage@@ sage@@ sage@@ seine Guant@@ Guant@@ wasser Gestaltung Gestaltung Gestaltung feste feste feste feste seine seine seine seine cau@@ cau@@ cau@@ cau@@ cau@@ Briti@@ Briti@@ Briti@@ Briti@@ Briti@@ Corn@@ Briti@@ cau@@ cau@@ cau@@ cau@@ cau@@ cau@@ cau@@ cau@@ Briti@@
-zwec@@ seine seine Guant@@ Guant@@ Guant@@ Guant@@ seine seine seine seine seine seine Guant@@ Guant@@ Guant@@ feste Gestaltung feste feste feste feste feste seine seine seine seine seine cau@@ cau@@ cau@@ Briti@@ Corn@@ Briti@@ Briti@@ Briti@@ Briti@@ Corn@@ Briti@@ cau@@ cau@@ cau@@ cau@@ cau@@ cau@@
-cau@@ Briti@@ Briti@@ Guant@@ Guant@@ Guant@@ Gestaltung sage@@ sage@@ sage@@ sage@@ sage@@ Guant@@ Guant@@ Guant@@ wasser Gestaltung Gestaltung Gestaltung feste feste feste feste feste feste seine cau@@ cau@@ cau@@ cau@@ cau@@ cau@@ Briti@@ Briti@@ Briti@@ Briti@@ Briti@@ Briti@@ Briti@@ cau@@ cau@@ cau@@ cau@@ cau@@ cau@@ cau@@ Briti@@ cau@@ Briti@@ Briti@@ Briti@@ Briti@@ Briti@@ Briti@@
-lich lich lich lich Milosevic Thomas Thomas lich lich lich lich lich sprüch@@ laufen strong wasser Last lich lich lich Bösch Bösch Bösch lich
+Euroraum ichtungs@@ Betra@@ Ausgabe Herkunfts@@ absor@@ trieb utter@@ Ehre Berücksichtigung mental Elek@@ apo@@ Ob@@ angenommene Arch@@ ärz@@ Krise cau@@ Gestaltung Ärmsten aga@@ lich Matthe@@ seine Mensch@@ Matthe@@ seine saving Delors cable cau@@ Ok@@ begeg@@ gau Institutionen klau@@ Monate Krise technologie lich Matthe@@ seine ambiguity Monate
+kleiner nei@@ übt Stoffen sage@@ Stoffen Caudron sage@@ shi@@ sage@@ shi@@ sage@@ electronic sage@@ ssion Kind Gestaltung angelegt genug berechtigt konsequ@@ klau@@ angelegt genug Himmel sage@@ Agrarausgaben seine Reding Juncker öpft Bese@@ sage@@ Agrarausgaben seine cau@@ Briti@@ vig@@ begann DG schlüssi@@ unglaub@@ cau@@ Briti@@ cap@@ Arch@@ chtnis chtnis
+ffenen trou@@ illegaler wasser χ@@ festgesetzt serve Thomas ssion Arbeitnehmern pus@@ child Erinn@@ laufen Arbeitszeit@@ Erd@@ will@@ wasser Arbeitszeit@@ Erd@@ will@@ omen verbessert mental Russ@@ berechtigt verbessert Stadium tolerance Russ@@ berechtigt Angel@@ mining schlüssi@@ Erd@@ hochwertigen seine Dayton Demonstranten Arch@@ unglaub@@ cau@@ Erinn@@ Erd@@ hochwertigen
+cau@@ Briti@@ Mutterschafts@@ vergeben anspru@@ geforderten gigan@@ Gestaltung heik@@ See@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ öder@@ herunter@@ Vorgänger öder@@ herunter@@ Vorgänger Arbeitnehmern cau@@ Ausgabe Herkunfts@@ cau@@ Ausgabe Herkunfts@@ cau@@ Ausgabe völk@@ gro@@ See@@ dezentr@@ Gestaltung
+festgesetzt gezogen finden monisierung disku@@ wasser tief@@ Gestaltung Herkunfts@@ Beklei@@ bewußt wasser tief@@ Gestaltung Herkunfts@@ Beklei@@ seine Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@
diff --git a/tests/training/features/mixed-ensembles/two_s2s.expected b/tests/training/features/mixed-ensembles/two_s2s.expected
index bbda134..ac69148 100644
--- a/tests/training/features/mixed-ensembles/two_s2s.expected
+++ b/tests/training/features/mixed-ensembles/two_s2s.expected
@@ -1,5 +1,5 @@
-cla@@ cam@@ implementation Son@@ goti@@ Spezial@@ Mitbürger entwicklungen character@@ Kür@@ 280 exclude Mitbürger out asser@@ Mon@@ Mon@@ spartei thodo@@ Mitbürger entwicklungen character@@ Kür@@ 280 exclude Mitbürger out asser@@ Mon@@ Mon@@ spartei thodo@@ Mitbürger entwicklungen character@@ gemel@@ Main@@ Spezial@@ Mitbürger entwicklungen character@@ Kür@@ 280 exclude Mitbürger
-Net@@ ph@@ Tajani mid Stresst@@ sobald ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ ies Tajani mid Stresst@@ sobald ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ patenti@@ spätestens verletz@@ vent@@ entwicklungen
-PE@@ wohnen Vide@@ 280 280 280 280 Temper@@ ers relevanten bar modernisieren Verursacher@@ öhnung rückgängig ers danach lition breiten tel@@ tel@@ holders Temper@@ Eigentumsrechte danach lition breiten Eigentumsrechte danach lition breiten Eigentumsrechte breiten tel@@ tel@@ tel@@ holders öhnung 280 280 280 Temper@@ ers relevanten bar
-Vertra@@ Vertra@@ Vertra@@ Vertra@@ Vertra@@ Vertra@@ Vertra@@ live commer@@ wettbewerbsfähig@@ ausgezeichnet l carriers Embargo ion@@ stoff@@ Ängste track fingerprin@@ luxembur@@ Vertra@@ Ängste track fingerprin@@ luxembur@@ Vertra@@ Ängste track fingerprin@@ luxembur@@ Vertra@@ Ängste track fingerprin@@ luxembur@@ Vertra@@ Ängste track fingerprin@@ luxembur@@ Vertra@@ Ängste track fingerprin@@ luxembur@@ Vertra@@ Ängste track fingerprin@@ luxembur@@ Vertra@@ Ängste track fingerprin@@
-Trieb@@ wählen te Geste zukommen herrschte Optionen justizi@@ Weiter@@ Image destab@@ Bekannt@@ herrschte Optionen justizi@@ Weiter@@ Image destab@@ Bekannt@@ herrschte Optionen ähnliches Kie@@ öhnung
+cla@@ cam@@ Berichterstatter again sad Son@@ goti@@ Spezial@@ Mitbürger entwicklungen character@@ Kür@@ 280 exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude SLIM Summit natural
+Net@@ ph@@ Tajani mid Stresst@@ sobald ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ zurückzuziehen ahme einfach Bereitschaft öhnung ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ zurückzuziehen ahme einfach Bereitschaft öhnung ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ zurückzuziehen ahme einfach Bereitschaft öhnung ifi@@ geordne@@ ifi@@ zurückzuziehen
+PE@@ wohnen Vide@@ 280 280 280 Temper@@ ers relevanten bar modernisieren Verursacher@@ öhnung rückgängig ers danach lition breiten Eigentumsrechte danach lition breiten Eigentumsrechte breiten tel@@ tel@@ tel@@ holders öhnung gentle@@ Verzö@@ Machth@@ möchte ers relevanten bar modernisieren Verursacher@@ öhnung rückgängig ers danach lition breiten tel@@
+initiative track fingerprin@@ digkeit digkeit aktualisiert digkeit digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit
+Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil Unternehmer@@ auch plat@@ eingesetzten austausch womöglich Sprache
diff --git a/tests/training/models/lm/lm-transformer.expected b/tests/training/models/lm/lm-transformer.expected
index 30093eb..8db5d5c 100644
--- a/tests/training/models/lm/lm-transformer.expected
+++ b/tests/training/models/lm/lm-transformer.expected
@@ -1,5 +1,5 @@
-403.40
-291.95
-215.86
-151.40
-86.24
+405.37481689
+294.14898682
+217.30960083
+152.40402222
+86.83746338
diff --git a/tests/training/models/lm/lm-transformer.scores.expected b/tests/training/models/lm/lm-transformer.scores.expected
index c3cc9e9..a996d13 100644
--- a/tests/training/models/lm/lm-transformer.scores.expected
+++ b/tests/training/models/lm/lm-transformer.scores.expected
@@ -1,10 +1,10 @@
--111.429642
--202.879257
--99.488770
--232.767487
--648.426636
--151.236145
--385.781769
--60.357460
--89.177971
--104.630791
+-112.025002
+-202.339981
+-99.826500
+-232.753708
+-643.735596
+-151.965912
+-383.419220
+-60.465172
+-89.117729
+-105.327499
diff --git a/tests/training/models/lm/lm.expected b/tests/training/models/lm/lm.expected
index ad2275c..dd06692 100644
--- a/tests/training/models/lm/lm.expected
+++ b/tests/training/models/lm/lm.expected
@@ -1,5 +1,5 @@
-410.28
-307.48
-234.20
-159.79
-89.71
+410.21221924
+307.32592773
+234.65556335
+165.87971497
+93.67483521
diff --git a/tests/training/models/lm/lm.scores.expected b/tests/training/models/lm/lm.scores.expected
index f36053b..53bf2c6 100644
--- a/tests/training/models/lm/lm.scores.expected
+++ b/tests/training/models/lm/lm.scores.expected
@@ -1,10 +1,10 @@
--110.248581
--196.816528
--99.821030
--227.068649
--617.119629
--148.625153
--366.640015
--59.929665
--89.517532
--107.458366
+-119.056870
+-209.617035
+-104.773262
+-245.013489
+-663.487915
+-160.193527
+-396.427094
+-62.445087
+-94.592773
+-110.077751
diff --git a/tests/training/models/multi-source/multi-s2s.expected b/tests/training/models/multi-source/multi-s2s.expected
index abad07e..517f88e 100644
--- a/tests/training/models/multi-source/multi-s2s.expected
+++ b/tests/training/models/multi-source/multi-s2s.expected
@@ -1,5 +1,5 @@
-389.91
-286.00
-204.95
-142.47
-80.15
+389.64587402
+284.92941284
+207.10832214
+145.49560547
+82.26767731
diff --git a/tests/training/models/multi-source/multi-transformer.expected b/tests/training/models/multi-source/multi-transformer.expected
index d6a7c6b..09c3608 100644
--- a/tests/training/models/multi-source/multi-transformer.expected
+++ b/tests/training/models/multi-source/multi-transformer.expected
@@ -1,5 +1,5 @@
-383.53
-276.46
-205.33
-145.79
-82.39
+383.92016602
+276.54284668
+205.36067200
+145.78311157
+82.33700562
diff --git a/tests/training/models/nematus/encdec_depth.expected b/tests/training/models/nematus/encdec_depth.expected
index cac301e..94c4672 100644
--- a/tests/training/models/nematus/encdec_depth.expected
+++ b/tests/training/models/nematus/encdec_depth.expected
@@ -1,5 +1,5 @@
-475.32
-452.59
-431.17
-414.60
-401.03
+475.38677979
+452.63513184
+431.22705078
+414.68640137
+401.01882935
diff --git a/tests/training/models/nematus/wmt17.expected b/tests/training/models/nematus/wmt17.expected
index 0e1dd9d..59e15b5 100644
--- a/tests/training/models/nematus/wmt17.expected
+++ b/tests/training/models/nematus/wmt17.expected
@@ -1,5 +1,5 @@
-475.17675781
-452.62014771
-431.33758545
-414.96319580
-401.58618164
+475.19924927
+452.67333984
+431.39782715
+415.06884766
+401.69699097
diff --git a/tests/training/models/transformer/transformer.expected b/tests/training/models/transformer/transformer.expected
index 5bc6a78..c181f85 100644
--- a/tests/training/models/transformer/transformer.expected
+++ b/tests/training/models/transformer/transformer.expected
@@ -1,10 +1,10 @@
-226.90
-250.85
-244.19
-247.59
-242.19
-238.91
-236.10
-231.21
-237.86
-241.79
+226.87113953
+250.82170105
+244.17037964
+247.56513977
+242.15576172
+238.88996887
+236.09550476
+231.19555664
+237.84509277
+241.79611206
diff --git a/tests/training/multi-gpu/async_sgd.expected b/tests/training/multi-gpu/async_sgd.expected
index 009aae1..5578a20 100644
--- a/tests/training/multi-gpu/async_sgd.expected
+++ b/tests/training/multi-gpu/async_sgd.expected
@@ -1,20 +1,20 @@
-Ep. 1 : Up. 20 : Sen. 1280 : Cost 242.20
-Ep. 1 : Up. 40 : Sen. 2560 : Cost 236.00
-Ep. 1 : Up. 60 : Sen. 3840 : Cost 233.30
-Ep. 1 : Up. 80 : Sen. 5120 : Cost 221.12
-Ep. 1 : Up. 100 : Sen. 6400 : Cost 205.75
-Ep. 1 : Up. 120 : Sen. 7680 : Cost 211.64
-Ep. 1 : Up. 140 : Sen. 8960 : Cost 201.38
-Ep. 1 : Up. 160 : Sen. 10240 : Cost 191.39
-Ep. 1 : Up. 180 : Sen. 11520 : Cost 188.53
-Ep. 1 : Up. 200 : Sen. 12800 : Cost 189.84
-Ep. 1 : Up. 220 : Sen. 14080 : Cost 195.90
-Ep. 1 : Up. 240 : Sen. 15360 : Cost 187.79
-Ep. 1 : Up. 260 : Sen. 16640 : Cost 187.15
-Ep. 1 : Up. 280 : Sen. 17920 : Cost 180.46
-Ep. 1 : Up. 300 : Sen. 19200 : Cost 169.73
-Ep. 1 : Up. 320 : Sen. 20480 : Cost 179.74
-Ep. 1 : Up. 340 : Sen. 21760 : Cost 177.60
-Ep. 1 : Up. 360 : Sen. 23040 : Cost 179.01
-Ep. 1 : Up. 380 : Sen. 24320 : Cost 177.33
-Ep. 1 : Up. 400 : Sen. 25600 : Cost 176.74
+Ep. 1 : Up. 20 : Sen. 1,280 : Cost 242.11746216
+Ep. 1 : Up. 40 : Sen. 2,560 : Cost 235.72708130
+Ep. 1 : Up. 60 : Sen. 3,840 : Cost 236.84497070
+Ep. 1 : Up. 80 : Sen. 5,120 : Cost 226.40872192
+Ep. 1 : Up. 100 : Sen. 6,400 : Cost 210.93917847
+Ep. 1 : Up. 120 : Sen. 7,680 : Cost 216.88357544
+Ep. 1 : Up. 140 : Sen. 8,960 : Cost 206.62683105
+Ep. 1 : Up. 160 : Sen. 10,240 : Cost 196.11888123
+Ep. 1 : Up. 180 : Sen. 11,520 : Cost 193.01203918
+Ep. 1 : Up. 200 : Sen. 12,800 : Cost 194.11386108
+Ep. 1 : Up. 220 : Sen. 14,080 : Cost 199.61235046
+Ep. 1 : Up. 240 : Sen. 15,360 : Cost 191.05105591
+Ep. 1 : Up. 260 : Sen. 16,640 : Cost 190.15347290
+Ep. 1 : Up. 280 : Sen. 17,920 : Cost 182.99386597
+Ep. 1 : Up. 300 : Sen. 19,200 : Cost 172.30728149
+Ep. 1 : Up. 320 : Sen. 20,480 : Cost 181.16923523
+Ep. 1 : Up. 340 : Sen. 21,760 : Cost 179.05541992
+Ep. 1 : Up. 360 : Sen. 23,040 : Cost 180.18623352
+Ep. 1 : Up. 380 : Sen. 24,320 : Cost 178.12226868
+Ep. 1 : Up. 400 : Sen. 25,600 : Cost 177.11029053
diff --git a/tests/training/multi-gpu/sync_sgd.expected b/tests/training/multi-gpu/sync_sgd.expected
index 4722434..4167667 100644
--- a/tests/training/multi-gpu/sync_sgd.expected
+++ b/tests/training/multi-gpu/sync_sgd.expected
@@ -1,10 +1,10 @@
-216.84594727
-188.32144165
-177.22477722
-191.40454102
-205.81669617
-211.58016968
-200.58798218
-221.50346375
-210.13241577
-219.71794128
+216.98922729
+189.01902771
+178.41192627
+192.66134644
+206.70254517
+212.32287598
+201.16146851
+222.01089478
+210.61257935
+220.22055054
diff --git a/tests/training/multi-gpu/sync_sgd_1gpu.expected b/tests/training/multi-gpu/sync_sgd_1gpu.expected
index 20a7e2a..7940fff 100644
--- a/tests/training/multi-gpu/sync_sgd_1gpu.expected
+++ b/tests/training/multi-gpu/sync_sgd_1gpu.expected
@@ -1,4 +1,4 @@
-236.77311707
-197.57308960
-199.66569519
-184.37145996
+236.82699585
+197.64729309
+199.82775879
+184.65493774
diff --git a/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected b/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected
index 20a7e2a..7940fff 100644
--- a/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected
+++ b/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected
@@ -1,4 +1,4 @@
-236.77311707
-197.57308960
-199.66569519
-184.37145996
+236.82699585
+197.64729309
+199.82775879
+184.65493774
diff --git a/tests/training/restarting/sgd_2e.expected b/tests/training/restarting/sgd_2e.expected
index dbfaa7d..9dacfd0 100644
--- a/tests/training/restarting/sgd_2e.expected
+++ b/tests/training/restarting/sgd_2e.expected
@@ -1,16 +1,16 @@
-Ep. 1 : Up. 4 : Sen. 128 : Cost 258.61
-Ep. 1 : Up. 8 : Sen. 256 : Cost 270.33
-Ep. 1 : Up. 12 : Sen. 384 : Cost 247.52
-Ep. 1 : Up. 16 : Sen. 512 : Cost 242.14
-Ep. 1 : Up. 20 : Sen. 640 : Cost 212.45
-Ep. 1 : Up. 24 : Sen. 768 : Cost 252.00
-Ep. 1 : Up. 28 : Sen. 896 : Cost 219.76
-Ep. 1 : Up. 32 : Sen. 1024 : Cost 207.43
-Ep. 2 : Up. 36 : Sen. 128 : Cost 219.43
-Ep. 2 : Up. 40 : Sen. 256 : Cost 225.69
-Ep. 2 : Up. 44 : Sen. 384 : Cost 202.95
-Ep. 2 : Up. 48 : Sen. 512 : Cost 197.49
-Ep. 2 : Up. 52 : Sen. 640 : Cost 173.57
-Ep. 2 : Up. 56 : Sen. 768 : Cost 209.77
-Ep. 2 : Up. 60 : Sen. 896 : Cost 186.90
-Ep. 2 : Up. 64 : Sen. 1024 : Cost 179.69
+Ep. 1 : Up. 4 : Sen. 128 : Cost 258.60427856
+Ep. 1 : Up. 8 : Sen. 256 : Cost 270.30130005
+Ep. 1 : Up. 12 : Sen. 384 : Cost 247.52519226
+Ep. 1 : Up. 16 : Sen. 512 : Cost 242.22946167
+Ep. 1 : Up. 20 : Sen. 640 : Cost 212.66212463
+Ep. 1 : Up. 24 : Sen. 768 : Cost 252.52357483
+Ep. 1 : Up. 28 : Sen. 896 : Cost 220.55186462
+Ep. 1 : Up. 32 : Sen. 1,024 : Cost 208.12597656
+Ep. 2 : Up. 36 : Sen. 128 : Cost 219.92158508
+Ep. 2 : Up. 40 : Sen. 256 : Cost 226.15286255
+Ep. 2 : Up. 44 : Sen. 384 : Cost 203.31124878
+Ep. 2 : Up. 48 : Sen. 512 : Cost 197.85574341
+Ep. 2 : Up. 52 : Sen. 640 : Cost 173.79647827
+Ep. 2 : Up. 56 : Sen. 768 : Cost 209.79650879
+Ep. 2 : Up. 60 : Sen. 896 : Cost 186.76977539
+Ep. 2 : Up. 64 : Sen. 1,024 : Cost 179.61027527
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
index dd440c8..17c3416 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
@@ -13,7 +13,7 @@ rm -rf expsmooth_sync expsmooth_sync_*.log
mkdir -p expsmooth_sync
-opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.5 --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32 --devices 0 1 --sync-sgd"
+opts="--no-shuffle --seed 777 --cost-type ce-sum --disp-label-counts --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.001 --valid-sets valid.bpe.en valid.bpe.de --valid-metrics ce-mean-words --valid-mini-batch 32 --devices 0 1 --sync-sgd"
opt_disp=20
opt_valid=20
diff --git a/tests/training/restoring/optimizer/adagrad.costs.expected b/tests/training/restoring/optimizer/adagrad.costs.expected
index 3e7dafc..31f312e 100644
--- a/tests/training/restoring/optimizer/adagrad.costs.expected
+++ b/tests/training/restoring/optimizer/adagrad.costs.expected
@@ -1,10 +1,10 @@
-238.75
-245.75
-240.41
-233.51
-239.67
-254.63
-256.65
-244.04
-245.92
-236.57
+238.72998047
+245.70014954
+240.35234070
+233.43530273
+239.59432983
+254.54069519
+256.56338501
+243.96914673
+245.85603333
+236.53524780
diff --git a/tests/training/restoring/optimizer/adagrad.gt.expected b/tests/training/restoring/optimizer/adagrad.gt.expected
index 38d8547..227cd1a 100644
--- a/tests/training/restoring/optimizer/adagrad.gt.expected
+++ b/tests/training/restoring/optimizer/adagrad.gt.expected
@@ -1,2 +1,2 @@
-[[1.7865219e-04 6.0295035e-05 1.4555028e-05 ... 0.0000000e+00
- 0.0000000e+00 0.0000000e+00]]
+[[ 2.13038606e-06 2.62594858e-05 9.69232133e-06 ..., 0.00000000e+00
+ 0.00000000e+00 0.00000000e+00]]
diff --git a/tests/training/restoring/optimizer/adam.costs.expected b/tests/training/restoring/optimizer/adam.costs.expected
index 82e89a7..7272a33 100644
--- a/tests/training/restoring/optimizer/adam.costs.expected
+++ b/tests/training/restoring/optimizer/adam.costs.expected
@@ -1,10 +1,10 @@
-238.74
-245.64
-239.94
-231.45
-229.88
-233.53
-229.18
-212.25
-209.13
-199.82
+238.70394897
+245.51347351
+239.71450806
+231.26594543
+231.99667358
+237.87966919
+234.03823853
+217.62667847
+215.01562500
+205.33581543
diff --git a/tests/training/restoring/optimizer/adam.mt.expected b/tests/training/restoring/optimizer/adam.mt.expected
index 29822a9..64bcee9 100644
--- a/tests/training/restoring/optimizer/adam.mt.expected
+++ b/tests/training/restoring/optimizer/adam.mt.expected
@@ -1,2 +1,2 @@
-[[ 0.00075354 0.00025602 -0.00038422 ... 0. 0.
- 0. ]]
+[[ -5.87761315e-05 1.77891372e-04 -1.15002964e-04 ..., 0.00000000e+00
+ 0.00000000e+00 0.00000000e+00]]
diff --git a/tests/training/restoring/optimizer/adam.vt.expected b/tests/training/restoring/optimizer/adam.vt.expected
index b9e12af..e08fe13 100644
--- a/tests/training/restoring/optimizer/adam.vt.expected
+++ b/tests/training/restoring/optimizer/adam.vt.expected
@@ -1,2 +1,2 @@
-[[4.5075168e-07 5.6444804e-08 1.0361377e-07 ... 0.0000000e+00
- 0.0000000e+00 0.0000000e+00]]
+[[ 3.27316441e-09 9.30779294e-08 4.11611119e-08 ..., 0.00000000e+00
+ 0.00000000e+00 0.00000000e+00]]
diff --git a/tests/training/restoring/optimizer/adam_async.costs.expected b/tests/training/restoring/optimizer/adam_async.costs.expected
index fb3742c..2c2bcce 100644
--- a/tests/training/restoring/optimizer/adam_async.costs.expected
+++ b/tests/training/restoring/optimizer/adam_async.costs.expected
@@ -1,10 +1,10 @@
-7256.50878906
-8020.36572266
-7798.46972656
-7876.51171875
-7549.89990234
-7100.47753906
-6763.89208984
-6482.65332031
-6560.43603516
-6543.25537109
+7255.70458984
+8017.33105469
+7792.87353516
+7871.14453125
+7605.37109375
+7275.98925781
+6925.38037109
+6636.69628906
+6718.36572266
+6711.51171875
diff --git a/tests/training/restoring/optimizer/adam_async.mt.expected b/tests/training/restoring/optimizer/adam_async.mt.expected
index 15454ce..36442fa 100644
--- a/tests/training/restoring/optimizer/adam_async.mt.expected
+++ b/tests/training/restoring/optimizer/adam_async.mt.expected
@@ -1 +1 @@
-[[ 0.03076001 0.00486175 -0.01256138 ..., 0. 0. 0. ]]
+[[ 0.00348229 0.00959761 -0.00544088 ..., 0. 0. 0. ]]
diff --git a/tests/training/restoring/optimizer/adam_async.vt.expected b/tests/training/restoring/optimizer/adam_async.vt.expected
index f023b1f..147b02c 100644
--- a/tests/training/restoring/optimizer/adam_async.vt.expected
+++ b/tests/training/restoring/optimizer/adam_async.vt.expected
@@ -1,2 +1,2 @@
-[[ 4.70023151e-05 1.37165273e-06 6.96373081e-06 ..., 0.00000000e+00
+[[ 8.52484561e-07 4.59003832e-06 1.47568892e-06 ..., 0.00000000e+00
0.00000000e+00 0.00000000e+00]]
diff --git a/tests/training/restoring/optimizer/adam_load.expected b/tests/training/restoring/optimizer/adam_load.expected
index 3865d92..ee730d5 100644
--- a/tests/training/restoring/optimizer/adam_load.expected
+++ b/tests/training/restoring/optimizer/adam_load.expected
@@ -1,6 +1,6 @@
-Ep. 1 : Up. 1 : Sen. 2 : Cost 223.48526001
-Ep. 1 : Up. 2 : Sen. 4 : Cost 255.67684937
-Ep. 1 : Up. 3 : Sen. 6 : Cost 211.85998535
-Ep. 1 : Up. 4 : Sen. 8 : Cost 351.33468628
-Ep. 1 : Up. 5 : Sen. 10 : Cost 239.30172729
-Ep. 1 : Up. 6 : Sen. 12 : Cost 171.99931335
+Ep. 1 : Up. 1 : Sen. 2 : Cost 223.47967529
+Ep. 1 : Up. 2 : Sen. 4 : Cost 255.49520874
+Ep. 1 : Up. 3 : Sen. 6 : Cost 213.80761719
+Ep. 1 : Up. 4 : Sen. 8 : Cost 352.16754150
+Ep. 1 : Up. 5 : Sen. 10 : Cost 367.21719360
+Ep. 1 : Up. 6 : Sen. 12 : Cost 196.68669128
diff --git a/tests/training/restoring/optimizer/adam_sync.costs.expected b/tests/training/restoring/optimizer/adam_sync.costs.expected
index c322edb..2faf27e 100644
--- a/tests/training/restoring/optimizer/adam_sync.costs.expected
+++ b/tests/training/restoring/optimizer/adam_sync.costs.expected
@@ -1,10 +1,10 @@
-7256.27343750
-8019.53662109
-7795.20312500
-7861.81884766
-7480.13671875
-7036.00000000
-6732.01171875
-6452.51220703
-6534.94628906
-6518.54003906
+7255.24365234
+8016.10302734
+7789.29394531
+7859.13769531
+7564.06005859
+7214.78515625
+6903.62011719
+6621.17675781
+6706.66259766
+6701.24316406
diff --git a/tests/training/restoring/optimizer/adam_sync.mt.expected b/tests/training/restoring/optimizer/adam_sync.mt.expected
index 55b2528..5cdf31d 100644
--- a/tests/training/restoring/optimizer/adam_sync.mt.expected
+++ b/tests/training/restoring/optimizer/adam_sync.mt.expected
@@ -1 +1 @@
-[[ 0.01112643 0.00158976 -0.00600797 ..., 0. 0. 0. ]]
+[[ 0.00084951 0.00929706 -0.00577381 ..., 0. 0. 0. ]]
diff --git a/tests/training/restoring/optimizer/adam_sync.vt.expected b/tests/training/restoring/optimizer/adam_sync.vt.expected
index f0a3fbb..7de36bb 100644
--- a/tests/training/restoring/optimizer/adam_sync.vt.expected
+++ b/tests/training/restoring/optimizer/adam_sync.vt.expected
@@ -1,2 +1,2 @@
-[[ 8.61441822e-06 4.59146378e-07 2.09469931e-06 ..., 0.00000000e+00
+[[ 2.50489421e-07 4.21463801e-06 1.65782251e-06 ..., 0.00000000e+00
0.00000000e+00 0.00000000e+00]]
diff --git a/tests/training/restoring/validation/valid_add.expected b/tests/training/restoring/validation/valid_add.expected
index 17a7804..397895f 100644
--- a/tests/training/restoring/validation/valid_add.expected
+++ b/tests/training/restoring/validation/valid_add.expected
@@ -1,15 +1,15 @@
-[valid] Ep. 1 : Up. 20 : cross-entropy : 296.113 : new best
-[valid] Ep. 1 : Up. 40 : cross-entropy : 296.108 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 296.103 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 296.098 : new best
-[valid] Ep. 1 : Up. 100 : cross-entropy : 296.093 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 296.088 : new best
-[valid] Ep. 1 : Up. 120 : ce-mean-words : 10.1574 : new best
-[valid] Ep. 1 : Up. 140 : cross-entropy : 296.082 : new best
-[valid] Ep. 1 : Up. 140 : ce-mean-words : 10.1572 : new best
-[valid] Ep. 1 : Up. 160 : cross-entropy : 296.077 : new best
-[valid] Ep. 1 : Up. 160 : ce-mean-words : 10.157 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 296.073 : new best
-[valid] Ep. 1 : Up. 180 : ce-mean-words : 10.1569 : new best
-[valid] Ep. 1 : Up. 200 : cross-entropy : 296.067 : new best
-[valid] Ep. 1 : Up. 200 : ce-mean-words : 10.1567 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 296.108 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 296.103 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 296.097 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 296.092 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 296.088 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 296.082 : new best
+[valid] Ep. 1 : Up. 120 : ce-mean-words : 10.1572 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 296.078 : new best
+[valid] Ep. 1 : Up. 140 : ce-mean-words : 10.157 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 296.072 : new best
+[valid] Ep. 1 : Up. 160 : ce-mean-words : 10.1569 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 296.068 : new best
+[valid] Ep. 1 : Up. 180 : ce-mean-words : 10.1567 : new best
+[valid] Ep. 1 : Up. 200 : cross-entropy : 296.062 : new best
+[valid] Ep. 1 : Up. 200 : ce-mean-words : 10.1565 : new best
diff --git a/tests/training/restoring/validation/valid_lowisbet.expected b/tests/training/restoring/validation/valid_lowisbet.expected
index 84573ba..7ddb2d1 100644
--- a/tests/training/restoring/validation/valid_lowisbet.expected
+++ b/tests/training/restoring/validation/valid_lowisbet.expected
@@ -1,10 +1,11 @@
-[valid] Ep. 1 : Up. 30 : cross-entropy : 299.663 : new best
-[valid] Ep. 2 : Up. 60 : cross-entropy : 298.728 : new best
-[valid] Ep. 3 : Up. 90 : cross-entropy : 294.158 : new best
-[valid] Ep. 4 : Up. 120 : cross-entropy : 292.71 : new best
-[valid] Ep. 5 : Up. 150 : cross-entropy : 292.194 : new best
-[valid] Ep. 5 : Up. 160 : cross-entropy : 292.132 : new best
-[valid] Ep. 6 : Up. 180 : cross-entropy : 290.975 : new best
-[valid] Ep. 7 : Up. 210 : cross-entropy : 290.719 : new best
-[valid] Ep. 8 : Up. 240 : cross-entropy : 291.122 : stalled 1 times (last best: 290.719)
-[valid] Ep. 9 : Up. 270 : cross-entropy : 291.832 : stalled 2 times (last best: 290.719)
+[valid] Ep. 1 : Up. 30 : cross-entropy : 299.666 : new best
+[valid] Ep. 2 : Up. 60 : cross-entropy : 299.155 : new best
+[valid] Ep. 3 : Up. 90 : cross-entropy : 295.698 : new best
+[valid] Ep. 4 : Up. 120 : cross-entropy : 293.975 : new best
+[valid] Ep. 5 : Up. 150 : cross-entropy : 293.203 : new best
+[valid] Ep. 5 : Up. 160 : cross-entropy : 293.042 : new best
+[valid] Ep. 6 : Up. 180 : cross-entropy : 291.761 : new best
+[valid] Ep. 7 : Up. 210 : cross-entropy : 291.089 : new best
+[valid] Ep. 8 : Up. 240 : cross-entropy : 291.087 : new best
+[valid] Ep. 9 : Up. 270 : cross-entropy : 291.422 : stalled 1 times (last best: 291.087)
+[valid] Ep. 10 : Up. 300 : cross-entropy : 291.984 : stalled 2 times (last best: 291.087)
diff --git a/tests/training/restoring/validation/valid_newbest.expected b/tests/training/restoring/validation/valid_newbest.expected
index 132334b..ec21cb2 100644
--- a/tests/training/restoring/validation/valid_newbest.expected
+++ b/tests/training/restoring/validation/valid_newbest.expected
@@ -1,20 +1,20 @@
-[valid] Ep. 1 : Up. 10 : cross-entropy : 250.144 : new best
-[valid] Ep. 1 : Up. 10 : translation : 8 : new best
-[valid] Ep. 1 : Up. 20 : cross-entropy : 250.143 : new best
-[valid] Ep. 1 : Up. 20 : translation : 9 : new best
-[valid] Ep. 1 : Up. 30 : cross-entropy : 250.142 : new best
-[valid] Ep. 1 : Up. 30 : translation : 1 : stalled 1 times (last best: 9)
-[valid] Ep. 1 : Up. 40 : cross-entropy : 250.14 : new best
-[valid] Ep. 1 : Up. 40 : translation : 5 : stalled 2 times (last best: 9)
-[valid] Ep. 1 : Up. 50 : cross-entropy : 250.139 : new best
-[valid] Ep. 1 : Up. 50 : translation : 2 : stalled 3 times (last best: 9)
-[valid] Ep. 1 : Up. 60 : cross-entropy : 250.137 : new best
-[valid] Ep. 1 : Up. 60 : translation : 8 : stalled 4 times (last best: 9)
-[valid] Ep. 1 : Up. 70 : cross-entropy : 250.136 : new best
-[valid] Ep. 1 : Up. 70 : translation : 8 : stalled 5 times (last best: 9)
-[valid] Ep. 1 : Up. 80 : cross-entropy : 250.134 : new best
-[valid] Ep. 1 : Up. 80 : translation : 8 : stalled 6 times (last best: 9)
-[valid] Ep. 1 : Up. 90 : cross-entropy : 250.133 : new best
-[valid] Ep. 1 : Up. 90 : translation : 8 : stalled 7 times (last best: 9)
-[valid] Ep. 1 : Up. 100 : cross-entropy : 250.131 : new best
-[valid] Ep. 1 : Up. 100 : translation : 8 : stalled 8 times (last best: 9)
+[valid] Ep. 1 : Up. 10 : cross-entropy : 250.085 : new best
+[valid] Ep. 1 : Up. 10 : translation : 4 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 250.084 : new best
+[valid] Ep. 1 : Up. 20 : translation : 4 : stalled 1 times (last best: 4)
+[valid] Ep. 1 : Up. 30 : cross-entropy : 250.083 : new best
+[valid] Ep. 1 : Up. 30 : translation : 4 : stalled 2 times (last best: 4)
+[valid] Ep. 1 : Up. 40 : cross-entropy : 250.081 : new best
+[valid] Ep. 1 : Up. 40 : translation : 9 : new best
+[valid] Ep. 1 : Up. 50 : cross-entropy : 250.08 : new best
+[valid] Ep. 1 : Up. 50 : translation : 0 : stalled 1 times (last best: 9)
+[valid] Ep. 1 : Up. 60 : cross-entropy : 250.078 : new best
+[valid] Ep. 1 : Up. 60 : translation : 0 : stalled 2 times (last best: 9)
+[valid] Ep. 1 : Up. 70 : cross-entropy : 250.077 : new best
+[valid] Ep. 1 : Up. 70 : translation : 5 : stalled 3 times (last best: 9)
+[valid] Ep. 1 : Up. 80 : cross-entropy : 250.075 : new best
+[valid] Ep. 1 : Up. 80 : translation : 6 : stalled 4 times (last best: 9)
+[valid] Ep. 1 : Up. 90 : cross-entropy : 250.074 : new best
+[valid] Ep. 1 : Up. 90 : translation : 6 : stalled 5 times (last best: 9)
+[valid] Ep. 1 : Up. 100 : cross-entropy : 250.072 : new best
+[valid] Ep. 1 : Up. 100 : translation : 6 : stalled 6 times (last best: 9)
diff --git a/tests/training/validation/final_batch.expected b/tests/training/validation/final_batch.expected
index 79f3542..893f451 100644
--- a/tests/training/validation/final_batch.expected
+++ b/tests/training/validation/final_batch.expected
@@ -1,3 +1,3 @@
-[valid] Ep. 1 : Up. 60 : cross-entropy : 240.186 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 240.174 : new best
-[valid] Ep. 1 : Up. 150 : cross-entropy : 240.167 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 240.178 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 240.164 : new best
+[valid] Ep. 1 : Up. 150 : cross-entropy : 240.157 : new best
diff --git a/tests/training/validation/final_epoch.expected b/tests/training/validation/final_epoch.expected
index a2cd5a4..decf6d3 100644
--- a/tests/training/validation/final_epoch.expected
+++ b/tests/training/validation/final_epoch.expected
@@ -1,3 +1,3 @@
-[valid] Ep. 1 : Up. 40 : cross-entropy : 240.19 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 240.182 : new best
-[valid] Ep. 2 : Up. 81 : cross-entropy : 240.182 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 240.203 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 240.194 : new best
+[valid] Ep. 2 : Up. 81 : cross-entropy : 240.194 : new best
diff --git a/tests/training/validation/final_match.expected b/tests/training/validation/final_match.expected
index a33d988..29ae2c8 100644
--- a/tests/training/validation/final_match.expected
+++ b/tests/training/validation/final_match.expected
@@ -1,3 +1,3 @@
-[valid] Ep. 1 : Up. 60 : cross-entropy : 240.186 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 240.174 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 240.161 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 240.178 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 240.164 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 240.15 : new best