Merge branch 'master' of https://github.com/marian-nmt/marian-regression-tests

author: Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> 2019-02-06 23:38:15 +0300
committer: Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> 2019-02-06 23:38:15 +0300
commit: 1419e20ddcab91b49c200d892c0149ab9d7003a2 (patch)
tree: 7782e6825d36c2a8d448d3b374d070576c10c337
parent: 5fafdf1eefd14d08e61ce9917d050c4c119ae48f (diff)
parent: 142eadddbe04493c1024b42586030b72e9cb7ea2 (diff)
62 files changed, 570 insertions, 569 deletions
diff --git a/tests/training/basics/gzip.expected b/tests/training/basics/gzip.expected
index c6960ce..47c8765 100644
--- a/tests/training/basics/gzip.expected
+++ b/tests/training/basics/gzip.expected
@@ -1,5 +1,5 @@
-447.84643555
-374.76788330
-324.78485107
-284.22189331
-248.39517212
+448.35955811
+374.86718750
+324.67779541
+283.97842407
+248.12561035
diff --git a/tests/training/basics/toy.expected b/tests/training/basics/toy.expected
index f5094ef..4e03c75 100644
--- a/tests/training/basics/toy.expected
+++ b/tests/training/basics/toy.expected
@@ -1,44 +1,44 @@
-207.97480774
-179.29321289
-212.48278809
-236.76557922
-185.84127808
-233.23265076
-208.95817566
-217.90177917
-183.83541870
-201.72744751
-148.79904175
-269.31390381
-217.85786438
-130.89492798
-121.50901794
-134.44161987
-177.97956848
-143.88101196
-181.38346863
-101.61656189
-126.42018890
-141.65258789
-213.97846985
-245.28518677
-169.17837524
-102.50040436
-115.88985443
-176.02175903
-128.50570679
-138.32833862
-123.53054047
-213.36453247
-148.53483582
-198.26582336
-135.98811340
-173.58190918
-181.12872314
-187.30583191
-129.51103210
-126.53293610
-162.27357483
-95.55107117
-133.98471069
-206.47187805
+207.82556152
+178.71255493
+211.33903503
+234.55387878
+182.80172729
+227.75459290
+200.35102844
+207.02507019
+174.51194763
+191.75926208
+141.59402466
+256.85244751
+208.20707703
+125.15594482
+116.06490326
+129.00891113
+172.10009766
+138.76477051
+176.41941833
+98.28771210
+123.42759705
+138.35072327
+210.30610657
+241.78837585
+166.28385925
+99.98839569
+113.32307434
+173.32617188
+125.69969177
+135.87945557
+120.28639984
+209.76863098
+145.23466492
+195.52755737
+132.37802124
+169.65127563
+177.24984741
+182.49871826
+125.44963074
+122.40126038
+157.54493713
+91.15261841
+129.87872314
+200.61141968
diff --git a/tests/training/basics/trans.expected b/tests/training/basics/trans.expected
index 6b7678b..68987df 100644
--- a/tests/training/basics/trans.expected
+++ b/tests/training/basics/trans.expected
@@ -1,4 +1,4 @@
-[valid] Ep. 1 : Up. 60 : cross-entropy : 221.035 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 218.835 : new best
 [valid] Ep. 1 : Up. 60 : translation : 1 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 195.984 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 187.91 : new best
 [valid] Ep. 1 : Up. 120 : translation : 2 : new best
diff --git a/tests/training/basics/valid.expected b/tests/training/basics/valid.expected
index 63aac29..66764ec 100644
--- a/tests/training/basics/valid.expected
+++ b/tests/training/basics/valid.expected
@@ -1,10 +1,10 @@
-[valid] Ep. 1 : Up. 15 : cross-entropy : 308.376 : new best
+[valid] Ep. 1 : Up. 15 : cross-entropy : 307.647 : new best
 [valid] Ep. 1 : Up. 15 : valid-script : 1 : new best
-[valid] Ep. 1 : Up. 30 : cross-entropy : 307.337 : new best
+[valid] Ep. 1 : Up. 30 : cross-entropy : 305.551 : new best
 [valid] Ep. 1 : Up. 30 : valid-script : 2 : new best
-[valid] Ep. 1 : Up. 45 : cross-entropy : 301.132 : new best
+[valid] Ep. 1 : Up. 45 : cross-entropy : 299.442 : new best
 [valid] Ep. 1 : Up. 45 : valid-script : 3 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 285.37 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 281.549 : new best
 [valid] Ep. 1 : Up. 60 : valid-script : 4 : new best
-[valid] Ep. 1 : Up. 75 : cross-entropy : 275.612 : new best
+[valid] Ep. 1 : Up. 75 : cross-entropy : 268.403 : new best
 [valid] Ep. 1 : Up. 75 : valid-script : 5 : new best
diff --git a/tests/training/cost-functions/ce-mean-words.expected b/tests/training/cost-functions/ce-mean-words.expected
index f8e42ef..1962514 100644
--- a/tests/training/cost-functions/ce-mean-words.expected
+++ b/tests/training/cost-functions/ce-mean-words.expected
@@ -1,8 +1,8 @@
-8.48374081
-8.47330475
-8.45676517
-8.45006180
-8.42407513
-8.39919662
-8.34003067
-8.26019096
+8.48090363
+8.45937634
+8.42510605
+8.40673161
+8.35613060
+8.30123329
+8.18988895
+8.04098606
diff --git a/tests/training/cost-functions/ce-mean.expected b/tests/training/cost-functions/ce-mean.expected
index 9649b45..10a808e 100644
--- a/tests/training/cost-functions/ce-mean.expected
+++ b/tests/training/cost-functions/ce-mean.expected
@@ -1,8 +1,8 @@
-205.86325073
-203.75646973
-113.10920715
-291.26306152
-150.84355164
-260.24383545
-181.52583313
-247.80560303
+205.79443359
+203.42153931
+112.68572235
+289.76901245
+149.62661743
+257.20617676
+178.25546265
+241.22378540
diff --git a/tests/training/cost-functions/ce-sum.expected b/tests/training/cost-functions/ce-sum.expected
index b108bbe..ccbb4ca 100644
--- a/tests/training/cost-functions/ce-sum.expected
+++ b/tests/training/cost-functions/ce-sum.expected
@@ -1,8 +1,8 @@
-13175.24804688
-13040.41406250
-7238.98876953
-18640.82226562
-9653.98242188
-16655.56445312
-11617.62695312
-15859.46289062
+13170.84375000
+13018.97851562
+7211.88525391
+18545.24804688
+9576.11328125
+16461.33593750
+11408.51269531
+15438.73730469
diff --git a/tests/training/cost-functions/perplexity.expected b/tests/training/cost-functions/perplexity.expected
index c97e54c..04cbf84 100644
--- a/tests/training/cost-functions/perplexity.expected
+++ b/tests/training/cost-functions/perplexity.expected
@@ -1,8 +1,8 @@
-4835.50488281
-4785.30371094
-4706.80761719
-4675.36181641
-4555.41650391
-4443.47412109
-4188.16210938
-3866.76611328
+4821.80517578
+4719.09570312
+4560.10205078
+4477.03125000
+4256.11279297
+4028.61523438
+3604.00585938
+3105.27124023
diff --git a/tests/training/cost-functions/test_ce-sum.sh b/tests/training/cost-functions/test_ce-sum.sh
index 8d8acaa..735ebbc 100644
--- a/tests/training/cost-functions/test_ce-sum.sh
+++ b/tests/training/cost-functions/test_ce-sum.sh
@@ -18,7 +18,7 @@ test -e ce-sum/model.npz
 test -e ce-sum.log
 
 cat ce-sum.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > ce-sum.out
-$MRT_TOOLS/diff-nums.py ce-sum.out ce-sum.expected -p 0.2 -o ce-sum.diff
+$MRT_TOOLS/diff-nums.py ce-sum.out ce-sum.expected -p 2 -o ce-sum.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/features/data-weighting/maxibatch.expected b/tests/training/features/data-weighting/maxibatch.expected
index 72fbb73..7c0001c 100644
--- a/tests/training/features/data-weighting/maxibatch.expected
+++ b/tests/training/features/data-weighting/maxibatch.expected
@@ -1,10 +1,10 @@
-6921.77050781
-5224.29541016
-4134.13769531
-3090.62255859
-2086.21093750
-5387.22900391
-5651.76074219
-4459.25537109
-3396.93627930
-2398.67187500
+6924.51171875
+5225.91162109
+4136.33691406
+3092.30273438
+2086.31420898
+5388.93750000
+5653.18310547
+4460.88183594
+3398.95581055
+2398.78735352
diff --git a/tests/training/features/data-weighting/sqlite.expected b/tests/training/features/data-weighting/sqlite.expected
index e5dc23a..d2f0f31 100644
--- a/tests/training/features/data-weighting/sqlite.expected
+++ b/tests/training/features/data-weighting/sqlite.expected
@@ -1,100 +1,100 @@
-145.76625061
-407.85418701
-1194.96923828
-233.20181274
-1427.79321289
-126.29446411
-378.78878784
-97.17931366
-757.44409180
-330.21224976
-1602.50830078
-213.69992065
-437.10656738
-97.17844391
-466.32504272
-194.22283936
-1194.76013184
-145.61857605
-932.43957520
-388.58657837
-932.55883789
-621.67999268
-873.99902344
-825.64160156
-437.16613770
-155.39439392
-495.47885132
-233.18109131
-349.39471436
-320.56796265
-612.07836914
-670.26416016
-1777.50268555
-242.84895325
-1369.95361328
-136.01496887
-1515.54931641
-184.50100708
-815.76287842
-135.94961548
-728.80718994
-174.92292786
-408.04016113
-213.76396179
-232.92919922
-135.99600220
-291.41558838
-165.11375427
-582.64404297
-349.64080811
-961.79394531
-252.50859070
-437.06246948
-330.19354248
-436.77810669
-155.42767334
-524.70916748
-563.38171387
-495.48129272
-174.89411926
-436.91708374
-135.98927307
-495.24850464
-87.39509583
-437.12753296
-213.65098572
-58.25393295
-378.78936768
-349.33700562
-48.53756332
-990.68878174
-291.55676270
-524.49810791
-621.72137451
-1223.91223145
-534.35400391
-845.23986816
-291.38967896
-1661.09838867
-145.75514221
-1806.95227051
-680.00848389
-874.08618164
-77.69504547
-1048.91845703
-165.15805054
-611.82751465
-87.41978455
-437.08914185
-126.29270172
-262.40588379
-184.46842957
-262.17800903
-213.70729065
-1340.37475586
-106.82720947
-174.82476807
-116.54352570
-1165.54418945
-611.97637939
+145.22386169
+408.22799683
+1194.63964844
+233.55360413
+1430.38696289
+126.14705658
+378.98550415
+96.71858215
+757.48663330
+330.56832886
+1604.02294922
+214.05000305
+438.19305420
+96.71667480
+466.88586426
+194.51495361
+1197.85363770
+146.07209778
+933.41638184
+389.17184448
+933.05151367
+622.52008057
+874.42907715
+825.77954102
+437.69128418
+155.24894714
+496.18453979
+232.95338440
+350.40795898
+321.22961426
+610.87152100
+670.83068848
+1778.17529297
+241.61517334
+1370.66113281
+136.27859497
+1510.65393066
+184.96159363
+815.58801270
+136.21365356
+729.38146973
+174.92094421
+408.49716187
+213.90168762
+233.55191040
+136.20471191
+290.47631836
+165.94480896
+584.11578369
+349.23815918
+958.81738281
+253.41488647
+437.03112793
+330.26031494
+435.45944214
+154.90957642
+521.61523438
+563.37170410
+496.03311157
+175.29434204
+438.16888428
+135.96989441
+496.29870605
+87.74061584
+436.65737915
+214.36607361
+58.15562820
+378.97769165
+348.81768799
+48.82063675
+993.13079834
+292.51773071
+524.27105713
+620.84045410
+1228.60827637
+535.02069092
+846.56628418
+291.05285645
+1658.58984375
+145.76992798
+1809.52172852
+680.63385010
+875.07159424
+78.35388947
+1049.29785156
+165.48069763
+611.14044189
+87.67588806
+438.09893799
+126.03882599
+262.80325317
+184.69609070
+262.46411133
+213.61804199
+1341.65759277
+106.89002228
+174.81327820
+116.23052979
+1165.48278809
+612.53643799
diff --git a/tests/training/features/data-weighting/sqlite_word.expected b/tests/training/features/data-weighting/sqlite_word.expected
index 3f43848..037b678 100644
--- a/tests/training/features/data-weighting/sqlite_word.expected
+++ b/tests/training/features/data-weighting/sqlite_word.expected
@@ -1,15 +1,15 @@
-861.55938721
-727.98687744
-630.16650391
-553.84191895
-469.53985596
-387.43038940
-299.73706055
-257.13076782
-828.24053955
-706.25305176
-615.00897217
-536.81494141
-454.13830566
-369.39624023
-274.19537354
+862.09051514
+728.33001709
+630.45318604
+554.12792969
+469.74276733
+387.57510376
+299.93112183
+257.35052490
+828.63208008
+706.60028076
+615.24633789
+537.03155518
+454.32131958
+369.54190063
+274.37387085
diff --git a/tests/training/features/data-weighting/train.expected b/tests/training/features/data-weighting/train.expected
index cd3f965..32e8451 100644
--- a/tests/training/features/data-weighting/train.expected
+++ b/tests/training/features/data-weighting/train.expected
@@ -1,10 +1,10 @@
-473.66577148
-495.55804443
-504.03683472
-498.77752686
-494.78051758
-475.22576904
-518.50439453
-495.43505859
-467.06631470
-523.22912598
+474.05703735
+495.60043335
+504.07754517
+498.34414673
+493.95330811
+474.32269287
+517.29327393
+493.80380249
+465.55847168
+521.18237305
diff --git a/tests/training/features/data-weighting/valid.expected b/tests/training/features/data-weighting/valid.expected
index 6afb154..c2937c9 100644
--- a/tests/training/features/data-weighting/valid.expected
+++ b/tests/training/features/data-weighting/valid.expected
@@ -1,8 +1,8 @@
-[valid] Ep. 2 : Up. 15 : cross-entropy : 277.031 : new best
+[valid] Ep. 2 : Up. 15 : cross-entropy : 277.417 : new best
 [valid] Ep. 2 : Up. 15 : valid-script : 1 : new best
-[valid] Ep. 3 : Up. 30 : cross-entropy : 276.978 : new best
+[valid] Ep. 3 : Up. 30 : cross-entropy : 276.641 : new best
 [valid] Ep. 3 : Up. 30 : valid-script : 2 : new best
-[valid] Ep. 4 : Up. 45 : cross-entropy : 276.894 : new best
+[valid] Ep. 4 : Up. 45 : cross-entropy : 275.773 : new best
 [valid] Ep. 4 : Up. 45 : valid-script : 3 : new best
-[valid] Ep. 4 : Up. 50 : cross-entropy : 276.855 : new best
+[valid] Ep. 4 : Up. 50 : cross-entropy : 275.441 : new best
 [valid] Ep. 4 : Up. 50 : valid-script : 4 : new best
diff --git a/tests/training/features/data-weighting/word_eos.expected b/tests/training/features/data-weighting/word_eos.expected
index 6965f3a..a4ec027 100644
--- a/tests/training/features/data-weighting/word_eos.expected
+++ b/tests/training/features/data-weighting/word_eos.expected
@@ -1,17 +1,17 @@
-Ep. 1 : Up. 5 : Sen. 320 : Cost 855.91491699
-Ep. 1 : Up. 10 : Sen. 640 : Cost 704.62048340
-Ep. 1 : Up. 15 : Sen. 960 : Cost 603.96533203
-Ep. 1 : Up. 20 : Sen. 1,280 : Cost 518.18493652
-Ep. 1 : Up. 25 : Sen. 1,600 : Cost 442.29742432
-Ep. 1 : Up. 30 : Sen. 1,920 : Cost 367.12915039
-Ep. 1 : Up. 35 : Sen. 2,240 : Cost 301.38732910
-Ep. 1 : Up. 40 : Sen. 2,560 : Cost 229.98681641
-Ep. 2 : Up. 45 : Sen. 64 : Cost 340.48742676
-Ep. 2 : Up. 50 : Sen. 384 : Cost 818.48626709
-Ep. 2 : Up. 55 : Sen. 704 : Cost 681.45458984
-Ep. 2 : Up. 60 : Sen. 1,024 : Cost 586.93054199
-Ep. 2 : Up. 65 : Sen. 1,344 : Cost 500.92144775
-Ep. 2 : Up. 70 : Sen. 1,664 : Cost 427.21176147
-Ep. 2 : Up. 75 : Sen. 1,984 : Cost 354.24258423
-Ep. 2 : Up. 80 : Sen. 2,304 : Cost 287.52239990
-Ep. 2 : Up. 85 : Sen. 2,624 : Cost 212.79341125
+Ep. 1 : Up. 5 : Sen. 320 : Cost 856.40283203
+Ep. 1 : Up. 10 : Sen. 640 : Cost 705.00976562
+Ep. 1 : Up. 15 : Sen. 960 : Cost 604.22814941
+Ep. 1 : Up. 20 : Sen. 1,280 : Cost 518.43249512
+Ep. 1 : Up. 25 : Sen. 1,600 : Cost 442.49536133
+Ep. 1 : Up. 30 : Sen. 1,920 : Cost 367.35723877
+Ep. 1 : Up. 35 : Sen. 2,240 : Cost 301.55618286
+Ep. 1 : Up. 40 : Sen. 2,560 : Cost 230.20394897
+Ep. 2 : Up. 45 : Sen. 64 : Cost 340.81048584
+Ep. 2 : Up. 50 : Sen. 384 : Cost 818.84265137
+Ep. 2 : Up. 55 : Sen. 704 : Cost 681.78875732
+Ep. 2 : Up. 60 : Sen. 1,024 : Cost 587.13653564
+Ep. 2 : Up. 65 : Sen. 1,344 : Cost 501.12982178
+Ep. 2 : Up. 70 : Sen. 1,664 : Cost 427.36920166
+Ep. 2 : Up. 75 : Sen. 1,984 : Cost 354.46206665
+Ep. 2 : Up. 80 : Sen. 2,304 : Cost 287.68417358
+Ep. 2 : Up. 85 : Sen. 2,624 : Cost 212.97563171
diff --git a/tests/training/features/data-weighting/word_maxibatch.expected b/tests/training/features/data-weighting/word_maxibatch.expected
index 72a1404..542de08 100644
--- a/tests/training/features/data-weighting/word_maxibatch.expected
+++ b/tests/training/features/data-weighting/word_maxibatch.expected
@@ -1,10 +1,10 @@
-493.10702515
-355.26889038
-272.68411255
-199.14933777
-131.11528015
-386.00247192
-388.27536011
-296.74649048
-220.32846069
-151.99040222
+493.42471313
+355.55953979
+272.83404541
+199.24537659
+131.18077087
+386.23254395
+388.55151367
+296.93032837
+220.35517883
+152.07803345
diff --git a/tests/training/features/data-weighting/word_twos.expected b/tests/training/features/data-weighting/word_twos.expected
index 24ed38d..cc235d0 100644
--- a/tests/training/features/data-weighting/word_twos.expected
+++ b/tests/training/features/data-weighting/word_twos.expected
@@ -1,17 +1,17 @@
-Ep. 1 : Up. 5 : Sen. 320 : Cost 846.21844482 :
-Ep. 1 : Up. 10 : Sen. 640 : Cost 694.92529297 :
-Ep. 1 : Up. 15 : Sen. 960 : Cost 594.27014160 :
-Ep. 1 : Up. 20 : Sen. 1,280 : Cost 508.49014282 :
-Ep. 1 : Up. 25 : Sen. 1,600 : Cost 432.60327148 :
-Ep. 1 : Up. 30 : Sen. 1,920 : Cost 357.43505859 :
-Ep. 1 : Up. 35 : Sen. 2,240 : Cost 291.69354248 :
-Ep. 1 : Up. 40 : Sen. 2,560 : Cost 220.29316711 :
-Ep. 2 : Up. 45 : Sen. 64 : Cost 330.79293823 :
-Ep. 2 : Up. 50 : Sen. 384 : Cost 808.79095459 :
-Ep. 2 : Up. 55 : Sen. 704 : Cost 671.76074219 :
-Ep. 2 : Up. 60 : Sen. 1,024 : Cost 577.23736572 :
-Ep. 2 : Up. 65 : Sen. 1,344 : Cost 491.22949219 :
-Ep. 2 : Up. 70 : Sen. 1,664 : Cost 417.52026367 :
-Ep. 2 : Up. 75 : Sen. 1,984 : Cost 344.55108643 :
-Ep. 2 : Up. 80 : Sen. 2,304 : Cost 277.83230591 :
-Ep. 2 : Up. 85 : Sen. 2,624 : Cost 203.10263062 :
+Ep. 1 : Up. 5 : Sen. 320 : Cost 846.69714355 :
+Ep. 1 : Up. 10 : Sen. 640 : Cost 695.30053711 :
+Ep. 1 : Up. 15 : Sen. 960 : Cost 594.51928711 :
+Ep. 1 : Up. 20 : Sen. 1,280 : Cost 508.72247314 :
+Ep. 1 : Up. 25 : Sen. 1,600 : Cost 432.78329468 :
+Ep. 1 : Up. 30 : Sen. 1,920 : Cost 357.64947510 :
+Ep. 1 : Up. 35 : Sen. 2,240 : Cost 291.84161377 :
+Ep. 1 : Up. 40 : Sen. 2,560 : Cost 220.49028015 :
+Ep. 2 : Up. 45 : Sen. 64 : Cost 331.08535767 :
+Ep. 2 : Up. 50 : Sen. 384 : Cost 809.13928223 :
+Ep. 2 : Up. 55 : Sen. 704 : Cost 672.08361816 :
+Ep. 2 : Up. 60 : Sen. 1,024 : Cost 577.43341064 :
+Ep. 2 : Up. 65 : Sen. 1,344 : Cost 491.42279053 :
+Ep. 2 : Up. 70 : Sen. 1,664 : Cost 417.66470337 :
+Ep. 2 : Up. 75 : Sen. 1,984 : Cost 344.76025391 :
+Ep. 2 : Up. 80 : Sen. 2,304 : Cost 277.97634888 :
+Ep. 2 : Up. 85 : Sen. 2,624 : Cost 203.26664734 :
diff --git a/tests/training/features/exp-smoothing/expsmooth.expected b/tests/training/features/exp-smoothing/expsmooth.expected
index 39b5a3f..0a332e7 100644
--- a/tests/training/features/exp-smoothing/expsmooth.expected
+++ b/tests/training/features/exp-smoothing/expsmooth.expected
@@ -1,10 +1,10 @@
-Ep. 1 : Up. 20 : Sen. 80 : Cost 188.28628540
-Ep. 1 : Up. 40 : Sen. 160 : Cost 158.74415588
-Ep. 1 : Up. 60 : Sen. 240 : Cost 186.73446655
-Ep. 1 : Up. 80 : Sen. 320 : Cost 187.43354797
-Ep. 1 : Up. 100 : Sen. 400 : Cost 191.98048401
-Ep. 1 : Up. 120 : Sen. 480 : Cost 197.26301575
-Ep. 1 : Up. 140 : Sen. 560 : Cost 188.48814392
-Ep. 1 : Up. 160 : Sen. 640 : Cost 138.19192505
-Ep. 1 : Up. 180 : Sen. 720 : Cost 178.13829041
-Ep. 1 : Up. 200 : Sen. 800 : Cost 172.75942993
+Ep. 1 : Up. 20 : Sen. 80 : Cost 189.12686157
+Ep. 1 : Up. 40 : Sen. 160 : Cost 164.48492432
+Ep. 1 : Up. 60 : Sen. 240 : Cost 191.08549500
+Ep. 1 : Up. 80 : Sen. 320 : Cost 193.08412170
+Ep. 1 : Up. 100 : Sen. 400 : Cost 197.98199463
+Ep. 1 : Up. 120 : Sen. 480 : Cost 203.05249023
+Ep. 1 : Up. 140 : Sen. 560 : Cost 194.73191833
+Ep. 1 : Up. 160 : Sen. 640 : Cost 141.65765381
+Ep. 1 : Up. 180 : Sen. 720 : Cost 182.43969727
+Ep. 1 : Up. 200 : Sen. 800 : Cost 175.64430237
diff --git a/tests/training/features/exp-smoothing/expsmooth.valid.expected b/tests/training/features/exp-smoothing/expsmooth.valid.expected
index c9cf296..5eb3194 100644
--- a/tests/training/features/exp-smoothing/expsmooth.valid.expected
+++ b/tests/training/features/exp-smoothing/expsmooth.valid.expected
@@ -1,10 +1,10 @@
-[valid] Ep. 1 : Up. 20 : cross-entropy : 222.122 : new best
-[valid] Ep. 1 : Up. 40 : cross-entropy : 204.974 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 198.165 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 193.006 : new best
-[valid] Ep. 1 : Up. 100 : cross-entropy : 189.999 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 188.414 : new best
-[valid] Ep. 1 : Up. 140 : cross-entropy : 187.68 : new best
-[valid] Ep. 1 : Up. 160 : cross-entropy : 185.726 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 185.776 : stalled 1 times (last best: 185.726)
-[valid] Ep. 1 : Up. 200 : cross-entropy : 185.164 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 227.393 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 210.417 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 203.053 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 198.014 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 194.883 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 192.912 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 191.532 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 189.221 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 188.993 : new best
+[valid] Ep. 1 : Up. 200 : cross-entropy : 188.035 : new best
diff --git a/tests/training/features/exp-smoothing/expsmooth_sync.expected b/tests/training/features/exp-smoothing/expsmooth_sync.expected
index 60c7b0f..48f75b5 100644
--- a/tests/training/features/exp-smoothing/expsmooth_sync.expected
+++ b/tests/training/features/exp-smoothing/expsmooth_sync.expected
@@ -1,10 +1,10 @@
-Ep. 1 : Up. 20 : Sen. 80 : Cost 182.65203857
-Ep. 1 : Up. 40 : Sen. 160 : Cost 155.88166809
-Ep. 1 : Up. 60 : Sen. 240 : Cost 184.18322754
-Ep. 1 : Up. 80 : Sen. 320 : Cost 186.25080872
-Ep. 1 : Up. 100 : Sen. 400 : Cost 191.62405396
-Ep. 1 : Up. 120 : Sen. 480 : Cost 197.04354858
-Ep. 1 : Up. 140 : Sen. 560 : Cost 188.32565308
-Ep. 1 : Up. 160 : Sen. 640 : Cost 138.54452515
-Ep. 1 : Up. 180 : Sen. 720 : Cost 178.15878296
-Ep. 1 : Up. 200 : Sen. 800 : Cost 172.65933228
+Ep. 1 : Up. 20 : Sen. 80 : Cost 187.71237183
+Ep. 1 : Up. 40 : Sen. 160 : Cost 163.85215759
+Ep. 1 : Up. 60 : Sen. 240 : Cost 190.63551331
+Ep. 1 : Up. 80 : Sen. 320 : Cost 192.62811279
+Ep. 1 : Up. 100 : Sen. 400 : Cost 197.13417053
+Ep. 1 : Up. 120 : Sen. 480 : Cost 202.37889099
+Ep. 1 : Up. 140 : Sen. 560 : Cost 193.67512512
+Ep. 1 : Up. 160 : Sen. 640 : Cost 140.65829468
+Ep. 1 : Up. 180 : Sen. 720 : Cost 180.93316650
+Ep. 1 : Up. 200 : Sen. 800 : Cost 174.50163269
diff --git a/tests/training/features/exp-smoothing/expsmooth_sync.valid.expected b/tests/training/features/exp-smoothing/expsmooth_sync.valid.expected
index 990b5bd..9185b54 100644
--- a/tests/training/features/exp-smoothing/expsmooth_sync.valid.expected
+++ b/tests/training/features/exp-smoothing/expsmooth_sync.valid.expected
@@ -1,10 +1,10 @@
-[valid] Ep. 1 : Up. 20 : cross-entropy : 217.366 : new best
-[valid] Ep. 1 : Up. 40 : cross-entropy : 201.761 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 195.803 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 191.88 : new best
-[valid] Ep. 1 : Up. 100 : cross-entropy : 189.405 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 188.01 : new best
-[valid] Ep. 1 : Up. 140 : cross-entropy : 187.335 : new best
-[valid] Ep. 1 : Up. 160 : cross-entropy : 185.499 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 185.617 : stalled 1 times (last best: 185.499)
-[valid] Ep. 1 : Up. 200 : cross-entropy : 184.961 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 226.312 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 209.256 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 202.403 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 197.33 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 194.074 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 191.846 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 190.357 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 187.689 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 187.172 : new best
+[valid] Ep. 1 : Up. 200 : cross-entropy : 186.016 : new best
diff --git a/tests/training/features/guided-alignment/rnn.expected b/tests/training/features/guided-alignment/rnn.expected
index 7c0ba57..0cefbcf 100644
--- a/tests/training/features/guided-alignment/rnn.expected
+++ b/tests/training/features/guided-alignment/rnn.expected
@@ -1,10 +1,10 @@
-237.17997742
-258.60153198
-244.89590454
-238.79472351
-224.54911804
-202.88813782
-231.55581665
-201.81591797
-211.45260620
-198.07586670
+236.62251282
+256.09661865
+241.58700562
+236.70104980
+224.99372864
+204.95254517
+235.38345337
+205.25239563
+215.87429810
+202.22142029
diff --git a/tests/training/features/guided-alignment/transformer.expected b/tests/training/features/guided-alignment/transformer.expected
index 54b713d..2513e28 100644
--- a/tests/training/features/guided-alignment/transformer.expected
+++ b/tests/training/features/guided-alignment/transformer.expected
@@ -1,10 +1,10 @@
-235.15219116
-248.11796570
-226.96292114
-220.31608582
-210.20214844
-190.91659546
-216.90063477
-189.43588257
-197.92109680
-185.81344604
+244.42282104
+256.56842041
+238.89138794
+233.57333374
+223.44998169
+204.23277283
+232.98970032
+204.28886414
+215.29394531
+201.92327881
diff --git a/tests/training/features/mixed-ensembles/s2s_transf.expected b/tests/training/features/mixed-ensembles/s2s_transf.expected
index 4509e3d..1aba12f 100644
--- a/tests/training/features/mixed-ensembles/s2s_transf.expected
+++ b/tests/training/features/mixed-ensembles/s2s_transf.expected
@@ -1,5 +1,5 @@
-Euroraum ichtungs@@ Betra@@ Ausgabe Herkunfts@@ absor@@ trieb utter@@ Ehre Berücksichtigung mental Elek@@ apo@@ Ob@@ angenommene Arch@@ ärz@@ Krise cau@@ Gestaltung Ärmsten aga@@ lich Matthe@@ seine Mensch@@ Matthe@@ seine saving Delors cable cau@@ Ok@@ begeg@@ gau Institutionen klau@@ Monate Krise technologie lich Matthe@@ seine ambiguity Monate
-kleiner nei@@ übt Stoffen sage@@ Stoffen Caudron sage@@ shi@@ sage@@ shi@@ sage@@ electronic sage@@ ssion Kind Gestaltung angelegt genug berechtigt konsequ@@ klau@@ angelegt genug Himmel sage@@ Agrarausgaben seine Reding Juncker öpft Bese@@ sage@@ Agrarausgaben seine cau@@ Briti@@ vig@@ begann DG schlüssi@@ unglaub@@ cau@@ Briti@@ cap@@ Arch@@ chtnis chtnis
-ffenen trou@@ illegaler wasser χ@@ festgesetzt serve Thomas ssion Arbeitnehmern pus@@ child Erinn@@ laufen Arbeitszeit@@ Erd@@ will@@ wasser Arbeitszeit@@ Erd@@ will@@ omen verbessert mental Russ@@ berechtigt verbessert Stadium tolerance Russ@@ berechtigt Angel@@ mining schlüssi@@ Erd@@ hochwertigen seine Dayton Demonstranten Arch@@ unglaub@@ cau@@ Erinn@@ Erd@@ hochwertigen
-cau@@ Briti@@ Mutterschafts@@ vergeben anspru@@ geforderten gigan@@ Gestaltung heik@@ See@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ öder@@ herunter@@ Vorgänger öder@@ herunter@@ Vorgänger Arbeitnehmern cau@@ Ausgabe Herkunfts@@ cau@@ Ausgabe Herkunfts@@ cau@@ Ausgabe völk@@ gro@@ See@@ dezentr@@ Gestaltung
-festgesetzt gezogen finden monisierung disku@@ wasser tief@@ Gestaltung Herkunfts@@ Beklei@@ bewußt wasser tief@@ Gestaltung Herkunfts@@ Beklei@@ seine Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@ Guant@@
+herrsch@@ Binnengrenzen gli@@ Borrell nische Millennium nun@@ Millennium nun@@ waren gessen@@ tentei@@ 41 typ@@ rig aufweisen ethn@@ Baum@@ nahe Unter@@ Unterzeichnung teure Wohl itäten ausgewogene Pläne persönliche agieren Meeres@@ persönliche agieren Meeres@@ persönliche Schlußfolgerungen Unterschied Fe@@ Unter@@ Somm@@ Pläne persönliche rain wunder@@ extended persönliche rain
+herrsch@@ Binnengrenzen Vorsitzes Pläne Kön@@ unterstützte tei@@ whol@@ Millennium wenngleich Kön@@ unterstützte tei@@ whol@@ VAT operator ethn@@ Baum@@ nahe Hague CI@@ COD rain will Kommissarin aush@@ wecken ASEM Konzep@@ Demokratisierungs@@ abzuwarten Voraussetzungen Kommissionspräsidenten unterbrochen COD Napole@@ Tür@@ log@@ Varela log@@ regulations wecken extreme Woh@@ log@@ Varela log@@ Varela
+waren gessen@@ nehme gli@@ cut@@ ethn@@ Baum@@ lich ca. EPL@@ ca. cycle tive ely Pazi@@ eben@@ agents ethn@@ agieren ethn@@ agieren Meeres@@ brachte Umweltverträglichkeitsprü@@ oring Genuss agieren ethn@@ agieren Meeres@@ ethn@@ agieren Meeres@@ nü@@ Instan@@ Geflügel@@ ahn finanziell bund@@ fortführen reform@@ Einklang need extreme agents
+herrsch@@ Binnengrenzen Binnengrenzen Binnengrenzen Dele@@ Tan@@ Texten Texten Texten Texten Texten Texten Texten wo nonsense thal@@ Sk@@ ethn@@ Baum@@ nahe nützlichen Konfrontation zielen Positionen mes@@ cor@@ Statistiken herrsch@@ Binnengrenzen nonsense reform just genügt erregend menschliche netz erregend menschliche tbewer@@ maj@@ coa log@@ Varela log@@ Varela log@@ Varela log@@ Varela log@@ Varela Budge@@ alitä@@ fit
+Statistiken rig ASEM Papier@@ ethn@@ itäts@@ zusätzlich itäts@@ zusätzlich itäts@@ zusätzlich itäts@@ zusätzlich will Capp@@ break ethn@@ agieren ethn@@ agieren ethn@@ agieren ethn@@ agieren
diff --git a/tests/training/features/mixed-ensembles/two_s2s.expected b/tests/training/features/mixed-ensembles/two_s2s.expected
index ac69148..b1d4e9e 100644
--- a/tests/training/features/mixed-ensembles/two_s2s.expected
+++ b/tests/training/features/mixed-ensembles/two_s2s.expected
@@ -1,5 +1,5 @@
-cla@@ cam@@ Berichterstatter again sad Son@@ goti@@ Spezial@@ Mitbürger entwicklungen character@@ Kür@@ 280 exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude gehin@@ verifi@@ buch exclude SLIM Summit natural
-Net@@ ph@@ Tajani mid Stresst@@ sobald ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ zurückzuziehen ahme einfach Bereitschaft öhnung ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ zurückzuziehen ahme einfach Bereitschaft öhnung ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ geordne@@ ifi@@ zurückzuziehen ahme einfach Bereitschaft öhnung ifi@@ geordne@@ ifi@@ zurückzuziehen
-PE@@ wohnen Vide@@ 280 280 280 Temper@@ ers relevanten bar modernisieren Verursacher@@ öhnung rückgängig ers danach lition breiten Eigentumsrechte danach lition breiten Eigentumsrechte breiten tel@@ tel@@ tel@@ holders öhnung gentle@@ Verzö@@ Machth@@ möchte ers relevanten bar modernisieren Verursacher@@ öhnung rückgängig ers danach lition breiten tel@@
-initiative track fingerprin@@ digkeit digkeit aktualisiert digkeit digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit aktualisiert digkeit digkeit
-Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil kriterien Gil Unternehmer@@ auch plat@@ eingesetzten austausch womöglich Sprache
+umgewandelt Davies Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt Standpunkt
+konzep@@ Zana Behauptungen Koordination ω@@ Hilfest@@ versu@@ gla@@ Jose@@ Einfuhren Jose@@ trade Entlastungsverfahren Strafgerichtshof Fle@@ wär@@ ω@@ Hilfest@@ gegenüber Jose@@ trade Entlastungsverfahren Koordination ω@@ französische etwas Wortmel@@ Bank risk@@ Modernis@@ beschä@@ Abstimmungs@@ Betriebe trade zweck Montre@@ ω@@ trade widerspiegelt photo@@ fortschritt@@ ω@@ Hilfest@@ versu@@ gla@@ Jose@@ Einfuhren ärz@@
+Hed@@ Warrant Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt schriftlichen Hed@@ GM@@ Lebensunterhalt
+kleiner Gon@@ gnügen Mindestnormen Forscher les gerich@@ verwendet Betrieben verei Prognosen les gerich@@ gerich@@ verwendet Betrieben anhaltenden freue day Ru@@ bedienen Forscher les gerich@@ verwendet Betrieben verei Prognosen day 30@@ classes ganda bewilli@@ wechselt gestimm@@ terungen les Wochenende Dig@@ uses endgültig gekostet tungen expressly ğ@@ Materie Beschlüssen there voneinander Forscher zweck Alzheimer day 30@@
+si@@ Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@ fil Υ@@
diff --git a/tests/training/models/lm/lm-transformer.expected b/tests/training/models/lm/lm-transformer.expected
index 8db5d5c..a5e9556 100644
--- a/tests/training/models/lm/lm-transformer.expected
+++ b/tests/training/models/lm/lm-transformer.expected
@@ -1,5 +1,5 @@
-405.37481689
-294.14898682
-217.30960083
-152.40402222
-86.83746338
+405.95352173
+277.85601807
+198.49377441
+135.86233521
+74.85224152
diff --git a/tests/training/models/lm/lm-transformer.scores.expected b/tests/training/models/lm/lm-transformer.scores.expected
index a996d13..d40b653 100644
--- a/tests/training/models/lm/lm-transformer.scores.expected
+++ b/tests/training/models/lm/lm-transformer.scores.expected
@@ -1,10 +1,10 @@
--112.025002
--202.339981
--99.826500
--232.753708
--643.735596
--151.965912
--383.419220
--60.465172
--89.117729
--105.327499
+-90.117882
+-179.877197
+-81.371750
+-204.165802
+-636.969482
+-131.934113
+-359.475616
+-56.732944
+-68.373947
+-100.774132
diff --git a/tests/training/models/lm/lm.expected b/tests/training/models/lm/lm.expected
index dd06692..c6b5c74 100644
--- a/tests/training/models/lm/lm.expected
+++ b/tests/training/models/lm/lm.expected
@@ -1,5 +1,5 @@
-410.21221924
-307.32592773
-234.65556335
-165.87971497
-93.67483521
+410.02645874
+306.52648926
+233.58132935
+167.19117737
+91.86805725
diff --git a/tests/training/models/lm/lm.scores.expected b/tests/training/models/lm/lm.scores.expected
index 53bf2c6..9ea8ae4 100644
--- a/tests/training/models/lm/lm.scores.expected
+++ b/tests/training/models/lm/lm.scores.expected
@@ -1,10 +1,10 @@
--119.056870
--209.617035
--104.773262
--245.013489
--663.487915
--160.193527
--396.427094
--62.445087
--94.592773
--110.077751
+-114.927658
+-208.074463
+-102.252083
+-244.505508
+-677.256836
+-154.783279
+-411.580017
+-58.307816
+-89.968994
+-111.055710
diff --git a/tests/training/models/multi-source/multi-s2s.expected b/tests/training/models/multi-source/multi-s2s.expected
index 517f88e..61608c4 100644
--- a/tests/training/models/multi-source/multi-s2s.expected
+++ b/tests/training/models/multi-source/multi-s2s.expected
@@ -1,5 +1,5 @@
-389.64587402
-284.92941284
-207.10832214
-145.49560547
-82.26767731
+388.14068604
+279.26577759
+198.44155884
+137.92988586
+75.97171021
diff --git a/tests/training/models/multi-source/multi-transformer.expected b/tests/training/models/multi-source/multi-transformer.expected
index 09c3608..35c5e4b 100644
--- a/tests/training/models/multi-source/multi-transformer.expected
+++ b/tests/training/models/multi-source/multi-transformer.expected
@@ -1,5 +1,5 @@
-383.92016602
-276.54284668
-205.36067200
-145.78311157
-82.33700562
+382.23056030
+264.14666748
+193.33871460
+133.58370972
+71.43719482
diff --git a/tests/training/models/nematus/encdec_depth.expected b/tests/training/models/nematus/encdec_depth.expected
index 94c4672..af2a74a 100644
--- a/tests/training/models/nematus/encdec_depth.expected
+++ b/tests/training/models/nematus/encdec_depth.expected
@@ -1,5 +1,5 @@
-475.38677979
-452.63513184
-431.22705078
-414.68640137
-401.01882935
+489.13665771
+462.08361816
+439.01745605
+420.90402222
+404.19827271
diff --git a/tests/training/models/nematus/test_encdec_depth.sh b/tests/training/models/nematus/test_encdec_depth.sh
index c2ce41a..15b8f49 100644
--- a/tests/training/models/nematus/test_encdec_depth.sh
+++ b/tests/training/models/nematus/test_encdec_depth.sh
@@ -1,3 +1,4 @@
+
 #!/bin/bash
 
 # Exit on error
@@ -10,7 +11,7 @@ rm -f encdec_depth/* encdec_depth.log
 $MRT_MARIAN/marian \
     --type nematus --enc-cell gru-nematus --dec-cell gru-nematus \
     --enc-depth 4 --enc-cell-depth 4 --enc-type bidirectional --dec-depth 4 --dec-cell-base-depth 4 --dec-cell-high-depth 1 \
-    --layer-normalization \
+    --layer-normalization --sync-sgd \
     --no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 128 \
     -m encdec_depth/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{de,en} -v vocab.en.yml vocab.de.yml \
     --log encdec_depth.log --disp-freq 2 --after-batches 10
@@ -19,7 +20,7 @@ test -e encdec_depth/model.npz
 test -e encdec_depth/model.npz.yml
 
 cat encdec_depth.log | $MRT_TOOLS/extract-costs.sh > encdec_depth.out
-$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 0.3 -o encdec_depth.diff
+$MRT_TOOLS/diff-nums.py encdec_depth.out encdec_depth.expected -p 3 -o encdec_depth.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/models/nematus/test_wmt17_model.sh b/tests/training/models/nematus/test_wmt17_model.sh
index 1d88bb8..5c06c83 100644
--- a/tests/training/models/nematus/test_wmt17_model.sh
+++ b/tests/training/models/nematus/test_wmt17_model.sh
@@ -19,7 +19,7 @@ test -e wmt17/model.npz
 test -e wmt17/model.npz.yml
 
 cat wmt17.log | $MRT_TOOLS/extract-costs.sh > wmt17.out
-$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 0.3 -o wmt17.diff
+$MRT_TOOLS/diff-nums.py wmt17.out wmt17.expected -p 2 -o wmt17.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/models/nematus/wmt17.expected b/tests/training/models/nematus/wmt17.expected
index 59e15b5..c7ffbcd 100644
--- a/tests/training/models/nematus/wmt17.expected
+++ b/tests/training/models/nematus/wmt17.expected
@@ -1,5 +1,5 @@
-475.19924927
-452.67333984
-431.39782715
-415.06884766
-401.69699097
+490.30654907
+466.26824951
+442.79544067
+426.92376709
+411.47766113
diff --git a/tests/training/models/transformer/transformer.expected b/tests/training/models/transformer/transformer.expected
index c181f85..50d85ee 100644
--- a/tests/training/models/transformer/transformer.expected
+++ b/tests/training/models/transformer/transformer.expected
@@ -1,10 +1,10 @@
-226.87113953
-250.82170105
-244.17037964
-247.56513977
-242.15576172
-238.88996887
-236.09550476
-231.19555664
-237.84509277
-241.79611206
+236.04219055
+260.96929932
+254.12194824
+257.51263428
+253.00631714
+248.38674927
+245.93569946
+240.78047180
+248.01782227
+252.18527222
diff --git a/tests/training/multi-gpu/async_sgd.expected b/tests/training/multi-gpu/async_sgd.expected
index 5578a20..8b07ca5 100644
--- a/tests/training/multi-gpu/async_sgd.expected
+++ b/tests/training/multi-gpu/async_sgd.expected
@@ -1,20 +1,20 @@
-Ep. 1 : Up. 20 : Sen. 1,280 : Cost 242.11746216
-Ep. 1 : Up. 40 : Sen. 2,560 : Cost 235.72708130
-Ep. 1 : Up. 60 : Sen. 3,840 : Cost 236.84497070
-Ep. 1 : Up. 80 : Sen. 5,120 : Cost 226.40872192
-Ep. 1 : Up. 100 : Sen. 6,400 : Cost 210.93917847
-Ep. 1 : Up. 120 : Sen. 7,680 : Cost 216.88357544
-Ep. 1 : Up. 140 : Sen. 8,960 : Cost 206.62683105
-Ep. 1 : Up. 160 : Sen. 10,240 : Cost 196.11888123
-Ep. 1 : Up. 180 : Sen. 11,520 : Cost 193.01203918
-Ep. 1 : Up. 200 : Sen. 12,800 : Cost 194.11386108
-Ep. 1 : Up. 220 : Sen. 14,080 : Cost 199.61235046
-Ep. 1 : Up. 240 : Sen. 15,360 : Cost 191.05105591
-Ep. 1 : Up. 260 : Sen. 16,640 : Cost 190.15347290
-Ep. 1 : Up. 280 : Sen. 17,920 : Cost 182.99386597
-Ep. 1 : Up. 300 : Sen. 19,200 : Cost 172.30728149
-Ep. 1 : Up. 320 : Sen. 20,480 : Cost 181.16923523
-Ep. 1 : Up. 340 : Sen. 21,760 : Cost 179.05541992
-Ep. 1 : Up. 360 : Sen. 23,040 : Cost 180.18623352
-Ep. 1 : Up. 380 : Sen. 24,320 : Cost 178.12226868
-Ep. 1 : Up. 400 : Sen. 25,600 : Cost 177.11029053
+Ep. 1 : Up. 20 : Sen. 1,280 : Cost 241.64421082
+Ep. 1 : Up. 40 : Sen. 2,560 : Cost 234.09941101
+Ep. 1 : Up. 60 : Sen. 3,840 : Cost 234.62942505
+Ep. 1 : Up. 80 : Sen. 5,120 : Cost 222.46903992
+Ep. 1 : Up. 100 : Sen. 6,400 : Cost 205.35173035
+Ep. 1 : Up. 120 : Sen. 7,680 : Cost 209.94436646
+Ep. 1 : Up. 140 : Sen. 8,960 : Cost 198.47026062
+Ep. 1 : Up. 160 : Sen. 10,240 : Cost 187.63980103
+Ep. 1 : Up. 180 : Sen. 11,520 : Cost 183.61985779
+Ep. 1 : Up. 200 : Sen. 12,800 : Cost 185.31411743
+Ep. 1 : Up. 220 : Sen. 14,080 : Cost 191.44917297
+Ep. 1 : Up. 240 : Sen. 15,360 : Cost 183.22087097
+Ep. 1 : Up. 260 : Sen. 16,640 : Cost 183.28189087
+Ep. 1 : Up. 280 : Sen. 17,920 : Cost 176.65219116
+Ep. 1 : Up. 300 : Sen. 19,200 : Cost 167.07302856
+Ep. 1 : Up. 320 : Sen. 20,480 : Cost 176.93701172
+Ep. 1 : Up. 340 : Sen. 21,760 : Cost 175.71195984
+Ep. 1 : Up. 360 : Sen. 23,040 : Cost 177.32435608
+Ep. 1 : Up. 380 : Sen. 24,320 : Cost 175.38401794
+Ep. 1 : Up. 400 : Sen. 25,600 : Cost 174.90682983
diff --git a/tests/training/multi-gpu/sync_sgd.expected b/tests/training/multi-gpu/sync_sgd.expected
index 4167667..0871af8 100644
--- a/tests/training/multi-gpu/sync_sgd.expected
+++ b/tests/training/multi-gpu/sync_sgd.expected
@@ -1,10 +1,10 @@
-216.98922729
-189.01902771
-178.41192627
-192.66134644
-206.70254517
-212.32287598
-201.16146851
-222.01089478
-210.61257935
-220.22055054
+216.80216980
+189.78765869
+181.02754211
+198.78121948
+212.99096680
+220.08464050
+209.56250000
+230.61848450
+218.01846313
+229.08955383
diff --git a/tests/training/multi-gpu/sync_sgd_1gpu.expected b/tests/training/multi-gpu/sync_sgd_1gpu.expected
index 7940fff..73c9503 100644
--- a/tests/training/multi-gpu/sync_sgd_1gpu.expected
+++ b/tests/training/multi-gpu/sync_sgd_1gpu.expected
@@ -1,4 +1,4 @@
-236.82699585
-197.64729309
-199.82775879
-184.65493774
+236.64883423
+197.38874817
+198.74374390
+183.10134888
diff --git a/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected b/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected
index 7940fff..0600ba2 100644
--- a/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected
+++ b/tests/training/multi-gpu/sync_sgd_1gpu_expsmooth.expected
@@ -1,4 +1,4 @@
-236.82699585
-197.64729309
-199.82775879
-184.65493774
+236.64883423
+197.38874817
+198.74374390
+183.10137939
diff --git a/tests/training/restarting/sgd_2e.expected b/tests/training/restarting/sgd_2e.expected
index 9dacfd0..2a801f2 100644
--- a/tests/training/restarting/sgd_2e.expected
+++ b/tests/training/restarting/sgd_2e.expected
@@ -1,16 +1,16 @@
-Ep. 1 : Up. 4 : Sen. 128 : Cost 258.60427856
-Ep. 1 : Up. 8 : Sen. 256 : Cost 270.30130005
-Ep. 1 : Up. 12 : Sen. 384 : Cost 247.52519226
-Ep. 1 : Up. 16 : Sen. 512 : Cost 242.22946167
-Ep. 1 : Up. 20 : Sen. 640 : Cost 212.66212463
-Ep. 1 : Up. 24 : Sen. 768 : Cost 252.52357483
-Ep. 1 : Up. 28 : Sen. 896 : Cost 220.55186462
-Ep. 1 : Up. 32 : Sen. 1,024 : Cost 208.12597656
-Ep. 2 : Up. 36 : Sen. 128 : Cost 219.92158508
-Ep. 2 : Up. 40 : Sen. 256 : Cost 226.15286255
-Ep. 2 : Up. 44 : Sen. 384 : Cost 203.31124878
-Ep. 2 : Up. 48 : Sen. 512 : Cost 197.85574341
-Ep. 2 : Up. 52 : Sen. 640 : Cost 173.79647827
-Ep. 2 : Up. 56 : Sen. 768 : Cost 209.79650879
-Ep. 2 : Up. 60 : Sen. 896 : Cost 186.76977539
-Ep. 2 : Up. 64 : Sen. 1,024 : Cost 179.61027527
+Ep. 1 : Up. 4 : Sen. 128 : Cost 257.99652100
+Ep. 1 : Up. 8 : Sen. 256 : Cost 267.93783569
+Ep. 1 : Up. 12 : Sen. 384 : Cost 243.39039612
+Ep. 1 : Up. 16 : Sen. 512 : Cost 235.87208557
+Ep. 1 : Up. 20 : Sen. 640 : Cost 204.79017639
+Ep. 1 : Up. 24 : Sen. 768 : Cost 240.11624146
+Ep. 1 : Up. 28 : Sen. 896 : Cost 208.47099304
+Ep. 1 : Up. 32 : Sen. 1,024 : Cost 199.36221313
+Ep. 2 : Up. 36 : Sen. 128 : Cost 213.58728027
+Ep. 2 : Up. 40 : Sen. 256 : Cost 220.51084900
+Ep. 2 : Up. 44 : Sen. 384 : Cost 199.10847473
+Ep. 2 : Up. 48 : Sen. 512 : Cost 194.56945801
+Ep. 2 : Up. 52 : Sen. 640 : Cost 171.77461243
+Ep. 2 : Up. 56 : Sen. 768 : Cost 208.75405884
+Ep. 2 : Up. 60 : Sen. 896 : Cost 186.17434692
+Ep. 2 : Up. 64 : Sen. 1,024 : Cost 179.63316345
diff --git a/tests/training/restoring/multi-gpu/test_sync.sh b/tests/training/restoring/multi-gpu/test_sync.sh
index 98aaf96..c4cd6cb 100644
--- a/tests/training/restoring/multi-gpu/test_sync.sh
+++ b/tests/training/restoring/multi-gpu/test_sync.sh
@@ -47,7 +47,7 @@ test -e sync_2.log
 
 cat sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' >> sync.out
 
-$MRT_TOOLS/diff-nums.py -p 0.08 sync.out sync.expected -o sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.3 sync.out sync.expected -o sync.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/adagrad.costs.expected b/tests/training/restoring/optimizer/adagrad.costs.expected
index 31f312e..7b4f7e1 100644
--- a/tests/training/restoring/optimizer/adagrad.costs.expected
+++ b/tests/training/restoring/optimizer/adagrad.costs.expected
@@ -1,10 +1,10 @@
-238.72998047
-245.70014954
-240.35234070
-233.43530273
-239.59432983
-254.54069519
-256.56338501
-243.96914673
-245.85603333
-236.53524780
+238.52751160
+245.27938843
+239.83557129
+232.83401489
+238.87149048
+253.74154663
+255.69897461
+243.06086731
+244.85818481
+235.55209351
diff --git a/tests/training/restoring/optimizer/adagrad.gt.expected b/tests/training/restoring/optimizer/adagrad.gt.expected
index 227cd1a..faacfeb 100644
--- a/tests/training/restoring/optimizer/adagrad.gt.expected
+++ b/tests/training/restoring/optimizer/adagrad.gt.expected
@@ -1,2 +1,2 @@
-[[  2.13038606e-06   2.62594858e-05   9.69232133e-06 ...,   0.00000000e+00
+[[  3.54592659e-04   5.64868504e-04   8.30977151e-05 ...,   0.00000000e+00
     0.00000000e+00   0.00000000e+00]]
diff --git a/tests/training/restoring/optimizer/adam.costs.expected b/tests/training/restoring/optimizer/adam.costs.expected
index 7272a33..a6b5f9a 100644
--- a/tests/training/restoring/optimizer/adam.costs.expected
+++ b/tests/training/restoring/optimizer/adam.costs.expected
@@ -1,10 +1,10 @@
-238.70394897
-245.51347351
-239.71450806
-231.26594543
-231.99667358
-237.87966919
-234.03823853
-217.62667847
-215.01562500
-205.33581543
+238.40983582
+244.61091614
+238.22981262
+229.24475098
+230.14970398
+234.50399780
+228.12467957
+210.38107300
+206.17379761
+196.83959961
diff --git a/tests/training/restoring/optimizer/adam.mt.expected b/tests/training/restoring/optimizer/adam.mt.expected
index 64bcee9..bd411e5 100644
--- a/tests/training/restoring/optimizer/adam.mt.expected
+++ b/tests/training/restoring/optimizer/adam.mt.expected
@@ -1,2 +1 @@
-[[ -5.87761315e-05   1.77891372e-04  -1.15002964e-04 ...,   0.00000000e+00
-    0.00000000e+00   0.00000000e+00]]
+[[-0.00032975  0.00083305 -0.00051456 ...,  0.          0.          0.        ]]
diff --git a/tests/training/restoring/optimizer/adam.vt.expected b/tests/training/restoring/optimizer/adam.vt.expected
index e08fe13..8205e2b 100644
--- a/tests/training/restoring/optimizer/adam.vt.expected
+++ b/tests/training/restoring/optimizer/adam.vt.expected
@@ -1,2 +1,2 @@
-[[  3.27316441e-09   9.30779294e-08   4.11611119e-08 ...,   0.00000000e+00
+[[  2.10904631e-07   7.14701400e-07   1.94320407e-07 ...,   0.00000000e+00
     0.00000000e+00   0.00000000e+00]]
diff --git a/tests/training/restoring/optimizer/adam_async.costs.expected b/tests/training/restoring/optimizer/adam_async.costs.expected
index 2c2bcce..5c13c3a 100644
--- a/tests/training/restoring/optimizer/adam_async.costs.expected
+++ b/tests/training/restoring/optimizer/adam_async.costs.expected
@@ -1,10 +1,10 @@
-7255.70458984
-8017.33105469
-7792.87353516
-7871.14453125
-7605.37109375
-7275.98925781
-6925.38037109
-6636.69628906
-6718.36572266
-6711.51171875
+7247.86181641
+7994.63183594
+7750.25000000
+7804.45703125
+7530.29150391
+7192.14550781
+6780.81640625
+6450.87402344
+6511.03564453
+6491.26269531
diff --git a/tests/training/restoring/optimizer/adam_async.mt.expected b/tests/training/restoring/optimizer/adam_async.mt.expected
index 36442fa..ff60b5d 100644
--- a/tests/training/restoring/optimizer/adam_async.mt.expected
+++ b/tests/training/restoring/optimizer/adam_async.mt.expected
@@ -1 +1 @@
-[[ 0.00348229  0.00959761 -0.00544088 ...,  0.          0.          0.        ]]
+[[-0.00195269  0.00663034 -0.00437212 ...,  0.          0.          0.        ]]
diff --git a/tests/training/restoring/optimizer/adam_async.vt.expected b/tests/training/restoring/optimizer/adam_async.vt.expected
index 147b02c..49106ba 100644
--- a/tests/training/restoring/optimizer/adam_async.vt.expected
+++ b/tests/training/restoring/optimizer/adam_async.vt.expected
@@ -1,2 +1,2 @@
-[[  8.52484561e-07   4.59003832e-06   1.47568892e-06 ...,   0.00000000e+00
+[[  1.19943786e-06   6.02165346e-06   2.27189776e-06 ...,   0.00000000e+00
     0.00000000e+00   0.00000000e+00]]
diff --git a/tests/training/restoring/optimizer/adam_load.expected b/tests/training/restoring/optimizer/adam_load.expected
index ee730d5..bf5fef4 100644
--- a/tests/training/restoring/optimizer/adam_load.expected
+++ b/tests/training/restoring/optimizer/adam_load.expected
@@ -1,6 +1,6 @@
-Ep. 1 : Up. 1 : Sen. 2 : Cost 223.47967529
-Ep. 1 : Up. 2 : Sen. 4 : Cost 255.49520874
-Ep. 1 : Up. 3 : Sen. 6 : Cost 213.80761719
-Ep. 1 : Up. 4 : Sen. 8 : Cost 352.16754150
-Ep. 1 : Up. 5 : Sen. 10 : Cost 367.21719360
-Ep. 1 : Up. 6 : Sen. 12 : Cost 196.68669128
+Ep. 1 : Up. 1 : Sen. 2 : Cost 223.64685059
+Ep. 1 : Up. 2 : Sen. 4 : Cost 258.80792236
+Ep. 1 : Up. 3 : Sen. 6 : Cost 255.67260742
+Ep. 1 : Up. 4 : Sen. 8 : Cost 346.67749023
+Ep. 1 : Up. 5 : Sen. 10 : Cost 278.72695923
+Ep. 1 : Up. 6 : Sen. 12 : Cost 178.23016357
diff --git a/tests/training/restoring/optimizer/adam_sync.costs.expected b/tests/training/restoring/optimizer/adam_sync.costs.expected
index 2faf27e..eb87705 100644
--- a/tests/training/restoring/optimizer/adam_sync.costs.expected
+++ b/tests/training/restoring/optimizer/adam_sync.costs.expected
@@ -1,10 +1,10 @@
-7255.24365234
-8016.10302734
-7789.29394531
-7859.13769531
-7564.06005859
-7214.78515625
-6903.62011719
-6621.17675781
-6706.66259766
-6701.24316406
+7245.90527344
+7991.28515625
+7743.94384766
+7789.80712891
+7495.90478516
+7120.76269531
+6739.33447266
+6421.30761719
+6486.60400391
+6468.15869141
diff --git a/tests/training/restoring/optimizer/adam_sync.mt.expected b/tests/training/restoring/optimizer/adam_sync.mt.expected
index 5cdf31d..6cb2aaa 100644
--- a/tests/training/restoring/optimizer/adam_sync.mt.expected
+++ b/tests/training/restoring/optimizer/adam_sync.mt.expected
@@ -1 +1 @@
-[[ 0.00084951  0.00929706 -0.00577381 ...,  0.          0.          0.        ]]
+[[-0.00177634  0.00746338 -0.00486176 ...,  0.          0.          0.        ]]
diff --git a/tests/training/restoring/optimizer/adam_sync.vt.expected b/tests/training/restoring/optimizer/adam_sync.vt.expected
index 7de36bb..b86c122 100644
--- a/tests/training/restoring/optimizer/adam_sync.vt.expected
+++ b/tests/training/restoring/optimizer/adam_sync.vt.expected
@@ -1,2 +1,2 @@
-[[  2.50489421e-07   4.21463801e-06   1.65782251e-06 ...,   0.00000000e+00
+[[  1.04040987e-06   6.49143612e-06   2.48227298e-06 ...,   0.00000000e+00
     0.00000000e+00   0.00000000e+00]]
diff --git a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
index 74fefa5..33ad8d0 100644
--- a/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
+++ b/tests/training/restoring/validation/test_restoring_validation_lower_is_better.sh
@@ -28,7 +28,7 @@ $MRT_MARIAN/marian \
     --no-shuffle --seed 1111 --maxi-batch 1 --maxi-batch-sort none \
     --dim-emb 64 --dim-rnn 128 --mini-batch 32 \
     -m valid_lowisbet/model.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \
-    --disp-freq 10 --valid-freq 30 --after-batches 320 --early-stopping 2 \
+    --disp-freq 10 --valid-freq 30 --after-batches 320 --early-stopping 4 \
     --valid-metrics cross-entropy --valid-sets $MRT_DATA/europarl.de-en/toy.bpe.{de,en} --valid-mini-batch 64 \
     --valid-log valid_lowisbet_2.log
 
diff --git a/tests/training/restoring/validation/valid_add.expected b/tests/training/restoring/validation/valid_add.expected
index 397895f..fb2d8a5 100644
--- a/tests/training/restoring/validation/valid_add.expected
+++ b/tests/training/restoring/validation/valid_add.expected
@@ -1,15 +1,15 @@
-[valid] Ep. 1 : Up. 20 : cross-entropy : 296.108 : new best
-[valid] Ep. 1 : Up. 40 : cross-entropy : 296.103 : new best
-[valid] Ep. 1 : Up. 60 : cross-entropy : 296.097 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 296.092 : new best
-[valid] Ep. 1 : Up. 100 : cross-entropy : 296.088 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 296.082 : new best
-[valid] Ep. 1 : Up. 120 : ce-mean-words : 10.1572 : new best
-[valid] Ep. 1 : Up. 140 : cross-entropy : 296.078 : new best
-[valid] Ep. 1 : Up. 140 : ce-mean-words : 10.157 : new best
-[valid] Ep. 1 : Up. 160 : cross-entropy : 296.072 : new best
-[valid] Ep. 1 : Up. 160 : ce-mean-words : 10.1569 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 296.068 : new best
-[valid] Ep. 1 : Up. 180 : ce-mean-words : 10.1567 : new best
-[valid] Ep. 1 : Up. 200 : cross-entropy : 296.062 : new best
-[valid] Ep. 1 : Up. 200 : ce-mean-words : 10.1565 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 296.282 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 296.269 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 296.255 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 296.242 : new best
+[valid] Ep. 1 : Up. 100 : cross-entropy : 296.229 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 296.216 : new best
+[valid] Ep. 1 : Up. 120 : ce-mean-words : 10.1618 : new best
+[valid] Ep. 1 : Up. 140 : cross-entropy : 296.202 : new best
+[valid] Ep. 1 : Up. 140 : ce-mean-words : 10.1613 : new best
+[valid] Ep. 1 : Up. 160 : cross-entropy : 296.189 : new best
+[valid] Ep. 1 : Up. 160 : ce-mean-words : 10.1609 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 296.176 : new best
+[valid] Ep. 1 : Up. 180 : ce-mean-words : 10.1604 : new best
+[valid] Ep. 1 : Up. 200 : cross-entropy : 296.162 : new best
+[valid] Ep. 1 : Up. 200 : ce-mean-words : 10.1599 : new best
diff --git a/tests/training/restoring/validation/valid_lowisbet.expected b/tests/training/restoring/validation/valid_lowisbet.expected
index 7ddb2d1..f00e988 100644
--- a/tests/training/restoring/validation/valid_lowisbet.expected
+++ b/tests/training/restoring/validation/valid_lowisbet.expected
@@ -1,11 +1,11 @@
-[valid] Ep. 1 : Up. 30 : cross-entropy : 299.666 : new best
-[valid] Ep. 2 : Up. 60 : cross-entropy : 299.155 : new best
-[valid] Ep. 3 : Up. 90 : cross-entropy : 295.698 : new best
-[valid] Ep. 4 : Up. 120 : cross-entropy : 293.975 : new best
-[valid] Ep. 5 : Up. 150 : cross-entropy : 293.203 : new best
-[valid] Ep. 5 : Up. 160 : cross-entropy : 293.042 : new best
-[valid] Ep. 6 : Up. 180 : cross-entropy : 291.761 : new best
-[valid] Ep. 7 : Up. 210 : cross-entropy : 291.089 : new best
-[valid] Ep. 8 : Up. 240 : cross-entropy : 291.087 : new best
-[valid] Ep. 9 : Up. 270 : cross-entropy : 291.422 : stalled 1 times (last best: 291.087)
-[valid] Ep. 10 : Up. 300 : cross-entropy : 291.984 : stalled 2 times (last best: 291.087)
+[valid] Ep. 1 : Up. 30 : cross-entropy : 299.129 : new best
+[valid] Ep. 2 : Up. 60 : cross-entropy : 298.528 : new best
+[valid] Ep. 3 : Up. 90 : cross-entropy : 296.425 : new best
+[valid] Ep. 4 : Up. 120 : cross-entropy : 297.841 : stalled 1 times (last best: 296.425)
+[valid] Ep. 5 : Up. 150 : cross-entropy : 297.727 : stalled 2 times (last best: 296.425)
+[valid] Ep. 6 : Up. 180 : cross-entropy : 296.889 : stalled 3 times (last best: 296.425)
+[valid] Ep. 7 : Up. 210 : cross-entropy : 296.014 : new best
+[valid] Ep. 8 : Up. 240 : cross-entropy : 296.076 : stalled 1 times (last best: 296.014)
+[valid] Ep. 9 : Up. 270 : cross-entropy : 296.746 : stalled 2 times (last best: 296.014)
+[valid] Ep. 10 : Up. 300 : cross-entropy : 297.825 : stalled 3 times (last best: 296.014)
+[valid] Ep. 10 : Up. 320 : cross-entropy : 298.678 : stalled 4 times (last best: 296.014)
diff --git a/tests/training/restoring/validation/valid_newbest.expected b/tests/training/restoring/validation/valid_newbest.expected
index ec21cb2..d03d098 100644
--- a/tests/training/restoring/validation/valid_newbest.expected
+++ b/tests/training/restoring/validation/valid_newbest.expected
@@ -1,20 +1,20 @@
-[valid] Ep. 1 : Up. 10 : cross-entropy : 250.085 : new best
-[valid] Ep. 1 : Up. 10 : translation : 4 : new best
-[valid] Ep. 1 : Up. 20 : cross-entropy : 250.084 : new best
-[valid] Ep. 1 : Up. 20 : translation : 4 : stalled 1 times (last best: 4)
-[valid] Ep. 1 : Up. 30 : cross-entropy : 250.083 : new best
-[valid] Ep. 1 : Up. 30 : translation : 4 : stalled 2 times (last best: 4)
-[valid] Ep. 1 : Up. 40 : cross-entropy : 250.081 : new best
+[valid] Ep. 1 : Up. 10 : cross-entropy : 250.506 : new best
+[valid] Ep. 1 : Up. 10 : translation : 8 : new best
+[valid] Ep. 1 : Up. 20 : cross-entropy : 250.501 : new best
+[valid] Ep. 1 : Up. 20 : translation : 8 : stalled 1 times (last best: 8)
+[valid] Ep. 1 : Up. 30 : cross-entropy : 250.497 : new best
+[valid] Ep. 1 : Up. 30 : translation : 8 : stalled 2 times (last best: 8)
+[valid] Ep. 1 : Up. 40 : cross-entropy : 250.491 : new best
 [valid] Ep. 1 : Up. 40 : translation : 9 : new best
-[valid] Ep. 1 : Up. 50 : cross-entropy : 250.08 : new best
-[valid] Ep. 1 : Up. 50 : translation : 0 : stalled 1 times (last best: 9)
-[valid] Ep. 1 : Up. 60 : cross-entropy : 250.078 : new best
-[valid] Ep. 1 : Up. 60 : translation : 0 : stalled 2 times (last best: 9)
-[valid] Ep. 1 : Up. 70 : cross-entropy : 250.077 : new best
-[valid] Ep. 1 : Up. 70 : translation : 5 : stalled 3 times (last best: 9)
-[valid] Ep. 1 : Up. 80 : cross-entropy : 250.075 : new best
-[valid] Ep. 1 : Up. 80 : translation : 6 : stalled 4 times (last best: 9)
-[valid] Ep. 1 : Up. 90 : cross-entropy : 250.074 : new best
-[valid] Ep. 1 : Up. 90 : translation : 6 : stalled 5 times (last best: 9)
-[valid] Ep. 1 : Up. 100 : cross-entropy : 250.072 : new best
+[valid] Ep. 1 : Up. 50 : cross-entropy : 250.486 : new best
+[valid] Ep. 1 : Up. 50 : translation : 7 : stalled 1 times (last best: 9)
+[valid] Ep. 1 : Up. 60 : cross-entropy : 250.481 : new best
+[valid] Ep. 1 : Up. 60 : translation : 3 : stalled 2 times (last best: 9)
+[valid] Ep. 1 : Up. 70 : cross-entropy : 250.476 : new best
+[valid] Ep. 1 : Up. 70 : translation : 6 : stalled 3 times (last best: 9)
+[valid] Ep. 1 : Up. 80 : cross-entropy : 250.471 : new best
+[valid] Ep. 1 : Up. 80 : translation : 0 : stalled 4 times (last best: 9)
+[valid] Ep. 1 : Up. 90 : cross-entropy : 250.465 : new best
+[valid] Ep. 1 : Up. 90 : translation : 9 : stalled 5 times (last best: 9)
+[valid] Ep. 1 : Up. 100 : cross-entropy : 250.461 : new best
 [valid] Ep. 1 : Up. 100 : translation : 6 : stalled 6 times (last best: 9)
diff --git a/tests/training/validation/final_batch.expected b/tests/training/validation/final_batch.expected
index 893f451..eac5cdc 100644
--- a/tests/training/validation/final_batch.expected
+++ b/tests/training/validation/final_batch.expected
@@ -1,3 +1,3 @@
-[valid] Ep. 1 : Up. 60 : cross-entropy : 240.178 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 240.164 : new best
-[valid] Ep. 1 : Up. 150 : cross-entropy : 240.157 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 240.376 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 240.348 : new best
+[valid] Ep. 1 : Up. 150 : cross-entropy : 240.332 : new best
diff --git a/tests/training/validation/final_epoch.expected b/tests/training/validation/final_epoch.expected
index decf6d3..ebcb25c 100644
--- a/tests/training/validation/final_epoch.expected
+++ b/tests/training/validation/final_epoch.expected
@@ -1,3 +1,3 @@
-[valid] Ep. 1 : Up. 40 : cross-entropy : 240.203 : new best
-[valid] Ep. 1 : Up. 80 : cross-entropy : 240.194 : new best
-[valid] Ep. 2 : Up. 81 : cross-entropy : 240.194 : new best
+[valid] Ep. 1 : Up. 40 : cross-entropy : 240.475 : new best
+[valid] Ep. 1 : Up. 80 : cross-entropy : 240.459 : new best
+[valid] Ep. 2 : Up. 81 : cross-entropy : 240.459 : new best
diff --git a/tests/training/validation/final_match.expected b/tests/training/validation/final_match.expected
index 29ae2c8..87c9cc2 100644
--- a/tests/training/validation/final_match.expected
+++ b/tests/training/validation/final_match.expected
@@ -1,3 +1,3 @@
-[valid] Ep. 1 : Up. 60 : cross-entropy : 240.178 : new best
-[valid] Ep. 1 : Up. 120 : cross-entropy : 240.164 : new best
-[valid] Ep. 1 : Up. 180 : cross-entropy : 240.15 : new best
+[valid] Ep. 1 : Up. 60 : cross-entropy : 240.376 : new best
+[valid] Ep. 1 : Up. 120 : cross-entropy : 240.348 : new best
+[valid] Ep. 1 : Up. 180 : cross-entropy : 240.317 : new best
diff --git a/tools/diff-nums.py b/tools/diff-nums.py
index 18b4415..0e527ad 100755
--- a/tools/diff-nums.py
+++ b/tools/diff-nums.py
@@ -16,7 +16,7 @@ NORMALIZE_NUMPY = [
     ("...) ", "... "),
     ("..., ", "... "),
     ("]", " ]"),
-    ("[", "[ ")
+    ("[", "[ "),
 ]
 
 
@@ -99,6 +99,7 @@ def read_line(iofile, separator=""):
 
 def process_line(line):
     line = REGEX_STRIP_EP.sub("[valid] ", line)                 # normalize "[valid] Ep. 1 : Up. 30" -> "[valid] 30"
+    line = line.replace("(", "( ").replace(")", " )")           # insert space before and after parentheses
     line_toks = line.rstrip().replace("[[-", "[[ -").split()    # tokenize
     nums = [float(s.replace(',', ''))                           # handle comma as thousands separator
             for s in line_toks if is_numeric(s)]                # find all numbers
author	Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk>	2019-02-06 23:38:15 +0300
committer	Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk>	2019-02-06 23:38:15 +0300
commit	1419e20ddcab91b49c200d892c0149ab9d7003a2 (patch)
tree	7782e6825d36c2a8d448d3b374d070576c10c337
parent	5fafdf1eefd14d08e61ce9917d050c4c119ae48f (diff)
parent	142eadddbe04493c1024b42586030b72e9cb7ea2 (diff)