Merge branch 'master' into mjd/fp16.2

author: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2021-01-26 13:46:32 +0300
committer: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2021-01-26 13:46:32 +0300
commit: 243652dd6b3bc8ce2422b2719489832de4004571 (patch)
tree: a1c23e87b81283a75309037f9a37dbb222d5ccec
parent: 18c4e54806205a3a29b0a8435864d6312dccaacf (diff)
parent: c360473cc33650da8672cc93fc3feb50028822e7 (diff)
6 files changed, 68 insertions, 70 deletions
diff --git a/tests/training/features/quantized-model/model_centers.expected b/tests/training/features/quantized-model/model_centers.expected
index 954a001..57380ae 100644
--- a/tests/training/features/quantized-model/model_centers.expected
+++ b/tests/training/features/quantized-model/model_centers.expected
@@ -1,49 +1,51 @@
-Tensor decoder_W_comb_att unique centers: [-0.1826457 -0.1217638 -0.0608819  0.         0.0608819  0.1217638
-  0.1826457]
-Tensor decoder_Wc_att unique centers: [-0.17328945 -0.1155263  -0.05776315  0.          0.05776315  0.1155263
-  0.17328945]
-Tensor Wemb_dec unique centers: [-2.3631978 -1.5754652 -0.7877326  0.         0.7877326  1.5754652
-  2.3631978]
-Tensor decoder_U unique centers: [-0.3221001 -0.2147334 -0.1073667 -0.         0.1073667  0.2147334
-  0.3221001]
-Tensor decoder_Ux unique centers: [-0.43822908 -0.29215273 -0.14607637  0.          0.14607637  0.29215273
-  0.43822908]
-Tensor decoder_W unique centers: [-0.22816041 -0.15210694 -0.07605347  0.          0.07605347  0.15210694
-  0.22816041]
-Tensor decoder_Wx unique centers: [-0.49631694 -0.33087796 -0.16543898 -0.          0.16543898  0.33087796
-  0.49631694]
-Tensor decoder_U_nl unique centers: [-0.3815875  -0.25439167 -0.12719584 -0.          0.12719584  0.25439167
-  0.3815875 ]
-Tensor decoder_Ux_nl unique centers: [-0.5111215  -0.34074768 -0.17037384  0.          0.17037384  0.34074768
-  0.5111215 ]
-Tensor decoder_Wc unique centers: [-0.42579597 -0.283864   -0.141932   -0.          0.141932    0.283864
-  0.42579597]
-Tensor decoder_Wcx unique centers: [-0.8375     -0.55833334 -0.27916667 -0.          0.27916667  0.55833334
-  0.8375    ]
-Tensor ff_logit_prev_W unique centers: [-70.87341  -23.624472   0.        23.624472  47.248943]
-Tensor ff_logit_lstm_W unique centers: [-246.07938 -164.05292  -82.02646    0.        82.02646  164.05292
-  246.07938]
-Tensor ff_logit_ctx_W unique centers: [-240.9685  -160.64568  -80.32284    0.        80.32284  160.64568
-  240.9685 ]
-Tensor decoder_ff_logit_l2_Wt unique centers: [-106.12637   -70.750916  -35.375458   -0.         35.375458   70.750916
-  106.12637 ]
-Tensor ff_state_W unique centers: [-0.2559117 -0.1706078 -0.0853039 -0.         0.0853039  0.1706078
-  0.2559117]
-Tensor Wemb unique centers: [-0.39904252 -0.19952126  0.          0.19952126  0.39904252  0.5985638 ]
-Tensor encoder_U unique centers: [-0.30375382 -0.20250255 -0.10125127 -0.          0.10125127  0.20250255
-  0.30375382]
-Tensor encoder_Ux unique centers: [-0.45867392 -0.30578262 -0.15289131 -0.          0.15289131  0.30578262
-  0.45867392]
-Tensor encoder_W unique centers: [-0.2062971 -0.1375314 -0.0687657  0.         0.0687657  0.1375314
-  0.2062971]
-Tensor encoder_Wx unique centers: [-0.3073737  -0.20491579 -0.1024579   0.          0.1024579   0.20491579
-  0.3073737 ]
-Tensor encoder_r_U unique centers: [-0.34318972 -0.22879314 -0.11439657  0.          0.11439657  0.22879314
-  0.34318972]
-Tensor encoder_r_Ux unique centers: [-0.72291785 -0.48194525 -0.24097262 -0.          0.24097262  0.48194525
-  0.72291785]
-Tensor encoder_r_W unique centers: [-0.21613705 -0.14409137 -0.07204568 -0.          0.07204568  0.14409137
-  0.21613705]
-Tensor encoder_r_Wx unique centers: [-0.39892155 -0.2659477  -0.13297385 -0.          0.13297385  0.2659477
-  0.39892155]
+Tensor decoder_W_comb_att unique centers: [-0.17677179 -0.11784786 -0.05892393 -0.          0.05892393  0.11784786
+  0.17677179]
+Tensor decoder_Wc_att unique centers: [-0.15336949 -0.10224632 -0.05112316 -0.          0.05112316  0.10224632
+  0.15336949]
+Tensor Wemb_dec unique centers: [-0.32046145 -0.21364096 -0.10682048  0.          0.10682048  0.21364096
+  0.32046145]
+Tensor decoder_U unique centers: [-0.17687811 -0.11791874 -0.05895937 -0.          0.05895937  0.11791874
+  0.17687811]
+Tensor decoder_Ux unique centers: [-0.21770547 -0.14513698 -0.07256849  0.          0.07256849  0.14513698
+  0.21770547]
+Tensor decoder_W unique centers: [-0.19397542 -0.12931694 -0.06465847 -0.          0.06465847  0.12931694
+  0.19397542]
+Tensor decoder_Wx unique centers: [-0.25329626 -0.16886416 -0.08443208 -0.          0.08443208  0.16886416
+  0.25329626]
+Tensor decoder_U_nl unique centers: [-0.17696194 -0.11797463 -0.05898732  0.          0.05898732  0.11797463
+  0.17696194]
+Tensor decoder_Ux_nl unique centers: [-0.21896881 -0.14597921 -0.07298961  0.          0.07298961  0.14597921
+  0.21896881]
+Tensor decoder_Wc unique centers: [-0.15324192 -0.10216128 -0.05108064  0.          0.05108064  0.10216128
+  0.15324192]
+Tensor decoder_Wcx unique centers: [-0.18192002 -0.12128001 -0.06064001 -0.          0.06064001  0.12128001
+  0.18192002]
+Tensor ff_logit_prev_W unique centers: [-0.32183957 -0.2145597  -0.10727985 -0.          0.10727985  0.2145597
+  0.32183957]
+Tensor ff_logit_lstm_W unique centers: [-0.25455362 -0.16970241 -0.08485121  0.          0.08485121  0.16970241
+  0.25455362]
+Tensor ff_logit_ctx_W unique centers: [-0.19867198 -0.13244799 -0.06622399 -0.          0.06622399  0.13244799
+  0.19867198]
+Tensor decoder_ff_logit_l2_Wt unique centers: [-0.36124557 -0.24083039 -0.1204152   0.          0.1204152   0.24083039
+  0.36124557]
+Tensor ff_state_W unique centers: [-0.17704961 -0.11803307 -0.05901653  0.          0.05901653  0.11803307
+  0.17704961]
+Tensor Wemb unique centers: [-0.31208774 -0.20805849 -0.10402925  0.          0.10402925  0.20805849
+  0.31208774]
+Tensor encoder_U unique centers: [-0.17686225 -0.11790817 -0.05895409  0.          0.05895409  0.11790817
+  0.17686225]
+Tensor encoder_Ux unique centers: [-0.21824732 -0.14549822 -0.07274911  0.          0.07274911  0.14549822
+  0.21824732]
+Tensor encoder_W unique centers: [-0.19403435 -0.12935624 -0.06467812  0.          0.06467812  0.12935624
+  0.19403435]
+Tensor encoder_Wx unique centers: [-0.25213736 -0.16809157 -0.08404578 -0.          0.08404578  0.16809157
+  0.25213736]
+Tensor encoder_r_U unique centers: [-0.17699143 -0.11799429 -0.05899715  0.          0.05899715  0.11799429
+  0.17699143]
+Tensor encoder_r_Ux unique centers: [-0.21971346 -0.14647564 -0.07323782 -0.          0.07323782  0.14647564
+  0.21971346]
+Tensor encoder_r_W unique centers: [-0.19410282 -0.12940188 -0.06470094  0.          0.06470094  0.12940188
+  0.19410282]
+Tensor encoder_r_Wx unique centers: [-0.25225359 -0.16816907 -0.08408453 -0.          0.08408453  0.16816907
+  0.25225359]
 Tensor decoder_c_tt unique centers: []
diff --git a/tests/training/features/quantized-model/quantized.expected b/tests/training/features/quantized-model/quantized.expected
index 2d0638e..17620ec 100644
--- a/tests/training/features/quantized-model/quantized.expected
+++ b/tests/training/features/quantized-model/quantized.expected
@@ -1,10 +1,10 @@
-5296.80419922
-14729.64062500
-14570.66210938
-17166.55859375
-16055.21875000
-16277.48437500
-18673.34765625
-16747.37109375
-17298.72070312
-16335.72949219
+225.10929871
+243.58345032
+229.45071411
+224.28813171
+212.65242004
+204.06596375
+197.81690979
+190.08915710
+193.72299194
+195.20808411
diff --git a/tests/training/features/quantized-model/test_quant_centers.sh b/tests/training/features/quantized-model/test_quant_centers.sh
index 8318c24..3ae7b37 100644
--- a/tests/training/features/quantized-model/test_quant_centers.sh
+++ b/tests/training/features/quantized-model/test_quant_centers.sh
@@ -3,6 +3,7 @@
 #####################################################################
 # SUMMARY: Make sure that the resulting model is in quantized form
 # AUTHOR: afaji
+# TAGS: clip-norm
 #####################################################################
 
 # Exit on error
@@ -16,7 +17,7 @@ mkdir -p train
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \
+    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
     -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 10 --disp-freq 2 --quantize-bits 3
 
diff --git a/tests/training/features/quantized-model/test_quantmodel.sh b/tests/training/features/quantized-model/test_quantmodel.sh
index 67019f2..ac44b68 100644
--- a/tests/training/features/quantized-model/test_quantmodel.sh
+++ b/tests/training/features/quantized-model/test_quantmodel.sh
@@ -3,6 +3,7 @@
 #####################################################################
 # SUMMARY: Train a quantized marian model
 # AUTHOR: afaji
+# TAGS: clip-norm
 #####################################################################
 
 # Exit on error
@@ -16,7 +17,7 @@ mkdir -p train
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \
+    --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \
     -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 \
     --log $PREFIX.log
diff --git a/tests/training/restoring/optimizer/adagrad.gt.expected b/tests/training/restoring/optimizer/adagrad.gt.expected
index a6c90a5..30a932c 100644
--- a/tests/training/restoring/optimizer/adagrad.gt.expected
+++ b/tests/training/restoring/optimizer/adagrad.gt.expected
@@ -1,2 +1,2 @@
-[[8.0574207e+00 1.5418689e-01 4.3262744e+00 ... 4.0905408e+03
-  1.1550205e+04 1.3359570e+04]]
+[[  4.38133684e-05   1.40065049e-06   3.63037943e-05 ...,   1.23982169e-02
+    3.66997421e-02   4.11312692e-02]]
diff --git a/tests/training/restoring/optimizer/test_adagrad_params.sh b/tests/training/restoring/optimizer/test_adagrad_params.sh
index 1372071..4396a76 100644
--- a/tests/training/restoring/optimizer/test_adagrad_params.sh
+++ b/tests/training/restoring/optimizer/test_adagrad_params.sh
@@ -1,11 +1,5 @@
 #!/bin/bash -x
 
-#####################################################################
-# SUMMARY: Training with Adagrad optimizer
-# AUTHOR: snukky
-# TAGS: optimizer adagrad
-#####################################################################
-
 # Exit on error
 set -e
 
@@ -14,7 +8,7 @@ rm -rf adagrad adagrad*.log
 mkdir -p adagrad
 
 $MRT_MARIAN/marian \
-    --no-shuffle --clip-norm 0 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \
+    --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 --clip-norm 1 \
     -m adagrad/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     --disp-freq 10 --after-batches 100 --save-freq 60 --optimizer adagrad --cost-type ce-mean \
     --log adagrad.log
@@ -30,7 +24,7 @@ python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adagrad/model.npz.optimi
 $MRT_TOOLS/diff.sh adagrad.keys.out adagrad.keys.expected > adagrad.keys.diff
 
 python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz -k "adagrad_gt" > adagrad.gt.out
-$MRT_TOOLS/diff-nums.py --numpy -p 0.009 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.001 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff
 
 # Exit with success code
 exit 0
author	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2021-01-26 13:46:32 +0300
committer	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2021-01-26 13:46:32 +0300
commit	243652dd6b3bc8ce2422b2719489832de4004571 (patch)
tree	a1c23e87b81283a75309037f9a37dbb222d5ccec
parent	18c4e54806205a3a29b0a8435864d6312dccaacf (diff)
parent	c360473cc33650da8672cc93fc3feb50028822e7 (diff)