diff options
author | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2021-01-26 13:46:32 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2021-01-26 13:46:32 +0300 |
commit | 243652dd6b3bc8ce2422b2719489832de4004571 (patch) | |
tree | a1c23e87b81283a75309037f9a37dbb222d5ccec | |
parent | 18c4e54806205a3a29b0a8435864d6312dccaacf (diff) | |
parent | c360473cc33650da8672cc93fc3feb50028822e7 (diff) |
Merge branch 'master' into mjd/fp16.2
6 files changed, 68 insertions, 70 deletions
diff --git a/tests/training/features/quantized-model/model_centers.expected b/tests/training/features/quantized-model/model_centers.expected index 954a001..57380ae 100644 --- a/tests/training/features/quantized-model/model_centers.expected +++ b/tests/training/features/quantized-model/model_centers.expected @@ -1,49 +1,51 @@ -Tensor decoder_W_comb_att unique centers: [-0.1826457 -0.1217638 -0.0608819 0. 0.0608819 0.1217638 - 0.1826457] -Tensor decoder_Wc_att unique centers: [-0.17328945 -0.1155263 -0.05776315 0. 0.05776315 0.1155263 - 0.17328945] -Tensor Wemb_dec unique centers: [-2.3631978 -1.5754652 -0.7877326 0. 0.7877326 1.5754652 - 2.3631978] -Tensor decoder_U unique centers: [-0.3221001 -0.2147334 -0.1073667 -0. 0.1073667 0.2147334 - 0.3221001] -Tensor decoder_Ux unique centers: [-0.43822908 -0.29215273 -0.14607637 0. 0.14607637 0.29215273 - 0.43822908] -Tensor decoder_W unique centers: [-0.22816041 -0.15210694 -0.07605347 0. 0.07605347 0.15210694 - 0.22816041] -Tensor decoder_Wx unique centers: [-0.49631694 -0.33087796 -0.16543898 -0. 0.16543898 0.33087796 - 0.49631694] -Tensor decoder_U_nl unique centers: [-0.3815875 -0.25439167 -0.12719584 -0. 0.12719584 0.25439167 - 0.3815875 ] -Tensor decoder_Ux_nl unique centers: [-0.5111215 -0.34074768 -0.17037384 0. 0.17037384 0.34074768 - 0.5111215 ] -Tensor decoder_Wc unique centers: [-0.42579597 -0.283864 -0.141932 -0. 0.141932 0.283864 - 0.42579597] -Tensor decoder_Wcx unique centers: [-0.8375 -0.55833334 -0.27916667 -0. 0.27916667 0.55833334 - 0.8375 ] -Tensor ff_logit_prev_W unique centers: [-70.87341 -23.624472 0. 23.624472 47.248943] -Tensor ff_logit_lstm_W unique centers: [-246.07938 -164.05292 -82.02646 0. 82.02646 164.05292 - 246.07938] -Tensor ff_logit_ctx_W unique centers: [-240.9685 -160.64568 -80.32284 0. 80.32284 160.64568 - 240.9685 ] -Tensor decoder_ff_logit_l2_Wt unique centers: [-106.12637 -70.750916 -35.375458 -0. 35.375458 70.750916 - 106.12637 ] -Tensor ff_state_W unique centers: [-0.2559117 -0.1706078 -0.0853039 -0. 0.0853039 0.1706078 - 0.2559117] -Tensor Wemb unique centers: [-0.39904252 -0.19952126 0. 0.19952126 0.39904252 0.5985638 ] -Tensor encoder_U unique centers: [-0.30375382 -0.20250255 -0.10125127 -0. 0.10125127 0.20250255 - 0.30375382] -Tensor encoder_Ux unique centers: [-0.45867392 -0.30578262 -0.15289131 -0. 0.15289131 0.30578262 - 0.45867392] -Tensor encoder_W unique centers: [-0.2062971 -0.1375314 -0.0687657 0. 0.0687657 0.1375314 - 0.2062971] -Tensor encoder_Wx unique centers: [-0.3073737 -0.20491579 -0.1024579 0. 0.1024579 0.20491579 - 0.3073737 ] -Tensor encoder_r_U unique centers: [-0.34318972 -0.22879314 -0.11439657 0. 0.11439657 0.22879314 - 0.34318972] -Tensor encoder_r_Ux unique centers: [-0.72291785 -0.48194525 -0.24097262 -0. 0.24097262 0.48194525 - 0.72291785] -Tensor encoder_r_W unique centers: [-0.21613705 -0.14409137 -0.07204568 -0. 0.07204568 0.14409137 - 0.21613705] -Tensor encoder_r_Wx unique centers: [-0.39892155 -0.2659477 -0.13297385 -0. 0.13297385 0.2659477 - 0.39892155] +Tensor decoder_W_comb_att unique centers: [-0.17677179 -0.11784786 -0.05892393 -0. 0.05892393 0.11784786 + 0.17677179] +Tensor decoder_Wc_att unique centers: [-0.15336949 -0.10224632 -0.05112316 -0. 0.05112316 0.10224632 + 0.15336949] +Tensor Wemb_dec unique centers: [-0.32046145 -0.21364096 -0.10682048 0. 0.10682048 0.21364096 + 0.32046145] +Tensor decoder_U unique centers: [-0.17687811 -0.11791874 -0.05895937 -0. 0.05895937 0.11791874 + 0.17687811] +Tensor decoder_Ux unique centers: [-0.21770547 -0.14513698 -0.07256849 0. 0.07256849 0.14513698 + 0.21770547] +Tensor decoder_W unique centers: [-0.19397542 -0.12931694 -0.06465847 -0. 0.06465847 0.12931694 + 0.19397542] +Tensor decoder_Wx unique centers: [-0.25329626 -0.16886416 -0.08443208 -0. 0.08443208 0.16886416 + 0.25329626] +Tensor decoder_U_nl unique centers: [-0.17696194 -0.11797463 -0.05898732 0. 0.05898732 0.11797463 + 0.17696194] +Tensor decoder_Ux_nl unique centers: [-0.21896881 -0.14597921 -0.07298961 0. 0.07298961 0.14597921 + 0.21896881] +Tensor decoder_Wc unique centers: [-0.15324192 -0.10216128 -0.05108064 0. 0.05108064 0.10216128 + 0.15324192] +Tensor decoder_Wcx unique centers: [-0.18192002 -0.12128001 -0.06064001 -0. 0.06064001 0.12128001 + 0.18192002] +Tensor ff_logit_prev_W unique centers: [-0.32183957 -0.2145597 -0.10727985 -0. 0.10727985 0.2145597 + 0.32183957] +Tensor ff_logit_lstm_W unique centers: [-0.25455362 -0.16970241 -0.08485121 0. 0.08485121 0.16970241 + 0.25455362] +Tensor ff_logit_ctx_W unique centers: [-0.19867198 -0.13244799 -0.06622399 -0. 0.06622399 0.13244799 + 0.19867198] +Tensor decoder_ff_logit_l2_Wt unique centers: [-0.36124557 -0.24083039 -0.1204152 0. 0.1204152 0.24083039 + 0.36124557] +Tensor ff_state_W unique centers: [-0.17704961 -0.11803307 -0.05901653 0. 0.05901653 0.11803307 + 0.17704961] +Tensor Wemb unique centers: [-0.31208774 -0.20805849 -0.10402925 0. 0.10402925 0.20805849 + 0.31208774] +Tensor encoder_U unique centers: [-0.17686225 -0.11790817 -0.05895409 0. 0.05895409 0.11790817 + 0.17686225] +Tensor encoder_Ux unique centers: [-0.21824732 -0.14549822 -0.07274911 0. 0.07274911 0.14549822 + 0.21824732] +Tensor encoder_W unique centers: [-0.19403435 -0.12935624 -0.06467812 0. 0.06467812 0.12935624 + 0.19403435] +Tensor encoder_Wx unique centers: [-0.25213736 -0.16809157 -0.08404578 -0. 0.08404578 0.16809157 + 0.25213736] +Tensor encoder_r_U unique centers: [-0.17699143 -0.11799429 -0.05899715 0. 0.05899715 0.11799429 + 0.17699143] +Tensor encoder_r_Ux unique centers: [-0.21971346 -0.14647564 -0.07323782 -0. 0.07323782 0.14647564 + 0.21971346] +Tensor encoder_r_W unique centers: [-0.19410282 -0.12940188 -0.06470094 0. 0.06470094 0.12940188 + 0.19410282] +Tensor encoder_r_Wx unique centers: [-0.25225359 -0.16816907 -0.08408453 -0. 0.08408453 0.16816907 + 0.25225359] Tensor decoder_c_tt unique centers: [] diff --git a/tests/training/features/quantized-model/quantized.expected b/tests/training/features/quantized-model/quantized.expected index 2d0638e..17620ec 100644 --- a/tests/training/features/quantized-model/quantized.expected +++ b/tests/training/features/quantized-model/quantized.expected @@ -1,10 +1,10 @@ -5296.80419922 -14729.64062500 -14570.66210938 -17166.55859375 -16055.21875000 -16277.48437500 -18673.34765625 -16747.37109375 -17298.72070312 -16335.72949219 +225.10929871 +243.58345032 +229.45071411 +224.28813171 +212.65242004 +204.06596375 +197.81690979 +190.08915710 +193.72299194 +195.20808411 diff --git a/tests/training/features/quantized-model/test_quant_centers.sh b/tests/training/features/quantized-model/test_quant_centers.sh index 8318c24..3ae7b37 100644 --- a/tests/training/features/quantized-model/test_quant_centers.sh +++ b/tests/training/features/quantized-model/test_quant_centers.sh @@ -3,6 +3,7 @@ ##################################################################### # SUMMARY: Make sure that the resulting model is in quantized form # AUTHOR: afaji +# TAGS: clip-norm ##################################################################### # Exit on error @@ -16,7 +17,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 10 --disp-freq 2 --quantize-bits 3 diff --git a/tests/training/features/quantized-model/test_quantmodel.sh b/tests/training/features/quantized-model/test_quantmodel.sh index 67019f2..ac44b68 100644 --- a/tests/training/features/quantized-model/test_quantmodel.sh +++ b/tests/training/features/quantized-model/test_quantmodel.sh @@ -3,6 +3,7 @@ ##################################################################### # SUMMARY: Train a quantized marian model # AUTHOR: afaji +# TAGS: clip-norm ##################################################################### # Exit on error @@ -16,7 +17,7 @@ mkdir -p train # Train an 8-bits model $MRT_MARIAN/marian \ - --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 0 \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd --clip-norm 1 \ -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 \ --log $PREFIX.log diff --git a/tests/training/restoring/optimizer/adagrad.gt.expected b/tests/training/restoring/optimizer/adagrad.gt.expected index a6c90a5..30a932c 100644 --- a/tests/training/restoring/optimizer/adagrad.gt.expected +++ b/tests/training/restoring/optimizer/adagrad.gt.expected @@ -1,2 +1,2 @@ -[[8.0574207e+00 1.5418689e-01 4.3262744e+00 ... 4.0905408e+03 - 1.1550205e+04 1.3359570e+04]] +[[ 4.38133684e-05 1.40065049e-06 3.63037943e-05 ..., 1.23982169e-02 + 3.66997421e-02 4.11312692e-02]] diff --git a/tests/training/restoring/optimizer/test_adagrad_params.sh b/tests/training/restoring/optimizer/test_adagrad_params.sh index 1372071..4396a76 100644 --- a/tests/training/restoring/optimizer/test_adagrad_params.sh +++ b/tests/training/restoring/optimizer/test_adagrad_params.sh @@ -1,11 +1,5 @@ #!/bin/bash -x -##################################################################### -# SUMMARY: Training with Adagrad optimizer -# AUTHOR: snukky -# TAGS: optimizer adagrad -##################################################################### - # Exit on error set -e @@ -14,7 +8,7 @@ rm -rf adagrad adagrad*.log mkdir -p adagrad $MRT_MARIAN/marian \ - --no-shuffle --clip-norm 0 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \ + --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 --clip-norm 1 \ -m adagrad/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 10 --after-batches 100 --save-freq 60 --optimizer adagrad --cost-type ce-mean \ --log adagrad.log @@ -30,7 +24,7 @@ python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adagrad/model.npz.optimi $MRT_TOOLS/diff.sh adagrad.keys.out adagrad.keys.expected > adagrad.keys.diff python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz -k "adagrad_gt" > adagrad.gt.out -$MRT_TOOLS/diff-nums.py --numpy -p 0.009 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff +$MRT_TOOLS/diff-nums.py --numpy -p 0.001 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff # Exit with success code exit 0 |