Use clip-norm 0 in training/restoring/

author: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2020-11-13 20:09:42 +0300
committer: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2020-11-13 20:09:42 +0300
commit: d8ce49222d7178b00606a975cee2eac52222d09e (patch)
tree: 8f7ff9bdddd98f5bfa6c4503fa5b9405c550a927 /tests
parent: ff30754bc5b0e016aee9966af419d03eaaafc8db (diff)
20 files changed, 137 insertions, 73 deletions
diff --git a/tests/training/restoring/corpus/finetune.expected b/tests/training/restoring/corpus/finetune.expected
index 6f4bc95..21f1847 100644
--- a/tests/training/restoring/corpus/finetune.expected
+++ b/tests/training/restoring/corpus/finetune.expected
@@ -1,15 +1,15 @@
-Ep. 1 : Up. 4 : Sen. 256 : Cost 239.27255249
-Ep. 1 : Up. 8 : Sen. 512 : Cost 246.85655212
-Ep. 1 : Up. 12 : Sen. 768 : Cost 230.16513062
-Ep. 1 : Up. 16 : Sen. 1,024 : Cost 251.03186035
-Ep. 1 : Up. 20 : Sen. 1,280 : Cost 249.74163818
-Ep. 1 : Up. 24 : Sen. 1,536 : Cost 239.31179810
-Ep. 1 : Up. 28 : Sen. 1,792 : Cost 231.93222046
-Ep. 1 : Up. 32 : Sen. 128 : Cost 255.42749023
-Ep. 1 : Up. 36 : Sen. 384 : Cost 250.27011108
-Ep. 1 : Up. 40 : Sen. 640 : Cost 249.66784668
-Ep. 1 : Up. 44 : Sen. 896 : Cost 254.14111328
-Ep. 2 : Up. 48 : Sen. 128 : Cost 237.40222168
-Ep. 2 : Up. 52 : Sen. 384 : Cost 255.97949219
-Ep. 2 : Up. 56 : Sen. 640 : Cost 252.84860229
-Ep. 2 : Up. 60 : Sen. 896 : Cost 244.12496948
+Ep. 1 : Up. 4 : Sen. 256 : Cost 238.82701111
+Ep. 1 : Up. 8 : Sen. 512 : Cost 245.15895081
+Ep. 1 : Up. 12 : Sen. 768 : Cost 227.24861145
+Ep. 1 : Up. 16 : Sen. 1,024 : Cost 246.25918579
+Ep. 1 : Up. 20 : Sen. 1,280 : Cost 243.25015259
+Ep. 1 : Up. 24 : Sen. 1,536 : Cost 230.48197937
+Ep. 1 : Up. 28 : Sen. 1,792 : Cost 219.80914307
+Ep. 1 : Up. 32 : Sen. 128 : Cost 236.07504272
+Ep. 1 : Up. 36 : Sen. 384 : Cost 225.42373657
+Ep. 1 : Up. 40 : Sen. 640 : Cost 218.38552856
+Ep. 1 : Up. 44 : Sen. 896 : Cost 217.53744507
+Ep. 2 : Up. 48 : Sen. 128 : Cost 201.09486389
+Ep. 2 : Up. 52 : Sen. 384 : Cost 215.17204285
+Ep. 2 : Up. 56 : Sen. 640 : Cost 211.10237122
+Ep. 2 : Up. 60 : Sen. 896 : Cost 200.33345032
diff --git a/tests/training/restoring/corpus/test_finetune.sh b/tests/training/restoring/corpus/test_finetune.sh
index 1e99645..78099d4 100644
--- a/tests/training/restoring/corpus/test_finetune.sh
+++ b/tests/training/restoring/corpus/test_finetune.sh
@@ -17,8 +17,7 @@ test -e vocab.de.yml
 test -e vocab.en.yml
 
 extra_opts="--seed 2222 --maxi-batch 1 --maxi-batch-sort none --mini-batch 64 --optimizer sgd --dim-emb 128 --dim-rnn 256 --disp-freq 4"
-# Added because default options has changes
-extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false"
+extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0"
 
 
 # Train a model on a training corpus
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth.sh b/tests/training/restoring/exp-smoothing/test_expsmooth.sh
index f048018..e7c7b6d 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Compare costs from a restarted training with exp-smoothing with a single pass
+# AUTHOR: snukky
+# TAGS: exp-smooth clip-norm
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -8,7 +14,7 @@ rm -rf expsmooth expsmooth_*.log
 mkdir -p expsmooth
 
 
-opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none"
+opts="--no-shuffle --clip-norm 1 --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none"
 opts="$opts --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.5"
 opts="$opts --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32"
 # Added because default options has changes
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
index 1080546..831ebec 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_s2s.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Compare costs from a restarted training with exp-smoothing with a single pass
+# AUTHOR: snukky
+# TAGS: exp-smooth clip-norm
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -8,7 +14,7 @@ rm -rf expsmooth_s2s expsmooth_s2s_*.log
 mkdir -p expsmooth_s2s
 
 
-opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none"
+opts="--no-shuffle --clip-norm 1 --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --sync-sgd"
 opts="$opts --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.5"
 opts="$opts --valid-sets valid.bpe.en valid.bpe.de --valid-metrics cross-entropy --valid-mini-batch 32 --type s2s"
 # Added because default options has changes
diff --git a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
index 3e26acf..eafc1cc 100644
--- a/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
+++ b/tests/training/restoring/exp-smoothing/test_expsmooth_sync.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Compare costs from a restarted training with exp-smoothing with a single pass on 2 GPUs
+# AUTHOR: snukky
+# TAGS: exp-smooth clip-norm multigpu
+#####################################################################
+
 # Exit on error
 set -e
 
diff --git a/tests/training/restoring/multi-gpu/test_adam_sync.sh b/tests/training/restoring/multi-gpu/test_adam_sync.sh
index 84079d3..daf2524 100644
--- a/tests/training/restoring/multi-gpu/test_adam_sync.sh
+++ b/tests/training/restoring/multi-gpu/test_adam_sync.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Training with Adam on 2 GPUs (sync-sgd)
+# AUTHOR: snukky
+# TAGS: optimizer adam multigpu
+#####################################################################
+
 # Exit on error
 set -e
 
diff --git a/tests/training/restoring/multi-gpu/test_async.sh b/tests/training/restoring/multi-gpu/test_async.sh
index 8e22f30..ba13ec6 100644
--- a/tests/training/restoring/multi-gpu/test_async.sh
+++ b/tests/training/restoring/multi-gpu/test_async.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Training with SGD on 2 GPUs (async)
+# AUTHOR: snukky
+# TAGS: optimizer multigpu clip-norm
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -12,7 +18,7 @@ fi
 rm -rf async async_*.log async.*out async.*expected
 mkdir -p async
 
-opts="--no-shuffle --seed 777 --mini-batch 1 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.1 --devices 0 1"
+opts="--no-shuffle --clip-norm 0 --seed 777 --mini-batch 1 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.005 --devices 0 1"
 # Added because default options has changes
 opts="$opts --cost-type ce-mean --disp-label-counts false"
 
diff --git a/tests/training/restoring/multi-gpu/test_sync.sh b/tests/training/restoring/multi-gpu/test_sync.sh
index ff10d23..57fc76d 100644
--- a/tests/training/restoring/multi-gpu/test_sync.sh
+++ b/tests/training/restoring/multi-gpu/test_sync.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Training with SGD on 2 GPUs (sync-sgd)
+# AUTHOR: snukky
+# TAGS: optimizer multigpu clip-norm
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -12,7 +18,7 @@ fi
 rm -rf sync sync_*.log
 mkdir -p sync
 
-opts="--no-shuffle --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.1 --devices 0 1 --sync-sgd"
+opts="--no-shuffle --clip-norm 0 --seed 777 --mini-batch 4 --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --optimizer sgd --learn-rate 0.01 --devices 0 1 --sync-sgd"
 # Added because default options has changes
 opts="$opts --cost-type ce-mean --disp-label-counts false"
 
@@ -49,7 +55,7 @@ test -e sync_2.log
 
 cat sync_2.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed 's/ : Time.*//' >> sync.out
 
-$MRT_TOOLS/diff-nums.py -p 0.3 sync.out sync.expected -o sync.diff
+$MRT_TOOLS/diff-nums.py -p 0.1 sync.out sync.expected -o sync.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/adagrad.costs.expected b/tests/training/restoring/optimizer/adagrad.costs.expected
index 7b4f7e1..533d10c 100644
--- a/tests/training/restoring/optimizer/adagrad.costs.expected
+++ b/tests/training/restoring/optimizer/adagrad.costs.expected
@@ -1,10 +1,10 @@
-238.52751160
-245.27938843
-239.83557129
-232.83401489
-238.87149048
-253.74154663
-255.69897461
-243.06086731
-244.85818481
-235.55209351
+238.52250671
+245.26730347
+239.82205200
+232.81472778
+238.84121704
+253.69161987
+255.61422729
+242.94416809
+244.69099426
+235.35519409
diff --git a/tests/training/restoring/optimizer/adagrad.gt.expected b/tests/training/restoring/optimizer/adagrad.gt.expected
index 30a932c..a6c90a5 100644
--- a/tests/training/restoring/optimizer/adagrad.gt.expected
+++ b/tests/training/restoring/optimizer/adagrad.gt.expected
@@ -1,2 +1,2 @@
-[[  4.38133684e-05   1.40065049e-06   3.63037943e-05 ...,   1.23982169e-02
-    3.66997421e-02   4.11312692e-02]]
+[[8.0574207e+00 1.5418689e-01 4.3262744e+00 ... 4.0905408e+03
+  1.1550205e+04 1.3359570e+04]]
diff --git a/tests/training/restoring/optimizer/adam.costs.expected b/tests/training/restoring/optimizer/adam.costs.expected
index a6b5f9a..565b1ea 100644
--- a/tests/training/restoring/optimizer/adam.costs.expected
+++ b/tests/training/restoring/optimizer/adam.costs.expected
@@ -1,10 +1,10 @@
-238.40983582
-244.61091614
-238.22981262
-229.24475098
-230.14970398
-234.50399780
-228.12467957
-210.38107300
-206.17379761
-196.83959961
+238.40853882
+244.59863281
+238.15905762
+228.80813599
+227.96830750
+231.00505066
+225.24502563
+207.64001465
+203.54002380
+194.72296143
diff --git a/tests/training/restoring/optimizer/adam.mt.expected b/tests/training/restoring/optimizer/adam.mt.expected
index 3a2de9b..67c9756 100644
--- a/tests/training/restoring/optimizer/adam.mt.expected
+++ b/tests/training/restoring/optimizer/adam.mt.expected
@@ -1,2 +1,2 @@
-[[ 8.0254285e-06 -5.1497386e-07  3.8298724e-05 ...  1.5516396e-03
-   1.5692838e-03  2.0285486e-03]]
+[[-0.00667148  0.00525377  0.0564099  ...  1.5877182   1.6200635
+   2.2804906 ]]
diff --git a/tests/training/restoring/optimizer/adam.vt.expected b/tests/training/restoring/optimizer/adam.vt.expected
index 6fbbfe5..c54930d 100644
--- a/tests/training/restoring/optimizer/adam.vt.expected
+++ b/tests/training/restoring/optimizer/adam.vt.expected
@@ -1,2 +1,2 @@
-[[  9.29374124e-08   4.41528991e-09   3.45339437e-08 ...,   2.22943163e-05
-    2.69053471e-05   5.34869505e-05]]
+[[8.1617765e-02 3.0912522e-03 1.2053944e-02 ... 4.4662014e+01
+  3.7031158e+01 7.2262390e+01]]
diff --git a/tests/training/restoring/optimizer/adam_load.expected b/tests/training/restoring/optimizer/adam_load.expected
index bf5fef4..5dd5164 100644
--- a/tests/training/restoring/optimizer/adam_load.expected
+++ b/tests/training/restoring/optimizer/adam_load.expected
@@ -1,6 +1,6 @@
 Ep. 1 : Up. 1 : Sen. 2 : Cost 223.64685059
-Ep. 1 : Up. 2 : Sen. 4 : Cost 258.80792236
-Ep. 1 : Up. 3 : Sen. 6 : Cost 255.67260742
-Ep. 1 : Up. 4 : Sen. 8 : Cost 346.67749023
-Ep. 1 : Up. 5 : Sen. 10 : Cost 278.72695923
-Ep. 1 : Up. 6 : Sen. 12 : Cost 178.23016357
+Ep. 1 : Up. 2 : Sen. 4 : Cost 258.78131104
+Ep. 1 : Up. 3 : Sen. 6 : Cost 256.86120605
+Ep. 1 : Up. 4 : Sen. 8 : Cost 365.52239990
+Ep. 1 : Up. 5 : Sen. 10 : Cost 281.86376953
+Ep. 1 : Up. 6 : Sen. 12 : Cost 203.98873901
diff --git a/tests/training/restoring/optimizer/adam_sync.costs.expected b/tests/training/restoring/optimizer/adam_sync.costs.expected
index d390e92..29a7e61 100644
--- a/tests/training/restoring/optimizer/adam_sync.costs.expected
+++ b/tests/training/restoring/optimizer/adam_sync.costs.expected
@@ -1,10 +1,10 @@
-7245.93652344
-7990.90771484
-7741.82177734
-7778.60302734
-7445.29589844
-7015.16699219
-6661.45312500
-6346.10888672
-6402.09814453
-6369.64550781
+7245.93505859
+7990.90478516
+7741.81542969
+7778.61621094
+7445.38574219
+7015.21337891
+6661.38769531
+6346.22802734
+6402.10009766
+6369.72216797
diff --git a/tests/training/restoring/optimizer/test_adagrad_params.sh b/tests/training/restoring/optimizer/test_adagrad_params.sh
index 8fca356..33096e1 100644
--- a/tests/training/restoring/optimizer/test_adagrad_params.sh
+++ b/tests/training/restoring/optimizer/test_adagrad_params.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Training with Adagrad optimizer
+# AUTHOR: snukky
+# TAGS: optimizer adagrad
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -8,7 +14,7 @@ rm -rf adagrad adagrad*.log
 mkdir -p adagrad
 
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \
+    --no-shuffle --clip-norm 0 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \
     -m adagrad/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     --disp-freq 10 --after-batches 100 --save-freq 60 --optimizer adagrad --cost-type ce-mean \
     --log adagrad.log
@@ -24,7 +30,7 @@ python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adagrad/model.npz.optimi
 $MRT_TOOLS/diff.sh adagrad.keys.out adagrad.keys.expected > adagrad.keys.diff
 
 python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adagrad/model.npz.optimizer.npz -k "adagrad_gt" > adagrad.gt.out
-$MRT_TOOLS/diff-nums.py --numpy -p 0.001 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff
+$MRT_TOOLS/diff-nums.py --numpy -p 0.003 adagrad.gt.out adagrad.gt.expected -o adagrad.gt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params.sh b/tests/training/restoring/optimizer/test_adam_params.sh
index e0bd76f..4310850 100644
--- a/tests/training/restoring/optimizer/test_adam_params.sh
+++ b/tests/training/restoring/optimizer/test_adam_params.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Training with Adam
+# AUTHOR: snukky
+# TAGS: optimizer adam
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -8,7 +14,7 @@ rm -rf adam adam.log
 mkdir -p adam
 
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \
+    --no-shuffle --clip-norm 0 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --dim-emb 128 --dim-rnn 256 \
     -m adam/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     --disp-freq 10 --after-batches 100 --save-freq 60 --cost-type ce-mean \
     --log adam.log
diff --git a/tests/training/restoring/optimizer/test_adam_params_async.sh b/tests/training/restoring/optimizer/test_adam_params_async.sh
index 2b2c869..6dee216 100644
--- a/tests/training/restoring/optimizer/test_adam_params_async.sh
+++ b/tests/training/restoring/optimizer/test_adam_params_async.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Training with Adam on 2 GPUs with asynchronous SGD
+# AUTHOR: snukky
+# TAGS: optimizer adam multigpu async clip-norm
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -13,7 +19,7 @@ if (( $MRT_NUM_DEVICES < 2 )); then
 fi
 
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --dim-emb 128 --dim-rnn 256 \
+    --no-shuffle --clip-norm 1 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --dim-emb 128 --dim-rnn 256 \
     -m adam_async/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     --disp-freq 10 --after-batches 100 --save-freq 60 --cost-type ce-sum --disp-label-counts false \
     --log adam_async.log --devices 0 1
@@ -33,8 +39,8 @@ $MRT_TOOLS/diff.sh adam_async.keys.out adam.keys.expected > adam_async.keys.diff
 python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_mt" > adam_async.mt.out
 python3 $MRT_MARIAN/../scripts/contrib/model_info.py -m adam_async/model.npz.optimizer.npz -k "adam_vt" > adam_async.vt.out
 
-$MRT_TOOLS/diff-nums.py --numpy -a -p 0.02  adam_async.mt.out adam_async.mt.expected -o adam_async.mt.diff
-$MRT_TOOLS/diff-nums.py --numpy    -p 0.001 adam_async.vt.out adam_async.vt.expected -o adam_async.vt.diff
+$MRT_TOOLS/diff-nums.py --numpy -a -p 0.03 adam_async.mt.out adam_async.mt.expected -o adam_async.mt.diff
+$MRT_TOOLS/diff-nums.py --numpy    -p 0.03 adam_async.vt.out adam_async.vt.expected -o adam_async.vt.diff
 
 # Exit with success code
 exit 0
diff --git a/tests/training/restoring/optimizer/test_adam_params_sync.sh b/tests/training/restoring/optimizer/test_adam_params_sync.sh
index 1e2481d..43dcda1 100644
--- a/tests/training/restoring/optimizer/test_adam_params_sync.sh
+++ b/tests/training/restoring/optimizer/test_adam_params_sync.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Training with Adam on 2 GPUs with sync-sgd
+# AUTHOR: snukky
+# TAGS: optimizer adam multigpu
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -13,10 +19,10 @@ if (( $MRT_NUM_DEVICES < 2 )); then
 fi
 
 $MRT_MARIAN/marian \
-    --no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --dim-emb 128 --dim-rnn 256 \
+    --no-shuffle --clip-norm 0 --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 32 --dim-emb 128 --dim-rnn 256 \
     -m adam_sync/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \
     --disp-freq 10 --after-batches 100 --save-freq 60 \
-    --log adam_sync.log --devices 0 1 --sync-sgd --cost-type ce-sum --disp-label-counts false --clip-norm 0
+    --log adam_sync.log --devices 0 1 --sync-sgd --cost-type ce-sum --disp-label-counts false
 
 test -e adam_sync/model.npz
 test -e adam_sync/model.npz.optimizer.npz
diff --git a/tests/training/restoring/optimizer/test_loading_adam_params.sh b/tests/training/restoring/optimizer/test_loading_adam_params.sh
index 95a48c8..2ded056 100644
--- a/tests/training/restoring/optimizer/test_loading_adam_params.sh
+++ b/tests/training/restoring/optimizer/test_loading_adam_params.sh
@@ -1,5 +1,11 @@
 #!/bin/bash -x
 
+#####################################################################
+# SUMMARY: Loading Adam parameters after restarting training
+# AUTHOR: snukky
+# TAGS: optimizer adam
+#####################################################################
+
 # Exit on error
 set -e
 
@@ -8,8 +14,7 @@ rm -rf adam_load adam_load_?.log
 mkdir -p adam_load
 
 extra_opts="--no-shuffle --seed 7777 --maxi-batch 1 --maxi-batch-sort none --mini-batch 2 --dim-rnn 64 --dim-emb 32"
-# Added because default options has changes
-extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false"
+extra_opts="$extra_opts --cost-type ce-mean --disp-label-counts false --clip-norm 0"
 
 $MRT_MARIAN/marian \
     -m adam_load/model.npz -t $MRT_DATA/train.max50.{en,de} -v vocab.en.yml vocab.de.yml \
author	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2020-11-13 20:09:42 +0300
committer	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2020-11-13 20:09:42 +0300
commit	d8ce49222d7178b00606a975cee2eac52222d09e (patch)
tree	8f7ff9bdddd98f5bfa6c4503fa5b9405c550a927 /tests
parent	ff30754bc5b0e016aee9966af419d03eaaafc8db (diff)