Update tests for data-weighting

author: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2020-10-21 13:35:15 +0300
committer: Roman Grundkiewicz <rgrundkiewicz@gmail.com> 2020-10-21 13:35:15 +0300
commit: 7c0bbca4ad87c40db9efd46599c8bf3f7794e301 (patch)
tree: 1965acfa8418330cb103568361c0b15c6c3bbd0d
parent: 9e9c0e7620fb95576bd73afa6886d4059b9da0f4 (diff)
9 files changed, 11 insertions, 11 deletions
diff --git a/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh b/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh
index 2cb6e29..58e9e82 100644
--- a/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh
+++ b/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh
@@ -18,7 +18,7 @@ test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corp
 $MRT_MARIAN/marian \
     --seed 3333 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
     -m maxibatch/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
-    --log maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-sum \
+    --log maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-sum --disp-label-counts false \
     --data-weighting train.1k.inc.txt --data-weighting-type sentence
 
 test -e maxibatch/model.npz
diff --git a/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh b/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh
index 2ba494f..5cb5592 100644
--- a/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh
+++ b/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh
@@ -18,7 +18,7 @@ test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corp
 $MRT_MARIAN/marian \
     --seed 6666 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
     -m word_maxibatch/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
-    --log word_maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 \
+    --log word_maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-mean \
     --data-weighting train.1k.wordinc.txt --data-weighting-type word
 
 test -e word_maxibatch/model.npz
diff --git a/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh b/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh
index c0a200a..2295d64 100644
--- a/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh
+++ b/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh
@@ -13,7 +13,7 @@ rm -rf sqlite sqlite.log
 mkdir -p sqlite
 
 $MRT_MARIAN/marian \
-    --seed 1111 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+    --seed 1111 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
     -m sqlite/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
     --log sqlite.log --disp-freq 1 --after-batches 100 --mini-batch 1 \
     --data-weighting train.1k.weights.txt --data-weighting-type sentence --sqlite sqlite/corpus.sqlite3
diff --git a/tests/training/features/data-weighting/test_sentence_weights_x3.sh b/tests/training/features/data-weighting/test_sentence_weights_x3.sh
index 3857814..1b28af8 100644
--- a/tests/training/features/data-weighting/test_sentence_weights_x3.sh
+++ b/tests/training/features/data-weighting/test_sentence_weights_x3.sh
@@ -18,7 +18,7 @@ test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corp
 $MRT_MARIAN/marian \
     --seed 2222 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd \
     -m x3copied/model.npz -t train.x3.{de,en} -v vocab.{de,en}.yml \
-    --log x3copied.log --disp-freq 1 --after-batches 100 --mini-batch 4 --cost-type ce-sum
+    --log x3copied.log --disp-freq 1 --after-batches 100 --mini-batch 4 --cost-type ce-sum --disp-label-counts false
 
 test -e x3copied/model.npz
 test -e x3copied.log
@@ -27,7 +27,7 @@ cat x3copied.log | grep 'Cost ' | sed -r 's/.*Cost (.*) : Time.*/\1/' > x3copied
 $MRT_MARIAN/marian \
     --seed 2222 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd \
     -m x3weights/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
-    --log x3weights.log --disp-freq 1 --after-batches 100 --mini-batch 2 --cost-type ce-sum \
+    --log x3weights.log --disp-freq 1 --after-batches 100 --mini-batch 2 --cost-type ce-sum --disp-label-counts false \
     --data-weighting train.1k.weights.txt --data-weighting-type sentence
 
 test -e x3weights/model.npz
diff --git a/tests/training/features/data-weighting/test_validation.sh b/tests/training/features/data-weighting/test_validation.sh
index e84c976..125ba34 100644
--- a/tests/training/features/data-weighting/test_validation.sh
+++ b/tests/training/features/data-weighting/test_validation.sh
@@ -13,7 +13,7 @@ rm -rf valid valid_script.temp
 mkdir -p valid
 
 $MRT_MARIAN/marian \
-    --seed 4444 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 \
+    --seed 4444 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --cost-type ce-mean \
     -m valid/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
     --disp-freq 5 --valid-freq 15 --after-batches 50 \
     --data-weighting train.1k.weights.txt --data-weighting-type sentence \
diff --git a/tests/training/features/data-weighting/test_word_weighting_sqlite.sh b/tests/training/features/data-weighting/test_word_weighting_sqlite.sh
index a07c344..1658e1b 100644
--- a/tests/training/features/data-weighting/test_word_weighting_sqlite.sh
+++ b/tests/training/features/data-weighting/test_word_weighting_sqlite.sh
@@ -15,7 +15,7 @@ mkdir -p sqlite_word
 cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > sqlite_word.weights.txt
 
 $MRT_MARIAN/marian \
-    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
     -m sqlite_word/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
     --log sqlite_word.log --disp-freq 5 -e 2 --mini-batch-fit -w 500 \
     --data-weighting sqlite_word.weights.txt --data-weighting-type word --sqlite sqlite_word/corpus.sqlite3
diff --git a/tests/training/features/data-weighting/test_word_weighting_with_eos.sh b/tests/training/features/data-weighting/test_word_weighting_with_eos.sh
index 1242f40..c3d7b93 100644
--- a/tests/training/features/data-weighting/test_word_weighting_with_eos.sh
+++ b/tests/training/features/data-weighting/test_word_weighting_with_eos.sh
@@ -17,7 +17,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r -e 's/[^ ]+/2/g' -e 's/$/ 2/' >
 
 # Train
 $MRT_MARIAN/marian \
-    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
     -m word_eos/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
     --log word_eos.log --disp-freq 5 -e 2 \
     --data-weighting word_eos.weights.txt --data-weighting-type word
diff --git a/tests/training/features/data-weighting/test_word_weighting_with_twos.sh b/tests/training/features/data-weighting/test_word_weighting_with_twos.sh
index 81bc3e5..0660501 100644
--- a/tests/training/features/data-weighting/test_word_weighting_with_twos.sh
+++ b/tests/training/features/data-weighting/test_word_weighting_with_twos.sh
@@ -18,7 +18,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > word_twos.weigh
 
 # Train with word weighting
 $MRT_MARIAN/marian \
-    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
     -m word_twos/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
     --log word_twos.log --disp-freq 5 -e 2 \
     --data-weighting word_twos.weights.txt --data-weighting-type word
@@ -41,7 +41,7 @@ echo "data-weighting-type: word" >> word_twos.config.yml
 
 # Train with word weighting
 $MRT_MARIAN/marian \
-    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
     -m word_twos_cfg/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
     --log word_twos_cfg.log --disp-freq 5 -e 2 \
     -c word_twos.config.yml
diff --git a/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh b/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh
index 68154af..675ae8b 100644
--- a/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh
+++ b/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh
@@ -18,7 +18,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > word_twos_sync.
 
 # Train with word weighting
 $MRT_MARIAN/marian \
-    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+    --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
     -m word_twos_sync/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml --sync-sgd \
     --log word_twos_sync.log --disp-freq 5 -e 2 \
     --data-weighting word_twos_sync.weights.txt --data-weighting-type word
author	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2020-10-21 13:35:15 +0300
committer	Roman Grundkiewicz <rgrundkiewicz@gmail.com>	2020-10-21 13:35:15 +0300
commit	7c0bbca4ad87c40db9efd46599c8bf3f7794e301 (patch)
tree	1965acfa8418330cb103568361c0b15c6c3bbd0d
parent	9e9c0e7620fb95576bd73afa6886d4059b9da0f4 (diff)