Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundkiewicz@gmail.com>2020-10-21 13:35:15 +0300
committerRoman Grundkiewicz <rgrundkiewicz@gmail.com>2020-10-21 13:35:15 +0300
commit7c0bbca4ad87c40db9efd46599c8bf3f7794e301 (patch)
tree1965acfa8418330cb103568361c0b15c6c3bbd0d
parent9e9c0e7620fb95576bd73afa6886d4059b9da0f4 (diff)
Update tests for data-weighting
-rw-r--r--tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh2
-rw-r--r--tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh2
-rw-r--r--tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh2
-rw-r--r--tests/training/features/data-weighting/test_sentence_weights_x3.sh4
-rw-r--r--tests/training/features/data-weighting/test_validation.sh2
-rw-r--r--tests/training/features/data-weighting/test_word_weighting_sqlite.sh2
-rw-r--r--tests/training/features/data-weighting/test_word_weighting_with_eos.sh2
-rw-r--r--tests/training/features/data-weighting/test_word_weighting_with_twos.sh4
-rw-r--r--tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh2
9 files changed, 11 insertions, 11 deletions
diff --git a/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh b/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh
index 2cb6e29..58e9e82 100644
--- a/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh
+++ b/tests/training/features/data-weighting/test_maxi_batches_with_sentence_weights.sh
@@ -18,7 +18,7 @@ test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corp
$MRT_MARIAN/marian \
--seed 3333 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
-m maxibatch/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
- --log maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-sum \
+ --log maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-sum --disp-label-counts false \
--data-weighting train.1k.inc.txt --data-weighting-type sentence
test -e maxibatch/model.npz
diff --git a/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh b/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh
index 2ba494f..5cb5592 100644
--- a/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh
+++ b/tests/training/features/data-weighting/test_maxi_batches_with_word_weights.sh
@@ -18,7 +18,7 @@ test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corp
$MRT_MARIAN/marian \
--seed 6666 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
-m word_maxibatch/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
- --log word_maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 \
+ --log word_maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-mean \
--data-weighting train.1k.wordinc.txt --data-weighting-type word
test -e word_maxibatch/model.npz
diff --git a/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh b/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh
index c0a200a..2295d64 100644
--- a/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh
+++ b/tests/training/features/data-weighting/test_sentence_weighting_sqlite.sh
@@ -13,7 +13,7 @@ rm -rf sqlite sqlite.log
mkdir -p sqlite
$MRT_MARIAN/marian \
- --seed 1111 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+ --seed 1111 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
-m sqlite/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
--log sqlite.log --disp-freq 1 --after-batches 100 --mini-batch 1 \
--data-weighting train.1k.weights.txt --data-weighting-type sentence --sqlite sqlite/corpus.sqlite3
diff --git a/tests/training/features/data-weighting/test_sentence_weights_x3.sh b/tests/training/features/data-weighting/test_sentence_weights_x3.sh
index 3857814..1b28af8 100644
--- a/tests/training/features/data-weighting/test_sentence_weights_x3.sh
+++ b/tests/training/features/data-weighting/test_sentence_weights_x3.sh
@@ -18,7 +18,7 @@ test -e vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corp
$MRT_MARIAN/marian \
--seed 2222 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd \
-m x3copied/model.npz -t train.x3.{de,en} -v vocab.{de,en}.yml \
- --log x3copied.log --disp-freq 1 --after-batches 100 --mini-batch 4 --cost-type ce-sum
+ --log x3copied.log --disp-freq 1 --after-batches 100 --mini-batch 4 --cost-type ce-sum --disp-label-counts false
test -e x3copied/model.npz
test -e x3copied.log
@@ -27,7 +27,7 @@ cat x3copied.log | grep 'Cost ' | sed -r 's/.*Cost (.*) : Time.*/\1/' > x3copied
$MRT_MARIAN/marian \
--seed 2222 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 --dim-emb 128 --dim-rnn 256 --optimizer sgd \
-m x3weights/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
- --log x3weights.log --disp-freq 1 --after-batches 100 --mini-batch 2 --cost-type ce-sum \
+ --log x3weights.log --disp-freq 1 --after-batches 100 --mini-batch 2 --cost-type ce-sum --disp-label-counts false \
--data-weighting train.1k.weights.txt --data-weighting-type sentence
test -e x3weights/model.npz
diff --git a/tests/training/features/data-weighting/test_validation.sh b/tests/training/features/data-weighting/test_validation.sh
index e84c976..125ba34 100644
--- a/tests/training/features/data-weighting/test_validation.sh
+++ b/tests/training/features/data-weighting/test_validation.sh
@@ -13,7 +13,7 @@ rm -rf valid valid_script.temp
mkdir -p valid
$MRT_MARIAN/marian \
- --seed 4444 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 \
+ --seed 4444 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --dim-rnn 64 --dim-emb 32 --cost-type ce-mean \
-m valid/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
--disp-freq 5 --valid-freq 15 --after-batches 50 \
--data-weighting train.1k.weights.txt --data-weighting-type sentence \
diff --git a/tests/training/features/data-weighting/test_word_weighting_sqlite.sh b/tests/training/features/data-weighting/test_word_weighting_sqlite.sh
index a07c344..1658e1b 100644
--- a/tests/training/features/data-weighting/test_word_weighting_sqlite.sh
+++ b/tests/training/features/data-weighting/test_word_weighting_sqlite.sh
@@ -15,7 +15,7 @@ mkdir -p sqlite_word
cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > sqlite_word.weights.txt
$MRT_MARIAN/marian \
- --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+ --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
-m sqlite_word/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
--log sqlite_word.log --disp-freq 5 -e 2 --mini-batch-fit -w 500 \
--data-weighting sqlite_word.weights.txt --data-weighting-type word --sqlite sqlite_word/corpus.sqlite3
diff --git a/tests/training/features/data-weighting/test_word_weighting_with_eos.sh b/tests/training/features/data-weighting/test_word_weighting_with_eos.sh
index 1242f40..c3d7b93 100644
--- a/tests/training/features/data-weighting/test_word_weighting_with_eos.sh
+++ b/tests/training/features/data-weighting/test_word_weighting_with_eos.sh
@@ -17,7 +17,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r -e 's/[^ ]+/2/g' -e 's/$/ 2/' >
# Train
$MRT_MARIAN/marian \
- --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+ --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
-m word_eos/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
--log word_eos.log --disp-freq 5 -e 2 \
--data-weighting word_eos.weights.txt --data-weighting-type word
diff --git a/tests/training/features/data-weighting/test_word_weighting_with_twos.sh b/tests/training/features/data-weighting/test_word_weighting_with_twos.sh
index 81bc3e5..0660501 100644
--- a/tests/training/features/data-weighting/test_word_weighting_with_twos.sh
+++ b/tests/training/features/data-weighting/test_word_weighting_with_twos.sh
@@ -18,7 +18,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > word_twos.weigh
# Train with word weighting
$MRT_MARIAN/marian \
- --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+ --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
-m word_twos/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
--log word_twos.log --disp-freq 5 -e 2 \
--data-weighting word_twos.weights.txt --data-weighting-type word
@@ -41,7 +41,7 @@ echo "data-weighting-type: word" >> word_twos.config.yml
# Train with word weighting
$MRT_MARIAN/marian \
- --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+ --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
-m word_twos_cfg/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
--log word_twos_cfg.log --disp-freq 5 -e 2 \
-c word_twos.config.yml
diff --git a/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh b/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh
index 68154af..675ae8b 100644
--- a/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh
+++ b/tests/training/features/data-weighting/test_word_weighting_with_twos_sync.sh
@@ -18,7 +18,7 @@ cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/2/g' > word_twos_sync.
# Train with word weighting
$MRT_MARIAN/marian \
- --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd \
+ --seed 1111 --no-shuffle --dim-emb 128 --dim-rnn 256 --optimizer sgd --cost-type ce-mean \
-m word_twos_sync/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml --sync-sgd \
--log word_twos_sync.log --disp-freq 5 -e 2 \
--data-weighting word_twos_sync.weights.txt --data-weighting-type word