Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPedro Coelho <pedrodiascoelho97@gmail.com>2021-02-23 14:16:27 +0300
committerPedro Coelho <pedrodiascoelho97@gmail.com>2021-02-23 14:16:27 +0300
commit0a396730bea7ca4ab5e732d39f510fde90c7dabe (patch)
treef51c4bf6cd632910b1b8b44880a986ce36bf9203
parentc505cf1fc66e4d4df039456c9633fb8689bd5155 (diff)
updated and simplified factors training test
- added clip-norm-0 - added verification if factors are being used - factored vocabulary is no longer constructed on the fly. The one already in the models folder is used - made training test marian-10 friendly
-rw-r--r--tests/training/features/factors/factors.expected88
-rwxr-xr-xtests/training/features/factors/setup.sh42
-rw-r--r--tests/training/features/factors/test_factors.sh6
3 files changed, 52 insertions, 84 deletions
diff --git a/tests/training/features/factors/factors.expected b/tests/training/features/factors/factors.expected
index 0164290..70ee0cf 100644
--- a/tests/training/features/factors/factors.expected
+++ b/tests/training/features/factors/factors.expected
@@ -1,44 +1,44 @@
-214.03384399
-234.90438843
-227.33822632
-229.79216003
-223.72485352
-220.85546875
-216.96545410
-210.97512817
-217.42474365
-217.29472351
-222.89697266
-225.04882812
-221.97128296
-221.55412292
-210.07235718
-214.99653625
-210.14802551
-202.47036743
-229.11576843
-213.29634094
-223.94625854
-213.39431763
-222.35632324
-208.29580688
-203.60266113
-210.88558960
-194.14140320
-232.09394836
-209.24897766
-215.81555176
-205.87390137
-216.11462402
-209.60700989
-198.33728027
-212.82144165
-186.60208130
-228.83305359
-203.70266724
-216.54733276
-201.15771484
-213.18595886
-202.07525635
-195.43476868
-209.35302734
+10.77032471
+10.66634846
+10.57369232
+10.49110889
+10.42863846
+10.37640285
+10.31346226
+10.26187325
+10.20974541
+10.14718914
+10.10516739
+10.08621216
+10.04499340
+10.03392220
+10.01447582
+9.95392036
+9.91196060
+9.86151695
+9.79879570
+9.77040195
+9.74897766
+9.72190189
+9.72009087
+9.66733265
+9.62868977
+9.60218239
+9.51061630
+9.43571186
+9.44846630
+9.42066574
+9.38913155
+9.40541935
+9.32790852
+9.28542328
+9.26995373
+9.15800571
+9.10080147
+9.11642838
+9.08799362
+9.07044029
+9.08870029
+9.00327969
+8.95821762
+8.93725300
diff --git a/tests/training/features/factors/setup.sh b/tests/training/features/factors/setup.sh
index ac22a0c..faa043c 100755
--- a/tests/training/features/factors/setup.sh
+++ b/tests/training/features/factors/setup.sh
@@ -11,44 +11,10 @@ set -e
test -f $MRT_DATA/europarl.de-en/toy.bpe.en || exit 1
test -f $MRT_DATA/europarl.de-en/toy.bpe.de || exit 1
-#escape carachters #:_\|
-test -s toy.bpe.esc.en || cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed 's/#/\&htg;/g;s/:/\&cln;/g;s/_/\&usc;/g;s/\\/\&esc;/g;s/|/\&ppe;/g' \
- > toy.bpe.esc.en
-test -s toy.bpe.esc.de || cat $MRT_DATA/europarl.de-en/toy.bpe.de | sed 's/#/\&htg;/g;s/:/\&cln;/g;s/_/\&usc;/g;s/\\/\&esc;/g;s/|/\&ppe;/g' \
- > toy.bpe.esc.de
-
-#add factors to replace @@ markers. s1 is used if a word is a subword (if it has the suffix @@), s0 is used otherwise
-test -s toy.bpe.fact.en || cat toy.bpe.esc.en | sed 's/\(\s\|$\)/|s0 /g;s/@@|s0/|s1/g;s/\s*$//' > toy.bpe.fact.en
-test -s toy.bpe.fact.de || cat toy.bpe.esc.de | sed 's/\(\s\|$\)/|s0 /g;s/@@|s0/|s1/g;s/\s*$//' > toy.bpe.fact.de
-
-#creates factored vocabulary
-if [[ ! -s vocab.en.fsv ]]; then
- echo '_lemma
-
-_s
-s0 : _s
-s1 : _s
-
-</s> : _lemma
-<unk> : _lemma' > vocab.en.fsv
-
- sed -i 's/@@//g' toy.bpe.esc.en
- $MRT_MARIAN/marian-vocab < toy.bpe.esc.en | grep -v '<\/s>\|<unk>' | sed 's/"//g' | sed 's/:.*$/ : _lemma _has_s/' >> vocab.en.fsv
-fi
-
-if [[ ! -s vocab.de.fsv ]]; then
- echo '_lemma
-
-_s
-s0 : _s
-s1 : _s
-
-</s> : _lemma
-<unk> : _lemma' > vocab.de.fsv
-
- sed -i 's/@@//g' toy.bpe.esc.de
- $MRT_MARIAN/marian-vocab < toy.bpe.esc.de | grep -v '<\/s>\|<unk>' | sed 's/"//g' | sed 's/:.*$/ : _lemma _has_s/' >> vocab.de.fsv
-fi
+test -s toy.bpe.fact.en || cat $MRT_DATA/europarl.de-en/toy.bpe.en | \
+ sed 's/\(\s\|$\)/|s0 /g;s/@@|s0/|s1/g;s/\s*$//' > toy.bpe.fact.en
+test -s toy.bpe.fact.de || cat $MRT_DATA/europarl.de-en/toy.bpe.en | \
+ sed 's/\(\s\|$\)/|s0 /g;s/@@|s0/|s1/g;s/\s*$//' > toy.bpe.fact.de
# Exit with success code
exit 0
diff --git a/tests/training/features/factors/test_factors.sh b/tests/training/features/factors/test_factors.sh
index 97deb6e..193ff37 100644
--- a/tests/training/features/factors/test_factors.sh
+++ b/tests/training/features/factors/test_factors.sh
@@ -15,14 +15,16 @@ mkdir -p factors
# Run marian command
$MRT_MARIAN/marian \
- --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \
- -m factors/model.npz -t toy.bpe.fact.{en,de} -v vocab.en.fsv vocab.de.fsv \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --clip-norm 0 \
+ -m factors/model.npz -t toy.bpe.fact.{en,de} -v $MRT_MODELS/factors/vocab.{en,de}.fsv \
--disp-freq 5 -e 5 \
--log factors.log
# Check if files exist
test -e factors/model.npz
test -e factors.log
+grep -q "Factored embeddings enabled" factors.log
+grep -q "Factored outputs enabled" factors.log
# Compare the current output with the expected output
cat factors.log | $MRT_TOOLS/extract-costs.sh > factors.out