Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-01-13 16:14:01 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-01-13 16:14:01 +0300
commit9505efeefc89f31393b538a22b01652093c4f863 (patch)
treedd1dd5be46a1b256cd3a6a62a7132e11dd43ab70 /tests
parent148aac8fac6a8a513f85d409cc1d842297be5bd0 (diff)
Add tests for weights initialization from a pretrained LM
Diffstat (limited to 'tests')
-rw-r--r--tests/interface/version/test_model_has_version.sh2
-rw-r--r--tests/training/pretrain/.gitignore2
-rw-r--r--tests/training/pretrain/setup.sh4
-rw-r--r--tests/training/pretrain/test_weights_from_pretrained_model.sh57
4 files changed, 64 insertions, 1 deletions
diff --git a/tests/interface/version/test_model_has_version.sh b/tests/interface/version/test_model_has_version.sh
index e8810a7..5037a8a 100644
--- a/tests/interface/version/test_model_has_version.sh
+++ b/tests/interface/version/test_model_has_version.sh
@@ -16,7 +16,7 @@ $MRT_MARIAN/build/marian \
# Check if the model contains a version
test -e version/model.npz
-python3 $MRT_MARIAN/scripts/contrib/model_info.py -m version/model.npz | grep -qP "version: v[1-9]+\.[0-9]+\.[0-9]+\+.*"
+python3 $MRT_MARIAN/scripts/contrib/model_info.py -s -m version/model.npz | grep -qP "version: v[1-9]+\.[0-9]+\.[0-9]+\+.*"
# Check if the version is printed
echo "test" | $MRT_MARIAN/build/marian-decoder \
diff --git a/tests/training/pretrain/.gitignore b/tests/training/pretrain/.gitignore
new file mode 100644
index 0000000..7229491
--- /dev/null
+++ b/tests/training/pretrain/.gitignore
@@ -0,0 +1,2 @@
+key-*.txt
+model
diff --git a/tests/training/pretrain/setup.sh b/tests/training/pretrain/setup.sh
new file mode 100644
index 0000000..6088de5
--- /dev/null
+++ b/tests/training/pretrain/setup.sh
@@ -0,0 +1,4 @@
+test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1
+test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1
+test -f $MRT_DATA/europarl.de-en/toy.bpe.en || exit 1
+test -f $MRT_DATA/europarl.de-en/toy.bpe.de || exit 1
diff --git a/tests/training/pretrain/test_weights_from_pretrained_model.sh b/tests/training/pretrain/test_weights_from_pretrained_model.sh
new file mode 100644
index 0000000..634087f
--- /dev/null
+++ b/tests/training/pretrain/test_weights_from_pretrained_model.sh
@@ -0,0 +1,57 @@
+#!/bin/bash -x
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -rf model lm.log orig.log model.log key-*.txt
+mkdir -p model
+
+# Train LM
+$MRT_MARIAN/build/marian \
+ --seed 1111 --type lm -m model/lm.npz \
+ -t $MRT_DATA/europarl.de-en/corpus.bpe.de --no-shuffle \
+ -v model/vocab.de.yml \
+ --log lm.log --after-batches 10
+
+test -e lm.log
+test -e model/lm.npz
+
+# Train model without pretrained weights
+$MRT_MARIAN/build/marian \
+ --type s2s -m model/orig.npz \
+ -t $MRT_DATA/europarl.de-en/corpus.bpe.en $MRT_DATA/europarl.de-en/corpus.bpe.de --no-shuffle \
+ -v model/vocab.en.yml model/vocab.de.yml \
+ --seed 2222 -l 0.0000000001 \
+ --log orig.log --after-batches 1
+
+test -e orig.log
+test -e model/orig.npz
+
+# Train model with weights initialized from LM
+$MRT_MARIAN/build/marian \
+ --type s2s -m model/model.npz --pretrained-model model/lm.npz \
+ -t $MRT_DATA/europarl.de-en/corpus.bpe.en $MRT_DATA/europarl.de-en/corpus.bpe.de --no-shuffle \
+ -v model/vocab.en.yml model/vocab.de.yml \
+ --seed 2222 -l 0.0000000001 \
+ --log model.log --after-batches 1
+
+test -e model.log
+test -e model/model.npz
+
+# Test if selected weights are initialized randomly
+for key in encoder_Wemb encoder_bi_U encoder_bi_r_Wx; do
+ python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/orig.npz -k $key > key-orig-$key.txt
+ python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/model.npz -k $key > key-model-$key.txt
+ diff key-orig-$key.txt key-model-$key.txt > key-diff-$key.txt
+done
+
+# Test if selected weights are identical with LM
+for key in decoder_Wemb decoder_cell1_U decoder_cell2_bx decoder_ff_logit_l1_W0; do
+ python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/lm.npz -k $key > key-lm-$key.txt
+ python3 $MRT_MARIAN/scripts/contrib/model_info.py -m model/model.npz -k $key > key-model-$key.txt
+ diff key-lm-$key.txt key-model-$key.txt > key-diff-$key.txt
+done
+
+# Exit with success code
+exit 0