Add tests for language models

author: Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> 2018-06-12 17:24:46 +0300
committer: Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> 2018-06-12 17:24:46 +0300
commit: 0f59ae9a972d0c3477f1814e901f4dcbe465d990 (patch)
tree: 2b0d3eddb6f19270527eb765c25ddf3b71af0f74 /tests/training/model-types
parent: 154de0d7e2f6ff5dfd2605c9eef54ba8acda083c (diff)
8 files changed, 101 insertions, 0 deletions
diff --git a/tests/training/model-types/.gitignore b/tests/training/model-types/.gitignore
new file mode 100644
index 0000000..38d4404
--- /dev/null
+++ b/tests/training/model-types/.gitignore
@@ -0,0 +1,7 @@
+lm
+lm-transformer
+multi-s2s
+multi-transformer
+
+vocab.*.yml
+test.bpe.en
diff --git a/tests/training/model-types/lm-transformer.expected b/tests/training/model-types/lm-transformer.expected
new file mode 100644
index 0000000..8ceb781
--- /dev/null
+++ b/tests/training/model-types/lm-transformer.expected
@@ -0,0 +1,5 @@
+403.11
+291.55
+215.41
+150.84
+85.74
diff --git a/tests/training/model-types/lm-transformer.scores.expected b/tests/training/model-types/lm-transformer.scores.expected
new file mode 100644
index 0000000..34ead58
--- /dev/null
+++ b/tests/training/model-types/lm-transformer.scores.expected
@@ -0,0 +1,10 @@
+-110.657913
+-202.235657
+-98.792732
+-232.406662
+-654.265076
+-150.503616
+-385.257812
+-60.361347
+-88.694214
+-104.250389
diff --git a/tests/training/model-types/lm.expected b/tests/training/model-types/lm.expected
new file mode 100644
index 0000000..a280169
--- /dev/null
+++ b/tests/training/model-types/lm.expected
@@ -0,0 +1,5 @@
+410.27
+307.43
+233.52
+158.37
+89.60
diff --git a/tests/training/model-types/lm.scores.expected b/tests/training/model-types/lm.scores.expected
new file mode 100644
index 0000000..417048d
--- /dev/null
+++ b/tests/training/model-types/lm.scores.expected
@@ -0,0 +1,10 @@
+-110.558548
+-197.097626
+-99.425133
+-226.850739
+-616.795593
+-148.828308
+-366.162659
+-60.395081
+-89.392021
+-107.213608
diff --git a/tests/training/model-types/setup.sh b/tests/training/model-types/setup.sh
new file mode 100644
index 0000000..8b8cd07
--- /dev/null
+++ b/tests/training/model-types/setup.sh
@@ -0,0 +1,2 @@
+test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1
+test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1
diff --git a/tests/training/model-types/test_lm-transformer.sh b/tests/training/model-types/test_lm-transformer.sh
new file mode 100644
index 0000000..14767e6
--- /dev/null
+++ b/tests/training/model-types/test_lm-transformer.sh
@@ -0,0 +1,31 @@
+#!/bin/bash -x
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -rf lm-transformer lm-transformer.log
+mkdir -p lm-transformer
+
+$MRT_MARIAN/build/marian \
+    --seed 1111 --no-shuffle \
+    --type lm-transformer --dim-emb 128 --dim-rnn 256 \
+    -m lm-transformer/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.en -v vocab.en.yml \
+    --disp-freq 20 --after-batches 100 \
+    --log lm-transformer.log
+
+test -e lm-transformer/model.npz
+test -e lm-transformer/model.npz.yml
+test -e lm-transformer.log
+
+cat lm-transformer.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > lm-transformer.out
+$MRT_TOOLS/diff-floats.py lm-transformer.out lm-transformer.expected -p 0.02 > lm-transformer.diff
+
+# Scoring with LM
+test -s temp.bpe.en || tail $MRT_DATA/europarl.de-en/corpus.bpe.en > test.bpe.en
+
+$MRT_MARIAN/build/marian-scorer -m lm-transformer/model.npz -t test.bpe.en -v vocab.en.yml > lm-transformer.scores.out
+$MRT_TOOLS/diff-floats.py lm-transformer.scores.out lm-transformer.scores.expected -p 0.002 > lm-transformer.scores.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/training/model-types/test_lm.sh b/tests/training/model-types/test_lm.sh
new file mode 100644
index 0000000..628fdc4
--- /dev/null
+++ b/tests/training/model-types/test_lm.sh
@@ -0,0 +1,31 @@
+#!/bin/bash -x
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -rf lm lm.log
+mkdir -p lm
+
+$MRT_MARIAN/build/marian \
+    --seed 1111 --no-shuffle \
+    --type lm --dim-emb 128 --dim-rnn 256 \
+    -m lm/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.en -v vocab.en.yml \
+    --disp-freq 20 --after-batches 100 \
+    --log lm.log
+
+test -e lm/model.npz
+test -e lm/model.npz.yml
+test -e lm.log
+
+cat lm.log | grep 'Ep\. 1 :' | $MRT_TOOLS/extract-costs.sh > lm.out
+$MRT_TOOLS/diff-floats.py lm.out lm.expected -p 0.02 > lm.diff
+
+# Scoring with LM
+test -s temp.bpe.en || tail $MRT_DATA/europarl.de-en/corpus.bpe.en > test.bpe.en
+
+$MRT_MARIAN/build/marian-scorer -m lm/model.npz -t test.bpe.en -v vocab.en.yml > lm.scores.out
+$MRT_TOOLS/diff-floats.py lm.scores.out lm.scores.expected -p 0.002 > lm.scores.diff
+
+# Exit with success code
+exit 0
author	Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk>	2018-06-12 17:24:46 +0300
committer	Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk>	2018-06-12 17:24:46 +0300
commit	0f59ae9a972d0c3477f1814e901f4dcbe465d990 (patch)
tree	2b0d3eddb6f19270527eb765c25ddf3b71af0f74 /tests/training/model-types
parent	154de0d7e2f6ff5dfd2605c9eef54ba8acda083c (diff)