Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-11-11 21:41:38 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-11-11 21:41:38 +0300
commit8c2718e49e8890d1a11f4f7947e809ff9606a56b (patch)
tree888e92c28ea688a15669346c81dd87875a9d0da3 /tests/training/basics
parent9aa1532933337cc01fe0b06849ec593e67d2db19 (diff)
Add test with gzipped train sets
Diffstat (limited to 'tests/training/basics')
-rw-r--r--tests/training/basics/.gitignore1
-rw-r--r--tests/training/basics/gzip.expected5
-rw-r--r--tests/training/basics/test_gzipped_train_sets.sh25
3 files changed, 31 insertions, 0 deletions
diff --git a/tests/training/basics/.gitignore b/tests/training/basics/.gitignore
index 61de908..af307e4 100644
--- a/tests/training/basics/.gitignore
+++ b/tests/training/basics/.gitignore
@@ -6,3 +6,4 @@ sqlite_seed
batch_fit
*.temp
vocab.*.yml
+gzip
diff --git a/tests/training/basics/gzip.expected b/tests/training/basics/gzip.expected
new file mode 100644
index 0000000..cc069b6
--- /dev/null
+++ b/tests/training/basics/gzip.expected
@@ -0,0 +1,5 @@
+447.89
+374.88
+324.98
+284.49
+248.72
diff --git a/tests/training/basics/test_gzipped_train_sets.sh b/tests/training/basics/test_gzipped_train_sets.sh
new file mode 100644
index 0000000..5f1d596
--- /dev/null
+++ b/tests/training/basics/test_gzipped_train_sets.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -rf gzip gzip.log
+mkdir -p gzip
+
+test -e $MRT_DATA/europarl.de-en/corpus.bpe.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.de.gz
+test -e $MRT_DATA/europarl.de-en/corpus.bpe.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.en.gz
+
+$MRT_MARIAN/build/marian \
+ --no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 64 \
+ -m gzip/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de}.gz -v vocab.en.yml vocab.de.yml \
+ --log gzip.log --disp-freq 10 --after-batches 50
+
+test -e gzip/model.npz
+test -e gzip.log
+
+cat gzip.log | $MRT_TOOLS/extract-costs.sh > gzip.out
+$MRT_TOOLS/diff-floats.py gzip.out gzip.expected -p 0.1 > gzip.diff
+
+# Exit with success code
+exit 0