diff options
author | Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> | 2018-11-11 21:41:38 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> | 2018-11-11 21:41:38 +0300 |
commit | 8c2718e49e8890d1a11f4f7947e809ff9606a56b (patch) | |
tree | 888e92c28ea688a15669346c81dd87875a9d0da3 /tests/training/basics | |
parent | 9aa1532933337cc01fe0b06849ec593e67d2db19 (diff) |
Add test with gzipped train sets
Diffstat (limited to 'tests/training/basics')
-rw-r--r-- | tests/training/basics/.gitignore | 1 | ||||
-rw-r--r-- | tests/training/basics/gzip.expected | 5 | ||||
-rw-r--r-- | tests/training/basics/test_gzipped_train_sets.sh | 25 |
3 files changed, 31 insertions, 0 deletions
diff --git a/tests/training/basics/.gitignore b/tests/training/basics/.gitignore index 61de908..af307e4 100644 --- a/tests/training/basics/.gitignore +++ b/tests/training/basics/.gitignore @@ -6,3 +6,4 @@ sqlite_seed batch_fit *.temp vocab.*.yml +gzip diff --git a/tests/training/basics/gzip.expected b/tests/training/basics/gzip.expected new file mode 100644 index 0000000..cc069b6 --- /dev/null +++ b/tests/training/basics/gzip.expected @@ -0,0 +1,5 @@ +447.89 +374.88 +324.98 +284.49 +248.72 diff --git a/tests/training/basics/test_gzipped_train_sets.sh b/tests/training/basics/test_gzipped_train_sets.sh new file mode 100644 index 0000000..5f1d596 --- /dev/null +++ b/tests/training/basics/test_gzipped_train_sets.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Exit on error +set -e + +# Test code goes here +rm -rf gzip gzip.log +mkdir -p gzip + +test -e $MRT_DATA/europarl.de-en/corpus.bpe.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.de.gz +test -e $MRT_DATA/europarl.de-en/corpus.bpe.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.en.gz + +$MRT_MARIAN/build/marian \ + --no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 64 \ + -m gzip/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de}.gz -v vocab.en.yml vocab.de.yml \ + --log gzip.log --disp-freq 10 --after-batches 50 + +test -e gzip/model.npz +test -e gzip.log + +cat gzip.log | $MRT_TOOLS/extract-costs.sh > gzip.out +$MRT_TOOLS/diff-floats.py gzip.out gzip.expected -p 0.1 > gzip.diff + +# Exit with success code +exit 0 |