Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsnukky <rgrundki@exceed.ed.ac.uk>2019-08-06 09:36:01 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2019-08-07 19:14:43 +0300
commit28aa9267953a8816f05da9ae8177effce9003be0 (patch)
tree71cdb634414352b1b20048836e4f69964415f870 /tests/sentencepiece/test_create_spm_vocabs.sh
parent01feb431db4d3adc887c266e688bc643f84e7af5 (diff)
Add tests for SentencePiece
Diffstat (limited to 'tests/sentencepiece/test_create_spm_vocabs.sh')
-rw-r--r--tests/sentencepiece/test_create_spm_vocabs.sh14
1 files changed, 9 insertions, 5 deletions
diff --git a/tests/sentencepiece/test_create_spm_vocabs.sh b/tests/sentencepiece/test_create_spm_vocabs.sh
index 700bdc1..32da62e 100644
--- a/tests/sentencepiece/test_create_spm_vocabs.sh
+++ b/tests/sentencepiece/test_create_spm_vocabs.sh
@@ -3,6 +3,7 @@
#####################################################################
# SUMMARY: Create SentencePiece vocabularies
# AUTHOR: snukky
+# TAGS: sentencepiece
#####################################################################
# Exit on error
@@ -15,7 +16,7 @@ mkdir -p vocabs
# Run marian command
$MRT_MARIAN/marian \
--no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none \
- -m vocabs/model.npz -t $MRT_DATA/europarl.de-en/corpus.{en,de} \
+ -m vocabs/model.npz -t $MRT_DATA/europarl.de-en/corpus.small.{en,de}.gz \
--dim-vocabs 4000 4000 -v vocabs/vocab.en.spm vocabs/vocab.de.spm \
--after-batches 1 \
--log vocabs.log
@@ -27,14 +28,17 @@ test -e vocabs/vocab.de.spm
test -e vocabs.log
# Check logging messages
-grep -q "Creating SentencePiece vocabulary.* vocabs.en.spm" vocabs.log
-grep -q "Creating SentencePiece vocabulary.* vocabs.de.spm" vocabs.log
+grep -q "Training SentencePiece vocabulary .*vocab.en.spm" vocabs.log
+grep -q "Training SentencePiece vocabulary .*vocab.de.spm" vocabs.log
+grep -q "Setting vocabulary size .* to 4000" vocabs.log
+grep -q "Loading SentencePiece vocabulary .*vocab.en.spm" vocabs.log
+grep -q "Loading SentencePiece vocabulary .*vocab.de.spm" vocabs.log
# Extract a textual vocabulary and compare with the expected output
-$MRT_MRT/spm_export_vocab --model vocabs/vocab.en.spm > vocabs.en.out
+$MRT_MARIAN/spm_export_vocab --model vocabs/vocab.en.spm > vocabs.en.out
$MRT_TOOLS/diff-nums.py vocabs.en.out vocabs.en.expected -o vocabs.en.diff
-$MRT_MRT/spm_export_vocab --model vocabs/vocab.de.spm > vocabs.de.out
+$MRT_MARIAN/spm_export_vocab --model vocabs/vocab.de.spm > vocabs.de.out
$MRT_TOOLS/diff-nums.py vocabs.de.out vocabs.de.expected -o vocabs.de.diff
# Exit with success code