diff options
author | Roman Grundkiewicz <roman.grundkiewicz@microsoft.com> | 2020-06-25 20:13:28 +0300 |
---|---|---|
committer | Roman Grundkiewicz <roman.grundkiewicz@microsoft.com> | 2020-06-25 20:13:28 +0300 |
commit | b47967133531ed2a86004485f6be3133594b5830 (patch) | |
tree | a4142bbd5a5c7f84de951e09ca4f05c97df44ffb /tests | |
parent | 7b31b4eb387a6037ee970afddf5050829d0bf649 (diff) |
Add more tests
Diffstat (limited to 'tests')
7 files changed, 120 insertions, 0 deletions
diff --git a/tests/interface/input-tsv/.gitignore b/tests/interface/input-tsv/.gitignore index 7760deb..191de36 100644 --- a/tests/interface/input-tsv/.gitignore +++ b/tests/interface/input-tsv/.gitignore @@ -18,6 +18,7 @@ train_align0 train_weights train_weights0 train_align_weights +train_align_stdin train.de train.en diff --git a/tests/interface/input-tsv/setup.sh b/tests/interface/input-tsv/setup.sh index 2b213e2..2b6fa6d 100644 --- a/tests/interface/input-tsv/setup.sh +++ b/tests/interface/input-tsv/setup.sh @@ -23,3 +23,5 @@ test -s train2.de-en-aln.tsv || paste train2.{de,en,aln} > train2.de-en-aln.tsv test -s train2.aln-de-en.tsv || paste train2.{aln,de,en} > train2.aln-de-en.tsv test -s train2.de-en-w.tsv || paste train2.{de,en,w} > train2.de-en-w.tsv test -s train2.w-de-en.tsv || paste train2.{w,de,en} > train2.w-de-en.tsv + +test -s train2.de-w-aln-en.tsv || paste train2.{de,w,aln,en} > train2.de-w-aln-en.tsv diff --git a/tests/interface/input-tsv/test_error_msg_for_creating_vocab_from_tsv_with_align.sh b/tests/interface/input-tsv/test_error_msg_for_creating_vocab_from_tsv_with_align.sh new file mode 100644 index 0000000..170d0ae --- /dev/null +++ b/tests/interface/input-tsv/test_error_msg_for_creating_vocab_from_tsv_with_align.sh @@ -0,0 +1,26 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Creating a vocabulary from a TSV file with alignment in not supported +# TAGS: sentencepiece tsv train align +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf msg_train_vocab_align msg_train_vocab_align.log +mkdir -p msg_train_vocab_align + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 1111 -m msg_train_vocab_align/model.npz \ + --tsv -t train2.de-en-aln.tsv -v msg_train_vocab_align/vocab.spm msg_train_vocab_align/vocab.spm --dim-vocabs 2000 2000 \ + --after-batches 1 --guided-alignment 2 \ + > msg_train_vocab_align.log 2>&1 || true + +test -e msg_train_vocab_align.log +grep -qi "creating vocab.* tsv data with alignment.* not supported" msg_train_vocab_align.log + +# Exit with success code +exit 0 diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh b/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh new file mode 100644 index 0000000..60c45c3 --- /dev/null +++ b/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh @@ -0,0 +1,35 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Train a model on TSV data with guided alignment and data weighting +# TAGS: sentencepiece tsv train align dataweights +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf train_align_weights train_align_weights.{log,out,diff} +mkdir -p train_align_weights + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 7777 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + -m train_align_weights/model.npz --tsv -t train2.de-w-aln-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ + --after-batches 60 --disp-freq 4 \ + --guided-alignment 2 --guided-alignment-weight 1.0 --data-weighting 1 \ + --log train_align_weights.log + + +# Check if files exist +test -e train_align_weights/model.npz +test -e train_align_weights.log +grep -qi "word alignments from" train_align_weights.log +grep -qi "weights from" train_align_weights.log + +# Compare the current output with the expected output +cat train_align_weights.log | $MRT_TOOLS/extract-costs.sh > train_align_weights.out +$MRT_TOOLS/diff-nums.py train_align_weights.out train_align_weights.expected -p 0.01 -o train_align_weights.diff + +# Exit with success code +exit 0 diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh b/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh new file mode 100644 index 0000000..92adebc --- /dev/null +++ b/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh @@ -0,0 +1,34 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Train a model on TSV data from STDIN with guided alignment +# TAGS: sentencepiece tsv train align stdin +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf train_align_stdin train_align_stdin.{log,out,diff} +mkdir -p train_align_stdin + +# Run marian command +cat train2.aln-de-en.tsv | $MRT_MARIAN/marian \ + --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \ + -m train_align_stdin/model.npz -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \ + --disp-freq 4 \ + --guided-alignment 0 --guided-alignment-weight 1.0 \ + --log train_align_stdin.log + + +# Check if files exist +test -e train_align_stdin/model.npz +test -e train_align_stdin.log +grep -qi "word alignments from" train_align_stdin.log + +# Compare the current output with the expected output +cat train_align_stdin.log | $MRT_TOOLS/extract-costs.sh > train_align_stdin.out +$MRT_TOOLS/diff-nums.py train_align_stdin.out train_align_stdin.expected -p 0.01 -o train_align_stdin.diff + +# Exit with success code +exit 0 diff --git a/tests/interface/input-tsv/train_align_stdin.expected b/tests/interface/input-tsv/train_align_stdin.expected new file mode 100644 index 0000000..a468d22 --- /dev/null +++ b/tests/interface/input-tsv/train_align_stdin.expected @@ -0,0 +1,7 @@ +272.57867432 +267.45211792 +245.10440063 +243.12583923 +254.65167236 +251.95730591 +259.63885498 diff --git a/tests/interface/input-tsv/train_align_weights.expected b/tests/interface/input-tsv/train_align_weights.expected new file mode 100644 index 0000000..4092789 --- /dev/null +++ b/tests/interface/input-tsv/train_align_weights.expected @@ -0,0 +1,15 @@ +341.56268311 +328.85687256 +300.68945312 +295.04937744 +322.07330322 +306.52780151 +332.50936890 +305.94641113 +290.83953857 +265.52841187 +256.46743774 +278.98114014 +265.27020264 +292.67654419 +273.39343262 |