Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorRoman Grundkiewicz <roman.grundkiewicz@microsoft.com>2020-06-25 20:13:28 +0300
committerRoman Grundkiewicz <roman.grundkiewicz@microsoft.com>2020-06-25 20:13:28 +0300
commitb47967133531ed2a86004485f6be3133594b5830 (patch)
treea4142bbd5a5c7f84de951e09ca4f05c97df44ffb /tests
parent7b31b4eb387a6037ee970afddf5050829d0bf649 (diff)
Add more tests
Diffstat (limited to 'tests')
-rw-r--r--tests/interface/input-tsv/.gitignore1
-rw-r--r--tests/interface/input-tsv/setup.sh2
-rw-r--r--tests/interface/input-tsv/test_error_msg_for_creating_vocab_from_tsv_with_align.sh26
-rw-r--r--tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh35
-rw-r--r--tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh34
-rw-r--r--tests/interface/input-tsv/train_align_stdin.expected7
-rw-r--r--tests/interface/input-tsv/train_align_weights.expected15
7 files changed, 120 insertions, 0 deletions
diff --git a/tests/interface/input-tsv/.gitignore b/tests/interface/input-tsv/.gitignore
index 7760deb..191de36 100644
--- a/tests/interface/input-tsv/.gitignore
+++ b/tests/interface/input-tsv/.gitignore
@@ -18,6 +18,7 @@ train_align0
train_weights
train_weights0
train_align_weights
+train_align_stdin
train.de
train.en
diff --git a/tests/interface/input-tsv/setup.sh b/tests/interface/input-tsv/setup.sh
index 2b213e2..2b6fa6d 100644
--- a/tests/interface/input-tsv/setup.sh
+++ b/tests/interface/input-tsv/setup.sh
@@ -23,3 +23,5 @@ test -s train2.de-en-aln.tsv || paste train2.{de,en,aln} > train2.de-en-aln.tsv
test -s train2.aln-de-en.tsv || paste train2.{aln,de,en} > train2.aln-de-en.tsv
test -s train2.de-en-w.tsv || paste train2.{de,en,w} > train2.de-en-w.tsv
test -s train2.w-de-en.tsv || paste train2.{w,de,en} > train2.w-de-en.tsv
+
+test -s train2.de-w-aln-en.tsv || paste train2.{de,w,aln,en} > train2.de-w-aln-en.tsv
diff --git a/tests/interface/input-tsv/test_error_msg_for_creating_vocab_from_tsv_with_align.sh b/tests/interface/input-tsv/test_error_msg_for_creating_vocab_from_tsv_with_align.sh
new file mode 100644
index 0000000..170d0ae
--- /dev/null
+++ b/tests/interface/input-tsv/test_error_msg_for_creating_vocab_from_tsv_with_align.sh
@@ -0,0 +1,26 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Creating a vocabulary from a TSV file with alignment in not supported
+# TAGS: sentencepiece tsv train align
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf msg_train_vocab_align msg_train_vocab_align.log
+mkdir -p msg_train_vocab_align
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 1111 -m msg_train_vocab_align/model.npz \
+ --tsv -t train2.de-en-aln.tsv -v msg_train_vocab_align/vocab.spm msg_train_vocab_align/vocab.spm --dim-vocabs 2000 2000 \
+ --after-batches 1 --guided-alignment 2 \
+ > msg_train_vocab_align.log 2>&1 || true
+
+test -e msg_train_vocab_align.log
+grep -qi "creating vocab.* tsv data with alignment.* not supported" msg_train_vocab_align.log
+
+# Exit with success code
+exit 0
diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh b/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh
new file mode 100644
index 0000000..60c45c3
--- /dev/null
+++ b/tests/interface/input-tsv/test_tsv_train_with_align_and_weights.sh
@@ -0,0 +1,35 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Train a model on TSV data with guided alignment and data weighting
+# TAGS: sentencepiece tsv train align dataweights
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf train_align_weights train_align_weights.{log,out,diff}
+mkdir -p train_align_weights
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 7777 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \
+ -m train_align_weights/model.npz --tsv -t train2.de-w-aln-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \
+ --after-batches 60 --disp-freq 4 \
+ --guided-alignment 2 --guided-alignment-weight 1.0 --data-weighting 1 \
+ --log train_align_weights.log
+
+
+# Check if files exist
+test -e train_align_weights/model.npz
+test -e train_align_weights.log
+grep -qi "word alignments from" train_align_weights.log
+grep -qi "weights from" train_align_weights.log
+
+# Compare the current output with the expected output
+cat train_align_weights.log | $MRT_TOOLS/extract-costs.sh > train_align_weights.out
+$MRT_TOOLS/diff-nums.py train_align_weights.out train_align_weights.expected -p 0.01 -o train_align_weights.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh b/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh
new file mode 100644
index 0000000..92adebc
--- /dev/null
+++ b/tests/interface/input-tsv/test_tsv_train_with_align_stdin.sh
@@ -0,0 +1,34 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Train a model on TSV data from STDIN with guided alignment
+# TAGS: sentencepiece tsv train align stdin
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf train_align_stdin train_align_stdin.{log,out,diff}
+mkdir -p train_align_stdin
+
+# Run marian command
+cat train2.aln-de-en.tsv | $MRT_MARIAN/marian \
+ --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \
+ -m train_align_stdin/model.npz -t stdin -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \
+ --disp-freq 4 \
+ --guided-alignment 0 --guided-alignment-weight 1.0 \
+ --log train_align_stdin.log
+
+
+# Check if files exist
+test -e train_align_stdin/model.npz
+test -e train_align_stdin.log
+grep -qi "word alignments from" train_align_stdin.log
+
+# Compare the current output with the expected output
+cat train_align_stdin.log | $MRT_TOOLS/extract-costs.sh > train_align_stdin.out
+$MRT_TOOLS/diff-nums.py train_align_stdin.out train_align_stdin.expected -p 0.01 -o train_align_stdin.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/interface/input-tsv/train_align_stdin.expected b/tests/interface/input-tsv/train_align_stdin.expected
new file mode 100644
index 0000000..a468d22
--- /dev/null
+++ b/tests/interface/input-tsv/train_align_stdin.expected
@@ -0,0 +1,7 @@
+272.57867432
+267.45211792
+245.10440063
+243.12583923
+254.65167236
+251.95730591
+259.63885498
diff --git a/tests/interface/input-tsv/train_align_weights.expected b/tests/interface/input-tsv/train_align_weights.expected
new file mode 100644
index 0000000..4092789
--- /dev/null
+++ b/tests/interface/input-tsv/train_align_weights.expected
@@ -0,0 +1,15 @@
+341.56268311
+328.85687256
+300.68945312
+295.04937744
+322.07330322
+306.52780151
+332.50936890
+305.94641113
+290.83953857
+265.52841187
+256.46743774
+278.98114014
+265.27020264
+292.67654419
+273.39343262