Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorRoman Grundkiewicz <roman.grundkiewicz@microsoft.com>2020-06-25 18:45:02 +0300
committerRoman Grundkiewicz <roman.grundkiewicz@microsoft.com>2020-06-25 18:45:02 +0300
commit7b31b4eb387a6037ee970afddf5050829d0bf649 (patch)
treef9b2068497ebd3d3aa8426ca24edc6ffe803e0ce /tests
parente7ee6bcc285c5883265cc83b9a7baca0bfd9d256 (diff)
Basic tests for guided alignment and data weighting with --tsv
Diffstat (limited to 'tests')
-rw-r--r--tests/interface/input-tsv/.gitignore3
-rw-r--r--tests/interface/input-tsv/setup.sh5
-rw-r--r--tests/interface/input-tsv/test_tsv_train_with_align.sh8
-rw-r--r--tests/interface/input-tsv/test_tsv_train_with_align_pos0.sh34
-rw-r--r--tests/interface/input-tsv/test_tsv_train_with_weights.sh8
-rw-r--r--tests/interface/input-tsv/test_tsv_train_with_weights_pos0.sh34
6 files changed, 84 insertions, 8 deletions
diff --git a/tests/interface/input-tsv/.gitignore b/tests/interface/input-tsv/.gitignore
index 972de4d..7760deb 100644
--- a/tests/interface/input-tsv/.gitignore
+++ b/tests/interface/input-tsv/.gitignore
@@ -14,7 +14,9 @@ train_lm
train_empty_lines
train_extra_tabs
train_align
+train_align0
train_weights
+train_weights0
train_align_weights
train.de
@@ -25,6 +27,7 @@ train.bpe.en
train.bpe.tsv
train_empty_lines.tsv
train_extra_tabs.tsv
+train2*.tsv
valid
valid.tsv
diff --git a/tests/interface/input-tsv/setup.sh b/tests/interface/input-tsv/setup.sh
index 9888ee6..2b213e2 100644
--- a/tests/interface/input-tsv/setup.sh
+++ b/tests/interface/input-tsv/setup.sh
@@ -18,3 +18,8 @@ test -s train.tsv || paste train.{de,en} > train.tsv
test -s train.bpe.de || cat $MRT_DATA/train.max50.de > train.bpe.de
test -s train.bpe.en || cat $MRT_DATA/train.max50.en > train.bpe.en
test -s train.bpe.tsv || paste train.bpe.{de,en} > train.bpe.tsv
+
+test -s train2.de-en-aln.tsv || paste train2.{de,en,aln} > train2.de-en-aln.tsv
+test -s train2.aln-de-en.tsv || paste train2.{aln,de,en} > train2.aln-de-en.tsv
+test -s train2.de-en-w.tsv || paste train2.{de,en,w} > train2.de-en-w.tsv
+test -s train2.w-de-en.tsv || paste train2.{w,de,en} > train2.w-de-en.tsv
diff --git a/tests/interface/input-tsv/test_tsv_train_with_align.sh b/tests/interface/input-tsv/test_tsv_train_with_align.sh
index 90079fd..8edf098 100644
--- a/tests/interface/input-tsv/test_tsv_train_with_align.sh
+++ b/tests/interface/input-tsv/test_tsv_train_with_align.sh
@@ -2,7 +2,7 @@
#####################################################################
# SUMMARY: Train a model on TSV data with guided alignment
-# TAGS: sentencepiece tsv train
+# TAGS: sentencepiece tsv train align
#####################################################################
# Exit on error
@@ -15,16 +15,16 @@ mkdir -p train_align
# Run marian command
$MRT_MARIAN/marian \
--no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \
- -m train_align/model.npz -t train2.de train2.en -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \
+ -m train_align/model.npz --tsv -t train2.de-en-aln.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \
--after-batches 100 --disp-freq 4 \
- --guided-alignment train2.aln --guided-alignment-weight 1.0 \
+ --guided-alignment 2 --guided-alignment-weight 1.0 \
--log train_align.log
# Check if files exist
test -e train_align/model.npz
test -e train_align.log
-grep -qi "word alignments from file" train_align.log
+grep -qi "word alignments from" train_align.log
# Compare the current output with the expected output
cat train_align.log | $MRT_TOOLS/extract-costs.sh > train_align.out
diff --git a/tests/interface/input-tsv/test_tsv_train_with_align_pos0.sh b/tests/interface/input-tsv/test_tsv_train_with_align_pos0.sh
new file mode 100644
index 0000000..85758e3
--- /dev/null
+++ b/tests/interface/input-tsv/test_tsv_train_with_align_pos0.sh
@@ -0,0 +1,34 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Train a model on TSV data with guided alignment
+# TAGS: sentencepiece tsv train align
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf train_align0 train_align0.{log,out,diff}
+mkdir -p train_align0
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \
+ -m train_align0/model.npz --tsv -t train2.aln-de-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \
+ --after-batches 100 --disp-freq 4 \
+ --guided-alignment 0 --guided-alignment-weight 1.0 \
+ --log train_align0.log
+
+
+# Check if files exist
+test -e train_align0/model.npz
+test -e train_align0.log
+grep -qi "word alignments from" train_align0.log
+
+# Compare the current output with the expected output
+cat train_align0.log | $MRT_TOOLS/extract-costs.sh > train_align0.out
+$MRT_TOOLS/diff-nums.py train_align0.out train_align.expected -p 0.01 -o train_align0.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/interface/input-tsv/test_tsv_train_with_weights.sh b/tests/interface/input-tsv/test_tsv_train_with_weights.sh
index 49abde6..7d6927e 100644
--- a/tests/interface/input-tsv/test_tsv_train_with_weights.sh
+++ b/tests/interface/input-tsv/test_tsv_train_with_weights.sh
@@ -2,7 +2,7 @@
#####################################################################
# SUMMARY: Train a model on TSV data with sentence weighting
-# TAGS: sentencepiece tsv train
+# TAGS: sentencepiece tsv train dataweights
#####################################################################
# Exit on error
@@ -15,16 +15,16 @@ mkdir -p train_weights
# Run marian command
$MRT_MARIAN/marian \
--no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \
- -m train_weights/model.npz -t train2.de train2.en -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \
+ -m train_weights/model.npz --tsv -t train2.de-en-w.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \
--after-batches 100 --disp-freq 4 \
- --data-weighting train2.w --data-weighting-type sentence \
+ --data-weighting 2 --data-weighting-type sentence \
--log train_weights.log
# Check if files exist
test -e train_weights/model.npz
test -e train_weights.log
-grep -qi "weights from file" train_weights.log
+grep -qi "weights from" train_weights.log
# Compare the current output with the expected output
cat train_weights.log | $MRT_TOOLS/extract-costs.sh > train_weights.out
diff --git a/tests/interface/input-tsv/test_tsv_train_with_weights_pos0.sh b/tests/interface/input-tsv/test_tsv_train_with_weights_pos0.sh
new file mode 100644
index 0000000..430b8c9
--- /dev/null
+++ b/tests/interface/input-tsv/test_tsv_train_with_weights_pos0.sh
@@ -0,0 +1,34 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Train a model on TSV data with sentence weighting
+# TAGS: sentencepiece tsv train dataweights
+#####################################################################
+
+# Exit on error
+set -e
+
+# Remove old artifacts and create working directory
+rm -rf train_weights0 train_weights0.{log,out,diff}
+mkdir -p train_weights0
+
+# Run marian command
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 5555 --dim-emb 32 --dim-rnn 64 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --learn-rate 0.1 \
+ -m train_weights0/model.npz --tsv -t train2.w-de-en.tsv -v $MRT_MODELS/rnn-spm/vocab.deen.{spm,spm} \
+ --after-batches 100 --disp-freq 4 \
+ --data-weighting 0 --data-weighting-type sentence \
+ --log train_weights0.log
+
+
+# Check if files exist
+test -e train_weights0/model.npz
+test -e train_weights0.log
+grep -qi "weights from" train_weights0.log
+
+# Compare the current output with the expected output
+cat train_weights0.log | $MRT_TOOLS/extract-costs.sh > train_weights0.out
+$MRT_TOOLS/diff-nums.py train_weights0.out train_weights.expected -p 0.01 -o train_weights0.diff
+
+# Exit with success code
+exit 0