Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-02-12 13:12:12 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-02-12 13:12:12 +0300
commit86d2de5f8f410e9b001355896ac88e31f55cdd48 (patch)
tree98271877060c5758b31bf4261f2f867da22716ff
parent62369d4a7a1a8a5829861250842b2e1d618f2ba5 (diff)
Add test for data weighting with validation
-rw-r--r--tests/training/weights/.gitignore2
-rw-r--r--tests/training/weights/test_validation.sh33
-rw-r--r--tests/training/weights/train.expected10
-rw-r--r--tests/training/weights/valid.expected8
-rwxr-xr-xtests/training/weights/valid_script.sh4
5 files changed, 57 insertions, 0 deletions
diff --git a/tests/training/weights/.gitignore b/tests/training/weights/.gitignore
index 5f5ebbf..6c415b9 100644
--- a/tests/training/weights/.gitignore
+++ b/tests/training/weights/.gitignore
@@ -7,3 +7,5 @@ word_ones.weights.txt
x3copied
x3weights
sqlite
+valid
+valid_script.temp
diff --git a/tests/training/weights/test_validation.sh b/tests/training/weights/test_validation.sh
new file mode 100644
index 0000000..eb16a85
--- /dev/null
+++ b/tests/training/weights/test_validation.sh
@@ -0,0 +1,33 @@
+#!/bin/bash -x
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -rf valid valid_script.temp
+mkdir -p valid
+
+test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml
+test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml
+
+$MRT_MARIAN/build/marian \
+ --seed 4444 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 \
+ -m valid/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
+ --disp-freq 5 --valid-freq 15 --after-batches 50 \
+ --data-weighting train.1k.weights.txt --data-weighting-type sentence \
+ --valid-metrics cross-entropy valid-script --valid-script-path ./valid_script.sh \
+ --valid-sets $MRT_DATA/europarl.de-en/toy.bpe.{en,de} \
+ --valid-log valid/valid.log --log valid/train.log
+
+test -e valid/model.npz
+test -e valid/valid.log
+test -e valid/train.log
+
+$MRT_TOOLS/strip-timestamps.sh < valid/valid.log > valid.out
+$MRT_TOOLS/diff-floats.py valid.out valid.expected -p 0.2 > valid.diff
+
+$MRT_TOOLS/extract-costs.sh < valid/train.log > train.out
+$MRT_TOOLS/diff-floats.py train.out train.expected -p 0.2 > train.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/training/weights/train.expected b/tests/training/weights/train.expected
new file mode 100644
index 0000000..b7570bc
--- /dev/null
+++ b/tests/training/weights/train.expected
@@ -0,0 +1,10 @@
+535.76
+658.71
+564.28
+506.01
+538.29
+482.65
+410.41
+437.76
+448.83
+372.70
diff --git a/tests/training/weights/valid.expected b/tests/training/weights/valid.expected
new file mode 100644
index 0000000..b29e69d
--- /dev/null
+++ b/tests/training/weights/valid.expected
@@ -0,0 +1,8 @@
+[valid] 15 : cross-entropy : 276.149 : new best
+[valid] 15 : valid-script : 1 : new best
+[valid] 30 : cross-entropy : 270.361 : new best
+[valid] 30 : valid-script : 2 : new best
+[valid] 45 : cross-entropy : 277.11 : stalled 1 times
+[valid] 45 : valid-script : 3 : new best
+[valid] 50 : cross-entropy : 280.511 : stalled 2 times
+[valid] 50 : valid-script : 4 : new best
diff --git a/tests/training/weights/valid_script.sh b/tests/training/weights/valid_script.sh
new file mode 100755
index 0000000..a60c4e5
--- /dev/null
+++ b/tests/training/weights/valid_script.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+echo line written at $(date) >> valid_script.temp 2> /dev/null
+wc -l valid_script.temp