Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-02-09 17:28:13 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-02-09 17:28:13 +0300
commit8102ce448c813499a75e6211b3e117540a201245 (patch)
treef195985b1227ac87d21538d4a6d15b9cbb89d2e3 /tests/training/weights/test_word_weighting_with_ones.sh
parentb93e2a29f8107f4a9db8cc1a249a51595d9d2b8c (diff)
Add data weighting tests
Diffstat (limited to 'tests/training/weights/test_word_weighting_with_ones.sh')
-rw-r--r--tests/training/weights/test_word_weighting_with_ones.sh37
1 files changed, 37 insertions, 0 deletions
diff --git a/tests/training/weights/test_word_weighting_with_ones.sh b/tests/training/weights/test_word_weighting_with_ones.sh
new file mode 100644
index 0000000..17311de
--- /dev/null
+++ b/tests/training/weights/test_word_weighting_with_ones.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -rf word_noweights* word_ones*
+mkdir -p word_noweights word_ones
+
+test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/toy.bpe.de > vocab.de.yml
+test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/toy.bpe.en > vocab.en.yml
+
+$MRT_MARIAN/build/marian \
+ --seed 1111 --no-shuffle \
+ -m word_noweights/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
+ --log word_noweights.log --disp-freq 5 -e 2
+
+test -e word_noweights/model.npz
+test -e word_noweights.log
+cat word_noweights.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_noweights.out
+
+cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/1/g' > word_ones.weights.txt
+
+$MRT_MARIAN/build/marian \
+ --seed 1111 --no-shuffle \
+ -m word_ones/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
+ --log word_ones.log --disp-freq 5 -e 2 \
+ --data-weighting word_ones.weights.txt --data-weighting-type word
+
+test -e word_ones/model.npz
+test -e word_ones.log
+
+cat word_ones.log | $MRT_TOOLS/strip-timestamps.sh | grep "Ep\. " | sed -r 's/ Time.*//' > word_ones.out
+$MRT_TOOLS/diff-floats.py word_noweights.out word_ones.out -p 0.1 > word_ones.diff
+
+# Exit with success code
+exit 0