Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-02-09 18:51:17 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-02-09 18:51:17 +0300
commit3ed1d9e57200937ee6f3dd07b18a4d0e9f94b1c1 (patch)
tree422aa31a6473c3d83644e28ec663a28d9b3c36e5
parentca1fef7880581b7a3e6fb768a44cb7dcc653d108 (diff)
Add data weighting test for SQLite
-rw-r--r--tests/training/weights/.gitignore11
-rw-r--r--tests/training/weights/sqlite.expected100
-rw-r--r--tests/training/weights/test_sentence_weighting_sqlite.sh28
3 files changed, 134 insertions, 5 deletions
diff --git a/tests/training/weights/.gitignore b/tests/training/weights/.gitignore
index 0e6dd05..5f5ebbf 100644
--- a/tests/training/weights/.gitignore
+++ b/tests/training/weights/.gitignore
@@ -1,8 +1,9 @@
-noweights
-ones
+noweights
+ones
ones.weights.txt
-word_noweights
+word_noweights
word_ones
word_ones.weights.txt
-x3copied
-x3weights
+x3copied
+x3weights
+sqlite
diff --git a/tests/training/weights/sqlite.expected b/tests/training/weights/sqlite.expected
new file mode 100644
index 0000000..4986c74
--- /dev/null
+++ b/tests/training/weights/sqlite.expected
@@ -0,0 +1,100 @@
+145.70
+407.94
+1194.70
+233.07
+1427.39
+126.23
+378.66
+97.07
+757.37
+330.12
+1602.07
+213.48
+436.69
+96.88
+465.92
+194.09
+1193.40
+145.47
+930.50
+387.66
+927.88
+619.45
+869.34
+818.98
+434.26
+154.00
+489.57
+228.58
+344.03
+306.33
+575.79
+594.91
+1552.05
+212.38
+1133.21
+117.73
+1334.77
+156.53
+690.57
+107.32
+613.12
+156.52
+364.33
+179.15
+203.65
+114.84
+252.92
+135.13
+449.82
+272.53
+761.90
+188.65
+303.93
+267.39
+354.33
+110.67
+381.25
+435.42
+382.01
+116.72
+325.70
+94.44
+312.32
+58.83
+339.84
+157.14
+49.69
+274.61
+264.53
+38.22
+713.85
+183.54
+380.32
+469.33
+853.95
+359.63
+611.92
+187.32
+1294.60
+84.43
+1289.77
+484.42
+535.36
+60.39
+730.99
+111.69
+429.13
+56.42
+267.68
+92.46
+148.99
+123.08
+143.95
+155.08
+940.73
+64.80
+86.15
+71.10
+783.37
+408.54
diff --git a/tests/training/weights/test_sentence_weighting_sqlite.sh b/tests/training/weights/test_sentence_weighting_sqlite.sh
new file mode 100644
index 0000000..39f9cf3
--- /dev/null
+++ b/tests/training/weights/test_sentence_weighting_sqlite.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -rf sqlite sqlite.log
+mkdir -p sqlite
+
+test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml
+test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml
+
+$MRT_MARIAN/build/marian \
+ --seed 1111 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 \
+ -m sqlite/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
+ --log sqlite.log --disp-freq 1 --after-batches 100 --mini-batch 1 \
+ --data-weighting train.1k.weights.txt --data-weighting-type sentence --sqlite sqlite/corpus.sqlite3
+
+test -e sqlite/model.npz
+test -e sqlite/corpus.sqlite3
+test -e sqlite.log
+
+cat sqlite.log | $MRT_TOOLS/extract-costs.sh > sqlite.out
+
+$MRT_TOOLS/diff-floats.py sqlite.out sqlite.expected -p 0.1 > sqlite.diff
+
+# Exit with success code
+exit 0