Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2017-10-13 17:00:32 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2017-10-13 17:00:32 +0300
commit3c236e9c72ee1cd223c506872a072bb0e60d9f81 (patch)
tree5659e5d0cdf346893edfe2ae4090caf171c6d018
parentabb4feb35ff60028d483312678260b86b668f3c8 (diff)
Update WMT16 tests
-rw-r--r--.gitignore9
-rw-r--r--Makefile1
-rwxr-xr-xdata/download-data.sh7
-rw-r--r--models/download-wmt16.sh23
-rw-r--r--models/wmt16_systems/marian.en-de.yml (renamed from models/wmt16.en-de/marian.yml)6
-rw-r--r--models/wmt17_systems/marian.en-de.yml21
-rw-r--r--tests/decoder/wmt16/setup.sh2
-rw-r--r--tests/decoder/wmt16/test_ende.sh2
-rw-r--r--tests/decoder/wmt16/test_nbest.sh2
9 files changed, 41 insertions, 32 deletions
diff --git a/.gitignore b/.gitignore
index 4e4b42d..8314716 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,11 +14,10 @@ tools/marian-dev
tools/moses-scripts
tools/subword-nmt
-models/*/model.npz
-models/*/model.npz.json
-models/*/vocab.*
-models/*/*.bpe
-models/*/truecase*
+models/wmt16_systems/en-de
+models/wmt17_systems/en-de
+models/wmt17_systems/scripts
+models/wmt17_systems/vars
data/*/corpus.*
data/*/*.bpe
diff --git a/Makefile b/Makefile
index ab4cbb3..ea52a76 100644
--- a/Makefile
+++ b/Makefile
@@ -27,6 +27,7 @@ tools/marian:
models:
mkdir -p $@
cd $@ && bash ./download-wmt16.sh
+ cd $@ && bash ./download-wmt17.sh
data:
mkdir -p $@
diff --git a/data/download-data.sh b/data/download-data.sh
index cf37607..079146b 100755
--- a/data/download-data.sh
+++ b/data/download-data.sh
@@ -12,7 +12,10 @@ for file in ${MODEL_FILES[@]}; do
mkdir -p $(dirname $file)
if [[ $file = *.gz ]]; then
- wget -qO- $URL/$file | gzip -dc > "${file%.*}"
- fi
+ target="${file%.*}"
+ if [ ! -s $target ]; then
+ wget -qO- $URL/$file | gzip -dc > $target
+ fi
+ fi
done
diff --git a/models/download-wmt16.sh b/models/download-wmt16.sh
index 9522df1..2b7e99d 100644
--- a/models/download-wmt16.sh
+++ b/models/download-wmt16.sh
@@ -1,21 +1,6 @@
#!/bin/bash
-URL=http://data.statmt.org/rsennrich/wmt16_systems
-SRC=en
-TRG=de
-
-MODEL_FILES=(
- $URL/$SRC-$TRG/model.npz
- $URL/$SRC-$TRG/model.npz.json
- $URL/$SRC-$TRG/vocab.$SRC.json
- $URL/$SRC-$TRG/vocab.$TRG.json
- $URL/$SRC-$TRG/$SRC$TRG.bpe
- $URL/$SRC-$TRG/truecase-model.$SRC
-)
-
-mkdir -p wmt16.$SRC-$TRG
-
-for model_file in ${MODEL_FILES[@]}; do
- echo $model_file
- wget -q --no-clobber --directory-prefix wmt16.$SRC-$TRG $model_file
-done
+# Download single en-de wmt16 model
+wget -r -e robots=off -nH -np \
+ -R *ens* -R *r2l* -R index.html* \
+ http://data.statmt.org/wmt16_systems/en-de/
diff --git a/models/wmt16.en-de/marian.yml b/models/wmt16_systems/marian.en-de.yml
index 7bb15af..fbe9f0b 100644
--- a/models/wmt16.en-de/marian.yml
+++ b/models/wmt16_systems/marian.en-de.yml
@@ -1,11 +1,11 @@
relative-paths: true
type: amun
models:
- - model.npz
+ - en-de/model.npz
dim-emb: 500
vocabs:
- - vocab.en.json
- - vocab.de.json
+ - en-de/vocab.en.json
+ - en-de/vocab.de.json
dim-vocabs:
- 85000
- 85000
diff --git a/models/wmt17_systems/marian.en-de.yml b/models/wmt17_systems/marian.en-de.yml
new file mode 100644
index 0000000..d10e4c2
--- /dev/null
+++ b/models/wmt17_systems/marian.en-de.yml
@@ -0,0 +1,21 @@
+type: nematus
+enc-depth: 1
+enc-cell-depth: 4
+enc-type: bidirectional
+dec-depth: 1
+dec-cell-base-depth: 8
+dec-cell-high-depth: 1
+dec-cell: gru-nematus
+enc-cell: gru-nematus
+tied-embeddings: true
+layer-normalization: true
+relative-paths: true
+models:
+ - en-de/model.l2r.ens1.npz
+dim-emb: 500
+vocabs:
+ - en-de/vocab.en.json
+ - en-de/vocab.de.json
+dim-vocabs:
+ - 51100
+ - 74383
diff --git a/tests/decoder/wmt16/setup.sh b/tests/decoder/wmt16/setup.sh
index bd695a8..2b9dffa 100644
--- a/tests/decoder/wmt16/setup.sh
+++ b/tests/decoder/wmt16/setup.sh
@@ -1 +1 @@
-test -f $MRT_MODELS/wmt16.en-de/model.npz || exit 1
+test -f $MRT_MODELS/wmt16_systems/en-de/model.npz || exit 1
diff --git a/tests/decoder/wmt16/test_ende.sh b/tests/decoder/wmt16/test_ende.sh
index a628a28..793a86b 100644
--- a/tests/decoder/wmt16/test_ende.sh
+++ b/tests/decoder/wmt16/test_ende.sh
@@ -4,7 +4,7 @@
set -e
# Test code goes here
-$MRT_MARIAN/build/s2s -c $MRT_MODELS/wmt16.en-de/marian.yml < text.in > text.out
+$MRT_MARIAN/build/s2s -c $MRT_MODELS/wmt16_systems/marian.en-de.yml < text.in > text.out
diff text.out text.expected > text.diff
# Exit with success code
diff --git a/tests/decoder/wmt16/test_nbest.sh b/tests/decoder/wmt16/test_nbest.sh
index 5c1c8ca..3cd471e 100644
--- a/tests/decoder/wmt16/test_nbest.sh
+++ b/tests/decoder/wmt16/test_nbest.sh
@@ -4,7 +4,7 @@
set -e
# Test code goes here
-$MRT_MARIAN/build/s2s -c $MRT_MODELS/wmt16.en-de/marian.yml -b 5 --n-best < text.in > nbest.out
+$MRT_MARIAN/build/s2s -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -b 5 --n-best < text.in > nbest.out
$MRT_TOOLS/diff-floats.py nbest.out nbest.expected > nbest.diff
# Exit with success code