diff options
author | Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> | 2017-10-13 17:00:32 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> | 2017-10-13 17:00:32 +0300 |
commit | 3c236e9c72ee1cd223c506872a072bb0e60d9f81 (patch) | |
tree | 5659e5d0cdf346893edfe2ae4090caf171c6d018 | |
parent | abb4feb35ff60028d483312678260b86b668f3c8 (diff) |
Update WMT16 tests
-rw-r--r-- | .gitignore | 9 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rwxr-xr-x | data/download-data.sh | 7 | ||||
-rw-r--r-- | models/download-wmt16.sh | 23 | ||||
-rw-r--r-- | models/wmt16_systems/marian.en-de.yml (renamed from models/wmt16.en-de/marian.yml) | 6 | ||||
-rw-r--r-- | models/wmt17_systems/marian.en-de.yml | 21 | ||||
-rw-r--r-- | tests/decoder/wmt16/setup.sh | 2 | ||||
-rw-r--r-- | tests/decoder/wmt16/test_ende.sh | 2 | ||||
-rw-r--r-- | tests/decoder/wmt16/test_nbest.sh | 2 |
9 files changed, 41 insertions, 32 deletions
@@ -14,11 +14,10 @@ tools/marian-dev tools/moses-scripts tools/subword-nmt -models/*/model.npz -models/*/model.npz.json -models/*/vocab.* -models/*/*.bpe -models/*/truecase* +models/wmt16_systems/en-de +models/wmt17_systems/en-de +models/wmt17_systems/scripts +models/wmt17_systems/vars data/*/corpus.* data/*/*.bpe @@ -27,6 +27,7 @@ tools/marian: models: mkdir -p $@ cd $@ && bash ./download-wmt16.sh + cd $@ && bash ./download-wmt17.sh data: mkdir -p $@ diff --git a/data/download-data.sh b/data/download-data.sh index cf37607..079146b 100755 --- a/data/download-data.sh +++ b/data/download-data.sh @@ -12,7 +12,10 @@ for file in ${MODEL_FILES[@]}; do mkdir -p $(dirname $file) if [[ $file = *.gz ]]; then - wget -qO- $URL/$file | gzip -dc > "${file%.*}" - fi + target="${file%.*}" + if [ ! -s $target ]; then + wget -qO- $URL/$file | gzip -dc > $target + fi + fi done diff --git a/models/download-wmt16.sh b/models/download-wmt16.sh index 9522df1..2b7e99d 100644 --- a/models/download-wmt16.sh +++ b/models/download-wmt16.sh @@ -1,21 +1,6 @@ #!/bin/bash -URL=http://data.statmt.org/rsennrich/wmt16_systems -SRC=en -TRG=de - -MODEL_FILES=( - $URL/$SRC-$TRG/model.npz - $URL/$SRC-$TRG/model.npz.json - $URL/$SRC-$TRG/vocab.$SRC.json - $URL/$SRC-$TRG/vocab.$TRG.json - $URL/$SRC-$TRG/$SRC$TRG.bpe - $URL/$SRC-$TRG/truecase-model.$SRC -) - -mkdir -p wmt16.$SRC-$TRG - -for model_file in ${MODEL_FILES[@]}; do - echo $model_file - wget -q --no-clobber --directory-prefix wmt16.$SRC-$TRG $model_file -done +# Download single en-de wmt16 model +wget -r -e robots=off -nH -np \ + -R *ens* -R *r2l* -R index.html* \ + http://data.statmt.org/wmt16_systems/en-de/ diff --git a/models/wmt16.en-de/marian.yml b/models/wmt16_systems/marian.en-de.yml index 7bb15af..fbe9f0b 100644 --- a/models/wmt16.en-de/marian.yml +++ b/models/wmt16_systems/marian.en-de.yml @@ -1,11 +1,11 @@ relative-paths: true type: amun models: - - model.npz + - en-de/model.npz dim-emb: 500 vocabs: - - vocab.en.json - - vocab.de.json + - en-de/vocab.en.json + - en-de/vocab.de.json dim-vocabs: - 85000 - 85000 diff --git a/models/wmt17_systems/marian.en-de.yml b/models/wmt17_systems/marian.en-de.yml new file mode 100644 index 0000000..d10e4c2 --- /dev/null +++ b/models/wmt17_systems/marian.en-de.yml @@ -0,0 +1,21 @@ +type: nematus +enc-depth: 1 +enc-cell-depth: 4 +enc-type: bidirectional +dec-depth: 1 +dec-cell-base-depth: 8 +dec-cell-high-depth: 1 +dec-cell: gru-nematus +enc-cell: gru-nematus +tied-embeddings: true +layer-normalization: true +relative-paths: true +models: + - en-de/model.l2r.ens1.npz +dim-emb: 500 +vocabs: + - en-de/vocab.en.json + - en-de/vocab.de.json +dim-vocabs: + - 51100 + - 74383 diff --git a/tests/decoder/wmt16/setup.sh b/tests/decoder/wmt16/setup.sh index bd695a8..2b9dffa 100644 --- a/tests/decoder/wmt16/setup.sh +++ b/tests/decoder/wmt16/setup.sh @@ -1 +1 @@ -test -f $MRT_MODELS/wmt16.en-de/model.npz || exit 1 +test -f $MRT_MODELS/wmt16_systems/en-de/model.npz || exit 1 diff --git a/tests/decoder/wmt16/test_ende.sh b/tests/decoder/wmt16/test_ende.sh index a628a28..793a86b 100644 --- a/tests/decoder/wmt16/test_ende.sh +++ b/tests/decoder/wmt16/test_ende.sh @@ -4,7 +4,7 @@ set -e # Test code goes here -$MRT_MARIAN/build/s2s -c $MRT_MODELS/wmt16.en-de/marian.yml < text.in > text.out +$MRT_MARIAN/build/s2s -c $MRT_MODELS/wmt16_systems/marian.en-de.yml < text.in > text.out diff text.out text.expected > text.diff # Exit with success code diff --git a/tests/decoder/wmt16/test_nbest.sh b/tests/decoder/wmt16/test_nbest.sh index 5c1c8ca..3cd471e 100644 --- a/tests/decoder/wmt16/test_nbest.sh +++ b/tests/decoder/wmt16/test_nbest.sh @@ -4,7 +4,7 @@ set -e # Test code goes here -$MRT_MARIAN/build/s2s -c $MRT_MODELS/wmt16.en-de/marian.yml -b 5 --n-best < text.in > nbest.out +$MRT_MARIAN/build/s2s -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -b 5 --n-best < text.in > nbest.out $MRT_TOOLS/diff-floats.py nbest.out nbest.expected > nbest.diff # Exit with success code |