Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/models
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2017-09-26 23:06:14 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2017-09-26 23:06:14 +0300
commit2855410054841c147bf5d7f014fc94891a38a9b3 (patch)
treecfbe6a818738cffd1812a4ba9239fd9d4aa1b57f /models
parent22b180e172002002bf1abd5bcbb3b77b3b4e59e0 (diff)
Add basic test for translation
Diffstat (limited to 'models')
-rw-r--r--models/download_wmt16.sh21
-rw-r--r--models/preprocess.sh12
-rw-r--r--models/wmt16.en-de/marian.yml11
3 files changed, 44 insertions, 0 deletions
diff --git a/models/download_wmt16.sh b/models/download_wmt16.sh
new file mode 100644
index 0000000..f86d982
--- /dev/null
+++ b/models/download_wmt16.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+URL=http://data.statmt.org/rsennrich/wmt16_systems
+SRC=en
+TRG=de
+
+MODEL_FILES=(
+ $URL/$SRC-$TRG/model.npz
+ $URL/$SRC-$TRG/model.npz.json
+ $URL/$SRC-$TRG/vocab.$SRC.json
+ $URL/$SRC-$TRG/vocab.$TRG.json
+ $URL/$SRC-$TRG/$SRC$TRG.bpe
+ $URL/$SRC-$TRG/truecase-model.$SRC
+)
+
+mkdir -p wmt16.$SRC-$TRG
+
+for model_file in ${MODEL_FILES[@]}; do
+ echo $model_file
+ wget -q --no-clobber --directory-prefix wmt16.$SRC-$TRG --show-progress $model_file
+done
diff --git a/models/preprocess.sh b/models/preprocess.sh
new file mode 100644
index 0000000..4b5bc3c
--- /dev/null
+++ b/models/preprocess.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+root="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+moses_scripts=$root/../tools/moses-scripts
+subword_nmt=$root/../tools/subword-nmt
+
+model_dir=$root/wmt16.en-de
+
+$moses_scripts/scripts/tokenizer/normalize-punctuation.perl -l en \
+ | $moses_scripts/scripts/tokenizer/tokenizer.perl -l en -penn \
+ | $moses_scripts/scripts/recaser/truecase.perl -model $model_dir/truecase-model.en \
+ | $subword_nmt/apply_bpe.py -c $model_dir/ende.bpe
diff --git a/models/wmt16.en-de/marian.yml b/models/wmt16.en-de/marian.yml
new file mode 100644
index 0000000..7bb15af
--- /dev/null
+++ b/models/wmt16.en-de/marian.yml
@@ -0,0 +1,11 @@
+relative-paths: true
+type: amun
+models:
+ - model.npz
+dim-emb: 500
+vocabs:
+ - vocab.en.json
+ - vocab.de.json
+dim-vocabs:
+ - 85000
+ - 85000