Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'examples/training/scripts/download-files.sh')
-rwxr-xr-xexamples/training/scripts/download-files.sh22
1 files changed, 22 insertions, 0 deletions
diff --git a/examples/training/scripts/download-files.sh b/examples/training/scripts/download-files.sh
new file mode 100755
index 00000000..069dcbc2
--- /dev/null
+++ b/examples/training/scripts/download-files.sh
@@ -0,0 +1,22 @@
+#!/bin/bash -v
+
+# get En-Ro training data for WMT16
+
+if [ ! -f data/ro-en.tgz ];
+then
+ wget http://www.statmt.org/europarl/v7/ro-en.tgz -O data/ro-en.tgz
+fi
+
+if [ ! -f data/SETIMES2.ro-en.txt.zip ];
+then
+ wget http://opus.lingfil.uu.se/download.php?f=SETIMES2/en-ro.txt.zip -O data/SETIMES2.ro-en.txt.zip
+fi
+
+cd data/
+tar -xf ro-en.tgz
+unzip SETIMES2.ro-en.txt.zip
+
+cat europarl-v7.ro-en.en SETIMES2.en-ro.en > corpus.en
+cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro > corpus.ro
+
+cd ..