Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2017-03-25 13:44:24 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2017-03-25 13:44:24 +0300
commit274a3b4741b8931123052dad42184047c36e2c41 (patch)
tree0c55b617f7aba93a542908f64a718cc321383445 /examples/training
parent463da37570ce4e815745ee8f09ac80d70dc38531 (diff)
download back-translated files
Diffstat (limited to 'examples/training')
-rwxr-xr-xexamples/training/scripts/download-files.sh11
1 files changed, 9 insertions, 2 deletions
diff --git a/examples/training/scripts/download-files.sh b/examples/training/scripts/download-files.sh
index 069dcbc2..9a739a60 100755
--- a/examples/training/scripts/download-files.sh
+++ b/examples/training/scripts/download-files.sh
@@ -12,11 +12,18 @@ then
wget http://opus.lingfil.uu.se/download.php?f=SETIMES2/en-ro.txt.zip -O data/SETIMES2.ro-en.txt.zip
fi
+if [ ! -f data/corpus.bt.ro-en.en.gz ];
+then
+ wget http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt.ro-en.en.gz -O data/corpus.bt.ro-en.en.gz
+ wget http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt.ro-en.ro.gz -O data/corpus.bt.ro-en.ro.gz
+fi
+
cd data/
tar -xf ro-en.tgz
unzip SETIMES2.ro-en.txt.zip
+gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz
-cat europarl-v7.ro-en.en SETIMES2.en-ro.en > corpus.en
-cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro > corpus.ro
+cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en
+cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro
cd ..