Welcome to mirror list, hosted at ThFree Co, Russian Federation.

download-files.sh « scripts « training « examples - github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 069dcbc2f2b52f703936ab87f2c5e87e2af61c7a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/bin/bash -v

# get En-Ro training data for WMT16

if [ ! -f data/ro-en.tgz ];
then
  wget http://www.statmt.org/europarl/v7/ro-en.tgz -O data/ro-en.tgz
fi

if [ ! -f data/SETIMES2.ro-en.txt.zip ];
then
  wget http://opus.lingfil.uu.se/download.php?f=SETIMES2/en-ro.txt.zip -O data/SETIMES2.ro-en.txt.zip
fi

cd data/
tar -xf ro-en.tgz
unzip SETIMES2.ro-en.txt.zip

cat europarl-v7.ro-en.en SETIMES2.en-ro.en > corpus.en
cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro > corpus.ro

cd ..