blob: 88432c1ee002547c58a2cc0474c30877d9dc2999 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
#!/bin/bash
# Download model tarballs from Marian storage on Azure.
#
# Usage examples:
# ./download-models.sh # download all tarbals
# ./download-models.sh wngt19 # download only wngt19.tar.gz
#
# If you want to add new model files to our Azure storage, open an issue at
# https://github.com/marian-nmt/marian-regression-tests
URL=https://romang.blob.core.windows.net/mariandev/regression-tests/models
# Each tarball is a .tar.gz file that contains a single directory of the same
# name as the tarball without .tar.gz
MODEL_TARBALLS=(
wmt16_systems # A part of En-De WMT16 model from http://data.statmt.org/wmt16_systems/en-de/
wmt17_systems # A part of En-De WMT17 model from http://data.statmt.org/wmt17_systems/en-de/
ape # A multi-source Transformer model trained on WMT16: APE Shared Task data with SentencePiece
lmgec # LM from http://data.statmt.org/romang/gec-naacl18/models.tgz
rnn-spm # Small De-En RNN-based model trained with SentencePiece
transformer # En-De transformer model from marian-examples/transformer
wnmt18 # WNMT18 student models
wngt19 # WNGT19 student models
student-eten # Et-En student model from https://github.com/browsermt/students
factors # Small En-De model trained with factorized vocabs by Unbabel
#char-s2s # A character-level RNN model (obsolete)
)
if [ $# -gt 0 ]; then
echo The list of parameters is not empty.
echo Skipping models not in the list: $*
fi
for model in ${MODEL_TARBALLS[@]}; do
file=$model.tar.gz
# If an argument list is provided, download only tarballs that are present
# in the list. Otherwise download all predefined tarballs
if [ $# -gt 0 ] && [[ "$file" != *"$*"* ]]; then
echo Skipping $file
continue;
fi
echo Downloading checksum for $file ...
wget -nv -O- $URL/$file.md5 > $model.md5.newest
# Do not download if the checksum files are identical, i.e. the archive has
# not been updated since it was downloaded last time
if test -s $model.md5 && $(cmp --silent $model.md5 $model.md5.newest); then
echo File $file does not need to be updated
else
echo Downloading $file ...
wget -nv $URL/$file
# Extract the archive
tar zxf $file
# Remove archive to save disk space
rm -f $file
fi
mv $model.md5.newest $model.md5
done
|