diff options
author | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2022-10-28 18:15:46 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2022-10-28 18:15:46 +0300 |
commit | d0822b6f3a9db533e33efe4ee8bdbe8061bc2bf7 (patch) | |
tree | dadcf2604200ae7ee439a0f2954d93d4db69b865 | |
parent | 1826dadad3d5a12b7bec912834ecb0fe713deb5c (diff) |
Temporarily use mirror server for downloading models and data
-rwxr-xr-x | data/download-data.sh | 26 | ||||
-rw-r--r-- | models/download-models.sh | 16 |
2 files changed, 36 insertions, 6 deletions
diff --git a/data/download-data.sh b/data/download-data.sh index ce6ba2d..4397c50 100755 --- a/data/download-data.sh +++ b/data/download-data.sh @@ -4,6 +4,12 @@ # https://github.com/marian-nmt/marian-regression-tests URL=https://romang.blob.core.windows.net/mariandev/regression-tests/data +TOKEN="${SAS_TOKEN:-}" + +# If the SAS token is not provided, switch to to the mirror server +if [ -z $TOKEN ]; then + URL=http://data.statmt.org/romang/marian-regression-tests/data +fi # Each tarball is a .tar.gz file that contains a single directory of the same # name as the tarball @@ -12,11 +18,23 @@ DATA_TARBALLS=( exdb_mnist ) +AZCOPY=true +if ! grep -q "blob\.core\.windows\.net" <<< "$URL"; then + AZCOPY=false +elif ! command -v azcopy &> /dev/null; then + echo "Warning: 'azcopy' is not installed in your system. Using wget." + AZCOPY=false +fi + for name in ${DATA_TARBALLS[@]}; do file=$name.tar.gz echo Downloading checksum for $file ... - wget -nv -O- $URL/$file.md5 > $name.md5.newest + if $AZCOPY; then + azcopy copy "$URL/$file.md5?$TOKEN" $name.md5.newest + else + wget -nv -O- $URL/$file.md5 > $name.md5.newest + fi # Do not download if the checksum files are identical, i.e. the archive has # not been updated since it was downloaded last time @@ -24,7 +42,11 @@ for name in ${DATA_TARBALLS[@]}; do echo File $file does not need to be updated else echo Downloading $file ... - wget -nv $URL/$file + if $AZCOPY; then + azcopy copy "$URL/$file?$TOKEN" . + else + wget -nv $URL/$file + fi # Extract the archive tar zxf $file # Remove archive to save disk space diff --git a/models/download-models.sh b/models/download-models.sh index f3a7199..ccd60f5 100644 --- a/models/download-models.sh +++ b/models/download-models.sh @@ -10,6 +10,12 @@ # https://github.com/marian-nmt/marian-regression-tests URL=https://romang.blob.core.windows.net/mariandev/regression-tests/models +TOKEN="${SAS_TOKEN:-}" + +# If the SAS token is not provided, switch to to the mirror server +if [ -z $TOKEN ]; then + URL=http://data.statmt.org/romang/marian-regression-tests/models +fi # Each tarball is a .tar.gz file that contains a single directory of the same # name as the tarball without .tar.gz @@ -28,8 +34,10 @@ MODEL_TARBALLS=( ) AZCOPY=true -if ! command -v azcopy &> /dev/null; then - echo "Warning: 'azcopy' is not installed in your system. Downloading with 'wget'." +if ! grep -q "blob\.core\.windows\.net" <<< "$URL"; then + AZCOPY=false +elif ! command -v azcopy &> /dev/null; then + echo "Warning: 'azcopy' is not installed in your system. Using wget." AZCOPY=false fi @@ -50,7 +58,7 @@ for model in ${MODEL_TARBALLS[@]}; do echo Downloading checksum for $file ... if $AZCOPY; then - azcopy copy "$URL/$file.md5" $model.md5.newest + azcopy copy "$URL/$file.md5?$TOKEN" $model.md5.newest else wget -nv -O- $URL/$file.md5 > $model.md5.newest fi @@ -62,7 +70,7 @@ for model in ${MODEL_TARBALLS[@]}; do else echo Downloading $file ... if $AZCOPY; then - azcopy copy "$URL/$file" . + azcopy copy "$URL/$file?$TOKEN" . else wget -nv $URL/$file fi |