Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2019-08-10 13:59:05 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2019-08-10 13:59:05 +0300
commit6d367ba637102072f148c297b471f03ffa924d78 (patch)
tree23f5e7aaa625c4d1e70d1d9fd22783ab23e6e6d3
parentd316102d35a5ffdde5fde92a4c2dde93e9015160 (diff)
Fix downloading data
-rwxr-xr-xdata/download-data.sh13
-rw-r--r--tests/training/basics/test_gzipped_train_sets.sh3
2 files changed, 7 insertions, 9 deletions
diff --git a/data/download-data.sh b/data/download-data.sh
index 6351d77..f142eda 100755
--- a/data/download-data.sh
+++ b/data/download-data.sh
@@ -11,15 +11,16 @@ for file in ${MODEL_FILES[@]}; do
echo Downloading $file ...
mkdir -p $(dirname $file)
+ # Download the file
+ test -s $file || wget -nv -O- $URL/$file > $file
+
+ # Uncompress if needed
if [[ $file = *.gz ]]; then
target="${file%.*}"
-
- if [ ! -s $target ]; then
- wget -nv -O- $URL/$file | gzip -dc > $target
- fi
+ test -s $target || gzip -dc $file > $target
fi
done
# Get de-BPEed small training data
-test -s europarl.de-en/corpus.small.de.gz || pigz -dc europarl.de-en/corpus.bpe.de.gz | head -n 100000 | sed 's/@@ //g' | pigz > europarl.de-en/corpus.small.de.gz
-test -s europarl.de-en/corpus.small.en.gz || pigz -dc europarl.de-en/corpus.bpe.en.gz | head -n 100000 | sed 's/@@ //g' | pigz > europarl.de-en/corpus.small.en.gz
+test -s europarl.de-en/corpus.small.de.gz || head -n 100000 europarl.de-en/corpus.bpe.de | sed 's/@@ //g' | gzip > europarl.de-en/corpus.small.de.gz
+test -s europarl.de-en/corpus.small.en.gz || head -n 100000 europarl.de-en/corpus.bpe.en | sed 's/@@ //g' | gzip > europarl.de-en/corpus.small.en.gz
diff --git a/tests/training/basics/test_gzipped_train_sets.sh b/tests/training/basics/test_gzipped_train_sets.sh
index 528c0eb..576b83a 100644
--- a/tests/training/basics/test_gzipped_train_sets.sh
+++ b/tests/training/basics/test_gzipped_train_sets.sh
@@ -7,9 +7,6 @@ set -e
rm -rf gzip gzip.log
mkdir -p gzip
-test -e $MRT_DATA/europarl.de-en/corpus.bpe.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.de.gz
-test -e $MRT_DATA/europarl.de-en/corpus.bpe.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.en.gz
-
$MRT_MARIAN/marian \
--no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 64 \
-m gzip/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de}.gz -v vocab.en.yml vocab.de.yml \