Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>2019-08-10 21:20:10 +0300
committerMarcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>2019-08-10 21:20:10 +0300
commit0b46f1a0a3b9dc9d7da86d05af120349a6f0da16 (patch)
tree380413ac1b4991e35db4a96f65221ce14f563ff8
parent84c17f4a6d14da7b918ab64543713f674c919b75 (diff)
parent6d367ba637102072f148c297b471f03ffa924d78 (diff)
Merge branch 'master' into mjd/mergewithpmastermjd/mergewithpmaster
-rwxr-xr-xdata/download-data.sh13
-rw-r--r--tests/training/basics/test_gzipped_train_sets.sh3
2 files changed, 7 insertions, 9 deletions
diff --git a/data/download-data.sh b/data/download-data.sh
index 6351d77..f142eda 100755
--- a/data/download-data.sh
+++ b/data/download-data.sh
@@ -11,15 +11,16 @@ for file in ${MODEL_FILES[@]}; do
echo Downloading $file ...
mkdir -p $(dirname $file)
+ # Download the file
+ test -s $file || wget -nv -O- $URL/$file > $file
+
+ # Uncompress if needed
if [[ $file = *.gz ]]; then
target="${file%.*}"
-
- if [ ! -s $target ]; then
- wget -nv -O- $URL/$file | gzip -dc > $target
- fi
+ test -s $target || gzip -dc $file > $target
fi
done
# Get de-BPEed small training data
-test -s europarl.de-en/corpus.small.de.gz || pigz -dc europarl.de-en/corpus.bpe.de.gz | head -n 100000 | sed 's/@@ //g' | pigz > europarl.de-en/corpus.small.de.gz
-test -s europarl.de-en/corpus.small.en.gz || pigz -dc europarl.de-en/corpus.bpe.en.gz | head -n 100000 | sed 's/@@ //g' | pigz > europarl.de-en/corpus.small.en.gz
+test -s europarl.de-en/corpus.small.de.gz || head -n 100000 europarl.de-en/corpus.bpe.de | sed 's/@@ //g' | gzip > europarl.de-en/corpus.small.de.gz
+test -s europarl.de-en/corpus.small.en.gz || head -n 100000 europarl.de-en/corpus.bpe.en | sed 's/@@ //g' | gzip > europarl.de-en/corpus.small.en.gz
diff --git a/tests/training/basics/test_gzipped_train_sets.sh b/tests/training/basics/test_gzipped_train_sets.sh
index 528c0eb..576b83a 100644
--- a/tests/training/basics/test_gzipped_train_sets.sh
+++ b/tests/training/basics/test_gzipped_train_sets.sh
@@ -7,9 +7,6 @@ set -e
rm -rf gzip gzip.log
mkdir -p gzip
-test -e $MRT_DATA/europarl.de-en/corpus.bpe.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.de.gz
-test -e $MRT_DATA/europarl.de-en/corpus.bpe.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.en.gz
-
$MRT_MARIAN/marian \
--no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 64 \
-m gzip/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de}.gz -v vocab.en.yml vocab.de.yml \