Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2019-08-10 10:24:39 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2019-08-10 10:24:39 +0300
commit6ecfbdc64173fa71fc91fed6e9b241219b15d384 (patch)
tree63c4a076ae696d5411cf07c3c68ef7c679933619
parentdb743d7fff2e07d9f839fbb3393dfbb3aabee2cb (diff)
Fix preparing small de-BPEed data
-rwxr-xr-xdata/download-data.sh6
1 files changed, 3 insertions, 3 deletions
diff --git a/data/download-data.sh b/data/download-data.sh
index 15eed62..6351d77 100755
--- a/data/download-data.sh
+++ b/data/download-data.sh
@@ -20,6 +20,6 @@ for file in ${MODEL_FILES[@]}; do
fi
done
-# Get de-BPEed training data
-test -s europarl.de-en/corpus.de.gz || pigz -dc europarl.de-en/corpus.bpe.de.gz | sed 's/@@ //g' | pigz > europarl.de-en/corpus.de.gz
-test -s europarl.de-en/corpus.en.gz || pigz -dc europarl.de-en/corpus.bpe.en.gz | sed 's/@@ //g' | pigz > europarl.de-en/corpus.en.gz
+# Get de-BPEed small training data
+test -s europarl.de-en/corpus.small.de.gz || pigz -dc europarl.de-en/corpus.bpe.de.gz | head -n 100000 | sed 's/@@ //g' | pigz > europarl.de-en/corpus.small.de.gz
+test -s europarl.de-en/corpus.small.en.gz || pigz -dc europarl.de-en/corpus.bpe.en.gz | head -n 100000 | sed 's/@@ //g' | pigz > europarl.de-en/corpus.small.en.gz