diff options
author | Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> | 2019-08-10 10:24:39 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> | 2019-08-10 10:24:39 +0300 |
commit | 6ecfbdc64173fa71fc91fed6e9b241219b15d384 (patch) | |
tree | 63c4a076ae696d5411cf07c3c68ef7c679933619 | |
parent | db743d7fff2e07d9f839fbb3393dfbb3aabee2cb (diff) |
Fix preparing small de-BPEed data
-rwxr-xr-x | data/download-data.sh | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/data/download-data.sh b/data/download-data.sh index 15eed62..6351d77 100755 --- a/data/download-data.sh +++ b/data/download-data.sh @@ -20,6 +20,6 @@ for file in ${MODEL_FILES[@]}; do fi done -# Get de-BPEed training data -test -s europarl.de-en/corpus.de.gz || pigz -dc europarl.de-en/corpus.bpe.de.gz | sed 's/@@ //g' | pigz > europarl.de-en/corpus.de.gz -test -s europarl.de-en/corpus.en.gz || pigz -dc europarl.de-en/corpus.bpe.en.gz | sed 's/@@ //g' | pigz > europarl.de-en/corpus.en.gz +# Get de-BPEed small training data +test -s europarl.de-en/corpus.small.de.gz || pigz -dc europarl.de-en/corpus.bpe.de.gz | head -n 100000 | sed 's/@@ //g' | pigz > europarl.de-en/corpus.small.de.gz +test -s europarl.de-en/corpus.small.en.gz || pigz -dc europarl.de-en/corpus.bpe.en.gz | head -n 100000 | sed 's/@@ //g' | pigz > europarl.de-en/corpus.small.en.gz |