From 73a35c57398e2e3bbf3d23dd7dc6c84e1d725c38 Mon Sep 17 00:00:00 2001 From: "Eleftherios Avramidis (Open Source Lab by DFKI)" Date: Wed, 6 Jan 2021 19:22:51 +0100 Subject: Filenames from downloaded SETIMES Romanian corpus have changed, adapt example scripts. --- training-basics-sentencepiece/README.md | 6 +++--- training-basics-sentencepiece/run-me.sh | 6 +++--- training-basics/scripts/download-files.sh | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/training-basics-sentencepiece/README.md b/training-basics-sentencepiece/README.md index 180445e..c842459 100644 --- a/training-basics-sentencepiece/README.md +++ b/training-basics-sentencepiece/README.md @@ -142,11 +142,11 @@ unzip SETIMES2.ro-en.txt.zip gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz # create corpus files -cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en -cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro +cat europarl-v7.ro-en.en SETIMES.en-ro.en corpus.bt.ro-en.en > corpus.en +cat europarl-v7.ro-en.ro SETIMES.en-ro.ro corpus.bt.ro-en.ro > corpus.ro # clean -rm ro-en.tgz SETIMES2.* corpus.bt.* europarl-* +rm ro-en.tgz SETIMES* corpus.bt.* europarl-* # change back into main directory cd .. diff --git a/training-basics-sentencepiece/run-me.sh b/training-basics-sentencepiece/run-me.sh index 54a59d8..0ef169d 100755 --- a/training-basics-sentencepiece/run-me.sh +++ b/training-basics-sentencepiece/run-me.sh @@ -44,11 +44,11 @@ then gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz # create corpus files - cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en - cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro + cat europarl-v7.ro-en.en SETIMES.en-ro.en corpus.bt.ro-en.en > corpus.en + cat europarl-v7.ro-en.ro SETIMES.en-ro.ro corpus.bt.ro-en.ro > corpus.ro # clean - rm ro-en.tgz SETIMES2.* corpus.bt.* europarl-* + rm ro-en.tgz SETIMES* corpus.bt.* europarl-* # change back into main directory cd .. diff --git a/training-basics/scripts/download-files.sh b/training-basics/scripts/download-files.sh index ffd6f47..fbc01a8 100755 --- a/training-basics/scripts/download-files.sh +++ b/training-basics/scripts/download-files.sh @@ -14,10 +14,10 @@ unzip SETIMES2.ro-en.txt.zip gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz # create corpus files -cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en -cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro +cat europarl-v7.ro-en.en SETIMES.en-ro.en corpus.bt.ro-en.en > corpus.en +cat europarl-v7.ro-en.ro SETIMES.en-ro.ro corpus.bt.ro-en.ro > corpus.ro # clean -rm ro-en.tgz SETIMES2.* corpus.bt.* europarl-* +rm ro-en.tgz SETIMES* corpus.bt.* europarl-* cd .. -- cgit v1.2.3