diff options
author | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2021-03-26 16:15:23 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-26 16:15:23 +0300 |
commit | 6d5921cc7de91f4e915b59e9c52c9a76c4e99b00 (patch) | |
tree | 4a0e1d185b9b8bcdb261ad98bfcd5a08c8d54e35 | |
parent | c19b7814d71febf1053bd93af6ac314b46204092 (diff) | |
parent | 73a35c57398e2e3bbf3d23dd7dc6c84e1d725c38 (diff) |
Merge pull request #20 from lefterav/master
Filenames from downloaded SETIMES Romanian corpus have changed
-rw-r--r-- | training-basics-sentencepiece/README.md | 6 | ||||
-rwxr-xr-x | training-basics-sentencepiece/run-me.sh | 6 | ||||
-rwxr-xr-x | training-basics/scripts/download-files.sh | 6 |
3 files changed, 9 insertions, 9 deletions
diff --git a/training-basics-sentencepiece/README.md b/training-basics-sentencepiece/README.md index 180445e..c842459 100644 --- a/training-basics-sentencepiece/README.md +++ b/training-basics-sentencepiece/README.md @@ -142,11 +142,11 @@ unzip SETIMES2.ro-en.txt.zip gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz # create corpus files -cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en -cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro +cat europarl-v7.ro-en.en SETIMES.en-ro.en corpus.bt.ro-en.en > corpus.en +cat europarl-v7.ro-en.ro SETIMES.en-ro.ro corpus.bt.ro-en.ro > corpus.ro # clean -rm ro-en.tgz SETIMES2.* corpus.bt.* europarl-* +rm ro-en.tgz SETIMES* corpus.bt.* europarl-* # change back into main directory cd .. diff --git a/training-basics-sentencepiece/run-me.sh b/training-basics-sentencepiece/run-me.sh index 54a59d8..0ef169d 100755 --- a/training-basics-sentencepiece/run-me.sh +++ b/training-basics-sentencepiece/run-me.sh @@ -44,11 +44,11 @@ then gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz # create corpus files - cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en - cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro + cat europarl-v7.ro-en.en SETIMES.en-ro.en corpus.bt.ro-en.en > corpus.en + cat europarl-v7.ro-en.ro SETIMES.en-ro.ro corpus.bt.ro-en.ro > corpus.ro # clean - rm ro-en.tgz SETIMES2.* corpus.bt.* europarl-* + rm ro-en.tgz SETIMES* corpus.bt.* europarl-* # change back into main directory cd .. diff --git a/training-basics/scripts/download-files.sh b/training-basics/scripts/download-files.sh index ffd6f47..fbc01a8 100755 --- a/training-basics/scripts/download-files.sh +++ b/training-basics/scripts/download-files.sh @@ -14,10 +14,10 @@ unzip SETIMES2.ro-en.txt.zip gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz # create corpus files -cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en -cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro +cat europarl-v7.ro-en.en SETIMES.en-ro.en corpus.bt.ro-en.en > corpus.en +cat europarl-v7.ro-en.ro SETIMES.en-ro.ro corpus.bt.ro-en.ro > corpus.ro # clean -rm ro-en.tgz SETIMES2.* corpus.bt.* europarl-* +rm ro-en.tgz SETIMES* corpus.bt.* europarl-* cd .. |