Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-examples.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-03-24 02:52:14 +0300
committerGitHub <noreply@github.com>2018-03-24 02:52:14 +0300
commit8be005eb5d6ff2135cc5844a4e58543df954839a (patch)
tree118ebff4d8fc7cce05c5fac60464ccbd072e3d7d
parent974bbfd9f984ba3230097f64840d4f2d88ee4589 (diff)
Add missing test2017 to preprocessing
-rwxr-xr-xwmt2017-transformer/scripts/preprocess-data.sh6
1 files changed, 3 insertions, 3 deletions
diff --git a/wmt2017-transformer/scripts/preprocess-data.sh b/wmt2017-transformer/scripts/preprocess-data.sh
index 3a968a5..309a646 100755
--- a/wmt2017-transformer/scripts/preprocess-data.sh
+++ b/wmt2017-transformer/scripts/preprocess-data.sh
@@ -16,7 +16,7 @@ mosesdecoder=../tools/moses-scripts
subword_nmt=../tools/subword-nmt
# tokenize
-for prefix in corpus valid test2014 test2015 test2016
+for prefix in corpus valid test2014 test2015 test2016 test2017
do
cat data/$prefix.$SRC \
| $mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l $SRC \
@@ -39,7 +39,7 @@ $mosesdecoder/scripts/recaser/train-truecaser.perl -corpus data/corpus.tok.$SRC
$mosesdecoder/scripts/recaser/train-truecaser.perl -corpus data/corpus.tok.$TRG -model model/tc.$TRG
# apply truecaser (cleaned training corpus)
-for prefix in corpus valid test2014 test2015 test2016
+for prefix in corpus valid test2014 test2015 test2016 test2017
do
$mosesdecoder/scripts/recaser/truecase.perl -model model/tc.$SRC < data/$prefix.tok.$SRC > data/$prefix.tc.$SRC
test -f data/$prefix.tok.$TRG || continue
@@ -50,7 +50,7 @@ done
cat data/corpus.tc.$SRC data/corpus.tc.$TRG | $subword_nmt/learn_bpe.py -s $bpe_operations > model/$SRC$TRG.bpe
# apply BPE
-for prefix in corpus valid test2014 test2015 test2016
+for prefix in corpus valid test2014 test2015 test2016 test2017
do
$subword_nmt/apply_bpe.py -c model/$SRC$TRG.bpe < data/$prefix.tc.$SRC > data/$prefix.bpe.$SRC
test -f data/$prefix.tc.$TRG || continue