Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-examples.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2018-11-25 19:50:38 +0300
committerMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2018-11-25 19:50:38 +0300
commitdbd7edb58eaa2d9836da078f28226291f639bf1b (patch)
tree89e037fc88f329b447ba8eefbd12d98be4b6e2e7
parent8bf8e5d89e84a6ece037a7b7b7929d75470bf218 (diff)
generate valid files with sacrebleu
-rwxr-xr-xtraining-basics-spm/run-me.sh17
1 files changed, 15 insertions, 2 deletions
diff --git a/training-basics-spm/run-me.sh b/training-basics-spm/run-me.sh
index 90ff701..62da8dd 100755
--- a/training-basics-spm/run-me.sh
+++ b/training-basics-spm/run-me.sh
@@ -16,9 +16,21 @@ then
exit 1
fi
+# get our fork of sacrebleu
+git clone https://github.com/marian-nmt/sacreBLEU
+
if [ ! -e "data/corpus.ro" ]
then
cd data
+
+ # create dev set
+ ../sacreBLEU/sacrebleu.py -t wmt16/dev -l ro-en --echo src > newsdev2016.ro
+ ../sacreBLEU/sacrebleu.py -t wmt16/dev -l ro-en --echo ref > newsdev2016.en
+
+ # create test set
+ ../sacreBLEU/sacrebleu.py -t wmt16 -l ro-en --echo src > newstest2016.ro
+ ../sacreBLEU/sacrebleu.py -t wmt16 -l ro-en --echo ref > newstest2016.en
+
# get En-Ro training data for WMT16
wget -nc http://www.statmt.org/europarl/v7/ro-en.tgz
wget -nc http://opus.lingfil.uu.se/download.php?f=SETIMES2/en-ro.txt.zip -O SETIMES2.ro-en.txt.zip
@@ -39,6 +51,7 @@ then
cd ..
fi
+# create the model folder
mkdir -p model
# train model
@@ -76,5 +89,5 @@ cat data/newstest2016.ro \
--mini-batch 64 --maxi-batch 10 --maxi-batch-sort src > data/newstest2016.ro.output
# calculate bleu scores on dev and test set
-../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/newsdev2016.en < data/newsdev2016.ro.output
-../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/newstest2016.en < data/newstest2016.ro.output
+sacreBLEU/sacrebleu.py -t wmt16/dev -l ro-en < data/newsdev2016.ro.output
+sacreBLEU/sacrebleu.py -t wmt16 -l ro-en < data/newstest2016.ro.output