diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2017-11-25 18:24:02 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2017-11-25 18:24:02 +0300 |
commit | b9ea7fae8912265b4de070dd0058dd312ddbd2b5 (patch) | |
tree | 977a4afcb711cd2f3cb694b8f85e3389283880f5 | |
parent | 69abab9f74d9e7a461cd72156afbf25b128cedd8 (diff) |
fix wrong detok
-rw-r--r-- | training-basics/README.md | 7 | ||||
-rwxr-xr-x | training-basics/run-me.sh | 11 |
2 files changed, 10 insertions, 8 deletions
diff --git a/training-basics/README.md b/training-basics/README.md index e61f42e..cb96331 100644 --- a/training-basics/README.md +++ b/training-basics/README.md @@ -42,7 +42,8 @@ Next it executes a training run with `marian`: --valid-sets data/newsdev2016.bpe.ro data/newsdev2016.bpe.en \ --valid-script-path ./scripts/validate.sh \ --log model/train.log --valid-log model/valid.log \ - --seed 1111 --exponential-smoothing + --seed 1111 --exponential-smoothing \ + --normalize=0.6 --beam-size 6 ``` After training (the training should stop if cross-entropy on the validation set @@ -55,7 +56,7 @@ cat data/newsdev2016.bpe.ro \ | ../../build/marian-decoder -c model/model.npz.decoder.yml -m model/model.avg.npz -d $GPUS -b 12 -n \ | sed 's/\@\@ //g' \ | ../tools/moses-scripts/scripts/recaser/detruecase.perl \ - | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l ro \ + | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en \ > data/newsdev2016.ro.output ``` after which BLEU scores for the dev and test set are reported. Results should @@ -82,7 +83,7 @@ apart from the final single score (last line): cat $1 \ | sed 's/\@\@ //g' \ | ../tools/moses-scripts/scripts/recaser/detruecase.perl \ - | ../tools/moses-scripts/scripts/tokenizer/detokenize.perl -l ro \ + | ../tools/moses-scripts/scripts/tokenizer/detokenize.perl -l en \ | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/newsdev2016.en \ | sed -r 's/BLEU = ([0-9.]+),.*/\1/' ``` diff --git a/training-basics/run-me.sh b/training-basics/run-me.sh index 142ceff..421e74a 100755 --- a/training-basics/run-me.sh +++ b/training-basics/run-me.sh @@ -53,7 +53,8 @@ then --valid-sets data/newsdev2016.bpe.ro data/newsdev2016.bpe.en \ --valid-script-path ./scripts/validate.sh \ --log model/train.log --valid-log model/valid.log \ - --seed 1111 --exponential-smoothing + --seed 1111 --exponential-smoothing \ + --normalize=0.6 --beam-size=6 fi # collect 4 best models on dev set @@ -64,18 +65,18 @@ MODELS=`cat model/valid.log | grep translation | sort -rg -k8,8 -t' ' | cut -f4 # translate dev set cat data/newsdev2016.bpe.ro \ - | $MARIAN/build/marian-decoder -c model/model.npz.decoder.yml -m model/model.avg.npz -d $GPUS -b 12 -n \ + | $MARIAN/build/marian-decoder -c model/model.npz.decoder.yml -m model/model.avg.npz -d $GPUS -b 6 -n0.6 \ | sed 's/\@\@ //g' \ | ../tools/moses-scripts/scripts/recaser/detruecase.perl \ - | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l ro \ + | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en \ > data/newsdev2016.ro.output # translate test set cat data/newstest2016.bpe.ro \ - | $MARIAN/build/marian-decoder -c model/model.npz.decoder.yml -m model/model.avg.npz -d $GPUS -b 12 -n \ + | $MARIAN/build/marian-decoder -c model/model.npz.decoder.yml -m model/model.avg.npz -d $GPUS -b 6 -n0.6 \ | sed 's/\@\@ //g' \ | ../tools/moses-scripts/scripts/recaser/detruecase.perl \ - | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l ro \ + | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en \ > data/newstest2016.ro.output # calculate bleu scores on dev and test set |