diff options
author | Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> | 2018-11-12 18:11:45 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundki@exseed.ed.ac.uk> | 2018-11-12 18:11:45 +0300 |
commit | f9c210b163d470d191e7383962488c81230a8818 (patch) | |
tree | 83105636349a411912768ea9f41e40d88bf628b0 | |
parent | 610885d0e1beb1c7fd132119256094a6fc04854d (diff) | |
parent | c86c7e379f9e28fd314a6b58054e016792dd5ab9 (diff) |
Merge branch 'master' into updates-from-testsanity
27 files changed, 122 insertions, 30 deletions
@@ -32,3 +32,4 @@ models/transformer/*.bpe data/*/corpus.* data/*/*.bpe data/*/truecase* +data/*/*.gz diff --git a/models/wmt16_systems/marian.en-de.scorer.yml b/models/wmt16_systems/marian.en-de.scorer.yml new file mode 100644 index 0000000..645eb07 --- /dev/null +++ b/models/wmt16_systems/marian.en-de.scorer.yml @@ -0,0 +1,10 @@ +relative-paths: true +type: amun +model: en-de/model.npz +dim-emb: 500 +vocabs: + - en-de/vocab.en.json + - en-de/vocab.de.json +dim-vocabs: + - 85000 + - 85000 diff --git a/tests/interface/config/test_load_config.sh b/tests/interface/config/test_load_config.sh index 742e892..ff97b07 100644 --- a/tests/interface/config/test_load_config.sh +++ b/tests/interface/config/test_load_config.sh @@ -30,10 +30,10 @@ grep -q "mini-batch: 8" load_config.log grep -q "dim-rnn: 32" load_config.log grep -q "dim-emb: 16" load_config.log -cat no_config.log | grep -v "\[memory\]" | $MRT_TOOLS/strip-timestamps.sh > no_config.out -cat load_config.log | grep -v "\[memory\]" | $MRT_TOOLS/strip-timestamps.sh > load_config.out +cat no_config.log | grep -vP "\[(memory|marian)\]" | $MRT_TOOLS/strip-timestamps.sh > no_config.out +cat load_config.log | grep -vP "\[(memory|marian)\]" | $MRT_TOOLS/strip-timestamps.sh > load_config.out -diff no_config.out load_config.out > load_config.diff +diff load_config.out no_config.out > load_config.diff # Exit with success code exit 0 diff --git a/tests/interface/version/test_model_has_version.sh b/tests/interface/version/test_model_has_version.sh index 718bc42..62e0578 100644 --- a/tests/interface/version/test_model_has_version.sh +++ b/tests/interface/version/test_model_has_version.sh @@ -16,12 +16,12 @@ $MRT_MARIAN/build/marian \ # Check if the version is logged for newly started training test -e version.log -grep -qP "created with Marian v[1-9]+\.[0-9]+\.[0-9]+\+.*" version.log +grep -qP "creat.* Marian v[1-9]+\.[0-9]+\.[0-9]+.*" version.log rm -f version.log # Check if the model contains a version test -e version/model.npz -python3 $MRT_MARIAN/scripts/contrib/model_info.py -s -m version/model.npz | grep -qP "version: v[1-9]+\.[0-9]+\.[0-9]+\+.*" +python3 $MRT_MARIAN/scripts/contrib/model_info.py -s -m version/model.npz | grep -qP "version: v[1-9]+\.[0-9]+\.[0-9]+.*" # Check if the version is printed during decoding echo "test" | $MRT_MARIAN/build/marian-decoder \ @@ -29,7 +29,7 @@ echo "test" | $MRT_MARIAN/build/marian-decoder \ --log version.log test -e version.log -grep -qP "created with Marian v[1-9]+\.[0-9]+\.[0-9]+\+.*" version.log +grep -qP "creat.* Marian v[1-9]+\.[0-9]+\.[0-9]+.*" version.log # Exit with success code exit 0 diff --git a/tests/scorer/align/test_scorer_align.sh b/tests/scorer/align/test_scorer_align.sh index 38ccff9..6a3a788 100644 --- a/tests/scorer/align/test_scorer_align.sh +++ b/tests/scorer/align/test_scorer_align.sh @@ -4,7 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t $(pwd)/text.src.in $(pwd)/text.trg.in --alignment --mini-batch 16 \ | sed 's/^.* ||| //' > align.out diff --git a/tests/scorer/align/test_scorer_align_batch_1.sh b/tests/scorer/align/test_scorer_align_batch_1.sh index 848589b..4d0069f 100644 --- a/tests/scorer/align/test_scorer_align_batch_1.sh +++ b/tests/scorer/align/test_scorer_align_batch_1.sh @@ -4,7 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t $(pwd)/text.src.in $(pwd)/text.trg.in --alignment --mini-batch 1 \ | sed 's/^.* ||| //' > align.b1.out diff --git a/tests/scorer/align/test_scorer_align_nbest.sh b/tests/scorer/align/test_scorer_align_nbest.sh index 69b419c..91a293d 100644 --- a/tests/scorer/align/test_scorer_align_nbest.sh +++ b/tests/scorer/align/test_scorer_align_nbest.sh @@ -4,7 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t $(pwd)/text.src.in $(pwd)/nbest.trg.in --alignment --mini-batch 16 --n-best > nbest.out # Compare n-best lists diff --git a/tests/scorer/align/test_scorer_soft_align.sh b/tests/scorer/align/test_scorer_soft_align.sh index faf90a5..f0671ec 100644 --- a/tests/scorer/align/test_scorer_soft_align.sh +++ b/tests/scorer/align/test_scorer_soft_align.sh @@ -4,7 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t $(pwd)/text.src.in $(pwd)/text.trg.in --alignment soft --mini-batch 16 \ | sed 's/^.* ||| //' > soft.out diff --git a/tests/scorer/nbest/test_compare_parallel_and_nbest.sh b/tests/scorer/nbest/test_compare_parallel_and_nbest.sh index f7602df..63deee1 100644 --- a/tests/scorer/nbest/test_compare_parallel_and_nbest.sh +++ b/tests/scorer/nbest/test_compare_parallel_and_nbest.sh @@ -7,11 +7,11 @@ test -e text.srcall.in || cat text.src.in | sed 'p;p;p;p' > text.srcall.in test -e text.trg.in || cat text.nbest.in | sed 's/ ||| /\t/g' | cut -f2 > text.trg.in # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t text.srcall.in text.trg.in \ > parallel.scores.out -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ --n-best -t text.src.in text.nbest.in \ > parallel.nbest.out diff --git a/tests/scorer/nbest/test_custom_feature_name.sh b/tests/scorer/nbest/test_custom_feature_name.sh index 5ab0e37..51e890e 100644 --- a/tests/scorer/nbest/test_custom_feature_name.sh +++ b/tests/scorer/nbest/test_custom_feature_name.sh @@ -4,7 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ --n-best --n-best-feature FeatureName -t text.src.in text.nbest.in \ > custom.out diff --git a/tests/scorer/nbest/test_score_nbest_list.sh b/tests/scorer/nbest/test_score_nbest_list.sh index 3ada7cd..3bf7a41 100644 --- a/tests/scorer/nbest/test_score_nbest_list.sh +++ b/tests/scorer/nbest/test_score_nbest_list.sh @@ -4,7 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ --n-best -t text.src.in text.nbest.in \ > nbest.out diff --git a/tests/scorer/scores/test_compare_with_decoder_scores.sh b/tests/scorer/scores/test_compare_with_decoder_scores.sh index df6283c..799ae81 100644 --- a/tests/scorer/scores/test_compare_with_decoder_scores.sh +++ b/tests/scorer/scores/test_compare_with_decoder_scores.sh @@ -16,8 +16,7 @@ cat text.in | perl -ne 'for$i(1..12){print}' > compare.src cat nbest.out | sed 's/ ||| /\t/g' | cut -f2 > compare.trg # Run rescorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml \ - -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t $(pwd)/compare.src $(pwd)/compare.trg > compare.scorer.out # Compare scores diff --git a/tests/scorer/scores/test_scores.sh b/tests/scorer/scores/test_scores.sh index b026509..b1a87bc 100644 --- a/tests/scorer/scores/test_scores.sh +++ b/tests/scorer/scores/test_scores.sh @@ -4,8 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml \ - -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t $(pwd)/scores.src.in $(pwd)/scores.trg.in > scores.out # Compare scores diff --git a/tests/scorer/scores/test_summary.sh b/tests/scorer/scores/test_summary.sh index 87caa32..e158fdd 100644 --- a/tests/scorer/scores/test_summary.sh +++ b/tests/scorer/scores/test_summary.sh @@ -4,8 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml \ - -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t $(pwd)/scores.src.in $(pwd)/scores.trg.in --summary > summary.out # Compare scores diff --git a/tests/scorer/scores/test_summary_perplexity.sh b/tests/scorer/scores/test_summary_perplexity.sh index 5e7f246..2493379 100644 --- a/tests/scorer/scores/test_summary_perplexity.sh +++ b/tests/scorer/scores/test_summary_perplexity.sh @@ -4,8 +4,7 @@ set -e # Run scorer -$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.yml \ - -m $MRT_MODELS/wmt16_systems/en-de/model.npz \ +$MRT_MARIAN/build/marian-scorer -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml \ -t $(pwd)/scores.src.in $(pwd)/scores.trg.in --summary perplexity > summary_perplexity.out # Compare scores diff --git a/tests/training/basics/.gitignore b/tests/training/basics/.gitignore index 61de908..af307e4 100644 --- a/tests/training/basics/.gitignore +++ b/tests/training/basics/.gitignore @@ -6,3 +6,4 @@ sqlite_seed batch_fit *.temp vocab.*.yml +gzip diff --git a/tests/training/basics/gzip.expected b/tests/training/basics/gzip.expected new file mode 100644 index 0000000..cc069b6 --- /dev/null +++ b/tests/training/basics/gzip.expected @@ -0,0 +1,5 @@ +447.89 +374.88 +324.98 +284.49 +248.72 diff --git a/tests/training/basics/setup.sh b/tests/training/basics/setup.sh index 6088de5..1bcb2f9 100644 --- a/tests/training/basics/setup.sh +++ b/tests/training/basics/setup.sh @@ -2,3 +2,8 @@ test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1 test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1 test -f $MRT_DATA/europarl.de-en/toy.bpe.en || exit 1 test -f $MRT_DATA/europarl.de-en/toy.bpe.de || exit 1 + +test -s vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml +test -s vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml +test -s vocab.de.yml +test -s vocab.en.yml diff --git a/tests/training/basics/test_gzipped_train_sets.sh b/tests/training/basics/test_gzipped_train_sets.sh new file mode 100644 index 0000000..5f1d596 --- /dev/null +++ b/tests/training/basics/test_gzipped_train_sets.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Exit on error +set -e + +# Test code goes here +rm -rf gzip gzip.log +mkdir -p gzip + +test -e $MRT_DATA/europarl.de-en/corpus.bpe.de.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.de | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.de.gz +test -e $MRT_DATA/europarl.de-en/corpus.bpe.en.gz || cat $MRT_DATA/europarl.de-en/corpus.bpe.en | gzip > $MRT_DATA/europarl.de-en/corpus.bpe.en.gz + +$MRT_MARIAN/build/marian \ + --no-shuffle --seed 1111 --dim-emb 64 --dim-rnn 64 \ + -m gzip/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de}.gz -v vocab.en.yml vocab.de.yml \ + --log gzip.log --disp-freq 10 --after-batches 50 + +test -e gzip/model.npz +test -e gzip.log + +cat gzip.log | $MRT_TOOLS/extract-costs.sh > gzip.out +$MRT_TOOLS/diff-floats.py gzip.out gzip.expected -p 0.1 > gzip.diff + +# Exit with success code +exit 0 diff --git a/tests/training/basics/test_sqlite.sh b/tests/training/basics/test_sqlite.sh index c2e0302..68821f7 100644 --- a/tests/training/basics/test_sqlite.sh +++ b/tests/training/basics/test_sqlite.sh @@ -10,8 +10,7 @@ mkdir -p sqlite $MRT_MARIAN/build/marian \ --seed 1111 --no-shuffle --dim-emb 64 --dim-rnn 128 --optimizer sgd \ -m sqlite/model.nosqlite.npz \ - -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} \ - -v sqlite/vocab.en.yml sqlite/vocab.de.yml \ + -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --disp-freq 10 --after-batches 100 \ --log nosqlite.log diff --git a/tests/training/basics/test_sqlite_random_seed.sh b/tests/training/basics/test_sqlite_random_seed.sh index 6c80ec9..3da1769 100644 --- a/tests/training/basics/test_sqlite_random_seed.sh +++ b/tests/training/basics/test_sqlite_random_seed.sh @@ -10,8 +10,7 @@ mkdir -p sqlite_seed $MRT_MARIAN/build/marian \ --seed 3333 --dim-emb 64 --dim-rnn 128 --optimizer sgd \ -m sqlite_seed/model1.npz \ - -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} --sqlite \ - -v sqlite_seed/vocab.en.yml sqlite_seed/vocab.de.yml \ + -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} --sqlite -v vocab.en.yml vocab.de.yml \ --disp-freq 2 --after-batches 50 \ --log sqlite_seed_1.log diff --git a/tests/training/basics/test_toy_vocab.sh b/tests/training/basics/test_toy_vocab.sh index c8401a6..6048e1b 100644 --- a/tests/training/basics/test_toy_vocab.sh +++ b/tests/training/basics/test_toy_vocab.sh @@ -9,7 +9,7 @@ rm -f toy/* toy.log $MRT_MARIAN/build/marian \ --seed 1111 --dim-emb 256 --dim-rnn 512 \ - -m toy/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v toy/vocab.en.yml toy/vocab.de.yml \ + -m toy/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v toy/vocab.de.yml toy/vocab.en.yml \ --log toy.log --disp-freq 5 -e 5 test -e toy/vocab.en.yml diff --git a/tests/training/basics/test_translation_script.sh b/tests/training/basics/test_translation_script.sh index d415f53..84a7cd0 100644 --- a/tests/training/basics/test_translation_script.sh +++ b/tests/training/basics/test_translation_script.sh @@ -10,8 +10,7 @@ mkdir -p trans $MRT_MARIAN/build/marian \ --seed 2222 --no-shuffle --dim-emb 128 --dim-rnn 256 --maxi-batch 1 --mini-batch 16 \ -m trans/model.npz \ - -t $MRT_DATA/europarl.de-en/corpus.bpe.en $MRT_DATA/europarl.de-en/corpus.bpe.de \ - -v vocab.en.yml vocab.de.yml \ + -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ --dim-vocabs 50000 50000 \ --disp-freq 30 --valid-freq 60 --after-batches 150 \ --valid-metrics cross-entropy translation --valid-script-path ./trans_script.sh \ diff --git a/tests/training/data-weighting/.gitignore b/tests/training/data-weighting/.gitignore index 7bf6c5d..ae9f462 100644 --- a/tests/training/data-weighting/.gitignore +++ b/tests/training/data-weighting/.gitignore @@ -21,4 +21,6 @@ valid_script.temp maxibatch word_maxibatch compare -compare.*.weights.txt +compare*.weights.txt +warn +warn*.weights.txt diff --git a/tests/training/data-weighting/test_length_mismatch_warnings.sh b/tests/training/data-weighting/test_length_mismatch_warnings.sh new file mode 100644 index 0000000..9a52fcc --- /dev/null +++ b/tests/training/data-weighting/test_length_mismatch_warnings.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Exit on error +set -e + +# Test code goes here +rm -rf warn warn.log warn.weights.txt +mkdir -p warn + + +cat $MRT_DATA/europarl.de-en/toy.bpe.en | sed -r 's/[^ ]+/1/g' > warn.weights.txt +sed -i '2s/1 1 /1 /g' warn.weights.txt +sed -i '3s/1 /1 1 /g' warn.weights.txt + +$MRT_MARIAN/build/marian \ + --seed 1111 --dim-emb 64 --dim-rnn 128 --optimizer sgd \ + -m warn/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \ + --log warn.log -e 1 \ + --data-weighting warn.weights.txt --data-weighting-type word + +test -e warn.log +grep -qi "[warn].*number of weights.* does not match.* words.* line #1" warn.log +grep -qi "[warn].*number of weights.* does not match.* words.* line #2" warn.log + + +# Exit with success code +exit 0 diff --git a/tests/training/model-types/.gitignore b/tests/training/model-types/.gitignore index 2742094..9a5cf30 100644 --- a/tests/training/model-types/.gitignore +++ b/tests/training/model-types/.gitignore @@ -1 +1,2 @@ transformer +decoder_c_tt diff --git a/tests/training/model-types/test_amun_has_decoder_c_tt.sh b/tests/training/model-types/test_amun_has_decoder_c_tt.sh new file mode 100644 index 0000000..fb44930 --- /dev/null +++ b/tests/training/model-types/test_amun_has_decoder_c_tt.sh @@ -0,0 +1,22 @@ +#!/bin/bash -x + +# Exit on error +set -e + +# Test code goes here +rm -rf decoder_c_tt +mkdir -p decoder_c_tt + +opts="--no-shuffle --seed 1111 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd --dim-emb 64 --dim-rnn 128" + +$MRT_MARIAN/build/marian \ + -m decoder_c_tt/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ + $opts --after-batches 1 + +test -e decoder_c_tt/model.npz + +python $MRT_MARIAN/scripts/contrib/model_info.py -m decoder_c_tt/model.npz > decoder_c_tt.out +grep -q "decoder_c_tt" decoder_c_tt.out + +# Exit with success code +exit 0 |