diff options
author | Frank Seide <fseide@microsoft.com> | 2019-05-15 21:39:48 +0300 |
---|---|---|
committer | Frank Seide <fseide@microsoft.com> | 2019-05-15 21:39:48 +0300 |
commit | 71b473f29017933e68a513f7262044c46e39cccc (patch) | |
tree | e447fbd240043c521cdae88a680c9ea5365a3ba8 | |
parent | 41f3c2df46e946081a078a6bd90091b9c2515511 (diff) | |
parent | f863979bd69b076bd568b9592a1ff60116736fb7 (diff) |
Merge branch 'master' of https://github.com/marian-nmt/marian-regression-tests into fseide/fixes
41 files changed, 597 insertions, 19 deletions
@@ -24,10 +24,11 @@ models/wmt17_systems/scripts models/wmt17_systems/vars models/char-s2s models/wnmt18 -models/transformer/model.* -models/transformer/tc.* -models/transformer/vocab.* -models/transformer/*.bpe +models/*/model.npz*yml +models/*/*.npz +models/*/*.bpe +models/*/tc.* +models/*/vocab.* data/*/corpus.* data/*/*.bpe @@ -29,6 +29,7 @@ models: cd $@ && bash ./download-char-s2s.sh cd $@ && bash ./download-wnmt18.sh cd $@ && bash ./download-transformer.sh + cd $@ && bash ./download-lm.sh data: mkdir -p $@ @@ -5,7 +5,9 @@ Marian regression tests pure C++ with minimal dependencies. This repository contains the regression test framework for the main development -repository: `https://github.com/marian-nmt/marian-dev`. +repository: https://github.com/marian-nmt/marian-dev. +The tests are run automatically on Jenkins after each push to the master branch +and a successful compilation: http://vali.inf.ed.ac.uk/jenkins/view/marian/ ## Structure @@ -96,6 +98,7 @@ _Horizon 2020 Research and Innovation Programme_ under grant agreements 645487 ([Modern MT](http://www.modernmt.eu); 2015-2017), 644333 ([TraMOOC](http://tramooc.eu/); 2015-2017), 644402 ([HiML](http://www.himl.eu/); 2015-2017), +825303 ([Bergamot](https://browser.mt/); 2019-2021), the Amazon Academic Research Awards program, the World Intellectual Property Organization, and is based upon work supported in part by the Office of the Director of diff --git a/models/download-lm.sh b/models/download-lm.sh new file mode 100644 index 0000000..1ec8f78 --- /dev/null +++ b/models/download-lm.sh @@ -0,0 +1,10 @@ +#!/bin/bash -x + +test -e lmgec/lm.npz && exit + +mkdir -p lmgec +cd lmgec +wget -nv -nc http://data.statmt.org/romang/gec-naacl18/models.tgz +tar zxvf models.tgz lm.npz tc.model gec.bpe vocab.yml +rm models.tgz +cd .. diff --git a/models/lmgec/config.yml b/models/lmgec/config.yml new file mode 100644 index 0000000..a5c5146 --- /dev/null +++ b/models/lmgec/config.yml @@ -0,0 +1,6 @@ +relative-paths: true +model: lm.npz +vocabs: + - vocab.yml +mini-batch: 1 +maxi-batch: 1 diff --git a/models/lmgec/preprocess.sh b/models/lmgec/preprocess.sh new file mode 100644 index 0000000..8d2de7c --- /dev/null +++ b/models/lmgec/preprocess.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +ROOTDIR=$(realpath ../..) + +cat \ + | perl $ROOTDIR/tools/moses-scripts/scripts/recaser/detruecase.perl \ + | perl $ROOTDIR/tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en \ + | python ./nltk_tok.py \ + | perl $ROOTDIR/tools/moses-scripts/scripts/tokenizer/escape-special-chars.perl \ + | perl $ROOTDIR/tools/moses-scripts/scripts/recaser/truecase.perl --model tc.model \ + | perl $ROOTDIR/tools/subword-nmt/subword_nmt/apply_bpe.py -c gec.bpe @@ -37,7 +37,7 @@ if [[ ! -e $MRT_MARIAN/marian-decoder ]]; then fi # Check if required tools are present in marian directory -for cmd in marian marian-decoder marian-scorer marian-server marian-vocab; do +for cmd in marian marian-decoder marian-scorer marian-vocab; do if [ ! -e $MRT_MARIAN/$cmd ]; then echo "Error: '$MRT_MARIAN/$cmd' not found. Do you need to compile the toolkit first?" exit 1 diff --git a/tests/interface/config/.gitignore b/tests/interface/config/.gitignore index 9e59180..0734b6f 100644 --- a/tests/interface/config/.gitignore +++ b/tests/interface/config/.gitignore @@ -1,8 +1,8 @@ load_config +load_castup +load_alias -dump_config.yml -dump_minimal.yml -dump_relpaths.yml +dump_*.yml overwrite diff --git a/tests/interface/config/_test_dump_config_expand.sh b/tests/interface/config/_test_dump_config_expand.sh new file mode 100644 index 0000000..930d8b0 --- /dev/null +++ b/tests/interface/config/_test_dump_config_expand.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +##################################################################### +# SUMMARY: Test expanding alias options when dumping to a config file +# AUTHOR: snukky +# TAGS: future +##################################################################### + +# Exit on error +set -e + +rm -f dump_expand.{yml,out} + +# Run with no config file +$MRT_MARIAN/marian --best-deep --type s2s --mini-batch 8 --dim-rnn 32 --dim-emb 16 --after-batches 2 --dump-config expand > dump_expand.yml + +# Remove first line and paths to train sets and vocabs +cat dump_expand.yml | tail -n +2 | grep -v ' - ' > dump_expand.out + +# Compare +$MRT_TOOLS/diff.sh dump_expand.out dump_expand.expected > dump_expand.diff + +# Exit with success code +exit 0 diff --git a/tests/interface/config/_test_dump_config_minimal_alias.sh b/tests/interface/config/_test_dump_config_minimal_alias.sh new file mode 100644 index 0000000..16f7b32 --- /dev/null +++ b/tests/interface/config/_test_dump_config_minimal_alias.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +##################################################################### +# SUMMARY: Test dumping minimum needed options to a config file when using an alias +# AUTHOR: snukky +# TAGS: future +##################################################################### + +# Exit on error +set -e + +rm -f dump_alias.{yml,out} + +# Run with no config file +$MRT_MARIAN/marian --best-deep --type s2s --mini-batch 8 --dim-rnn 32 --dim-emb 16 --after-batches 2 --dump-config minimal > dump_alias.yml + +# Remove first line and paths to train sets and vocabs +cat dump_alias.yml | tail -n +2 | grep -v ' - ' > dump_alias.out + +# Compare +$MRT_TOOLS/diff.sh dump_alias.out dump_alias.expected > dump_alias.diff + +# Exit with success code +exit 0 diff --git a/tests/interface/config/_test_load_config_with_alias.sh b/tests/interface/config/_test_load_config_with_alias.sh new file mode 100644 index 0000000..dafaa67 --- /dev/null +++ b/tests/interface/config/_test_load_config_with_alias.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +##################################################################### +# SUMMARY: Test if an alias in a config file does not override other options +# AUTHOR: romang +# TAGS: future +##################################################################### + +# Exit on error +set -e + +rm -rf load_alias load_alias.log +mkdir -p load_alias + +# Run Marian +$MRT_MARIAN/marian --train-sets $MRT_DATA/europarl.de-en/corpus.bpe.{de,en} --model load_alias/model.npz --vocabs vocab.de.yml vocab.en.yml --no-shuffle \ + --config load_alias.yml --log load_alias.log + +test -e load_alias/model.npz +test -e load_alias.log + +grep -q "type: transformer" load_alias.log +grep -q "learn-rate: 0.5" load_alias.log +grep -q "dim-emb: 16" load_alias.log + +# Exit with success code +exit 0 diff --git a/tests/interface/config/dump_alias.expected b/tests/interface/config/dump_alias.expected new file mode 100644 index 0000000..7800d22 --- /dev/null +++ b/tests/interface/config/dump_alias.expected @@ -0,0 +1,8 @@ +# Model options +type: s2s +dim-emb: 16 +dim-rnn: 32 +best-deep: true +# Training options +after-batches: 2 +mini-batch: 8 diff --git a/tests/interface/config/dump_expand.expected b/tests/interface/config/dump_expand.expected new file mode 100644 index 0000000..7437365 --- /dev/null +++ b/tests/interface/config/dump_expand.expected @@ -0,0 +1,16 @@ +# Model options +type: s2s +dim-emb: 16 +dim-rnn: 32 +enc-type: alternating +enc-cell-depth: 2 +enc-depth: 4 +dec-cell-base-depth: 4 +dec-cell-high-depth: 2 +dec-depth: 4 +skip: true +layer-normalization: true +tied-embeddings: true +# Training options +after-batches: 2 +mini-batch: 8 diff --git a/tests/interface/config/load_alias.yml b/tests/interface/config/load_alias.yml new file mode 100644 index 0000000..fb2b316 --- /dev/null +++ b/tests/interface/config/load_alias.yml @@ -0,0 +1,5 @@ +task: transformer +learn-rate: 0.5 +dim-emb: 16 +after-batches: 2 +mini-batch: 8 diff --git a/tests/interface/config/load_castup.yml b/tests/interface/config/load_castup.yml new file mode 100644 index 0000000..0aa7ffd --- /dev/null +++ b/tests/interface/config/load_castup.yml @@ -0,0 +1,7 @@ +type: s2s +mini-batch: 8 +dim-rnn: 32 +dim-emb: 16 +after-batches: 2 +# lr-decay-inv-sqrt is now defined as a vector, so a single number should be casted up to a vector +lr-decay-inv-sqrt: 142536475869 diff --git a/tests/interface/config/nonex_config.yml b/tests/interface/config/nonex_config.yml new file mode 100644 index 0000000..1021086 --- /dev/null +++ b/tests/interface/config/nonex_config.yml @@ -0,0 +1,6 @@ +type: s2s +mini-batch: 8 +dim-rnn: 32 +dim-emb: 16 +after-batches: 2 +blahblah: 12345 diff --git a/tests/interface/config/test_alias_best_deep.sh b/tests/interface/config/test_alias_best_deep.sh index 24ba88e..bc6848a 100644 --- a/tests/interface/config/test_alias_best_deep.sh +++ b/tests/interface/config/test_alias_best_deep.sh @@ -6,7 +6,6 @@ set -e rm -rf bestdeep bestdeep.log mkdir -p bestdeep - # Test $MRT_MARIAN/marian -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -m bestdeep/model.npz -v vocab.de.yml vocab.en.yml \ --type s2s --dim-emb 32 --dim-rnn 16 --mini-batch 1 --after-batches 1 --no-shuffle \ @@ -14,7 +13,7 @@ $MRT_MARIAN/marian -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -m bestdeep/model test -e bestdeep.log -grep -q "best-deep: true" bestdeep.log +#grep -q "best-deep: true" bestdeep.log grep -q "layer-normalization: true" bestdeep.log grep -q "tied-embeddings: true" bestdeep.log grep -q "enc-depth: 4" bestdeep.log diff --git a/tests/interface/config/test_load_config_with_nonexistent_options.sh b/tests/interface/config/test_load_config_with_nonexistent_options.sh new file mode 100644 index 0000000..bab0968 --- /dev/null +++ b/tests/interface/config/test_load_config_with_nonexistent_options.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Exit on error +set -e + +rm -rf nonex_config nonex_config.log +mkdir -p nonex_config + +$MRT_MARIAN/marian --train-sets $MRT_DATA/europarl.de-en/corpus.bpe.{de,en} --model nonex_config/model.npz --vocabs vocab.de.yml vocab.en.yml \ + --config nonex_config.yml > nonex_config.log 2>&1 || true + +test -e nonex_config.log +grep -q "option.* not expected.* blahblah" nonex_config.log + +# Exit with success code +exit 0 diff --git a/tests/interface/config/test_load_config_with_type_conversion.sh b/tests/interface/config/test_load_config_with_type_conversion.sh new file mode 100644 index 0000000..0409b50 --- /dev/null +++ b/tests/interface/config/test_load_config_with_type_conversion.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# SUMMARY: Test if a single value option from a config file can be converted to a vector +# AUTHOR: romang + +# Exit on error +set -e + +rm -rf load_castup load_castup.log +mkdir -p load_castup + +# Run marian +$MRT_MARIAN/marian --train-sets $MRT_DATA/europarl.de-en/corpus.bpe.{de,en} --model load_castup/model.npz --vocabs vocab.de.yml vocab.en.yml --no-shuffle \ + --config load_castup.yml --log load_castup.log + +test -e load_castup/model.npz +test -e load_castup.log + +grep -q "type: s2s" load_castup.log +grep -q "lr-decay-inv-sqrt:$" load_castup.log +grep -q " - 142536475869" load_castup.log + +# Exit with success code +exit 0 diff --git a/tests/scorer/lm/lm_scores.expected b/tests/scorer/lm/lm_scores.expected new file mode 100644 index 0000000..fcd66d9 --- /dev/null +++ b/tests/scorer/lm/lm_scores.expected @@ -0,0 +1,100 @@ +-102.146797 +-56.570122 +-179.359634 +-126.033112 +-151.495895 +-179.698151 +-194.555847 +-218.278519 +-183.315292 +-290.833252 +-102.915894 +-170.672165 +-52.901627 +-166.206558 +-49.160263 +-139.223511 +-152.224136 +-30.153316 +-53.275631 +-190.775604 +-58.611706 +-68.133881 +-24.469469 +-35.131294 +-168.187225 +-49.200638 +-136.020294 +-327.748901 +-177.979187 +-48.333916 +-192.816528 +-82.605606 +-135.975204 +-54.720943 +-164.225433 +-191.112335 +-124.949036 +-204.887207 +-157.517319 +-93.470726 +-192.979294 +-95.325439 +-92.605972 +-141.128265 +-50.027866 +-52.459736 +-139.888809 +-112.474449 +-107.640236 +-110.293877 +-132.735626 +-68.751846 +-64.823151 +-126.765007 +-32.195976 +-47.674992 +-64.521729 +-166.688812 +-75.829742 +-47.022652 +-83.426292 +-154.526764 +-97.985588 +-95.690933 +-170.144775 +-174.160675 +-179.407593 +-91.198380 +-198.941437 +-202.614502 +-100.660248 +-253.774704 +-50.770256 +-195.531281 +-64.387291 +-77.049728 +-86.907028 +-63.171913 +-73.030006 +-94.385803 +-104.468475 +-90.391045 +-97.847717 +-147.599380 +-130.965668 +-31.314968 +-73.762161 +-202.152832 +-54.794056 +-60.364326 +-114.973816 +-107.025894 +-116.153397 +-37.122612 +-67.105400 +-144.332703 +-125.365646 +-64.766396 +-70.662804 +-115.290909 diff --git a/tests/scorer/lm/setup.sh b/tests/scorer/lm/setup.sh new file mode 100644 index 0000000..37dc69f --- /dev/null +++ b/tests/scorer/lm/setup.sh @@ -0,0 +1 @@ +test -f $MRT_MODELS/lmgec/lm.npz || exit 1 diff --git a/tests/scorer/lm/test_lm_scores.sh b/tests/scorer/lm/test_lm_scores.sh new file mode 100644 index 0000000..5a55419 --- /dev/null +++ b/tests/scorer/lm/test_lm_scores.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +##################################################################### +# SUMMARY: Test scoring sentences with a pretrained language model +# AUTHOR: snukky +##################################################################### + +# Exit on error +set -e + +# Run scorer +$MRT_MARIAN/marian-scorer -c $MRT_MODELS/lmgec/config.yml -t $(pwd)/text.prep.en > lm_scores.out + +# Compare scores +$MRT_TOOLS/diff-nums.py lm_scores.out lm_scores.expected -p 0.0003 -o lm_scores.diff + +# Exit with success code +exit 0 diff --git a/tests/scorer/lm/text.prep.en b/tests/scorer/lm/text.prep.en new file mode 100644 index 0000000..2ea61e5 --- /dev/null +++ b/tests/scorer/lm/text.prep.en @@ -0,0 +1,100 @@ +hop@@ s are not a typical enough example for us to understand the importance of the common agricultural policy for farmers in the European Union . +they do , however , show the extent to which it actually helps our farmers . +as Mr May@@ er said , hop@@ s are a traditional product which is particularly important to the quality of beer produced , although production is very limited ; some 4 000 hec@@ tar@@ es of land throughout the whole of Europe . +yet a sufficient number of farming families in the countries where hop@@ s are produced , particularly in Ba@@ vari@@ a , make their living from that product alone . +these families should not be left to the mercy of continual price falls , neither should they be forced to desert specific rural areas because of difficulties arising from ir@@ regul@@ arities within the market . +there have been a number of changes to the main regulation relating to this particular product as a result of the market fluctu@@ ations and the changing needs of farmers , the most recent being the Council decision to set a uniform level of aid to producers for a period of five years . +this latest decision al@@ ters the obligations of the Commission arising from the previous regime , that is of having to grant annual aid , and Members States no longer need to grant aid for setting up production teams . +this development means that certain articles of the old regulation need to be re@@ vo@@ ked which , rightly so , is carried out in the new regulation for which we will be voting , together with Mr May@@ er 's amend@@ ments , noting that the proposed regulation will not in any way affect the budget . +Mr President , I should like to congratulate the rapp@@ or@@ te@@ ur on the report . I can inform you that the Group of the European Liberal , Demo@@ crat and Reform Party will support the report when it is put to the vote . +Mr President , ladies and gentlemen , I first of all want to thank the rapp@@ or@@ te@@ ur , Mr Xa@@ ver May@@ er , for a valuable report - and perhaps especially for his enthusiastic presentation of the hop paradise of Ba@@ vari@@ a - together with the Committee on Agriculture and R@@ ural Development for its constructive attitude . +I am very pleased that our proposal for changing the way in which the market for hop@@ s is organised has met with a positive reception . +the Commission 's proposal is , of course , aimed at removing those sti@@ pu@@ lations which are no longer valid , either because deadlines have run out or because of previous changes to the common regime under which hop@@ s are organised . +these changes must be implemented before the basic regulation is consoli@@ dated . +owing to the fact that the Council has resolved that the level of support is to remain constant for a period of five years , the Commission does not consider that it is necessary to submit a report every year on the situation concerning the production and marketing of hop@@ s . +the Commission therefore considers that Article 11 can be removed . +according to Article 18 of the proposal , we shall , however , be presenting , by 1 September 2000 , a thorough assessment of the situation regarding the production and marketing of hop@@ s . +I am therefore afraid that the European Parliament 's two amend@@ ments complicate the text unnecessarily and that the requirement to receive information each year is already covered by the new proposal . +this information will also be made available on the Internet . +that is why the Commission can not adopt these amend@@ ments in this situation . +Mr President , firstly , I would be pleased to invite the Com@@ mission@@ er to K@@ los@@ ter An@@ de@@ chs in Ba@@ vari@@ a , a place where seven different types of beer are brewed ... +secondly , I would like to make it known that next ... +Mr Pos@@ sel@@ t , this is not a proce@@ du@@ ral motion . +the debate is closed . +we shall now proceed to the vote . +Mr President , before leaving for Stra@@ s@@ bourg , the pen@@ sion@@ ers who took me to the airport asked me " Is there going to be a debate about beer on Friday morning ? " +I replied " Yes , certainly . " +" well , you have to give an explanation of vote and say that we pen@@ sion@@ ers are in favour of the production and development of beer . " +we are in favour not just because ten years ago , the Pen@@ sion@@ ers ' Party put forward as candidate for Rome 's mayor the model Sol@@ vei@@ g tu@@ bing , who was born in Berlin and was a great con@@ no@@ isse@@ ur and lover of beer , but also because my own personal studies on beer show that drinking it makes you younger . +I know that welfare institutions and governments are against developing beer , because this means that they have to pay out pensions for longer , but as representative of the Pen@@ sion@@ ers ' Party , I am in favour . +extension of exceptional financial assistance to Ta@@ ji@@ kistan +Mr President , Com@@ mission@@ er , Ta@@ ji@@ kistan is not only the poorest of all the countries formed from the Soviet Union , it has also been the one to suffer the most on account of the turmoil caused by tribal fe@@ uding , which ultimately escal@@ ated into civil war . +the country failed to grasp how to employ the financial aid provided so far in a targeted manner . +the situation has only calmed down to some extent over the last few months , once the warring parties had ceased hos@@ ti@@ lities and resolved that their next step would be to form a coalition government . +general free elections are set for March 2000 . +the international donor community , which includes Swiss organisations for the most part , is now prepared to carry on where it left off delivering financial aid , but with certain provi@@ sos . +now that the situation has ab@@ ated and there are more favourable prospects for future progress overall , the Sa@@ vary report now attempts to provide renewed support for the macro@@ economic financial aid for this country in the form of loans . +we hope that this will make it sufficiently clear to Ta@@ ji@@ kistan that it needs to improve its state machinery by emb@@ racing democratic development and undertaking the necessary reforms . +however , the financial aid in the form of loans should only be granted if there is a real possibility of the European Union being able to properly monitor the situation , if the process of national reconci@@ liation continues and the elections , in particular the parliament@@ ary elections set for March , are free and democratic . +as Mr Sa@@ vary rightly said , this is also what we aim to achieve with proposed amend@@ ments no@@ s 8 and 9 , to which we give our un@@ equi@@ vocal support . +if Ta@@ ji@@ kistan 's credi@@ t@@ wor@@ thiness is to be restored , then the proposal in Budget 2000 is also to be welcomed . +the rapp@@ or@@ te@@ ur , Mr Bour@@ lang@@ es , has just confirmed to me that as far as this is concerned , a commentary is to provide for a particular form of financial aid to be made available again under the T@@ AC@@ IS programme . +on a final note , I would like to say that the P@@ PE group supports this report not@@ withstanding all the associated risks . +it represents a renewed , hopefully successful attempt to resume and promote economic and technical cooperation with Ta@@ ji@@ kistan . +Mr President , the loan which Ta@@ ji@@ kistan will receive equals this small and poor country 's share in an outstanding debt to the former Soviet Union . +as such , this will not solve any problems within Ta@@ ji@@ kistan . +the loan only prevents the outstanding debts from continuing to exist . +central Asia , the majority of whose population is Tur@@ ki@@ sh-@@ speaking and a small part of which is Ir@@ ani@@ an-@@ speaking , was conquered in the previous century by the Russian ts@@ ari@@ st empire . +this empire did not look for colonies far from home or overseas , like most Western European States , but close by . +although they were decol@@ on@@ ised in 19@@ 22 , they have remained linked to Russia in the form of Federal States of the Soviet Union . +the boundaries drawn by Stal@@ in between the various linguistic and cultural regions in the '@@ 20s and ' 30s are now state borders . +this prolonged European influence means that we in the European Union should feel especially responsible for the vic@@ is@@ sit@@ udes of the five States which appeared after the collapse of the Soviet Union . +the economy and environment are in a sad state of affairs in all fifteen States . +authorit@@ arian regi@@ mes have come to power and leave little or no scope for political opponents . +by means of refer@@ en@@ du@@ ms and intimid@@ ation , some presidents have their periods of office extended by ten years , without there being rival candidates . +in this respect , Ta@@ ji@@ kistan is no exception . +should European money be spent on a country like this ? +in general , my group is not in favour of funding un@@ democratic regi@@ mes . +all too often , we have noticed that they receive funding in the expectation that they will regard this money as a reward for taking small steps towards greater democracy and human rights and as an encouragement to take further such steps . +in practice , however , this method does not work , as we have since found out in Turkey and Russia . +the funding is received , but the situation does not improve . +with the collapse of the Soviet Union , Ta@@ ji@@ kistan has rever@@ ted to the situation in the nineteenth and at the beginning of the twentieth century . +there are several , regi@@ onally powerful families and groups which are fighting each other in a situation where war@@ lords seize upon political and religious differences as an excuse to justify armed action . +the fate of Ta@@ ji@@ kistan largely depends upon what is happening in its immediate surroundings , such as the hopeless , violent conflict in Afghanistan . +a large proportion of the Ta@@ ji@@ ki@@ stani population lives in north-east Afghanistan , the area which is not in the hands of the Taliban . +the North of Ta@@ ji@@ kistan stretches out as far as the den@@ sely populated Fer@@ g@@ ana Valley which is partly located in Uzbekistan and is completely integrated into the economy and road network of this neighbouring country . +as a front@@ line area fl@@ an@@ ked , on the one hand , by the Russian sphere of influence and , on the other hand , by Islamic fundament@@ alism in Afghanistan , the present Ta@@ ji@@ ki@@ stani State has little chance of survival . +the only reason to inject European funding into Ta@@ ji@@ kistan despite all this is that funding increases the chance of survival of the Ta@@ ji@@ ki@@ stani population and offers more chance of peace than there would be without such aid . +this is the reason why my group can nevertheless agree with the proposals made in the Sa@@ vary report . +Mr President , for our part , we will not be voting for the Sa@@ vary report . this is both for reasons concerning the choice of this country and out of more general considerations involving financial aid . +although , of course , we have nothing against the sovereign State of Ta@@ ji@@ kistan , we nevertheless do not think that European States should drop their priorities , or to be more precise , the priority that they set a long time ago on the subject of cooperation . +this priority has now been in force for more than a quarter of a century through the L@@ om@@ é A@@ gree@@ ments . +now , at the same time , so-called exceptional financial aid to the most diverse countries in the world is multi@@ plying , without any overall plan emerging , which means that our cooperation policy is nothing but a vague , huge scratching of the surface or , to sum it up , it is no longer a policy at all . +to this particular consideration we can add a second . +Ta@@ ji@@ kistan may have been spared the economic problems described in the report , moreover like so many other countries in the world , but it is nevertheless the victim of an ill-@@ considered opening up of its borders and of the huge game w@@ aged by emp@@ ires . +Mr President , the political tide in Ta@@ ji@@ kistan seems to be turning . +only last week , President R@@ ag@@ man@@ ov called for parliament@@ ary elections to be held next spring . +after months of tu@@ g-@@ of-@@ war between the government and the opposition , agreement has finally been reached regarding the new elector@@ al law . +I should point out , however , that these developments mark only the beginning of the democra@@ ti@@ sation process . +Ta@@ ji@@ kistan still shows features which are incompatible with a democratic constitutional state . +indeed , the downside of the present positive developments is that during the next elections , a number of parties will remain on the sidel@@ ines . +they are excluded from participating . this is hardly surprising as permission to participate in elections is still in the hands of former communi@@ sts . +this remark regarding Ta@@ ji@@ kistan 's democratic status does not de@@ tract from the fact that quite a few changes have already taken place . +as such , international organisations and bilateral don@@ ors no longer see good reason for su@@ spending aid to Ta@@ ji@@ kistan . +even the European Commission , with the proposal it is making , seems to think it should put its o@@ ar in . however , the Commission is losing sight of one important factor . +earlier this year , the three institutions of the European Union concluded the inter@@ institu@@ tional agreement for a period of seven years , stipul@@ ating the financial ceil@@ ings for the various policy areas . +I would like to remind the Commission of this . +in the proposal to grant aid to Ta@@ ji@@ kistan , this agreement is not given much consideration . +neither the urgent appeal by the IMF and World Bank to the European Union to increase aid to Ta@@ ji@@ kistan nor the argument of moral duty in the light of Ta@@ ji@@ kistan 's debts to the Union are in themselves good enough reasons to grant aid . +we are first of all faced with the European Union 's financial limitations . +the above agreement does not allow for making gifts to Ta@@ ji@@ kistan . +moreover , we have recent experiences of entering into financial commitments which we can not honour , as illustrated in the reconstruction of Kos@@ ovo . +the Commission has pledged a sum of EUR 500 million while the Member States do not want to make the necessary increase in the European budget at this stage . +a vague declaration of intent has since been drafted by the Council to prevent similar problems from occurring in future , but it remains to be seen what will come of this . +Kos@@ ovo is no better off at the moment . +aid has been reduced to EUR 360 million and also spread over several years . +this incident has given me grave concerns regarding the Member States ' willingness to make conce@@ ssions once again within the context of aid to Ta@@ ji@@ kistan , even if only relatively small amounts are involved . +member States find it hard to sell the idea within their own countries if the outcome of the negotiations at the Berlin Summit are under@@ mined by reality . +apart from a limited budget , the European Union has little political interest in Ta@@ ji@@ kistan . +the geographical remot@@ eness makes it impossible to have any real influence on the democra@@ ti@@ sation process . +although the European Union has an interest in being surrounded by large , stable regions , the tools it has available in order to achieve this are still very limited . diff --git a/tests/server/test_ende_with_empty_lines.sh b/tests/server/test_ende_with_empty_lines.sh new file mode 100644 index 0000000..7ac5b59 --- /dev/null +++ b/tests/server/test_ende_with_empty_lines.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Exit on error +set -e + +clean_up() { + kill $SERVER_PID +} +trap clean_up EXIT + +# Test code goes here +$MRT_MARIAN/marian-server -c $MRT_MODELS/wmt16_systems/marian.en-de.yml -p 8765 > server.log 2>&1 & +SERVER_PID=$! + +sleep 20 + +python3 $MRT_MARIAN/../scripts/server/client_example.py -p 8765 < text.someempty.in > text.someempty.out +kill $SERVER_PID + +$MRT_TOOLS/diff.sh text.someempty.out text.someempty.expected > text.someempty.diff +test -e server.log +grep -q "listening on port 8765" server.log + +# Exit with success code +exit 0 diff --git a/tests/server/text.someempty.expected b/tests/server/text.someempty.expected new file mode 100644 index 0000000..35a3239 --- /dev/null +++ b/tests/server/text.someempty.expected @@ -0,0 +1,20 @@ +Indien und Japan Ministerpräsidenten treffen sich in Tokio +Indiens neuer Premierminister Nar@@ endra Modi trifft sein jap@@ anisches Pendant , Shin@@ zo Abe , in Tokio , um über wirtschaftliche und Sicherheits@@ beziehungen zu diskutieren , auf seinem ersten wichtigen Auslands@@ besuch seit der Wahl im Mai . +Herr Modi ist auf einer fünft@@ ä@@ gigen Reise nach Japan , um die wirtschaftlichen Beziehungen zur dritt@@ größten Volkswirtschaft der Welt zu stärken . ++ + + +auch Indien hofft angeblich auf eine Einigung zur Verteidigungs@@ kooperation beider Nationen . +Kar@@ ra@@ tha Polizei nimmt 20-@@ Jährige nach Hochgeschwindigkeits@@ motor@@ en@@ jagd fest ++ + + +die Verkehrs@@ polizei auf Pat@@ rou@@ ille in Kar@@ ra@@ tha an diesem Vormittag versuchte , über ein blau@@ es Motorrad zu ziehen , als sie sie entdeckten , sie erreichte 12@@ ,5 / h , wie sie aus einer Tankstelle an der Bath@@ tor@@ straße aus@@ zog . +die Polizei meldet den Reiter dann nicht an und fuhr weiter bis zur Bur@@ ges@@ e-@@ Straße , bevor er in Busch@@ land fuhr , wodurch die Beamten aus den Augen verloren haben . +das Motorrad und ein Mensch Treffer der Beschreibung des Rei@@ ters wurde dann in einem Haus auf Wal@@ cott Way in Bulgar@@ ra ges@@ ichtet . +die Polizei von Kar@@ ra@@ tha hat einen 20-@@ jährigen Mann beauftragt , der es versäumt zu stoppen und leicht@@ sinnig fahren zu können . ++ + + ++ + + +George Web@@ ster warf N@@ air@@ n und Pit@@ loch@@ ry Hotel@@ vergewal@@ tigung vor ++ + + +George Web@@ ster , 28 , stellte sich bei einer Anhörung vor dem Ober@@ landes@@ gericht in Glasgow die Vorwürfe . +er soll am 7. Juni 2013 eine Frau im schottischen Hotel in Pit@@ loch@@ ry in Per@@ th@@ shire vergewaltigt haben . +es wird von Web@@ ster angegriffen , während sie " bewusst@@ los , schlä@@ ft und unfähig ist , ihre Zustimmung zu geben " . +Web@@ ster ist dann angeklagt , eine zweite Frau im Golf View Hotel in N@@ air@@ n im Hochland am 4. Mai 2014 vergewaltigt zu haben . ++ + + diff --git a/tests/server/text.someempty.in b/tests/server/text.someempty.in new file mode 100644 index 0000000..f969985 --- /dev/null +++ b/tests/server/text.someempty.in @@ -0,0 +1,20 @@ +India and Japan prime ministers meet in Tokyo +India 's new prime minister , Nar@@ endra Modi , is meeting his Japanese counterpart , Shin@@ zo Abe , in Tokyo to discuss economic and security ties , on his first major foreign visit since winning May 's election . +Mr Modi is on a five-@@ day trip to Japan to strengthen economic ties with the third largest economy in the world . + +India is also reportedly hoping for a deal on defence collaboration between the two nations . +Kar@@ ra@@ tha police arrest 20-@@ year-old after high speed motorcycle chase + +traffic police on patrol in Kar@@ ra@@ tha this morning tried to pull over a blue motorcycle when they spotted it reaching 12@@ 5km @/@ h as it pulled out of a service station on Bath@@ gate Road . +police say the rider then failed to stop and continued on to Bur@@ ges@@ s Road before turning into bush@@ land , causing the officers to lose sight of it . +the motorcycle and a person matching the description of the rider was then spotted at a house on Wal@@ cott Way in Bulgar@@ ra . +Kar@@ ra@@ tha Police have charged a 20-@@ year-old man with failing to stop and reckless driving . + + +George Web@@ ster accused of N@@ air@@ n and Pit@@ loch@@ ry hotel rap@@ es + +George Web@@ ster , 28 , faced the charges during a hearing at the High Court in Glasgow . +he is alleged to have raped a woman at the Scotland 's Hotel in Pit@@ loch@@ ry in Per@@ th@@ shire on June 7 , 2013 . +it is claimed Web@@ ster attacked her while she was `` unconscious , asleep and incapable of giving consent . '' +Web@@ ster is then charged with rap@@ ing a second woman at the Golf View Hotel in N@@ air@@ n in the Highlands on May 4 , 2014 . + diff --git a/tests/training/cost-functions/perplexity.expected b/tests/training/cost-functions/perplexity.expected index 04cbf84..cb624fc 100644 --- a/tests/training/cost-functions/perplexity.expected +++ b/tests/training/cost-functions/perplexity.expected @@ -1,8 +1,8 @@ 4821.80517578 -4719.09570312 -4560.10205078 -4477.03125000 -4256.11279297 -4028.61523438 -3604.00585938 -3105.27124023 +4719.09130859 +4560.05859375 +4476.96679688 +4255.98291016 +4028.35009766 +3603.63452148 +3104.74414062 diff --git a/tests/training/cost-functions/test_perplexity.sh b/tests/training/cost-functions/test_perplexity.sh index 36d224a..5de3391 100644 --- a/tests/training/cost-functions/test_perplexity.sh +++ b/tests/training/cost-functions/test_perplexity.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Train a model using perplexity as cost function +# AUTHOR: snukky +# TAGS: unstable +##################################################################### + # Exit on error set -e diff --git a/tests/training/embeddings/.gitignore b/tests/training/features/custom-embeddings/.gitignore index 1f7bba7..1f7bba7 100644 --- a/tests/training/embeddings/.gitignore +++ b/tests/training/features/custom-embeddings/.gitignore diff --git a/tests/training/embeddings/setup.sh b/tests/training/features/custom-embeddings/setup.sh index f285aad..f285aad 100644 --- a/tests/training/embeddings/setup.sh +++ b/tests/training/features/custom-embeddings/setup.sh diff --git a/tests/training/embeddings/test_custom_embeddings.sh b/tests/training/features/custom-embeddings/test_custom_embeddings.sh index c80ab2b..c80ab2b 100644 --- a/tests/training/embeddings/test_custom_embeddings.sh +++ b/tests/training/features/custom-embeddings/test_custom_embeddings.sh diff --git a/tests/training/embeddings/word2vec.de b/tests/training/features/custom-embeddings/word2vec.de index cdbb67d..cdbb67d 100644 --- a/tests/training/embeddings/word2vec.de +++ b/tests/training/features/custom-embeddings/word2vec.de diff --git a/tests/training/embeddings/word2vec.en b/tests/training/features/custom-embeddings/word2vec.en index 6bc925d..6bc925d 100644 --- a/tests/training/embeddings/word2vec.en +++ b/tests/training/features/custom-embeddings/word2vec.en diff --git a/tests/training/features/guided-alignment/test_guided_alignment_rnn.sh b/tests/training/features/guided-alignment/test_guided_alignment_rnn.sh index 508dcf9..ce520d4 100644 --- a/tests/training/features/guided-alignment/test_guided_alignment_rnn.sh +++ b/tests/training/features/guided-alignment/test_guided_alignment_rnn.sh @@ -1,7 +1,7 @@ #!/bin/bash -x ##################################################################### -# SUMMARY: Training rnn model with guided alignment +# SUMMARY: Training S2S model with guided alignment # AUTHOR: snukky ##################################################################### diff --git a/tests/training/features/mixed-ensembles/test_ensemble_of_different_s2s.sh b/tests/training/features/mixed-ensembles/test_ensemble_of_different_s2s.sh index 3f125ee..cd465c7 100644 --- a/tests/training/features/mixed-ensembles/test_ensemble_of_different_s2s.sh +++ b/tests/training/features/mixed-ensembles/test_ensemble_of_different_s2s.sh @@ -1,5 +1,11 @@ #!/bin/bash -x +##################################################################### +# SUMMARY: Train and decode with RNN models of different architectures +# AUTHOR: snukky +# TAGS: unstable +##################################################################### + # Exit on error set -e diff --git a/tests/training/features/right-left/.gitignore b/tests/training/features/right-left/.gitignore new file mode 100644 index 0000000..731ba41 --- /dev/null +++ b/tests/training/features/right-left/.gitignore @@ -0,0 +1,2 @@ +rnn +transformer diff --git a/tests/training/features/right-left/rnn.expected b/tests/training/features/right-left/rnn.expected new file mode 100644 index 0000000..c683efb --- /dev/null +++ b/tests/training/features/right-left/rnn.expected @@ -0,0 +1,10 @@ +227.26374817 +251.25552368 +244.43490601 +247.96240234 +242.51679993 +239.25460815 +236.51896667 +231.50540161 +238.35562134 +242.17578125 diff --git a/tests/training/features/right-left/setup.sh b/tests/training/features/right-left/setup.sh new file mode 100644 index 0000000..eda35bc --- /dev/null +++ b/tests/training/features/right-left/setup.sh @@ -0,0 +1,8 @@ +test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1 +test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1 + +test -s vocab.de.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml +test -s vocab.en.yml || $MRT_MARIAN/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml +test -s vocab.de.yml +test -s vocab.en.yml + diff --git a/tests/training/features/right-left/test_right_left_rnn.sh b/tests/training/features/right-left/test_right_left_rnn.sh new file mode 100644 index 0000000..31c65dc --- /dev/null +++ b/tests/training/features/right-left/test_right_left_rnn.sh @@ -0,0 +1,32 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Training right-left S2S model +# AUTHOR: snukky +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf rnn rnn.{log,out,diff} +mkdir -p rnn + +# Run marian command +$MRT_MARIAN/marian \ + --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + -m rnn/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ + --after-batches 100 --disp-freq 10 \ + --right-left --log rnn.log + +# Check if files exist +test -e rnn/model.npz +test -e rnn.log +grep -qi "right-left: true" rnn.log + +# Compare costs with expected costs +cat rnn.log | $MRT_TOOLS/extract-costs.sh > rnn.out +$MRT_TOOLS/diff-nums.py rnn.out rnn.expected -o rnn.diff + +# Exit with success code +exit 0 diff --git a/tests/training/features/right-left/test_right_left_transformer.sh b/tests/training/features/right-left/test_right_left_transformer.sh new file mode 100644 index 0000000..429ef69 --- /dev/null +++ b/tests/training/features/right-left/test_right_left_transformer.sh @@ -0,0 +1,32 @@ +#!/bin/bash -x + +##################################################################### +# SUMMARY: Training right-left transformer model +# AUTHOR: snukky +##################################################################### + +# Exit on error +set -e + +# Remove old artifacts and create working directory +rm -rf transformer transformer.{log,out,diff} +mkdir -p transformer + +# Run marian command +$MRT_MARIAN/marian --type transformer \ + --no-shuffle --seed 2222 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --optimizer sgd \ + -m transformer/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v vocab.en.yml vocab.de.yml \ + --after-batches 100 --disp-freq 10 \ + --right-left --log transformer.log + +# Check if files exist +test -e transformer/model.npz +test -e transformer.log +grep -qi "right-left: true" transformer.log + +# Compare costs with expected costs +cat transformer.log | $MRT_TOOLS/extract-costs.sh > transformer.out +$MRT_TOOLS/diff-nums.py transformer.out transformer.expected -o transformer.diff + +# Exit with success code +exit 0 diff --git a/tests/training/features/right-left/transformer.expected b/tests/training/features/right-left/transformer.expected new file mode 100644 index 0000000..a63420f --- /dev/null +++ b/tests/training/features/right-left/transformer.expected @@ -0,0 +1,10 @@ +237.99105835 +263.23455811 +255.45816040 +259.72146606 +254.48379517 +250.45918274 +248.04586792 +242.33943176 +249.78984070 +253.30130005 |