Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-02-12 16:27:24 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-02-12 16:28:11 +0300
commit19817b7b8eba0911c3c427fd0119150027830d37 (patch)
tree3d45a5de4b9e1f589462a6cec21279616e32aad4
parent86d2de5f8f410e9b001355896ac88e31f55cdd48 (diff)
Add test for data weighting with the default maxi-batch
-rw-r--r--tests/training/weights/.gitignore1
-rw-r--r--tests/training/weights/maxibatch.expected10
-rw-r--r--tests/training/weights/setup.sh5
-rw-r--r--tests/training/weights/test_maxi_batches.sh23
-rw-r--r--tests/training/weights/test_sentence_weighting_sqlite.sh3
-rw-r--r--tests/training/weights/test_sentence_weighting_with_ones.sh3
-rw-r--r--tests/training/weights/test_sentence_weights_x3.sh3
-rw-r--r--tests/training/weights/test_validation.sh3
-rw-r--r--tests/training/weights/test_word_weighting_with_ones.sh3
-rw-r--r--tests/training/weights/train.1k.inc.txt1000
10 files changed, 1039 insertions, 15 deletions
diff --git a/tests/training/weights/.gitignore b/tests/training/weights/.gitignore
index 6c415b9..86ab335 100644
--- a/tests/training/weights/.gitignore
+++ b/tests/training/weights/.gitignore
@@ -9,3 +9,4 @@ x3weights
sqlite
valid
valid_script.temp
+maxibatch
diff --git a/tests/training/weights/maxibatch.expected b/tests/training/weights/maxibatch.expected
new file mode 100644
index 0000000..d195827
--- /dev/null
+++ b/tests/training/weights/maxibatch.expected
@@ -0,0 +1,10 @@
+6915.06
+5113.08
+3434.51
+2349.80
+1471.84
+3602.55
+3667.30
+2832.99
+2116.09
+1487.49
diff --git a/tests/training/weights/setup.sh b/tests/training/weights/setup.sh
index 8b8cd07..4739558 100644
--- a/tests/training/weights/setup.sh
+++ b/tests/training/weights/setup.sh
@@ -1,2 +1,7 @@
test -f $MRT_DATA/europarl.de-en/corpus.bpe.en || exit 1
test -f $MRT_DATA/europarl.de-en/corpus.bpe.de || exit 1
+
+test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml
+test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml
+test -e vocab.de.yml
+test -e vocab.en.yml
diff --git a/tests/training/weights/test_maxi_batches.sh b/tests/training/weights/test_maxi_batches.sh
new file mode 100644
index 0000000..148ff15
--- /dev/null
+++ b/tests/training/weights/test_maxi_batches.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Exit on error
+set -e
+
+# Test code goes here
+mkdir -p maxibatch
+rm -rf maxibatch/* maxibatch.log
+
+$MRT_MARIAN/build/marian \
+ --seed 3333 --no-shuffle \
+ -m maxibatch/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
+ --log maxibatch.log --disp-freq 10 --after-batches 100 --mini-batch 16 --cost-type ce-sum \
+ --data-weighting train.1k.inc.txt --data-weighting-type sentence
+
+test -e maxibatch/model.npz
+test -e maxibatch.log
+
+$MRT_TOOLS/extract-costs.sh < maxibatch.log > maxibatch.out
+$MRT_TOOLS/diff-floats.py maxibatch.out maxibatch.expected -p 0.2 > maxibatch.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/training/weights/test_sentence_weighting_sqlite.sh b/tests/training/weights/test_sentence_weighting_sqlite.sh
index 6c9852f..b6b7a16 100644
--- a/tests/training/weights/test_sentence_weighting_sqlite.sh
+++ b/tests/training/weights/test_sentence_weighting_sqlite.sh
@@ -7,9 +7,6 @@ set -e
rm -rf sqlite sqlite.log
mkdir -p sqlite
-test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml
-test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml
-
$MRT_MARIAN/build/marian \
--seed 1111 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 \
-m sqlite/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
diff --git a/tests/training/weights/test_sentence_weighting_with_ones.sh b/tests/training/weights/test_sentence_weighting_with_ones.sh
index e9e707d..6f70894 100644
--- a/tests/training/weights/test_sentence_weighting_with_ones.sh
+++ b/tests/training/weights/test_sentence_weighting_with_ones.sh
@@ -7,9 +7,6 @@ set -e
rm -rf noweights* ones*
mkdir -p noweights ones
-test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/toy.bpe.de > vocab.de.yml
-test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/toy.bpe.en > vocab.en.yml
-
$MRT_MARIAN/build/marian \
--seed 2222 --no-shuffle \
-m noweights/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
diff --git a/tests/training/weights/test_sentence_weights_x3.sh b/tests/training/weights/test_sentence_weights_x3.sh
index eaa7d3a..a37d821 100644
--- a/tests/training/weights/test_sentence_weights_x3.sh
+++ b/tests/training/weights/test_sentence_weights_x3.sh
@@ -7,9 +7,6 @@ set -e
rm -rf x3copied* x3weights*
mkdir -p x3copied x3weights
-test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml
-test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml
-
$MRT_MARIAN/build/marian \
--seed 2222 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 \
-m x3copied/model.npz -t train.x3.{de,en} -v vocab.{de,en}.yml \
diff --git a/tests/training/weights/test_validation.sh b/tests/training/weights/test_validation.sh
index eb16a85..7d9fe50 100644
--- a/tests/training/weights/test_validation.sh
+++ b/tests/training/weights/test_validation.sh
@@ -7,9 +7,6 @@ set -e
rm -rf valid valid_script.temp
mkdir -p valid
-test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.de > vocab.de.yml
-test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/corpus.bpe.en > vocab.en.yml
-
$MRT_MARIAN/build/marian \
--seed 4444 --no-shuffle --maxi-batch 1 --maxi-batch-sort none --max-length 100 \
-m valid/model.npz -t train.1k.{de,en} -v vocab.{de,en}.yml \
diff --git a/tests/training/weights/test_word_weighting_with_ones.sh b/tests/training/weights/test_word_weighting_with_ones.sh
index 17311de..bf6b918 100644
--- a/tests/training/weights/test_word_weighting_with_ones.sh
+++ b/tests/training/weights/test_word_weighting_with_ones.sh
@@ -7,9 +7,6 @@ set -e
rm -rf word_noweights* word_ones*
mkdir -p word_noweights word_ones
-test -e vocab.de.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/toy.bpe.de > vocab.de.yml
-test -e vocab.en.yml || $MRT_MARIAN/build/marian-vocab < $MRT_DATA/europarl.de-en/toy.bpe.en > vocab.en.yml
-
$MRT_MARIAN/build/marian \
--seed 1111 --no-shuffle \
-m word_noweights/model.npz -t $MRT_DATA/europarl.de-en/toy.bpe.{de,en} -v vocab.{de,en}.yml \
diff --git a/tests/training/weights/train.1k.inc.txt b/tests/training/weights/train.1k.inc.txt
new file mode 100644
index 0000000..af59fb2
--- /dev/null
+++ b/tests/training/weights/train.1k.inc.txt
@@ -0,0 +1,1000 @@
+1.0000
+1.0001
+1.0002
+1.0003
+1.0004
+1.0005
+1.0006
+1.0007
+1.0008
+1.0009
+1.0010
+1.0011
+1.0012
+1.0013
+1.0014
+1.0015
+1.0016
+1.0017
+1.0018
+1.0019
+1.0020
+1.0021
+1.0022
+1.0023
+1.0024
+1.0025
+1.0026
+1.0027
+1.0028
+1.0029
+1.0030
+1.0031
+1.0032
+1.0033
+1.0034
+1.0035
+1.0036
+1.0037
+1.0038
+1.0039
+1.0040
+1.0041
+1.0042
+1.0043
+1.0044
+1.0045
+1.0046
+1.0047
+1.0048
+1.0049
+1.0050
+1.0051
+1.0052
+1.0053
+1.0054
+1.0055
+1.0056
+1.0057
+1.0058
+1.0059
+1.0060
+1.0061
+1.0062
+1.0063
+1.0064
+1.0065
+1.0066
+1.0067
+1.0068
+1.0069
+1.0070
+1.0071
+1.0072
+1.0073
+1.0074
+1.0075
+1.0076
+1.0077
+1.0078
+1.0079
+1.0080
+1.0081
+1.0082
+1.0083
+1.0084
+1.0085
+1.0086
+1.0087
+1.0088
+1.0089
+1.0090
+1.0091
+1.0092
+1.0093
+1.0094
+1.0095
+1.0096
+1.0097
+1.0098
+1.0099
+1.0100
+1.0101
+1.0102
+1.0103
+1.0104
+1.0105
+1.0106
+1.0107
+1.0108
+1.0109
+1.0110
+1.0111
+1.0112
+1.0113
+1.0114
+1.0115
+1.0116
+1.0117
+1.0118
+1.0119
+1.0120
+1.0121
+1.0122
+1.0123
+1.0124
+1.0125
+1.0126
+1.0127
+1.0128
+1.0129
+1.0130
+1.0131
+1.0132
+1.0133
+1.0134
+1.0135
+1.0136
+1.0137
+1.0138
+1.0139
+1.0140
+1.0141
+1.0142
+1.0143
+1.0144
+1.0145
+1.0146
+1.0147
+1.0148
+1.0149
+1.0150
+1.0151
+1.0152
+1.0153
+1.0154
+1.0155
+1.0156
+1.0157
+1.0158
+1.0159
+1.0160
+1.0161
+1.0162
+1.0163
+1.0164
+1.0165
+1.0166
+1.0167
+1.0168
+1.0169
+1.0170
+1.0171
+1.0172
+1.0173
+1.0174
+1.0175
+1.0176
+1.0177
+1.0178
+1.0179
+1.0180
+1.0181
+1.0182
+1.0183
+1.0184
+1.0185
+1.0186
+1.0187
+1.0188
+1.0189
+1.0190
+1.0191
+1.0192
+1.0193
+1.0194
+1.0195
+1.0196
+1.0197
+1.0198
+1.0199
+1.0200
+1.0201
+1.0202
+1.0203
+1.0204
+1.0205
+1.0206
+1.0207
+1.0208
+1.0209
+1.0210
+1.0211
+1.0212
+1.0213
+1.0214
+1.0215
+1.0216
+1.0217
+1.0218
+1.0219
+1.0220
+1.0221
+1.0222
+1.0223
+1.0224
+1.0225
+1.0226
+1.0227
+1.0228
+1.0229
+1.0230
+1.0231
+1.0232
+1.0233
+1.0234
+1.0235
+1.0236
+1.0237
+1.0238
+1.0239
+1.0240
+1.0241
+1.0242
+1.0243
+1.0244
+1.0245
+1.0246
+1.0247
+1.0248
+1.0249
+1.0250
+1.0251
+1.0252
+1.0253
+1.0254
+1.0255
+1.0256
+1.0257
+1.0258
+1.0259
+1.0260
+1.0261
+1.0262
+1.0263
+1.0264
+1.0265
+1.0266
+1.0267
+1.0268
+1.0269
+1.0270
+1.0271
+1.0272
+1.0273
+1.0274
+1.0275
+1.0276
+1.0277
+1.0278
+1.0279
+1.0280
+1.0281
+1.0282
+1.0283
+1.0284
+1.0285
+1.0286
+1.0287
+1.0288
+1.0289
+1.0290
+1.0291
+1.0292
+1.0293
+1.0294
+1.0295
+1.0296
+1.0297
+1.0298
+1.0299
+1.0300
+1.0301
+1.0302
+1.0303
+1.0304
+1.0305
+1.0306
+1.0307
+1.0308
+1.0309
+1.0310
+1.0311
+1.0312
+1.0313
+1.0314
+1.0315
+1.0316
+1.0317
+1.0318
+1.0319
+1.0320
+1.0321
+1.0322
+1.0323
+1.0324
+1.0325
+1.0326
+1.0327
+1.0328
+1.0329
+1.0330
+1.0331
+1.0332
+1.0333
+1.0334
+1.0335
+1.0336
+1.0337
+1.0338
+1.0339
+1.0340
+1.0341
+1.0342
+1.0343
+1.0344
+1.0345
+1.0346
+1.0347
+1.0348
+1.0349
+1.0350
+1.0351
+1.0352
+1.0353
+1.0354
+1.0355
+1.0356
+1.0357
+1.0358
+1.0359
+1.0360
+1.0361
+1.0362
+1.0363
+1.0364
+1.0365
+1.0366
+1.0367
+1.0368
+1.0369
+1.0370
+1.0371
+1.0372
+1.0373
+1.0374
+1.0375
+1.0376
+1.0377
+1.0378
+1.0379
+1.0380
+1.0381
+1.0382
+1.0383
+1.0384
+1.0385
+1.0386
+1.0387
+1.0388
+1.0389
+1.0390
+1.0391
+1.0392
+1.0393
+1.0394
+1.0395
+1.0396
+1.0397
+1.0398
+1.0399
+1.0400
+1.0401
+1.0402
+1.0403
+1.0404
+1.0405
+1.0406
+1.0407
+1.0408
+1.0409
+1.0410
+1.0411
+1.0412
+1.0413
+1.0414
+1.0415
+1.0416
+1.0417
+1.0418
+1.0419
+1.0420
+1.0421
+1.0422
+1.0423
+1.0424
+1.0425
+1.0426
+1.0427
+1.0428
+1.0429
+1.0430
+1.0431
+1.0432
+1.0433
+1.0434
+1.0435
+1.0436
+1.0437
+1.0438
+1.0439
+1.0440
+1.0441
+1.0442
+1.0443
+1.0444
+1.0445
+1.0446
+1.0447
+1.0448
+1.0449
+1.0450
+1.0451
+1.0452
+1.0453
+1.0454
+1.0455
+1.0456
+1.0457
+1.0458
+1.0459
+1.0460
+1.0461
+1.0462
+1.0463
+1.0464
+1.0465
+1.0466
+1.0467
+1.0468
+1.0469
+1.0470
+1.0471
+1.0472
+1.0473
+1.0474
+1.0475
+1.0476
+1.0477
+1.0478
+1.0479
+1.0480
+1.0481
+1.0482
+1.0483
+1.0484
+1.0485
+1.0486
+1.0487
+1.0488
+1.0489
+1.0490
+1.0491
+1.0492
+1.0493
+1.0494
+1.0495
+1.0496
+1.0497
+1.0498
+1.0499
+1.0500
+1.0501
+1.0502
+1.0503
+1.0504
+1.0505
+1.0506
+1.0507
+1.0508
+1.0509
+1.0510
+1.0511
+1.0512
+1.0513
+1.0514
+1.0515
+1.0516
+1.0517
+1.0518
+1.0519
+1.0520
+1.0521
+1.0522
+1.0523
+1.0524
+1.0525
+1.0526
+1.0527
+1.0528
+1.0529
+1.0530
+1.0531
+1.0532
+1.0533
+1.0534
+1.0535
+1.0536
+1.0537
+1.0538
+1.0539
+1.0540
+1.0541
+1.0542
+1.0543
+1.0544
+1.0545
+1.0546
+1.0547
+1.0548
+1.0549
+1.0550
+1.0551
+1.0552
+1.0553
+1.0554
+1.0555
+1.0556
+1.0557
+1.0558
+1.0559
+1.0560
+1.0561
+1.0562
+1.0563
+1.0564
+1.0565
+1.0566
+1.0567
+1.0568
+1.0569
+1.0570
+1.0571
+1.0572
+1.0573
+1.0574
+1.0575
+1.0576
+1.0577
+1.0578
+1.0579
+1.0580
+1.0581
+1.0582
+1.0583
+1.0584
+1.0585
+1.0586
+1.0587
+1.0588
+1.0589
+1.0590
+1.0591
+1.0592
+1.0593
+1.0594
+1.0595
+1.0596
+1.0597
+1.0598
+1.0599
+1.0600
+1.0601
+1.0602
+1.0603
+1.0604
+1.0605
+1.0606
+1.0607
+1.0608
+1.0609
+1.0610
+1.0611
+1.0612
+1.0613
+1.0614
+1.0615
+1.0616
+1.0617
+1.0618
+1.0619
+1.0620
+1.0621
+1.0622
+1.0623
+1.0624
+1.0625
+1.0626
+1.0627
+1.0628
+1.0629
+1.0630
+1.0631
+1.0632
+1.0633
+1.0634
+1.0635
+1.0636
+1.0637
+1.0638
+1.0639
+1.0640
+1.0641
+1.0642
+1.0643
+1.0644
+1.0645
+1.0646
+1.0647
+1.0648
+1.0649
+1.0650
+1.0651
+1.0652
+1.0653
+1.0654
+1.0655
+1.0656
+1.0657
+1.0658
+1.0659
+1.0660
+1.0661
+1.0662
+1.0663
+1.0664
+1.0665
+1.0666
+1.0667
+1.0668
+1.0669
+1.0670
+1.0671
+1.0672
+1.0673
+1.0674
+1.0675
+1.0676
+1.0677
+1.0678
+1.0679
+1.0680
+1.0681
+1.0682
+1.0683
+1.0684
+1.0685
+1.0686
+1.0687
+1.0688
+1.0689
+1.0690
+1.0691
+1.0692
+1.0693
+1.0694
+1.0695
+1.0696
+1.0697
+1.0698
+1.0699
+1.0700
+1.0701
+1.0702
+1.0703
+1.0704
+1.0705
+1.0706
+1.0707
+1.0708
+1.0709
+1.0710
+1.0711
+1.0712
+1.0713
+1.0714
+1.0715
+1.0716
+1.0717
+1.0718
+1.0719
+1.0720
+1.0721
+1.0722
+1.0723
+1.0724
+1.0725
+1.0726
+1.0727
+1.0728
+1.0729
+1.0730
+1.0731
+1.0732
+1.0733
+1.0734
+1.0735
+1.0736
+1.0737
+1.0738
+1.0739
+1.0740
+1.0741
+1.0742
+1.0743
+1.0744
+1.0745
+1.0746
+1.0747
+1.0748
+1.0749
+1.0750
+1.0751
+1.0752
+1.0753
+1.0754
+1.0755
+1.0756
+1.0757
+1.0758
+1.0759
+1.0760
+1.0761
+1.0762
+1.0763
+1.0764
+1.0765
+1.0766
+1.0767
+1.0768
+1.0769
+1.0770
+1.0771
+1.0772
+1.0773
+1.0774
+1.0775
+1.0776
+1.0777
+1.0778
+1.0779
+1.0780
+1.0781
+1.0782
+1.0783
+1.0784
+1.0785
+1.0786
+1.0787
+1.0788
+1.0789
+1.0790
+1.0791
+1.0792
+1.0793
+1.0794
+1.0795
+1.0796
+1.0797
+1.0798
+1.0799
+1.0800
+1.0801
+1.0802
+1.0803
+1.0804
+1.0805
+1.0806
+1.0807
+1.0808
+1.0809
+1.0810
+1.0811
+1.0812
+1.0813
+1.0814
+1.0815
+1.0816
+1.0817
+1.0818
+1.0819
+1.0820
+1.0821
+1.0822
+1.0823
+1.0824
+1.0825
+1.0826
+1.0827
+1.0828
+1.0829
+1.0830
+1.0831
+1.0832
+1.0833
+1.0834
+1.0835
+1.0836
+1.0837
+1.0838
+1.0839
+1.0840
+1.0841
+1.0842
+1.0843
+1.0844
+1.0845
+1.0846
+1.0847
+1.0848
+1.0849
+1.0850
+1.0851
+1.0852
+1.0853
+1.0854
+1.0855
+1.0856
+1.0857
+1.0858
+1.0859
+1.0860
+1.0861
+1.0862
+1.0863
+1.0864
+1.0865
+1.0866
+1.0867
+1.0868
+1.0869
+1.0870
+1.0871
+1.0872
+1.0873
+1.0874
+1.0875
+1.0876
+1.0877
+1.0878
+1.0879
+1.0880
+1.0881
+1.0882
+1.0883
+1.0884
+1.0885
+1.0886
+1.0887
+1.0888
+1.0889
+1.0890
+1.0891
+1.0892
+1.0893
+1.0894
+1.0895
+1.0896
+1.0897
+1.0898
+1.0899
+1.0900
+1.0901
+1.0902
+1.0903
+1.0904
+1.0905
+1.0906
+1.0907
+1.0908
+1.0909
+1.0910
+1.0911
+1.0912
+1.0913
+1.0914
+1.0915
+1.0916
+1.0917
+1.0918
+1.0919
+1.0920
+1.0921
+1.0922
+1.0923
+1.0924
+1.0925
+1.0926
+1.0927
+1.0928
+1.0929
+1.0930
+1.0931
+1.0932
+1.0933
+1.0934
+1.0935
+1.0936
+1.0937
+1.0938
+1.0939
+1.0940
+1.0941
+1.0942
+1.0943
+1.0944
+1.0945
+1.0946
+1.0947
+1.0948
+1.0949
+1.0950
+1.0951
+1.0952
+1.0953
+1.0954
+1.0955
+1.0956
+1.0957
+1.0958
+1.0959
+1.0960
+1.0961
+1.0962
+1.0963
+1.0964
+1.0965
+1.0966
+1.0967
+1.0968
+1.0969
+1.0970
+1.0971
+1.0972
+1.0973
+1.0974
+1.0975
+1.0976
+1.0977
+1.0978
+1.0979
+1.0980
+1.0981
+1.0982
+1.0983
+1.0984
+1.0985
+1.0986
+1.0987
+1.0988
+1.0989
+1.0990
+1.0991
+1.0992
+1.0993
+1.0994
+1.0995
+1.0996
+1.0997
+1.0998
+1.0999