From 4dac388a39adc581ecba6ae88b5a889988d15729 Mon Sep 17 00:00:00 2001
From: Alham Fikri Aji <afaji321@gmail.com>
Date: Wed, 11 Nov 2020 04:27:38 +0000
Subject: simplify quantized-model testing by using a shared train folder

---
 tests/training/features/quantized-model/.gitignore   |  3 ---
 .../features/quantized-model/quantized-opt.expected  | 20 ++++++++++----------
 .../features/quantized-model/test_quantmodel.sh      | 10 +++++-----
 .../quantized-model/test_quantmodel_with_bias.sh     | 12 ++++++------
 .../test_quantmodel_with_optimization.sh             | 12 ++++++------
 5 files changed, 27 insertions(+), 30 deletions(-)

(limited to 'tests')

diff --git a/tests/training/features/quantized-model/.gitignore b/tests/training/features/quantized-model/.gitignore
index 2fa5b37..08afa18 100644
--- a/tests/training/features/quantized-model/.gitignore
+++ b/tests/training/features/quantized-model/.gitignore
@@ -1,4 +1 @@
-quantized
-quantized-with-bias
-quantized-opt
 train
diff --git a/tests/training/features/quantized-model/quantized-opt.expected b/tests/training/features/quantized-model/quantized-opt.expected
index 6ccc2e0..12f21d3 100644
--- a/tests/training/features/quantized-model/quantized-opt.expected
+++ b/tests/training/features/quantized-model/quantized-opt.expected
@@ -1,10 +1,10 @@
-225.53999329
-244.43618774
-230.57369995
-225.52883911
-213.84687805
-204.98857117
-198.73059082
-191.04969788
-194.96365356
-196.72579956
+225.11299133
+243.58525085
+229.45321655
+224.28414917
+212.65376282
+204.06687927
+197.81901550
+190.08296204
+193.72265625
+195.21139526
diff --git a/tests/training/features/quantized-model/test_quantmodel.sh b/tests/training/features/quantized-model/test_quantmodel.sh
index b31a1d5..8b55697 100644
--- a/tests/training/features/quantized-model/test_quantmodel.sh
+++ b/tests/training/features/quantized-model/test_quantmodel.sh
@@ -11,18 +11,18 @@ set -e
 PREFIX=quantized
 
 # Remove old artifacts and create working directory
-rm -rf $PREFIX $PREFIX.{log,out,diff}
-mkdir -p $PREFIX train
+rm -rf train $PREFIX.{log,out,diff}
+mkdir -p train
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
     --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
-    -m $PREFIX/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
+    -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 \
     --log $PREFIX.log
 
 # Check if files exist
-test -e $PREFIX/model.npz
+test -e train/model.npz
 test -e $PREFIX.log
 
 # Compare the current output with the expected output
@@ -30,7 +30,7 @@ cat $PREFIX.log | $MRT_TOOLS/extract-costs.sh > $PREFIX.out
 $MRT_TOOLS/diff-nums.py $PREFIX.out $PREFIX.expected -o $PREFIX.diff
 
 # make sure that the resulting model has no more than 256 different values (i.e. quantized)
-$MRT_TOOLS/check-model-unique-vals.py $PREFIX/model.npz -b 8
+$MRT_TOOLS/check-model-unique-vals.py train/model.npz -b 8
 
 # Exit with success code
 exit 0
diff --git a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
index 96380c3..de14ffb 100644
--- a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
+++ b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
@@ -11,25 +11,25 @@ set -e
 PREFIX=quantized-with-bias
 
 # Remove old artifacts and create working directory
-rm -rf $PREFIX $PREFIX.{log,out,diff}
-mkdir -p $PREFIX train
+rm -rf train $PREFIX.{log,out,diff}
+mkdir -p train
 
 # training with quantized bias is tricky, so we start by training a normal model first before finetuning it to the quantized space.
 $MRT_MARIAN/marian \
     --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
-    -m $PREFIX/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
+    -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 20 --disp-freq 10 \
     --log $PREFIX.log
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
     --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
-    -m $PREFIX/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
+    -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-biases \
     --log $PREFIX.log
 
 # Check if files exist
-test -e $PREFIX/model.npz
+test -e train/model.npz
 test -e $PREFIX.log
 
 # Compare the current output with the expected output
@@ -37,7 +37,7 @@ cat $PREFIX.log | $MRT_TOOLS/extract-costs.sh > $PREFIX.out
 $MRT_TOOLS/diff-nums.py $PREFIX.out $PREFIX.expected -o $PREFIX.diff
 
 # make sure that the resulting model has no more than 256 different values (i.e. quantized)
-$MRT_TOOLS/check-model-unique-vals.py $PREFIX/model.npz -b 8 --with_bias
+$MRT_TOOLS/check-model-unique-vals.py train/model.npz -b 8 --with_bias
 
 # Exit with success code
 exit 0
diff --git a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
index 4a88059..fe7993e 100644
--- a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
+++ b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
@@ -11,26 +11,26 @@ set -e
 PREFIX=quantized-opt
 
 # Remove old artifacts and create working directory
-rm -rf $PREFIX $PREFIX.{log,out,diff}
-mkdir -p $PREFIX train
+rm -rf train $PREFIX.{log,out,diff}
+mkdir -p train
 
 # Train an 8-bits model
 $MRT_MARIAN/marian \
     --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
-    -m $PREFIX/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
+    -m train/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
     --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-optimization-steps 3 \
     --log $PREFIX.log
 
 # Check if files exist
-test -e $PREFIX/model.npz
+test -e train/model.npz
 test -e $PREFIX.log
 
 # Compare the current output with the expected output
 cat $PREFIX.log | $MRT_TOOLS/extract-costs.sh > $PREFIX.out
-$MRT_TOOLS/diff-nums.py $PREFIX.out $PREFIX.expected -o $PREFIX.diff -p 0.01
+$MRT_TOOLS/diff-nums.py $PREFIX.out $PREFIX.expected -o $PREFIX.diff
 
 # make sure that the resulting model has no more than 256 different values (i.e. quantized)
-$MRT_TOOLS/check-model-unique-vals.py $PREFIX/model.npz -b 8
+$MRT_TOOLS/check-model-unique-vals.py train/model.npz -b 8
 
 # Exit with success code
 exit 0
-- 
cgit v1.2.3