Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian-regression-tests.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tests/training/features/quantized-model/quantized-no-opt.expected10
-rw-r--r--tests/training/features/quantized-model/quantized-with-bias.expected10
-rw-r--r--tests/training/features/quantized-model/quantized.expected10
-rw-r--r--tests/training/features/quantized-model/test_quantmodel.sh37
-rw-r--r--tests/training/features/quantized-model/test_quantmodel_with_bias.sh44
-rw-r--r--tests/training/features/quantized-model/test_quantmodel_with_optimization.sh37
-rwxr-xr-xtools/check-model-unique-vals.py57
7 files changed, 205 insertions, 0 deletions
diff --git a/tests/training/features/quantized-model/quantized-no-opt.expected b/tests/training/features/quantized-model/quantized-no-opt.expected
new file mode 100644
index 0000000..c4ac602
--- /dev/null
+++ b/tests/training/features/quantized-model/quantized-no-opt.expected
@@ -0,0 +1,10 @@
+225.10929871
+243.58345032
+229.45071411
+224.28813171
+212.65242004
+204.06596375
+197.81690979
+190.08908081
+193.72296143
+195.20832825
diff --git a/tests/training/features/quantized-model/quantized-with-bias.expected b/tests/training/features/quantized-model/quantized-with-bias.expected
new file mode 100644
index 0000000..4f56c41
--- /dev/null
+++ b/tests/training/features/quantized-model/quantized-with-bias.expected
@@ -0,0 +1,10 @@
+225.10006714
+243.58285522
+229.47399902
+224.31018066
+212.68742371
+204.09915161
+197.85253906
+190.12380981
+193.74653625
+195.23658752
diff --git a/tests/training/features/quantized-model/quantized.expected b/tests/training/features/quantized-model/quantized.expected
new file mode 100644
index 0000000..5f42a2c
--- /dev/null
+++ b/tests/training/features/quantized-model/quantized.expected
@@ -0,0 +1,10 @@
+225.25198364
+244.07009888
+230.06900024
+224.89492798
+213.12133789
+204.42272949
+198.22579956
+190.56004333
+194.42613220
+196.06826782
diff --git a/tests/training/features/quantized-model/test_quantmodel.sh b/tests/training/features/quantized-model/test_quantmodel.sh
new file mode 100644
index 0000000..75d3a1a
--- /dev/null
+++ b/tests/training/features/quantized-model/test_quantmodel.sh
@@ -0,0 +1,37 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Train a quantized marian model
+# AUTHOR: afaji
+#####################################################################
+
+# Exit on error
+set -e
+
+PREFIX=quantized
+
+# Remove old artifacts and create working directory
+rm -rf $PREFIX $PREFIX.{log,out,diff}
+mkdir -p $PREFIX
+
+
+# Train an 8-bits model
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ -m $PREFIX/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
+ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 \
+ --log $PREFIX.log
+
+# Check if files exist
+test -e $PREFIX/model.npz
+test -e $PREFIX.log
+
+# Compare the current output with the expected output
+cat $PREFIX.log | $MRT_TOOLS/extract-costs.sh > $PREFIX.out
+$MRT_TOOLS/diff-nums.py $PREFIX.out $PREFIX.expected -o $PREFIX.diff
+
+# make sure that the resulting model has no more than 256 different values (i.e. quantized)
+$MRT_TOOLS/check-model-unique-vals.py $PREFIX/model.npz -b 8
+
+# Exit with success code
+exit 0
diff --git a/tests/training/features/quantized-model/test_quantmodel_with_bias.sh b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
new file mode 100644
index 0000000..1878ecb
--- /dev/null
+++ b/tests/training/features/quantized-model/test_quantmodel_with_bias.sh
@@ -0,0 +1,44 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Train a quantized marian model
+# AUTHOR: afaji
+#####################################################################
+
+# Exit on error
+set -e
+
+PREFIX=quantized-with-bias
+
+# Remove old artifacts and create working directory
+rm -rf $PREFIX $PREFIX.{log,out,diff}
+mkdir -p $PREFIX
+
+
+# training with quantized bias is tricky, so we start by training a normal model first before finetuning it to the quantized space.
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ -m $PREFIX/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
+ --cost-type cross-entropy --sync-sgd --after-batches 20 --disp-freq 10 \
+ --log $PREFIX.log
+
+# Train an 8-bits model
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ -m $PREFIX/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
+ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-biases \
+ --log $PREFIX.log
+
+# Check if files exist
+test -e $PREFIX/model.npz
+test -e $PREFIX.log
+
+# Compare the current output with the expected output
+cat $PREFIX.log | $MRT_TOOLS/extract-costs.sh > $PREFIX.out
+$MRT_TOOLS/diff-nums.py $PREFIX.out $PREFIX.expected -o $PREFIX.diff
+
+# make sure that the resulting model has no more than 256 different values (i.e. quantized)
+$MRT_TOOLS/check-model-unique-vals.py $PREFIX/model.npz -b 8 --with_bias
+
+# Exit with success code
+exit 0
diff --git a/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
new file mode 100644
index 0000000..368d920
--- /dev/null
+++ b/tests/training/features/quantized-model/test_quantmodel_with_optimization.sh
@@ -0,0 +1,37 @@
+#!/bin/bash -x
+
+#####################################################################
+# SUMMARY: Train a quantized marian model with a scale optimization
+# AUTHOR: afaji
+#####################################################################
+
+# Exit on error
+set -e
+
+PREFIX=quantized-opt
+
+# Remove old artifacts and create working directory
+rm -rf $PREFIX $PREFIX.{log,out,diff}
+mkdir -p $PREFIX
+
+
+# Train an 8-bits model
+$MRT_MARIAN/marian \
+ --no-shuffle --seed 1111 --dim-emb 32 --dim-rnn 64 --mini-batch 32 --maxi-batch 1 --maxi-batch-sort none --learn-rate 0.1 --optimizer sgd \
+ -m $PREFIX/model.npz -t $MRT_DATA/europarl.de-en/corpus.bpe.{en,de} -v train/vocab.en.yml train/vocab.de.yml \
+ --cost-type cross-entropy --sync-sgd --after-batches 100 --disp-freq 10 --quantize-bits 8 --quantize-optimization-steps 3 \
+ --log $PREFIX.log
+
+# Check if files exist
+test -e $PREFIX/model.npz
+test -e $PREFIX.log
+
+# Compare the current output with the expected output
+cat $PREFIX.log | $MRT_TOOLS/extract-costs.sh > $PREFIX.out
+$MRT_TOOLS/diff-nums.py $PREFIX.out $PREFIX.expected -o $PREFIX.diff
+
+# make sure that the resulting model has no more than 256 different values (i.e. quantized)
+$MRT_TOOLS/check-model-unique-vals.py $PREFIX/model.npz -b 8
+
+# Exit with success code
+exit 0
diff --git a/tools/check-model-unique-vals.py b/tools/check-model-unique-vals.py
new file mode 100755
index 0000000..97c1e91
--- /dev/null
+++ b/tools/check-model-unique-vals.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import argparse
+import re
+
+import numpy as np
+
+def main():
+ exit_code = 0
+ args = parse_user_args()
+
+ with np.load(args.file) as data:
+ for key in data:
+ # skip special:model.yml
+ if "special" in key:
+ continue
+
+ # if one of the dimension is 1, then it is a bias
+ # skip if it is bias and bias is not included
+ smallest_dim = sorted(data[key].shape)[0]
+ if(smallest_dim == 1 and not args.with_bias):
+ continue
+
+ if (np.unique(data[key]).size > 2**args.bits):
+ message("Tensor {} has more than {} unique values".format( \
+ key, \
+ 2**args.bits), args)
+ exit_code = 1
+
+ return exit_code
+
+
+def message(text, args):
+ if not text.endswith("\n"):
+ text += "\n"
+ args.output.write(text)
+ if not args.quiet \
+ and args.output is not sys.stdout \
+ and args.output is not sys.stderr:
+ sys.stderr.write(text)
+
+
+def parse_user_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("file", type=str)
+ parser.add_argument("-o", "--output", type=argparse.FileType('w'), metavar="FILE", default=sys.stdout)
+ parser.add_argument("-b", "--bits", type=int)
+ parser.add_argument("--with_bias", action="store_true")
+ parser.add_argument("-q", "--quiet", action="store_true")
+ return parser.parse_args()
+
+if __name__ == '__main__':
+ code = main()
+ exit(code)