diff options
author | Guillaume Klein <guillaume.klein@systrangroup.com> | 2018-06-13 18:33:21 +0300 |
---|---|---|
committer | Guillaume Klein <guillaume.klein@systrangroup.com> | 2018-06-13 18:33:21 +0300 |
commit | 2c1f674f7adeaa7d3b623b1a12f44971213f16c6 (patch) | |
tree | 2eaa2e8015a872c2616c532a6c8efa2afdbcacc7 | |
parent | b4430a5d1f175bc69c7e9fe02f7e06c09fd46074 (diff) |
parametrize conversion scriptbenchmark-6
-rw-r--r-- | inspect_model.py | 14 | ||||
-rw-r--r-- | src/transformer.cc | 9 | ||||
-rw-r--r-- | tools/convert_saved_model.py (renamed from convert.py) | 22 |
3 files changed, 23 insertions, 22 deletions
diff --git a/inspect_model.py b/inspect_model.py deleted file mode 100644 index 602ec79c..00000000 --- a/inspect_model.py +++ /dev/null @@ -1,14 +0,0 @@ -import struct - -import tensorflow as tf - -export_dir = "/home/klein/dev/OpenNMT-tf/models/averaged-ende-export500k/export/manual/1519808686" - -with tf.Session() as sess: - meta_graph = tf.saved_model.loader.load(sess, ["serve"], export_dir) - variables = tf.trainable_variables() - variables_value = sess.run(variables) - - for tensor, value in zip(variables, variables_value): - if tensor.name.startswith("transformer/encoder/w_embs"): - print(value[1][0:2]) diff --git a/src/transformer.cc b/src/transformer.cc index 0d448efd..3cb13846 100644 --- a/src/transformer.cc +++ b/src/transformer.cc @@ -92,7 +92,14 @@ namespace opennmt { void ScaledEmbeddings::operator()(const StorageView& ids, StorageView& output) { - _gather_op(_embeddings, ids, output); + if (_embeddings.dtype() == DataType::DT_INT16) { + static const ops::Unquantize unquantize_op(1000); + static thread_local StorageView gathered(_embeddings.dtype()); + _gather_op(_embeddings, ids, gathered); + unquantize_op(gathered, output); + } else { + _gather_op(_embeddings, ids, output); + } const size_t embedding_size = _embeddings.dim(-1); primitives::mul(static_cast<float>(sqrt(embedding_size)), output.data<float>(), diff --git a/convert.py b/tools/convert_saved_model.py index 5618a3d4..23317f4b 100644 --- a/convert.py +++ b/tools/convert_saved_model.py @@ -1,30 +1,38 @@ +import argparse import struct import tensorflow as tf import numpy as np -export_dir = "/home/klein/dev/OpenNMT-tf/models/averaged-ende-export500k/export/manual/1519808686" -quantize = True +parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("--export_dir", required=True, + help="Saved model directory.") +parser.add_argument("--output_model", required=True, + help="Output model file.") +parser.add_argument("--quantization", default="none", choices=["none", "int16"], + help="Weight quantization type.") +args = parser.parse_args() with tf.Session() as sess: - meta_graph = tf.saved_model.loader.load(sess, ["serve"], export_dir) + meta_graph = tf.saved_model.loader.load(sess, ["serve"], args.export_dir) variables = tf.trainable_variables() variables_value = sess.run(variables) - with open("model.bin", "wb") as model: + with open(args.output_model, "wb") as model: model.write(struct.pack("I", len(variables))) for tensor, value in zip(variables, variables_value): if "kernel" in tensor.name: value = np.transpose(np.squeeze(value)) + if "kernel" in tensor.name or "w_embs" in tensor.name: + if args.quantization == "int16": + value *= 1000 + value = value.astype(np.int16) model.write(struct.pack("H", len(tensor.name) + 1)) model.write(tf.compat.as_bytes(tensor.name)) model.write(struct.pack('B', 0)) model.write(struct.pack("B", len(value.shape))) for dim in value.shape: model.write(struct.pack("I", dim)) - if quantize and "kernel" in tensor.name: - value *= 1000 - value = value.astype(np.int16) model.write(struct.pack("B", value.dtype.itemsize)) model.write(struct.pack("I", value.size)) model.write(value.tobytes()) |