parametrize conversion scriptbenchmark-6

author: Guillaume Klein <guillaume.klein@systrangroup.com> 2018-06-13 18:33:21 +0300
committer: Guillaume Klein <guillaume.klein@systrangroup.com> 2018-06-13 18:33:21 +0300
commit: 2c1f674f7adeaa7d3b623b1a12f44971213f16c6 (patch)
tree: 2eaa2e8015a872c2616c532a6c8efa2afdbcacc7
parent: b4430a5d1f175bc69c7e9fe02f7e06c09fd46074 (diff)
3 files changed, 23 insertions, 22 deletions
diff --git a/inspect_model.py b/inspect_model.py
deleted file mode 100644
index 602ec79c..00000000
--- a/inspect_model.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import struct
-
-import tensorflow as tf
-
-export_dir = "/home/klein/dev/OpenNMT-tf/models/averaged-ende-export500k/export/manual/1519808686"
-
-with tf.Session() as sess:
-    meta_graph = tf.saved_model.loader.load(sess, ["serve"], export_dir)
-    variables = tf.trainable_variables()
-    variables_value = sess.run(variables)
-
-    for tensor, value in zip(variables, variables_value):
-        if tensor.name.startswith("transformer/encoder/w_embs"):
-            print(value[1][0:2])
diff --git a/src/transformer.cc b/src/transformer.cc
index 0d448efd..3cb13846 100644
--- a/src/transformer.cc
+++ b/src/transformer.cc
@@ -92,7 +92,14 @@ namespace opennmt {
 
   void ScaledEmbeddings::operator()(const StorageView& ids,
                                     StorageView& output) {
-    _gather_op(_embeddings, ids, output);
+    if (_embeddings.dtype() == DataType::DT_INT16) {
+      static const ops::Unquantize unquantize_op(1000);
+      static thread_local StorageView gathered(_embeddings.dtype());
+      _gather_op(_embeddings, ids, gathered);
+      unquantize_op(gathered, output);
+    } else {
+      _gather_op(_embeddings, ids, output);
+    }
     const size_t embedding_size = _embeddings.dim(-1);
     primitives::mul(static_cast<float>(sqrt(embedding_size)),
                     output.data<float>(),
diff --git a/convert.py b/tools/convert_saved_model.py
index 5618a3d4..23317f4b 100644
--- a/convert.py
+++ b/tools/convert_saved_model.py
@@ -1,30 +1,38 @@
+import argparse
 import struct
 
 import tensorflow as tf
 import numpy as np
 
-export_dir = "/home/klein/dev/OpenNMT-tf/models/averaged-ende-export500k/export/manual/1519808686"
-quantize = True
+parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("--export_dir", required=True,
+                    help="Saved model directory.")
+parser.add_argument("--output_model", required=True,
+                    help="Output model file.")
+parser.add_argument("--quantization", default="none", choices=["none", "int16"],
+                    help="Weight quantization type.")
+args = parser.parse_args()
 
 with tf.Session() as sess:
-    meta_graph = tf.saved_model.loader.load(sess, ["serve"], export_dir)
+    meta_graph = tf.saved_model.loader.load(sess, ["serve"], args.export_dir)
     variables = tf.trainable_variables()
     variables_value = sess.run(variables)
 
-    with open("model.bin", "wb") as model:
+    with open(args.output_model, "wb") as model:
         model.write(struct.pack("I", len(variables)))
         for tensor, value in zip(variables, variables_value):
             if "kernel" in tensor.name:
                 value = np.transpose(np.squeeze(value))
+            if "kernel" in tensor.name or "w_embs" in tensor.name:
+                if args.quantization == "int16":
+                    value *= 1000
+                    value = value.astype(np.int16)
             model.write(struct.pack("H", len(tensor.name) + 1))
             model.write(tf.compat.as_bytes(tensor.name))
             model.write(struct.pack('B', 0))
             model.write(struct.pack("B", len(value.shape)))
             for dim in value.shape:
                 model.write(struct.pack("I", dim))
-            if quantize and "kernel" in tensor.name:
-                value *= 1000
-                value = value.astype(np.int16)
             model.write(struct.pack("B", value.dtype.itemsize))
             model.write(struct.pack("I", value.size))
             model.write(value.tobytes())
author	Guillaume Klein <guillaume.klein@systrangroup.com>	2018-06-13 18:33:21 +0300
committer	Guillaume Klein <guillaume.klein@systrangroup.com>	2018-06-13 18:33:21 +0300
commit	2c1f674f7adeaa7d3b623b1a12f44971213f16c6 (patch)
tree	2eaa2e8015a872c2616c532a6c8efa2afdbcacc7
parent	b4430a5d1f175bc69c7e9fe02f7e06c09fd46074 (diff)