diff options
author | Jan Buethe <jbuethe@amazon.de> | 2023-07-22 23:00:21 +0300 |
---|---|---|
committer | Jan Buethe <jbuethe@amazon.de> | 2023-07-22 23:01:06 +0300 |
commit | 0e5c103d1aad1dfee3fe11ac090f59a9d64a8f7b (patch) | |
tree | c89805e4bca13961a77b48f287581e9e73a69c07 | |
parent | 8f7c72a6624259037b948c38fc6c890f0f605612 (diff) |
added weight-exchange library
-rw-r--r-- | dnn/torch/weight-exchange/README.md | 21 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/requirements.txt | 1 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/setup.py | 19 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/__init__.py | 1 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/c_export/__init__.py | 2 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/c_export/c_writer.py | 143 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/c_export/common.py | 315 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/tf/__init__.py | 5 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/tf/tf.py | 169 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/torch/__init__.py | 5 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/torch/torch.py | 146 |
11 files changed, 827 insertions, 0 deletions
diff --git a/dnn/torch/weight-exchange/README.md b/dnn/torch/weight-exchange/README.md new file mode 100644 index 00000000..f4818b5b --- /dev/null +++ b/dnn/torch/weight-exchange/README.md @@ -0,0 +1,21 @@ +# weight-exchange + + + +## Weight Exchange +Repo wor exchanging weights betweeen torch an tensorflow.keras modules, using an intermediate numpy format. + +Routines for loading/dumping torch weights are located in exchange/torch and can be loaded with +``` +import exchange.torch +``` +and routines for loading/dumping tensorflow weights are located in exchange/tf and can be loaded with +``` +import exchange.tf +``` + +Note that `exchange.torch` requires torch to be installed and `exchange.tf` requires tensorflow. To avoid the necessity of installing both torch and tensorflow in the working environment, none of these submodules is imported when calling `import exchange`. Similarly, the requirements listed in `requirements.txt` do include neither Tensorflow or Pytorch. + + +## C export +The module `exchange.c_export` contains routines to export weights to C files. On the long run it will be possible to call all `dump_...` functions with either a path string or a `CWriter` instance based on which the export format is chosen. This is currently only implemented for `torch.nn.GRU`, `torch.nn.Linear` and `torch.nn.Conv1d`.
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/requirements.txt b/dnn/torch/weight-exchange/requirements.txt new file mode 100644 index 00000000..296d6545 --- /dev/null +++ b/dnn/torch/weight-exchange/requirements.txt @@ -0,0 +1 @@ +numpy
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/setup.py b/dnn/torch/weight-exchange/setup.py new file mode 100644 index 00000000..bf298627 --- /dev/null +++ b/dnn/torch/weight-exchange/setup.py @@ -0,0 +1,19 @@ +#!/usr/bin/env/python +import os +from setuptools import setup + +lib_folder = os.path.dirname(os.path.realpath(__file__)) + +with open(os.path.join(lib_folder, 'requirements.txt'), 'r') as f: + install_requires = list(f.read().splitlines()) + +print(install_requires) + +setup(name='wexchange', + version='1.4', + author='Jan Buethe', + author_email='jbuethe@amazon.de', + description='Weight-exchange library between Pytorch and Tensorflow', + packages=['wexchange', 'wexchange.tf', 'wexchange.torch', 'wexchange.c_export'], + install_requires=install_requires + ) diff --git a/dnn/torch/weight-exchange/wexchange/__init__.py b/dnn/torch/weight-exchange/wexchange/__init__.py new file mode 100644 index 00000000..330d17e6 --- /dev/null +++ b/dnn/torch/weight-exchange/wexchange/__init__.py @@ -0,0 +1 @@ +from . import c_export
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/wexchange/c_export/__init__.py b/dnn/torch/weight-exchange/wexchange/c_export/__init__.py new file mode 100644 index 00000000..6fb5ea79 --- /dev/null +++ b/dnn/torch/weight-exchange/wexchange/c_export/__init__.py @@ -0,0 +1,2 @@ +from .c_writer import CWriter +from .common import print_gru_layer, print_dense_layer, print_conv1d_layer, print_vector
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py new file mode 100644 index 00000000..624ab665 --- /dev/null +++ b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py @@ -0,0 +1,143 @@ +import os +from collections import OrderedDict + +class CWriter: + def __init__(self, + filename_without_extension, + message=None, + header_only=False, + enable_binary_blob=False, + create_state_struct=False, + model_struct_name="Model", + nnet_header="nnet.h"): + """ + Writer class for creating souce and header files for weight exports to C + + Parameters: + ----------- + + filename_without_extension: str + filename from which .c and .h files are created + + message: str, optional + if given and not None, this message will be printed as comment in the header file + + header_only: bool, optional + if True, only a header file is created; defaults to False + + enable_binary_blob: bool, optional + if True, export is done in binary blob format and a model type is created; defaults to False + + create_state_struct: bool, optional + if True, a state struct type is created in the header file; if False, state sizes are defined as macros; defaults to False + + model_struct_name: str, optional + name used for the model struct type; only relevant when enable_binary_blob is True; defaults to "Model" + + nnet_header: str, optional + name of header nnet header file; defaults to nnet.h + + """ + + + self.header_only = header_only + self.enable_binary_blob = enable_binary_blob + self.create_state_struct = create_state_struct + self.model_struct_name = model_struct_name + + # for binary blob format, format is key=<layer name>, value=(<layer type>, <init call>) + self.layer_dict = OrderedDict() + + # for binary blob format, format is key=<layer name>, value=<layer type> + self.weight_arrays = set() + + # form model struct, format is key=<layer name>, value=<number of elements> + self.state_dict = OrderedDict() + + self.header = open(filename_without_extension + ".h", "w") + header_name = os.path.basename(filename_without_extension) + '.h' + + if message is not None: + self.header.write(f"/* {message} */\n\n") + + self.header_guard = os.path.basename(filename_without_extension).upper() + "_H" + self.header.write( +f''' +#ifndef {self.header_guard} +#define {self.header_guard} + +#include "{nnet_header}" + +''' + ) + + if not self.header_only: + self.source = open(filename_without_extension + ".c", "w") + if message is not None: + self.source.write(f"/* {message} */\n\n") + + self.source.write( +f""" +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +""") + self.source.write(f'#include "{header_name}"\n\n') + + + def _finalize_header(self): + + # create model type + if self.enable_binary_blob: + self.header.write(f"\nstruct {self.model_struct_name} {{") + for name, data in self.layer_dict.items(): + layer_type = data[0] + self.header.write(f"\n {layer_type} {name};") + self.header.write(f"\n}};\n") + + init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)" + self.header.write(f"\n{init_prototype};\n") + + self.header.write(f"\n#endif /* {self.header_guard} */\n") + + def _finalize_source(self): + + if self.enable_binary_blob: + # create weight array + self.source.write("\n#ifndef USE_WEIGHTS_FILE\n") + self.source.write(f"const WeightArray {self.model_struct_name.lower()}_arrays[] = {{\n") + for name in self.weight_arrays: + self.source.write(f"#ifdef WEIGHTS_{name}_DEFINED\n") + self.source.write(f' {{"{name}", WEIGHTS_{name}_TYPE, sizeof({name}), {name}}},\n') + self.source.write(f"#endif\n") + self.source.write(" {NULL, 0, 0, NULL}\n") + self.source.write("};\n") + + self.source.write("#endif /* USE_WEIGHTS_FILE */\n") + + # create init function definition + init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)" + self.source.write("\n#ifndef DUMP_BINARY_WEIGHTS\n") + self.source.write(f"{init_prototype} {{\n") + for name, data in self.layer_dict.items(): + self.source.write(f" if ({data[1]}) return 1;\n") + self.source.write(" return 0;\n") + self.source.write("}\n") + self.source.write("#endif /* DUMP_BINARY_WEIGHTS */\n") + + + def close(self): + + if not self.header_only: + self._finalize_source() + self.source.close() + + self._finalize_header() + self.header.close() + + def __del__(self): + try: + self.close() + except: + pass
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/wexchange/c_export/common.py b/dnn/torch/weight-exchange/wexchange/c_export/common.py new file mode 100644 index 00000000..f469f848 --- /dev/null +++ b/dnn/torch/weight-exchange/wexchange/c_export/common.py @@ -0,0 +1,315 @@ +'''Copyright (c) 2017-2018 Mozilla + Copyright (c) 2022 Amazon + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +''' + +import numpy as np + +from .c_writer import CWriter + +def print_vector(writer, vector, name, dtype='float', dotp=False, static=True): + + f = writer.source + binary_blob = writer.enable_binary_blob + + if binary_blob: + f.write( +f''' +#ifndef USE_WEIGHTS_FILE +#define WEIGHTS_{name}_DEFINED +#define WEIGHTS_{name}_TYPE WEIGHT_TYPE_{"qweight" if dotp else "float"} +''' + ) + writer.weight_arrays.add(name) + + if dotp: + vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8)) + vector = vector.transpose((2, 0, 3, 1)) + + v = np.reshape(vector, (-1)) + + if static: + f.write('static ') + + f.write(f'const {dtype} {name}[{len(v)}] = {{\n ') + + for i in range(0, len(v)): + + f.write(f'{v[i]}') + + if (i!=len(v)-1): + f.write(',') + else: + break + + if (i%8==7): + f.write("\n ") + else: + f.write(" ") + + f.write('\n};\n\n') + if binary_blob: + f.write( +f''' +#endif /* USE_WEIGHTS_FILE */ +''' + ) + + return vector + + + +def print_sparse_vector(writer, A, name, have_diag=True): + f = writer.source + N = A.shape[0] + M = A.shape[1] + W = np.zeros((0,), dtype='int') + W0 = np.zeros((0,)) + if have_diag: + diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])]) + A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N])) + A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N])) + A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:])) + print_vector(writer, diag, name + '_diag') + AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int') + idx = np.zeros((0,), dtype='int') + for i in range(M//8): + pos = idx.shape[0] + idx = np.append(idx, -1) + nb_nonzero = 0 + for j in range(N//4): + block = A[j*4:(j+1)*4, i*8:(i+1)*8] + qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8] + if np.sum(np.abs(block)) > 1e-10: + nb_nonzero = nb_nonzero + 1 + idx = np.append(idx, j*4) + vblock = qblock.transpose((1,0)).reshape((-1,)) + W0 = np.concatenate([W0, block.reshape((-1,))]) + W = np.concatenate([W, vblock]) + idx[pos] = nb_nonzero + f.write('#ifdef DOT_PROD\n') + print_vector(writer, W, name, dtype='qweight') + f.write('#else /*DOT_PROD*/\n') + print_vector(writer, W0, name, dtype='qweight') + f.write('#endif /*DOT_PROD*/\n') + + print_vector(writer, idx, name + '_idx', dtype='int') + return AQ + +def _check_activation(activation): + if not activation in {"TANH", "SIGMOID", "LINEAR", "SWISH", "RELU", "SOFTMAX"}: + raise ValueError(f"error: unknown activation {activation}") + +def print_dense_layer(writer : CWriter, + name : str, + weight : np.ndarray, + bias : np.ndarray, + activation: str, + format : str = 'torch'): + + _check_activation(activation) + + if format == 'torch': + weight = weight.transpose() + + print_vector(writer, weight, name + "_weights") + print_vector(writer, bias, name + "_bias") + + writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[1]}\n") + + if writer.enable_binary_blob: + init_call = f'dense_init(&model->{name}, arrays, "{name}_bias", "{name}_weights", {weight.shape[0]}, {weight.shape[1]}, ACTIVATION_{activation})' + writer.layer_dict[name] = ('DenseLayer', init_call) + else: + writer.source.write( +f""" + +const DenseLayer {name} = {{ + {name}_bias, + {name}_weights, + {weight.shape[0]}, + {weight.shape[1]}, + ACTIVATION_{activation} +}}; + +""" + ) + + writer.header.write(f"\nextern const DenseLayer {name};\n\n") + + + + + +def print_conv1d_layer(writer : CWriter, + name : str, + weight : np.ndarray, + bias : np.ndarray, + activation: str, + format : str = 'torch'): + + _check_activation(activation) + + if format == "torch": + # convert to channels last + weight = np.transpose(weight, (2, 1, 0)) + + print_vector(writer, weight, name + "_weights") + print_vector(writer, bias, name + "_bias") + + writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[2]}\n") + writer.header.write(f"\n#define {name.upper()}_STATE_SIZE ({weight.shape[1]} * ({weight.shape[0] - 1}))\n") + writer.header.write(f"\n#define {name.upper()}_DELAY {(weight.shape[0] - 1) // 2}\n") # CAVE: delay is not a property of the conv layer + + if writer.enable_binary_blob: + init_call = f'conv1d_init(&model->{name}, arrays, "{name}_bias", "{name}_weights", {weight.shape[1]}, {weight.shape[0]}, {weight.shape[2]}, ACTIVATION_{activation})' + writer.layer_dict[name] = ('Conv1DLayer', init_call) + else: + + writer.source.write( +f""" + +const Conv1DLayer {name} = {{ + {name}_bias, + {name}_weights, + {weight.shape[1]}, + {weight.shape[0]}, + {weight.shape[2]}, + ACTIVATION_{activation} +}}; + +""" + ) + + writer.header.write(f"\nextern const Conv1DLayer {name};\n\n") + + return weight.shape[0] * weight.shape[1] + + +def print_gru_layer(writer : CWriter, + name : str, + weight : np.ndarray, + recurrent_weight : np.ndarray, + bias : np.ndarray, + recurrent_bias : np.ndarray, + activation: str, + format : str = 'torch', + dotp : bool = False, + input_sparse : bool = False, + reset_after : int = 0 + ): + + _check_activation(activation) + + if format == "torch": + # transpose weight matrices and change gate order from rzn to zrn + + N = weight.shape[0] // 3 + for x in [weight, recurrent_weight, bias, recurrent_bias]: + tmp = x[0:N].copy() + x[0:N] = x[N:2*N] + x[N:2*N] = tmp + + weight = weight.transpose() + recurrent_weight = recurrent_weight.transpose() + + + # input weights + if input_sparse: + qweight = print_sparse_vector(writer, weight, name + '_weights', have_diag=False) + else: + qweight = np.clip(np.round(128. * weight).astype('int'), -128, 127) + + if dotp: + writer.source.write("#ifdef DOT_PROD\n") + print_vector(writer, qweight, name + '_weights', dtype='qweight', dotp=True) + writer.source.write("#else /*DOT_PROD*/\n") + + print_vector(writer, weight, name + '_weights') + + if dotp: + writer.source.write("#endif /*DOT_PROD*/\n") + + + # recurrent weights + recurrent_qweight = np.clip(np.round(128. * recurrent_weight).astype('int'), -128, 127) + + if dotp: + writer.source.write("#ifdef DOT_PROD\n") + print_vector(writer, recurrent_qweight, name + '_recurrent_weights', dtype='qweight', dotp=True) + writer.source.write("#else /*DOT_PROD*/\n") + + print_vector(writer, recurrent_weight, name + '_recurrent_weights') + + if dotp: + writer.source.write("#endif /*DOT_PROD*/\n") + + + # corrected bias for unsigned int matrix multiplication + subias = bias - np.sum(qweight / 128., axis=0) + recurrent_subias = recurrent_bias - np.sum(recurrent_qweight / 128., axis=0) + + print_vector(writer, np.concatenate((bias, recurrent_bias)), name + "_bias") + print_vector(writer, np.concatenate((subias, recurrent_subias)), name + "_subias") + + + # wrapping it up + writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {N}\n") + writer.header.write(f"\n#define {name.upper()}_STATE_SIZE {N}\n") + + if writer.enable_binary_blob: + if input_sparse: + init_call = f'gru_init(&model->{name}, arrays, "{name}_bias", "{name}_subias", "{name}_weights", "{name + "_weights_idx"}", "{name}_recurrent_weights", {weight.shape[0]}, {weight.shape[1] // 3}, ACTIVATION_{activation}, {reset_after})' + else: + init_call = f'gru_init(&model->{name}, arrays, "{name}_bias", "{name}_subias", "{name}_weights", NULL, "{name}_recurrent_weights", {weight.shape[0]}, {weight.shape[1] // 3}, ACTIVATION_{activation}, {reset_after})' + + writer.layer_dict[name] = ('GRULayer', init_call) + + else: + + writer.source.write( +f""" + +const GRULayer {name} = {{ + {name}_bias, + {name}_subias, + {name}_weights, + {name + "_weights_idx" if input_sparse else "NULL"}, + {name}_recurrent_weights, + {weight.shape[0]}, + {weight.shape[1] // 3}, + ACTIVATION_{activation}, + {reset_after} +}}; + +""" + ) + + writer.header.write(f"\nextern const GRULayer {name};\n") + + + return N + + diff --git a/dnn/torch/weight-exchange/wexchange/tf/__init__.py b/dnn/torch/weight-exchange/wexchange/tf/__init__.py new file mode 100644 index 00000000..02678048 --- /dev/null +++ b/dnn/torch/weight-exchange/wexchange/tf/__init__.py @@ -0,0 +1,5 @@ +from .tf import dump_tf_conv1d_weights, load_tf_conv1d_weights +from .tf import dump_tf_dense_weights, load_tf_dense_weights +from .tf import dump_tf_embedding_weights, load_tf_embedding_weights +from .tf import dump_tf_gru_weights, load_tf_gru_weights +from .tf import dump_tf_weights, load_tf_weights
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/wexchange/tf/tf.py b/dnn/torch/weight-exchange/wexchange/tf/tf.py new file mode 100644 index 00000000..d6481eb7 --- /dev/null +++ b/dnn/torch/weight-exchange/wexchange/tf/tf.py @@ -0,0 +1,169 @@ +import os + +import tensorflow as tf +import numpy as np + +from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer + +def dump_tf_gru_weights(where, gru, name=None, input_sparse=False, dotp=False): + + + assert gru.activation == tf.keras.activations.tanh + assert gru.recurrent_activation == tf.keras.activations.sigmoid + assert gru.reset_after == True + + w_ih = gru.weights[0].numpy().transpose().copy() + w_hh = gru.weights[1].numpy().transpose().copy() + b_ih = gru.weights[2].numpy()[0].copy() + b_hh = gru.weights[2].numpy()[1].copy() + + if isinstance(where, CWriter): + return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, 'TANH', format='tf', reset_after=1, input_sparse=input_sparse, dotp=dotp) + else: + os.makedirs(where, exist_ok=True) + + # zrn => rzn + N = w_ih.shape[0] // 3 + for x in [w_ih, w_hh, b_ih, b_hh]: + tmp = x[0:N].copy() + x[0:N] = x[N:2*N] + x[N:2*N] = tmp + + np.save(os.path.join(where, 'weight_ih_rzn.npy'), w_ih) + np.save(os.path.join(where, 'weight_hh_rzn.npy'), w_hh) + np.save(os.path.join(where, 'bias_ih_rzn.npy'), b_ih) + np.save(os.path.join(where, 'bias_hh_rzn.npy'), b_hh) + + +def load_tf_gru_weights(path, gru): + + assert gru.activation == tf.keras.activations.tanh + assert gru.recurrent_activation == tf.keras.activations.sigmoid + assert gru.reset_after == True + + w_ih = np.load(os.path.join(path, 'weight_ih_rzn.npy')) + w_hh = np.load(os.path.join(path, 'weight_hh_rzn.npy')) + b_ih = np.load(os.path.join(path, 'bias_ih_rzn.npy')) + b_hh = np.load(os.path.join(path, 'bias_hh_rzn.npy')) + + # rzn => zrn + N = w_ih.shape[0] // 3 + for x in [w_ih, w_hh, b_ih, b_hh]: + tmp = x[0:N].copy() + x[0:N] = x[N:2*N] + x[N:2*N] = tmp + + gru.weights[0].assign(tf.convert_to_tensor(w_ih.transpose())) + gru.weights[1].assign(tf.convert_to_tensor(w_hh.transpose())) + gru.weights[2].assign(tf.convert_to_tensor(np.vstack((b_ih, b_hh)))) + + +def dump_tf_dense_weights(where, dense, name=None): + + w = dense.weights[0].numpy() + if dense.bias is None: + b = np.zeros(dense.units, dtype=w.dtype) + else: + b = dense.bias.numpy() + + + + if isinstance(where, CWriter): + try: + activation = dense.activation.__name__.upper() + except: + activation = "LINEAR" + + return print_dense_layer(where, name, w, b, activation, format='tf') + + else: + os.makedirs(where, exist_ok=True) + + np.save(os.path.join(where, 'weight.npy'), w.transpose()) + np.save(os.path.join(where, 'bias.npy'), b) + + +def load_tf_dense_weights(path, dense): + + w = np.load(os.path.join(path, 'weight.npy')).transpose() + b = np.load(os.path.join(path, 'bias.npy')) + + dense.weights[0].assign(tf.convert_to_tensor(w)) + if dense.bias is not None: + dense.weights[1].assign(tf.convert_to_tensor(b)) + + +def dump_tf_conv1d_weights(where, conv, name=None): + + assert conv.data_format == 'channels_last' + + w = conv.weights[0].numpy().copy() + if conv.bias is None: + b = np.zeros(conv.filters, dtype=w.dtype) + else: + b = conv.bias.numpy() + + if isinstance(where, CWriter): + try: + activation = conv.activation.__name__.upper() + except: + activation = "LINEAR" + + return print_conv1d_layer(where, name, w, b, activation, format='tf') + else: + os.makedirs(where, exist_ok=True) + + w = np.transpose(w, (2, 1, 0)) + np.save(os.path.join(where, 'weight_oik.npy'), w) + np.save(os.path.join(where, 'bias.npy'), b) + + +def load_tf_conv1d_weights(path, conv): + + w = np.load(os.path.join(path, 'weight_oik.npy')) + b = np.load(os.path.join(path, 'bias.npy')) + + w = np.transpose(w, (2, 1, 0)) + + conv.weights[0].assign(tf.convert_to_tensor(w)) + if conv.bias is not None: + conv.weights[1].assign(tf.convert_to_tensor(b)) + + +def dump_tf_embedding_weights(path, emb): + os.makedirs(path, exist_ok=True) + + w = emb.weights[0].numpy() + np.save(os.path.join(path, 'weight.npy'), w) + + + +def load_tf_embedding_weights(path, emb): + + w = np.load(os.path.join(path, 'weight.npy')) + emb.weights[0].assign(tf.convert_to_tensor(w)) + + +def dump_tf_weights(path, module): + if isinstance(module, tf.keras.layers.Dense): + dump_tf_dense_weights(path, module) + elif isinstance(module, tf.keras.layers.GRU): + dump_tf_gru_weights(path, module) + elif isinstance(module, tf.keras.layers.Conv1D): + dump_tf_conv1d_weights(path, module) + elif isinstance(module, tf.keras.layers.Embedding): + dump_tf_embedding_weights(path, module) + else: + raise ValueError(f'dump_tf_weights: layer of type {type(module)} not supported') + +def load_tf_weights(path, module): + if isinstance(module, tf.keras.layers.Dense): + load_tf_dense_weights(path, module) + elif isinstance(module, tf.keras.layers.GRU): + load_tf_gru_weights(path, module) + elif isinstance(module, tf.keras.layers.Conv1D): + load_tf_conv1d_weights(path, module) + elif isinstance(module, tf.keras.layers.Embedding): + load_tf_embedding_weights(path, module) + else: + raise ValueError(f'dump_tf_weights: layer of type {type(module)} not supported')
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/wexchange/torch/__init__.py b/dnn/torch/weight-exchange/wexchange/torch/__init__.py new file mode 100644 index 00000000..61b57665 --- /dev/null +++ b/dnn/torch/weight-exchange/wexchange/torch/__init__.py @@ -0,0 +1,5 @@ +from .torch import dump_torch_conv1d_weights, load_torch_conv1d_weights +from .torch import dump_torch_dense_weights, load_torch_dense_weights +from .torch import dump_torch_gru_weights, load_torch_gru_weights +from .torch import dump_torch_embedding_weights, load_torch_embedding_weights +from .torch import dump_torch_weights, load_torch_weights diff --git a/dnn/torch/weight-exchange/wexchange/torch/torch.py b/dnn/torch/weight-exchange/wexchange/torch/torch.py new file mode 100644 index 00000000..92a20d3d --- /dev/null +++ b/dnn/torch/weight-exchange/wexchange/torch/torch.py @@ -0,0 +1,146 @@ +import os + +import torch +import numpy as np + +from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer + +def dump_torch_gru_weights(where, gru, name=None, input_sparse=False, dotp=False): + + assert gru.num_layers == 1 + assert gru.bidirectional == False + + w_ih = gru.weight_ih_l0.detach().cpu().numpy() + w_hh = gru.weight_hh_l0.detach().cpu().numpy() + b_ih = gru.bias_ih_l0.detach().cpu().numpy() + b_hh = gru.bias_hh_l0.detach().cpu().numpy() + + if isinstance(where, CWriter): + return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, 'TANH', format='torch', reset_after=1, input_sparse=input_sparse, dotp=dotp) + else: + os.makedirs(where, exist_ok=True) + + np.save(os.path.join(where, 'weight_ih_rzn.npy'), w_ih) + np.save(os.path.join(where, 'weight_hh_rzn.npy'), w_hh) + np.save(os.path.join(where, 'bias_ih_rzn.npy'), b_ih) + np.save(os.path.join(where, 'bias_hh_rzn.npy'), b_hh) + + + +def load_torch_gru_weights(where, gru): + + assert gru.num_layers == 1 + assert gru.bidirectional == False + + w_ih = np.load(os.path.join(where, 'weight_ih_rzn.npy')) + w_hh = np.load(os.path.join(where, 'weight_hh_rzn.npy')) + b_ih = np.load(os.path.join(where, 'bias_ih_rzn.npy')) + b_hh = np.load(os.path.join(where, 'bias_hh_rzn.npy')) + + with torch.no_grad(): + gru.weight_ih_l0.set_(torch.from_numpy(w_ih)) + gru.weight_hh_l0.set_(torch.from_numpy(w_hh)) + gru.bias_ih_l0.set_(torch.from_numpy(b_ih)) + gru.bias_hh_l0.set_(torch.from_numpy(b_hh)) + + +def dump_torch_dense_weights(where, dense, name=None, activation="LINEAR"): + + w = dense.weight.detach().cpu().numpy() + if dense.bias is None: + b = np.zeros(dense.out_features, dtype=w.dtype) + else: + b = dense.bias.detach().cpu().numpy() + + if isinstance(where, CWriter): + return print_dense_layer(where, name, w, b, activation, format='torch') + + else: + os.makedirs(where, exist_ok=True) + + np.save(os.path.join(where, 'weight.npy'), w) + np.save(os.path.join(where, 'bias.npy'), b) + + +def load_torch_dense_weights(where, dense): + + w = np.load(os.path.join(where, 'weight.npy')) + b = np.load(os.path.join(where, 'bias.npy')) + + with torch.no_grad(): + dense.weight.set_(torch.from_numpy(w)) + if dense.bias is not None: + dense.bias.set_(torch.from_numpy(b)) + + +def dump_torch_conv1d_weights(where, conv, name=None, activation="LINEAR"): + + w = conv.weight.detach().cpu().numpy() + if conv.bias is None: + b = np.zeros(conv.out_channels, dtype=w.dtype) + else: + b = conv.bias.detach().cpu().numpy() + + if isinstance(where, CWriter): + + return print_conv1d_layer(where, name, w, b, activation, format='torch') + else: + os.makedirs(where, exist_ok=True) + + np.save(os.path.join(where, 'weight_oik.npy'), w) + + np.save(os.path.join(where, 'bias.npy'), b) + + +def load_torch_conv1d_weights(where, conv): + + with torch.no_grad(): + w = np.load(os.path.join(where, 'weight_oik.npy')) + conv.weight.set_(torch.from_numpy(w)) + if type(conv.bias) != type(None): + b = np.load(os.path.join(where, 'bias.npy')) + if conv.bias is not None: + conv.bias.set_(torch.from_numpy(b)) + + +def dump_torch_embedding_weights(where, emb): + os.makedirs(where, exist_ok=True) + + w = emb.weight.detach().cpu().numpy() + np.save(os.path.join(where, 'weight.npy'), w) + + +def load_torch_embedding_weights(where, emb): + + w = np.load(os.path.join(where, 'weight.npy')) + + with torch.no_grad(): + emb.weight.set_(torch.from_numpy(w)) + +def dump_torch_weights(where, module, name=None, activation="LINEAR", verbose=False, **kwargs): + """ generic function for dumping weights of some torch.nn.Module """ + if verbose and name is not None: + print(f"printing layer {name} of type {type(module)}...") + if isinstance(module, torch.nn.Linear): + return dump_torch_dense_weights(where, module, name, activation, **kwargs) + elif isinstance(module, torch.nn.GRU): + return dump_torch_gru_weights(where, module, name, **kwargs) + elif isinstance(module, torch.nn.Conv1d): + return dump_torch_conv1d_weights(where, module, name, **kwargs) + elif isinstance(module, torch.nn.Embedding): + return dump_torch_embedding_weights(where, module) + else: + raise ValueError(f'dump_tf_weights: layer of type {type(module)} not supported') + +def load_torch_weights(where, module): + """ generic function for loading weights of some torch.nn.Module """ + if isinstance(module, torch.nn.Linear): + load_torch_dense_weights(where, module) + elif isinstance(module, torch.nn.GRU): + load_torch_gru_weights(where, module) + elif isinstance(module, torch.nn.Conv1d): + load_torch_conv1d_weights(where, module) + elif isinstance(module, torch.nn.Embedding): + load_torch_embedding_weights(where, module) + else: + raise ValueError(f'dump_tf_weights: layer of type {type(module)} not supported')
\ No newline at end of file |