diff options
author | Jan Buethe <jbuethe@amazon.de> | 2023-07-25 03:13:49 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-07-28 02:55:17 +0300 |
commit | eb72d29a15a24cc1b68f66161293299cf06f0cc3 (patch) | |
tree | c4ff223ef2520297323bcab2b456c3a93f956d8e | |
parent | b075eb535adef656060e19183bf52ddd2e134e29 (diff) |
Support for dumping LinearLayer in weight-exchange
-rw-r--r-- | dnn/torch/rdovae/export_rdovae_weights.py | 43 | ||||
-rw-r--r-- | dnn/torch/rdovae/libs/wexchange-1.0-py3-none-any.whl | bin | 7153 -> 0 bytes | |||
-rw-r--r-- | dnn/torch/rdovae/libs/wexchange-1.2-py3-none-any.whl | bin | 7794 -> 0 bytes | |||
-rw-r--r-- | dnn/torch/rdovae/requirements.txt | 3 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/setup.py | 2 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/c_export/c_writer.py | 6 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/c_export/common.py | 323 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/tf/tf.py | 22 | ||||
-rw-r--r-- | dnn/torch/weight-exchange/wexchange/torch/torch.py | 16 |
9 files changed, 214 insertions, 201 deletions
diff --git a/dnn/torch/rdovae/export_rdovae_weights.py b/dnn/torch/rdovae/export_rdovae_weights.py index b6fbaa4b..f9c1db81 100644 --- a/dnn/torch/rdovae/export_rdovae_weights.py +++ b/dnn/torch/rdovae/export_rdovae_weights.py @@ -29,6 +29,9 @@ import os import argparse +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '../weight-exchange')) parser = argparse.ArgumentParser() @@ -83,20 +86,30 @@ def c_export(args, model): message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}" - enc_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_enc_data"), message=message) - dec_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_dec_data"), message=message) - stats_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_stats_data"), message=message) - constants_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_constants"), message=message, header_only=True) + enc_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_enc_data"), message=message, model_struct_name='RDOVAEEnc') + dec_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_dec_data"), message=message, model_struct_name='RDOVAEDec') + stats_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_stats_data"), message=message, enable_binary_blob=False) + constants_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_constants"), message=message, header_only=True, enable_binary_blob=False) # some custom includes - for writer in [enc_writer, dec_writer, stats_writer]: + for writer in [enc_writer, dec_writer]: writer.header.write( f""" #include "opus_types.h" +#include "dred_rdovae.h" + +#include "dred_rdovae_constants.h" + +""" + ) + + stats_writer.header.write( +f""" +#include "opus_types.h" + #include "dred_rdovae_constants.h" -#include "nnet.h" """ ) @@ -111,9 +124,9 @@ f""" ('core_encoder.module.state_dense_2' , 'gdense2' , 'TANH') ] - for name, export_name, activation in encoder_dense_layers: + for name, export_name, _ in encoder_dense_layers: layer = model.get_submodule(name) - dump_torch_weights(enc_writer, layer, name=export_name, activation=activation, verbose=True) + dump_torch_weights(enc_writer, layer, name=export_name, verbose=True) encoder_gru_layers = [ @@ -122,15 +135,15 @@ f""" ('core_encoder.module.gru_3' , 'enc_dense6', 'TANH') ] - enc_max_rnn_units = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True) - for name, export_name, activation in encoder_gru_layers]) + enc_max_rnn_units = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, verbose=True, input_sparse=True, quantize=True) + for name, export_name, _ in encoder_gru_layers]) encoder_conv_layers = [ ('core_encoder.module.conv1' , 'bits_dense' , 'LINEAR') ] - enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True) for name, export_name, activation in encoder_conv_layers]) + enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, verbose=True, quantize=False) for name, export_name, _ in encoder_conv_layers]) del enc_writer @@ -148,9 +161,9 @@ f""" ('core_decoder.module.output' , 'dec_final', 'LINEAR') ] - for name, export_name, activation in decoder_dense_layers: + for name, export_name, _ in decoder_dense_layers: layer = model.get_submodule(name) - dump_torch_weights(dec_writer, layer, name=export_name, activation=activation, verbose=True) + dump_torch_weights(dec_writer, layer, name=export_name, verbose=True) decoder_gru_layers = [ @@ -159,8 +172,8 @@ f""" ('core_decoder.module.gru_3' , 'dec_dense6', 'TANH') ] - dec_max_rnn_units = max([dump_torch_weights(dec_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True) - for name, export_name, activation in decoder_gru_layers]) + dec_max_rnn_units = max([dump_torch_weights(dec_writer, model.get_submodule(name), export_name, verbose=True, input_sparse=True, quantize=True) + for name, export_name, _ in decoder_gru_layers]) del dec_writer diff --git a/dnn/torch/rdovae/libs/wexchange-1.0-py3-none-any.whl b/dnn/torch/rdovae/libs/wexchange-1.0-py3-none-any.whl Binary files differdeleted file mode 100644 index cfeebae5..00000000 --- a/dnn/torch/rdovae/libs/wexchange-1.0-py3-none-any.whl +++ /dev/null diff --git a/dnn/torch/rdovae/libs/wexchange-1.2-py3-none-any.whl b/dnn/torch/rdovae/libs/wexchange-1.2-py3-none-any.whl Binary files differdeleted file mode 100644 index ac34d7d9..00000000 --- a/dnn/torch/rdovae/libs/wexchange-1.2-py3-none-any.whl +++ /dev/null diff --git a/dnn/torch/rdovae/requirements.txt b/dnn/torch/rdovae/requirements.txt index 8afdcda3..9225ea84 100644 --- a/dnn/torch/rdovae/requirements.txt +++ b/dnn/torch/rdovae/requirements.txt @@ -1,5 +1,4 @@ numpy scipy torch -tqdm -libs/wexchange-1.2-py3-none-any.whl
\ No newline at end of file +tqdm
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/setup.py b/dnn/torch/weight-exchange/setup.py index bf08db19..e590aad6 100644 --- a/dnn/torch/weight-exchange/setup.py +++ b/dnn/torch/weight-exchange/setup.py @@ -39,7 +39,7 @@ with open(os.path.join(lib_folder, 'requirements.txt'), 'r') as f: print(install_requires) setup(name='wexchange', - version='1.4', + version='1.5', author='Jan Buethe', author_email='jbuethe@amazon.de', description='Weight-exchange library between Pytorch and Tensorflow', diff --git a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py index 8601d7df..36050881 100644 --- a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py +++ b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py @@ -35,8 +35,8 @@ class CWriter: filename_without_extension, message=None, header_only=False, - enable_binary_blob=False, create_state_struct=False, + enable_binary_blob=True, model_struct_name="Model", nnet_header="nnet.h"): """ @@ -78,7 +78,7 @@ class CWriter: self.layer_dict = OrderedDict() # for binary blob format, format is key=<layer name>, value=<layer type> - self.weight_arrays = set() + self.weight_arrays = [] # form model struct, format is key=<layer name>, value=<number of elements> self.state_dict = OrderedDict() @@ -134,6 +134,8 @@ f""" if self.enable_binary_blob: # create weight array + if len(set(self.weight_arrays)) != len(self.weight_arrays): + raise ValueError("error: detected duplicates in weight arrays") self.source.write("\n#ifndef USE_WEIGHTS_FILE\n") self.source.write(f"const WeightArray {self.model_struct_name.lower()}_arrays[] = {{\n") for name in self.weight_arrays: diff --git a/dnn/torch/weight-exchange/wexchange/c_export/common.py b/dnn/torch/weight-exchange/wexchange/c_export/common.py index ae2c39a1..d8b3f7e7 100644 --- a/dnn/torch/weight-exchange/wexchange/c_export/common.py +++ b/dnn/torch/weight-exchange/wexchange/c_export/common.py @@ -29,27 +29,49 @@ import numpy as np from .c_writer import CWriter -def print_vector(writer, vector, name, dtype='float', dotp=False, static=True): +def print_vector(writer, vector, name, dtype='float', reshape_8x4=False, static=True, debug_float=False): + + if isinstance(writer, CWriter): + f = writer.source + binary_blob = writer.enable_binary_blob + else: + f = writer + binary_blob = False + + dtype_suffix = { + 'float' : 'float', + 'opus_int8' : 'int8', + 'opus_uint16' : 'uint16', + 'opus_int16' : 'int16', + 'int' : 'int', + 'qweight': 'qweight' + } - f = writer.source - binary_blob = writer.enable_binary_blob if binary_blob: f.write( f''' #ifndef USE_WEIGHTS_FILE -#define WEIGHTS_{name}_DEFINED -#define WEIGHTS_{name}_TYPE WEIGHT_TYPE_{"qweight" if dotp else "float"} ''' ) - writer.weight_arrays.add(name) + writer.weight_arrays.append(name) - if dotp: + if reshape_8x4: vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8)) vector = vector.transpose((2, 0, 3, 1)) v = np.reshape(vector, (-1)) + if debug_float: + f.write('#ifndef DISABLE_DEBUG_FLOAT\n') + if binary_blob: + f.write( +f''' +#define WEIGHTS_{name}_DEFINED +#define WEIGHTS_{name}_TYPE WEIGHT_TYPE_{dtype_suffix[dtype]} +''' + ) + if static: f.write('static ') @@ -70,6 +92,8 @@ f''' f.write(" ") f.write('\n};\n\n') + if debug_float: f.write('#endif /*DISABLE_DEBUG_FLOAT*/\n') + if binary_blob: f.write( f''' @@ -81,19 +105,48 @@ f''' -def print_sparse_vector(writer, A, name, have_diag=True): - f = writer.source +def extract_diagonal(A): + """ input shape is (N, k*N) """ + + N, M = A.shape + B = A.copy() + assert M % N == 0 + k = M // N + + diags = [] + for l in range(k): + diag = np.diag(B[:, l * N : (l+1) * N]).copy() + B[:, l * N : (l+1) * N] -= np.diag(diag) + diags.append(diag) + + diag = np.concatenate(diags) + + return diag, B + +def quantize_weight(weight, scale): + Aq = np.round(weight / scale).astype('int') + if Aq.max() > 127 or Aq.min() <= -128: + raise ValueError("value out of bounds in quantize_weight") + Aq = np.clip(np.round(weight / scale).astype('int'), -128, 127) + return Aq + + +def print_sparse_weight(writer, A, name, scale=1/128, have_diag=True, quantize=False): N = A.shape[0] M = A.shape[1] W = np.zeros((0,), dtype='int') W0 = np.zeros((0,)) + if have_diag: - diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])]) - A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N])) - A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N])) - A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:])) + diag, A = extract_diagonal(A) print_vector(writer, diag, name + '_diag') - AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int') + + if quantize: + Aq = quantize_weight(A, scale) + else: + Aq = A + + # extract blocks idx = np.zeros((0,), dtype='int') for i in range(M//8): pos = idx.shape[0] @@ -101,7 +154,7 @@ def print_sparse_vector(writer, A, name, have_diag=True): nb_nonzero = 0 for j in range(N//4): block = A[j*4:(j+1)*4, i*8:(i+1)*8] - qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8] + qblock = Aq[j*4:(j+1)*4, i*8:(i+1)*8] if np.sum(np.abs(block)) > 1e-10: nb_nonzero = nb_nonzero + 1 idx = np.append(idx, j*4) @@ -109,102 +162,125 @@ def print_sparse_vector(writer, A, name, have_diag=True): W0 = np.concatenate([W0, block.reshape((-1,))]) W = np.concatenate([W, vblock]) idx[pos] = nb_nonzero - f.write('#ifdef DOT_PROD\n') - print_vector(writer, W, name, dtype='qweight') - f.write('#else /*DOT_PROD*/\n') - print_vector(writer, W0, name, dtype='qweight') - f.write('#endif /*DOT_PROD*/\n') - print_vector(writer, idx, name + '_idx', dtype='int') - return AQ + if quantize: print_vector(writer, W, name + '_int8', reshape_8x4=False, dtype='opus_int8') + print_vector(writer, W0, name + '_float', reshape_8x4=False, dtype='float', debug_float=quantize) + print_vector(writer, idx, name + '_idx', reshape_8x4=False, dtype='int') -def _check_activation(activation): - if not activation in {"TANH", "SIGMOID", "LINEAR", "SWISH", "RELU", "SOFTMAX"}: - raise ValueError(f"error: unknown activation {activation}") + return Aq -def print_dense_layer(writer : CWriter, - name : str, - weight : np.ndarray, - bias : np.ndarray, - activation: str, - format : str = 'torch'): - _check_activation(activation) +def qn(string): + if string == "NULL": return string + else: return '"' + string + '"' - if format == 'torch': - weight = weight.transpose() +def print_linear_layer(writer : CWriter, + name : str, + weight : np.ndarray, + bias : np.ndarray, + scale : np.ndarray = None, + sparse : bool = False, + diagonal : bool = False, + quantize : bool = True): - print_vector(writer, weight, name + "_weights") - print_vector(writer, bias, name + "_bias") + """ prints linear layer - writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[1]}\n") + Parameters: + ----------- + name : str + layer name + weight: np.ndarray + ... + scale: np.ndarray or None + If None auto scaling will be applied. Otherwise, output channels will be multiplied by scale (the usual broadcasting rules apply). - if writer.enable_binary_blob: - init_call = f'dense_init(&model->{name}, arrays, "{name}_bias", "{name}_weights", {weight.shape[0]}, {weight.shape[1]}, ACTIVATION_{activation})' - writer.layer_dict[name] = ('DenseLayer', init_call) + + """ + + if len(weight.shape) != 2: + raise ValueError('expecting 2-dim weight array in print_linear_layer') + + + bias_name = "NULL" if bias is None else name + "_bias" + subias_name = name + "_subias" if quantize else "NULL" + scale_name = name + "_scale" if quantize else "NULL" + idx_name = name + "_weights_idx" if sparse else "NULL" + float_weight_name = name + "_weights_float" + int_weight_name = name + "_weights_int8" if quantize else "NULL" + diag_name = name + "_weights_diag" if sparse and diagonal else "NULL" + + nb_inputs, nb_outputs = weight.shape + + if scale is None: + raise ValueError("None scale case not implemented yet.") + + + + if sparse: + weight_q = print_sparse_weight(writer, weight, name + "_weights", scale=scale, have_diag=diagonal, quantize=quantize) else: - writer.source.write( -f""" - -const DenseLayer {name} = {{ - {name}_bias, - {name}_weights, - {weight.shape[0]}, - {weight.shape[1]}, - ACTIVATION_{activation} -}}; - -""" - ) + if quantize: + weight_q = quantize_weight(weight, scale) + print_vector(writer, weight_q, name + "_weights_int8", dtype='opus_int8', reshape_8x4=True) + + print_vector(writer, weight, name + "_weights_float", dtype='float', reshape_8x4=False, debug_float=quantize) - writer.header.write(f"\nextern const DenseLayer {name};\n\n") + if quantize: + subias = (np.zeros(nb_outputs) if bias is None else bias) - np.sum(weight_q * scale, axis=0) + print_vector(writer, subias, name + "_subias") + final_scale = scale / 127 * np.ones(nb_outputs) + print_vector(writer, final_scale, name + "_scale") + if bias is not None: + print_vector(writer, bias, name + "_bias") + init_call = f'linear_init(&model->{name}, arrays, {qn(bias_name)}, {qn(subias_name)}, {qn(int_weight_name)},' \ + + f'{qn(float_weight_name)}, {qn(idx_name)}, {qn(diag_name)}, {qn(scale_name)}, {nb_inputs}, {nb_outputs})' + + writer.layer_dict[name] = ('LinearLayer', init_call) + + +def print_dense_layer(writer : CWriter, + name : str, + weight : np.ndarray, + bias : np.ndarray, + scale=1/128, + format : str = 'torch', + sparse=False, + diagonal=False, + quantize=False): + + if format == 'torch': + weight = weight.transpose() + + print_linear_layer(writer, name, weight, bias, scale=scale, sparse=sparse, diagonal=diagonal, quantize=quantize) + + writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[1]}\n") + def print_conv1d_layer(writer : CWriter, name : str, weight : np.ndarray, bias : np.ndarray, - activation: str, - format : str = 'torch'): + scale=1/128, + format : str = 'torch', + quantize=False): - _check_activation(activation) if format == "torch": # convert to channels last weight = np.transpose(weight, (2, 1, 0)) - print_vector(writer, weight, name + "_weights") - print_vector(writer, bias, name + "_bias") + lin_weight = np.reshape(weight, (-1, weight.shape[-1])) + print_linear_layer(writer, name, lin_weight, bias, scale=scale, sparse=False, diagonal=False, quantize=quantize) writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[2]}\n") + writer.header.write(f"\n#define {name.upper()}_IN_SIZE {weight.shape[1]}\n") writer.header.write(f"\n#define {name.upper()}_STATE_SIZE ({weight.shape[1]} * ({weight.shape[0] - 1}))\n") writer.header.write(f"\n#define {name.upper()}_DELAY {(weight.shape[0] - 1) // 2}\n") # CAVE: delay is not a property of the conv layer - if writer.enable_binary_blob: - init_call = f'conv1d_init(&model->{name}, arrays, "{name}_bias", "{name}_weights", {weight.shape[1]}, {weight.shape[0]}, {weight.shape[2]}, ACTIVATION_{activation})' - writer.layer_dict[name] = ('Conv1DLayer', init_call) - else: - - writer.source.write( -f""" - -const Conv1DLayer {name} = {{ - {name}_bias, - {name}_weights, - {weight.shape[1]}, - {weight.shape[0]}, - {weight.shape[2]}, - ACTIVATION_{activation} -}}; - -""" - ) - - writer.header.write(f"\nextern const Conv1DLayer {name};\n\n") - return weight.shape[0] * weight.shape[1] @@ -214,17 +290,16 @@ def print_gru_layer(writer : CWriter, recurrent_weight : np.ndarray, bias : np.ndarray, recurrent_bias : np.ndarray, - activation: str, format : str = 'torch', - dotp : bool = False, + quantize : bool = False, input_sparse : bool = False, - reset_after : int = 0 + recurrent_sparse : bool = False, + scale=1/128, + recurrent_scale=1/128 ): - _check_activation(activation) - if format == "torch": - # transpose weight matrices and change gate order from rzn to zrn + # change gate ordering from rzn to zrn N = weight.shape[0] // 3 for x in [weight, recurrent_weight, bias, recurrent_bias]: @@ -234,80 +309,14 @@ def print_gru_layer(writer : CWriter, weight = weight.transpose() recurrent_weight = recurrent_weight.transpose() - - - # input weights - if input_sparse: - qweight = print_sparse_vector(writer, weight, name + '_weights', have_diag=False) else: - qweight = np.clip(np.round(128. * weight).astype('int'), -128, 127) - - if dotp: - writer.source.write("#ifdef DOT_PROD\n") - print_vector(writer, qweight, name + '_weights', dtype='qweight', dotp=True) - writer.source.write("#else /*DOT_PROD*/\n") - - print_vector(writer, weight, name + '_weights') - - if dotp: - writer.source.write("#endif /*DOT_PROD*/\n") - - - # recurrent weights - recurrent_qweight = np.clip(np.round(128. * recurrent_weight).astype('int'), -128, 127) - - if dotp: - writer.source.write("#ifdef DOT_PROD\n") - print_vector(writer, recurrent_qweight, name + '_recurrent_weights', dtype='qweight', dotp=True) - writer.source.write("#else /*DOT_PROD*/\n") - - print_vector(writer, recurrent_weight, name + '_recurrent_weights') - - if dotp: - writer.source.write("#endif /*DOT_PROD*/\n") - - - # corrected bias for unsigned int matrix multiplication - subias = bias - np.sum(qweight / 128., axis=0) - recurrent_subias = recurrent_bias - np.sum(recurrent_qweight / 128., axis=0) - - print_vector(writer, np.concatenate((bias, recurrent_bias)), name + "_bias") - print_vector(writer, np.concatenate((subias, recurrent_subias)), name + "_subias") + N = weight.shape[1] // 3 + print_linear_layer(writer, name + "_input", weight, bias, scale=scale, sparse=input_sparse, quantize=quantize) + print_linear_layer(writer, name + "_recurrent", recurrent_weight, recurrent_bias, scale=recurrent_scale, sparse=recurrent_sparse, diagonal=recurrent_sparse, quantize=quantize) # wrapping it up writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {N}\n") writer.header.write(f"\n#define {name.upper()}_STATE_SIZE {N}\n") - if writer.enable_binary_blob: - if input_sparse: - init_call = f'gru_init(&model->{name}, arrays, "{name}_bias", "{name}_subias", "{name}_weights", "{name + "_weights_idx"}", "{name}_recurrent_weights", {weight.shape[0]}, {weight.shape[1] // 3}, ACTIVATION_{activation}, {reset_after})' - else: - init_call = f'gru_init(&model->{name}, arrays, "{name}_bias", "{name}_subias", "{name}_weights", NULL, "{name}_recurrent_weights", {weight.shape[0]}, {weight.shape[1] // 3}, ACTIVATION_{activation}, {reset_after})' - - writer.layer_dict[name] = ('GRULayer', init_call) - - else: - - writer.source.write( -f""" - -const GRULayer {name} = {{ - {name}_bias, - {name}_subias, - {name}_weights, - {name + "_weights_idx" if input_sparse else "NULL"}, - {name}_recurrent_weights, - {weight.shape[0]}, - {weight.shape[1] // 3}, - ACTIVATION_{activation}, - {reset_after} -}}; - -""" - ) - - writer.header.write(f"\nextern const GRULayer {name};\n") - - - return N + return N
\ No newline at end of file diff --git a/dnn/torch/weight-exchange/wexchange/tf/tf.py b/dnn/torch/weight-exchange/wexchange/tf/tf.py index c8f9ed2f..bebbb55a 100644 --- a/dnn/torch/weight-exchange/wexchange/tf/tf.py +++ b/dnn/torch/weight-exchange/wexchange/tf/tf.py @@ -34,7 +34,7 @@ import numpy as np from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer -def dump_tf_gru_weights(where, gru, name=None, input_sparse=False, dotp=False): +def dump_tf_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128): assert gru.activation == tf.keras.activations.tanh @@ -47,7 +47,7 @@ def dump_tf_gru_weights(where, gru, name=None, input_sparse=False, dotp=False): b_hh = gru.weights[2].numpy()[1].copy() if isinstance(where, CWriter): - return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, 'TANH', format='tf', reset_after=1, input_sparse=input_sparse, dotp=dotp) + return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, format='tf', input_sparse=input_sparse, recurrent_sparse=recurrent_sparse, quantize=quantize, scale=scale, recurrent_scale=recurrent_scale) else: os.makedirs(where, exist_ok=True) @@ -87,7 +87,7 @@ def load_tf_gru_weights(path, gru): gru.weights[2].assign(tf.convert_to_tensor(np.vstack((b_ih, b_hh)))) -def dump_tf_dense_weights(where, dense, name=None): +def dump_tf_dense_weights(where, dense, name='dense', scale=1/128, sparse=False, diagonal=False, quantize=False): w = dense.weights[0].numpy() if dense.bias is None: @@ -98,12 +98,7 @@ def dump_tf_dense_weights(where, dense, name=None): if isinstance(where, CWriter): - try: - activation = dense.activation.__name__.upper() - except: - activation = "LINEAR" - - return print_dense_layer(where, name, w, b, activation, format='tf') + return print_dense_layer(where, name, w, b, scale=scale, format='tf', sparse=sparse, diagonal=diagonal, quantize=quantize) else: os.makedirs(where, exist_ok=True) @@ -122,7 +117,7 @@ def load_tf_dense_weights(path, dense): dense.weights[1].assign(tf.convert_to_tensor(b)) -def dump_tf_conv1d_weights(where, conv, name=None): +def dump_tf_conv1d_weights(where, conv, name='conv', scale=1/128, quantize=False): assert conv.data_format == 'channels_last' @@ -133,12 +128,7 @@ def dump_tf_conv1d_weights(where, conv, name=None): b = conv.bias.numpy() if isinstance(where, CWriter): - try: - activation = conv.activation.__name__.upper() - except: - activation = "LINEAR" - - return print_conv1d_layer(where, name, w, b, activation, format='tf') + return print_conv1d_layer(where, name, w, b, scale=scale, format='tf', quantize=quantize) else: os.makedirs(where, exist_ok=True) diff --git a/dnn/torch/weight-exchange/wexchange/torch/torch.py b/dnn/torch/weight-exchange/wexchange/torch/torch.py index 729c1bc9..4f6d7dfd 100644 --- a/dnn/torch/weight-exchange/wexchange/torch/torch.py +++ b/dnn/torch/weight-exchange/wexchange/torch/torch.py @@ -34,7 +34,7 @@ import numpy as np from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer -def dump_torch_gru_weights(where, gru, name=None, input_sparse=False, dotp=False): +def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128): assert gru.num_layers == 1 assert gru.bidirectional == False @@ -45,7 +45,7 @@ def dump_torch_gru_weights(where, gru, name=None, input_sparse=False, dotp=False b_hh = gru.bias_hh_l0.detach().cpu().numpy() if isinstance(where, CWriter): - return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, 'TANH', format='torch', reset_after=1, input_sparse=input_sparse, dotp=dotp) + return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, format='torch', input_sparse=input_sparse, recurrent_sparse=recurrent_sparse, quantize=quantize, scale=scale, recurrent_scale=recurrent_scale) else: os.makedirs(where, exist_ok=True) @@ -73,7 +73,7 @@ def load_torch_gru_weights(where, gru): gru.bias_hh_l0.set_(torch.from_numpy(b_hh)) -def dump_torch_dense_weights(where, dense, name=None, activation="LINEAR"): +def dump_torch_dense_weights(where, dense, name='dense', scale=1/128, sparse=False, diagonal=False, quantize=False): w = dense.weight.detach().cpu().numpy() if dense.bias is None: @@ -82,7 +82,7 @@ def dump_torch_dense_weights(where, dense, name=None, activation="LINEAR"): b = dense.bias.detach().cpu().numpy() if isinstance(where, CWriter): - return print_dense_layer(where, name, w, b, activation, format='torch') + return print_dense_layer(where, name, w, b, scale=scale, format='torch', sparse=sparse, diagonal=diagonal, quantize=quantize) else: os.makedirs(where, exist_ok=True) @@ -102,7 +102,7 @@ def load_torch_dense_weights(where, dense): dense.bias.set_(torch.from_numpy(b)) -def dump_torch_conv1d_weights(where, conv, name=None, activation="LINEAR"): +def dump_torch_conv1d_weights(where, conv, name='conv', scale=1/128, quantize=False): w = conv.weight.detach().cpu().numpy() if conv.bias is None: @@ -112,7 +112,7 @@ def dump_torch_conv1d_weights(where, conv, name=None, activation="LINEAR"): if isinstance(where, CWriter): - return print_conv1d_layer(where, name, w, b, activation, format='torch') + return print_conv1d_layer(where, name, w, b, scale=scale, format='torch', quantize=quantize) else: os.makedirs(where, exist_ok=True) @@ -146,12 +146,12 @@ def load_torch_embedding_weights(where, emb): with torch.no_grad(): emb.weight.set_(torch.from_numpy(w)) -def dump_torch_weights(where, module, name=None, activation="LINEAR", verbose=False, **kwargs): +def dump_torch_weights(where, module, name=None, verbose=False, **kwargs): """ generic function for dumping weights of some torch.nn.Module """ if verbose and name is not None: print(f"printing layer {name} of type {type(module)}...") if isinstance(module, torch.nn.Linear): - return dump_torch_dense_weights(where, module, name, activation, **kwargs) + return dump_torch_dense_weights(where, module, name, **kwargs) elif isinstance(module, torch.nn.GRU): return dump_torch_gru_weights(where, module, name, **kwargs) elif isinstance(module, torch.nn.Conv1d): |