Change order of arguments to PrepareBQuantizedTransposed to match other functions

author: Kenneth Heafield <github@kheafield.com> 2020-02-03 15:23:25 +0300
committer: Kenneth Heafield <github@kheafield.com> 2020-02-03 15:23:25 +0300
commit: 76a6d9f643c06880549725379b7207a259eb57b5 (patch)
tree: a469dc7c6621c720809e9d211bdc8397f40decb6
parent: 0fe00661bddd4923b16a3c444dc237b77bed93f3 (diff)
4 files changed, 6 insertions, 6 deletions
diff --git a/interleave.h b/interleave.h
index 9b596e5..43cbab0 100644
--- a/interleave.h
+++ b/interleave.h
@@ -235,7 +235,7 @@ target static inline void PrepareB(const float *input, int16_t *output_shadow, f
 }
 
 #define INTGEMM_PREPARE_B_QUANTIZED_TRANSPOSED(target, cpu_type, Integer) \
-target static inline void PrepareBQuantizedTransposed(const Integer* input, Integer* output, Index rows, Index cols) { \
+target static inline void PrepareBQuantizedTransposed(const Integer* input, Integer* output, Index cols, Index rows) { \
   using Register = vector_t<cpu_type, Integer>; \
   const Index RegisterElems = sizeof(Register) / sizeof(Integer); \
   const Index kColStride = 8; \
diff --git a/intgemm.cc b/intgemm.cc
index f1d328d..21b7ddd 100644
--- a/intgemm.cc
+++ b/intgemm.cc
@@ -20,7 +20,7 @@ void (*Int8::QuantizeU)(const float *input, uint8_t *output, float quant_mult, I
 
 void (*Int8::PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512VNNI_8bit::PrepareB, AVX512_8bit::PrepareB, AVX2_8bit::PrepareB, SSSE3_8bit::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB);
 
-void (*Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index rows, Index cols) = ChooseCPU(AVX512_8bit::PrepareBQuantizedTransposed, AVX512_8bit::PrepareBQuantizedTransposed, AVX2_8bit::PrepareBQuantizedTransposed, SSSE3_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed);
+void (*Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512_8bit::PrepareBQuantizedTransposed, AVX512_8bit::PrepareBQuantizedTransposed, AVX2_8bit::PrepareBQuantizedTransposed, SSSE3_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed);
 
 void (*Int8::SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512VNNI_8bit::SelectColumnsB, AVX512_8bit::SelectColumnsB, AVX2_8bit::SelectColumnsB, SSSE3_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB);
 
diff --git a/intgemm.h b/intgemm.h
index 581c6f4..58d2dcc 100644
--- a/intgemm.h
+++ b/intgemm.h
@@ -277,7 +277,7 @@ struct Int8 {
   // Multiply floats by quant_mult then convert to 8-bit integers with saturation.
   // A version that adds 127 to each number, making sure that all numbers are positive
   static void (*QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size);
-  
+
   // Warning: the output of PrepareB depends on the CPU.
   // It will match the Multiply function on the same CPU though.
   static void (*PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols);
@@ -286,9 +286,9 @@ struct Int8 {
   // quantized (e.g. with Quantize) to the CPU-dependent format used for
   // Multiply.  This is useful for storing a quantized model on disk then in a
   // CPU-independent fashion.
-  static void (*PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index rows, Index cols);
+  static void (*PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols);
 
-  // Select columns from a prepared B matrix.  The number of selected columns must be a multiple of 8. 
+  // Select columns from a prepared B matrix.  The number of selected columns must be a multiple of 8.
   static void (*SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end);
 
   // Multiply C = A * B, presuming A and B have been prepared.
diff --git a/test/prepare_b_quantized_transposed.cc b/test/prepare_b_quantized_transposed.cc
index 11b518c..b7b2257 100644
--- a/test/prepare_b_quantized_transposed.cc
+++ b/test/prepare_b_quantized_transposed.cc
@@ -30,7 +30,7 @@ bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Ind
   bool success = true;
 
   AlignedVector<typename Backend::Integer> output(input.size());
-  Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), rows, cols);
+  Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), cols, rows);
 
   AlignedVector<typename Backend::Integer> reference(input.size());
   PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), rows, cols);
author	Kenneth Heafield <github@kheafield.com>	2020-02-03 15:23:25 +0300
committer	Kenneth Heafield <github@kheafield.com>	2020-02-03 15:23:25 +0300
commit	76a6d9f643c06880549725379b7207a259eb57b5 (patch)
tree	a469dc7c6621c720809e9d211bdc8397f40decb6
parent	0fe00661bddd4923b16a3c444dc237b77bed93f3 (diff)