diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-02-03 15:23:25 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-02-03 15:23:25 +0300 |
commit | 76a6d9f643c06880549725379b7207a259eb57b5 (patch) | |
tree | a469dc7c6621c720809e9d211bdc8397f40decb6 | |
parent | 0fe00661bddd4923b16a3c444dc237b77bed93f3 (diff) |
Change order of arguments to PrepareBQuantizedTransposed to match other functions
-rw-r--r-- | interleave.h | 2 | ||||
-rw-r--r-- | intgemm.cc | 2 | ||||
-rw-r--r-- | intgemm.h | 6 | ||||
-rw-r--r-- | test/prepare_b_quantized_transposed.cc | 2 |
4 files changed, 6 insertions, 6 deletions
diff --git a/interleave.h b/interleave.h index 9b596e5..43cbab0 100644 --- a/interleave.h +++ b/interleave.h @@ -235,7 +235,7 @@ target static inline void PrepareB(const float *input, int16_t *output_shadow, f } #define INTGEMM_PREPARE_B_QUANTIZED_TRANSPOSED(target, cpu_type, Integer) \ -target static inline void PrepareBQuantizedTransposed(const Integer* input, Integer* output, Index rows, Index cols) { \ +target static inline void PrepareBQuantizedTransposed(const Integer* input, Integer* output, Index cols, Index rows) { \ using Register = vector_t<cpu_type, Integer>; \ const Index RegisterElems = sizeof(Register) / sizeof(Integer); \ const Index kColStride = 8; \ @@ -20,7 +20,7 @@ void (*Int8::QuantizeU)(const float *input, uint8_t *output, float quant_mult, I void (*Int8::PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512VNNI_8bit::PrepareB, AVX512_8bit::PrepareB, AVX2_8bit::PrepareB, SSSE3_8bit::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB); -void (*Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index rows, Index cols) = ChooseCPU(AVX512_8bit::PrepareBQuantizedTransposed, AVX512_8bit::PrepareBQuantizedTransposed, AVX2_8bit::PrepareBQuantizedTransposed, SSSE3_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed); +void (*Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512_8bit::PrepareBQuantizedTransposed, AVX512_8bit::PrepareBQuantizedTransposed, AVX2_8bit::PrepareBQuantizedTransposed, SSSE3_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed); void (*Int8::SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512VNNI_8bit::SelectColumnsB, AVX512_8bit::SelectColumnsB, AVX2_8bit::SelectColumnsB, SSSE3_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB); @@ -277,7 +277,7 @@ struct Int8 { // Multiply floats by quant_mult then convert to 8-bit integers with saturation. // A version that adds 127 to each number, making sure that all numbers are positive static void (*QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); - + // Warning: the output of PrepareB depends on the CPU. // It will match the Multiply function on the same CPU though. static void (*PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols); @@ -286,9 +286,9 @@ struct Int8 { // quantized (e.g. with Quantize) to the CPU-dependent format used for // Multiply. This is useful for storing a quantized model on disk then in a // CPU-independent fashion. - static void (*PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index rows, Index cols); + static void (*PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols); - // Select columns from a prepared B matrix. The number of selected columns must be a multiple of 8. + // Select columns from a prepared B matrix. The number of selected columns must be a multiple of 8. static void (*SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end); // Multiply C = A * B, presuming A and B have been prepared. diff --git a/test/prepare_b_quantized_transposed.cc b/test/prepare_b_quantized_transposed.cc index 11b518c..b7b2257 100644 --- a/test/prepare_b_quantized_transposed.cc +++ b/test/prepare_b_quantized_transposed.cc @@ -30,7 +30,7 @@ bool Test(const AlignedVector<typename Backend::Integer>& input, Index rows, Ind bool success = true; AlignedVector<typename Backend::Integer> output(input.size()); - Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), rows, cols); + Backend::PrepareBQuantizedTransposed(input.begin(), output.begin(), cols, rows); AlignedVector<typename Backend::Integer> reference(input.size()); PrepareBQuantizedTransposedRef<Backend>(input.begin(), reference.begin(), rows, cols); |