From 6d2882d93c9d671a6f156d260561665058c1bd0d Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 16 Feb 2022 16:35:15 +0000 Subject: Make function pointers const (#99) --- intgemm/intgemm.cc | 28 ++++++++++++++-------------- intgemm/intgemm.h | 44 ++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/intgemm/intgemm.cc b/intgemm/intgemm.cc index d6c26b9..58e4bc5 100644 --- a/intgemm/intgemm.cc +++ b/intgemm/intgemm.cc @@ -141,33 +141,33 @@ MeanStd Unsupported_VectorMeanStd(const float * /*begin*/, const float * /*end*/ return MeanStd(); } -void (*Int16::Quantize)(const float *input, int16_t *output, float quant_mult, Index size) = ChooseCPU(AVX512BW::Kernels16::Quantize, AVX512BW::Kernels16::Quantize, AVX2::Kernels16::Quantize, SSE2::Kernels16::Quantize, SSE2::Kernels16::Quantize, Unsupported_16bit::Quantize); +void (*const Int16::Quantize)(const float *input, int16_t *output, float quant_mult, Index size) = ChooseCPU(AVX512BW::Kernels16::Quantize, AVX512BW::Kernels16::Quantize, AVX2::Kernels16::Quantize, SSE2::Kernels16::Quantize, SSE2::Kernels16::Quantize, Unsupported_16bit::Quantize); -void (*Int16::PrepareB)(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512BW::Kernels16::PrepareB, AVX512BW::Kernels16::PrepareB, AVX2::Kernels16::PrepareB, SSE2::Kernels16::PrepareB, SSE2::Kernels16::PrepareB, Unsupported_16bit::PrepareB); +void (*const Int16::PrepareB)(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512BW::Kernels16::PrepareB, AVX512BW::Kernels16::PrepareB, AVX2::Kernels16::PrepareB, SSE2::Kernels16::PrepareB, SSE2::Kernels16::PrepareB, Unsupported_16bit::PrepareB); -void (*Int16::PrepareBQuantizedTransposed)(const int16_t *input, int16_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels16::PrepareBQuantizedTransposed, AVX512BW::Kernels16::PrepareBQuantizedTransposed, AVX2::Kernels16::PrepareBQuantizedTransposed, SSE2::Kernels16::PrepareBQuantizedTransposed, SSE2::Kernels16::PrepareBQuantizedTransposed, Unsupported_16bit::PrepareBQuantizedTransposed); +void (*const Int16::PrepareBQuantizedTransposed)(const int16_t *input, int16_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels16::PrepareBQuantizedTransposed, AVX512BW::Kernels16::PrepareBQuantizedTransposed, AVX2::Kernels16::PrepareBQuantizedTransposed, SSE2::Kernels16::PrepareBQuantizedTransposed, SSE2::Kernels16::PrepareBQuantizedTransposed, Unsupported_16bit::PrepareBQuantizedTransposed); -void (*Int16::PrepareBTransposed)(const float *input, int16_t *output, float quant_mult, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels16::PrepareBTransposed, AVX512BW::Kernels16::PrepareBTransposed, AVX2::Kernels16::PrepareBTransposed, SSE2::Kernels16::PrepareBTransposed, SSE2::Kernels16::PrepareBTransposed, Unsupported_16bit::PrepareBTransposed); +void (*const Int16::PrepareBTransposed)(const float *input, int16_t *output, float quant_mult, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels16::PrepareBTransposed, AVX512BW::Kernels16::PrepareBTransposed, AVX2::Kernels16::PrepareBTransposed, SSE2::Kernels16::PrepareBTransposed, SSE2::Kernels16::PrepareBTransposed, Unsupported_16bit::PrepareBTransposed); -void (*Int16::SelectColumnsB)(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512BW::Kernels16::SelectColumnsB, AVX512BW::Kernels16::SelectColumnsB, AVX2::Kernels16::SelectColumnsB, SSE2::Kernels16::SelectColumnsB, SSE2::Kernels16::SelectColumnsB, Unsupported_16bit::SelectColumnsB); +void (*const Int16::SelectColumnsB)(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512BW::Kernels16::SelectColumnsB, AVX512BW::Kernels16::SelectColumnsB, AVX2::Kernels16::SelectColumnsB, SSE2::Kernels16::SelectColumnsB, SSE2::Kernels16::SelectColumnsB, Unsupported_16bit::SelectColumnsB); const char *const Int16::kName = ChooseCPU(AVX512BW::Kernels16::kName, AVX512BW::Kernels16::kName, AVX2::Kernels16::kName, SSE2::Kernels16::kName, SSE2::Kernels16::kName, Unsupported_16bit::kName); -void (*Int8::Quantize)(const float *input, int8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::Quantize, AVX512BW::Kernels8::Quantize, AVX2::Kernels8::Quantize, SSSE3::Kernels8::Quantize, Unsupported_8bit::Quantize, Unsupported_8bit::Quantize); +void (*const Int8::Quantize)(const float *input, int8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::Quantize, AVX512BW::Kernels8::Quantize, AVX2::Kernels8::Quantize, SSSE3::Kernels8::Quantize, Unsupported_8bit::Quantize, Unsupported_8bit::Quantize); -void (*Int8::QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::QuantizeU, AVX512BW::Kernels8::QuantizeU, AVX2::Kernels8::QuantizeU, SSSE3::Kernels8::QuantizeU, Unsupported_8bit::QuantizeU, Unsupported_8bit::QuantizeU); +void (*const Int8::QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::QuantizeU, AVX512BW::Kernels8::QuantizeU, AVX2::Kernels8::QuantizeU, SSSE3::Kernels8::QuantizeU, Unsupported_8bit::QuantizeU, Unsupported_8bit::QuantizeU); -void (*Int8::PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512VNNI::Kernels8::PrepareB, AVX512BW::Kernels8::PrepareB, AVX2::Kernels8::PrepareB, SSSE3::Kernels8::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB); +void (*const Int8::PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512VNNI::Kernels8::PrepareB, AVX512BW::Kernels8::PrepareB, AVX2::Kernels8::PrepareB, SSSE3::Kernels8::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB); -void (*Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels8::PrepareBQuantizedTransposed, AVX512BW::Kernels8::PrepareBQuantizedTransposed, AVX2::Kernels8::PrepareBQuantizedTransposed, SSSE3::Kernels8::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed); +void (*const Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels8::PrepareBQuantizedTransposed, AVX512BW::Kernels8::PrepareBQuantizedTransposed, AVX2::Kernels8::PrepareBQuantizedTransposed, SSSE3::Kernels8::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed); -void (*Int8::PrepareBTransposed)(const float *input, int8_t *output, float quant_mult, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels8::PrepareBTransposed, AVX512BW::Kernels8::PrepareBTransposed, AVX2::Kernels8::PrepareBTransposed, SSSE3::Kernels8::PrepareBTransposed, Unsupported_8bit::PrepareBTransposed, Unsupported_8bit::PrepareBTransposed); +void (*const Int8::PrepareBTransposed)(const float *input, int8_t *output, float quant_mult, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels8::PrepareBTransposed, AVX512BW::Kernels8::PrepareBTransposed, AVX2::Kernels8::PrepareBTransposed, SSSE3::Kernels8::PrepareBTransposed, Unsupported_8bit::PrepareBTransposed, Unsupported_8bit::PrepareBTransposed); -void (*Int8::SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512VNNI::Kernels8::SelectColumnsB, AVX512BW::Kernels8::SelectColumnsB, AVX2::Kernels8::SelectColumnsB, SSSE3::Kernels8::SelectColumnsB, Unsupported_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB); +void (*const Int8::SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512VNNI::Kernels8::SelectColumnsB, AVX512BW::Kernels8::SelectColumnsB, AVX2::Kernels8::SelectColumnsB, SSSE3::Kernels8::SelectColumnsB, Unsupported_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB); const char *const Int8::kName = ChooseCPU(AVX512VNNI::Kernels8::kName, AVX512BW::Kernels8::kName, AVX2::Kernels8::kName, SSSE3::Kernels8::kName, Unsupported_8bit::kName, Unsupported_8bit::kName); -void (*Int8Shift::QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::QuantizeU, AVX512BW::Kernels8::QuantizeU, AVX2::Kernels8::QuantizeU, SSSE3::Kernels8::QuantizeU, Unsupported_8bit::QuantizeU, Unsupported_8bit::QuantizeU); +void (*const Int8Shift::QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::QuantizeU, AVX512BW::Kernels8::QuantizeU, AVX2::Kernels8::QuantizeU, SSSE3::Kernels8::QuantizeU, Unsupported_8bit::QuantizeU, Unsupported_8bit::QuantizeU); const char *const Int8Shift::kName = ChooseCPU(AVX512VNNI::Kernels8::kName, AVX512BW::Kernels8::kName, AVX2::Kernels8::kName, SSSE3::Kernels8::kName, Unsupported_8bit::kName, Unsupported_8bit::kName); @@ -184,9 +184,9 @@ using AVX2::VectorMeanStd; } // namespace AVX512BW #endif -float (*MaxAbsolute)(const float *begin, const float *end) = ChooseCPU(AVX512BW::MaxAbsolute, AVX512BW::MaxAbsolute, AVX2::MaxAbsolute, SSE2::MaxAbsolute, SSE2::MaxAbsolute, Unsupported_MaxAbsolute); +float (*const MaxAbsolute)(const float *begin, const float *end) = ChooseCPU(AVX512BW::MaxAbsolute, AVX512BW::MaxAbsolute, AVX2::MaxAbsolute, SSE2::MaxAbsolute, SSE2::MaxAbsolute, Unsupported_MaxAbsolute); -MeanStd (*VectorMeanStd)(const float *begin, const float *end, bool absolute) = ChooseCPU(AVX512BW::VectorMeanStd, AVX512BW::VectorMeanStd, AVX2::VectorMeanStd, SSE2::VectorMeanStd, SSE2::VectorMeanStd, Unsupported_VectorMeanStd); +MeanStd (*const VectorMeanStd)(const float *begin, const float *end, bool absolute) = ChooseCPU(AVX512BW::VectorMeanStd, AVX512BW::VectorMeanStd, AVX2::VectorMeanStd, SSE2::VectorMeanStd, SSE2::VectorMeanStd, Unsupported_VectorMeanStd); constexpr const char *const Unsupported_16bit::kName; constexpr const char *const Unsupported_8bit::kName; diff --git a/intgemm/intgemm.h b/intgemm/intgemm.h index 2528fdb..26febb5 100644 --- a/intgemm/intgemm.h +++ b/intgemm/intgemm.h @@ -178,29 +178,29 @@ struct Int8 { } // Multiply floats by quant_mult then convert to 8-bit integers with saturation. - static void (*Quantize)(const float *input, int8_t *output, float quant_mult, Index size); + static void (*const Quantize)(const float *input, int8_t *output, float quant_mult, Index size); // Multiply floats by quant_mult then convert to 8-bit integers with saturation. // A version that adds 127 to each number, making sure that all numbers are positive - static void (*QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); + static void (*const QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); // Warning: the output of PrepareB depends on the CPU. // It will match the Multiply function on the same CPU though. - static void (*PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols); + static void (*const PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols); // Convert from a B that was already transposed (routine not provided) and // quantized (e.g. with Quantize) to the CPU-dependent format used for // Multiply. This is useful for storing a quantized model on disk then in a // CPU-independent fashion. - static void (*PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols); + static void (*const PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols); // Convert from a B that was already transposed (routine not provided) to // the CPU-dependent format used for Multiply. This is useful for storing // a quantized model on disk then in a CPU-independent fashion. - static void (*PrepareBTransposed)(const float *input, int8_t *output, float quant_mul, Index inner, Index B_untransposed_cols); + static void (*const PrepareBTransposed)(const float *input, int8_t *output, float quant_mul, Index inner, Index B_untransposed_cols); // Select columns from a prepared B matrix. The number of selected columns must be a multiple of 8. - static void (*SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end); + static void (*const SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end); // Multiply C = A * B, presuming A and B have been prepared. template @@ -213,12 +213,12 @@ struct Int8 { private: template struct MultiplyImpl { - static void (*run)(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback); + static void (*const run)(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback); }; }; template -void (*Int8::MultiplyImpl::run)(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU(OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, Unsupported_8bit::Multiply, Unsupported_8bit::Multiply); +void (*const Int8::MultiplyImpl::run)(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU(OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, Unsupported_8bit::Multiply, Unsupported_8bit::Multiply); /* * 8-bit matrix multiplication with shifting A by 127 @@ -236,7 +236,7 @@ struct Int8Shift { // Multiply floats by quant_mult then convert to 8-bit integers with saturation. // A version that adds 127 to each number, making sure that all numbers are positive - static void (*QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); + static void (*const QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); // Warning: the output of PrepareB depends on the CPU. // It will match the Multiply function on the same CPU though. @@ -271,17 +271,17 @@ struct Int8Shift { private: template struct MultiplyImpl { - static void (*run)(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback); + static void (*const run)(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback); }; template struct PrepareBiasImpl { - static void (*run)(const int8_t *B, Index width, Index B_cols, Callback callback); + static void (*const run)(const int8_t *B, Index width, Index B_cols, Callback callback); }; }; template -void (*Int8Shift::MultiplyImpl::run)(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU( +void (*const Int8Shift::MultiplyImpl::run)(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU( OMPParallelWrap8Shift, OMPParallelWrap8Shift, OMPParallelWrap8Shift, @@ -289,7 +289,7 @@ void (*Int8Shift::MultiplyImpl::run)(const uint8_t *A, const int8_t *B Unsupported_8bit::Multiply8Shift, Unsupported_8bit::Multiply8Shift); template -void (*Int8Shift::PrepareBiasImpl::run)(const int8_t *B, Index width, Index B_cols, Callback callback) = ChooseCPU(AVX512VNNI::Kernels8::PrepareBias, AVX512BW::Kernels8::PrepareBias, AVX2::Kernels8::PrepareBias, SSSE3::Kernels8::PrepareBias, SSSE3::Kernels8::PrepareBias, Unsupported_8bit::PrepareBias); +void (*const Int8Shift::PrepareBiasImpl::run)(const int8_t *B, Index width, Index B_cols, Callback callback) = ChooseCPU(AVX512VNNI::Kernels8::PrepareBias, AVX512BW::Kernels8::PrepareBias, AVX2::Kernels8::PrepareBias, SSSE3::Kernels8::PrepareBias, SSSE3::Kernels8::PrepareBias, Unsupported_8bit::PrepareBias); /* * 16-bit matrix multiplication @@ -309,25 +309,25 @@ struct Int16 { // Multiply floats by quant_mult then convert to 16-bit integers with saturation. // input - static void (*Quantize)(const float *input, int16_t *output, float quant_mult, Index size); + static void (*const Quantize)(const float *input, int16_t *output, float quant_mult, Index size); // Warning: the output of PrepareB depends on the CPU. // It will match the Multiply function on the same CPU though. - static void (*PrepareB)(const float *input, int16_t *output, float quant_mult, Index rows, Index cols); + static void (*const PrepareB)(const float *input, int16_t *output, float quant_mult, Index rows, Index cols); // Convert from a B that was already transposed (routine not provided) and // quantized (e.g. with Quantize) to the CPU-dependent format used for // Multiply. This is useful for storing a quantized model on disk then in a // CPU-independent fashion. - static void (*PrepareBQuantizedTransposed)(const int16_t *input, int16_t *output, Index inner, Index B_untransposed_cols); + static void (*const PrepareBQuantizedTransposed)(const int16_t *input, int16_t *output, Index inner, Index B_untransposed_cols); // Convert from a B that was already transposed (routine not provided) to // the CPU-dependent format used for Multiply. This is useful for storing // a quantized model on disk then in a CPU-independent fashion. - static void (*PrepareBTransposed)(const float *input, int16_t *output, float quant_mul, Index inner, Index B_untransposed_cols); + static void (*const PrepareBTransposed)(const float *input, int16_t *output, float quant_mul, Index inner, Index B_untransposed_cols); // Select columns from a prepared B matrix. The number of selected columns must be a multiple of 8. - static void (*SelectColumnsB)(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end); + static void (*const SelectColumnsB)(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end); // Multiply C = A * B, presuming A and B have been prepared. template @@ -340,20 +340,20 @@ struct Int16 { private: template struct MultiplyImpl { - static void (*run)(const int16_t *A, const int16_t *B, Index A_rows, Index width, Index B_cols, Callback callback); + static void (*const run)(const int16_t *A, const int16_t *B, Index A_rows, Index width, Index B_cols, Callback callback); }; }; template -void (*Int16::MultiplyImpl::run)(const int16_t *A, const int16_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU(OMPParallelWrap /*TODO VNNI 16-bit. */, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, Unsupported_16bit::Multiply); +void (*const Int16::MultiplyImpl::run)(const int16_t *A, const int16_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU(OMPParallelWrap /*TODO VNNI 16-bit. */, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, Unsupported_16bit::Multiply); extern const CPUType kCPU; // Get the maximum absolute value of an array of floats. The number of floats must be a multiple of 16 and 64-byte aligned. -extern float (*MaxAbsolute)(const float *begin, const float *end); +extern float (*const MaxAbsolute)(const float *begin, const float *end); // Get a Quantization value that is equant to the mean of the data +N standard deviations. Use 2 by default -extern MeanStd (*VectorMeanStd)(const float *begin, const float *end, bool); +extern MeanStd (*const VectorMeanStd)(const float *begin, const float *end, bool); /* Returns the Mean and the Standard deviation of a vector. * If "absolute" is set to true, it computes the mean and the standard deviation of the absolute values of the vector */ -- cgit v1.2.3