From 6d2882d93c9d671a6f156d260561665058c1bd0d Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 16 Feb 2022 16:35:15 +0000 Subject: Make function pointers const (#99) --- intgemm/intgemm.cc | 28 ++++++++++++++-------------- intgemm/intgemm.h | 44 ++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/intgemm/intgemm.cc b/intgemm/intgemm.cc index d6c26b9..58e4bc5 100644 --- a/intgemm/intgemm.cc +++ b/intgemm/intgemm.cc @@ -141,33 +141,33 @@ MeanStd Unsupported_VectorMeanStd(const float * /*begin*/, const float * /*end*/ return MeanStd(); } -void (*Int16::Quantize)(const float *input, int16_t *output, float quant_mult, Index size) = ChooseCPU(AVX512BW::Kernels16::Quantize, AVX512BW::Kernels16::Quantize, AVX2::Kernels16::Quantize, SSE2::Kernels16::Quantize, SSE2::Kernels16::Quantize, Unsupported_16bit::Quantize); +void (*const Int16::Quantize)(const float *input, int16_t *output, float quant_mult, Index size) = ChooseCPU(AVX512BW::Kernels16::Quantize, AVX512BW::Kernels16::Quantize, AVX2::Kernels16::Quantize, SSE2::Kernels16::Quantize, SSE2::Kernels16::Quantize, Unsupported_16bit::Quantize); -void (*Int16::PrepareB)(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512BW::Kernels16::PrepareB, AVX512BW::Kernels16::PrepareB, AVX2::Kernels16::PrepareB, SSE2::Kernels16::PrepareB, SSE2::Kernels16::PrepareB, Unsupported_16bit::PrepareB); +void (*const Int16::PrepareB)(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512BW::Kernels16::PrepareB, AVX512BW::Kernels16::PrepareB, AVX2::Kernels16::PrepareB, SSE2::Kernels16::PrepareB, SSE2::Kernels16::PrepareB, Unsupported_16bit::PrepareB); -void (*Int16::PrepareBQuantizedTransposed)(const int16_t *input, int16_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels16::PrepareBQuantizedTransposed, AVX512BW::Kernels16::PrepareBQuantizedTransposed, AVX2::Kernels16::PrepareBQuantizedTransposed, SSE2::Kernels16::PrepareBQuantizedTransposed, SSE2::Kernels16::PrepareBQuantizedTransposed, Unsupported_16bit::PrepareBQuantizedTransposed); +void (*const Int16::PrepareBQuantizedTransposed)(const int16_t *input, int16_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels16::PrepareBQuantizedTransposed, AVX512BW::Kernels16::PrepareBQuantizedTransposed, AVX2::Kernels16::PrepareBQuantizedTransposed, SSE2::Kernels16::PrepareBQuantizedTransposed, SSE2::Kernels16::PrepareBQuantizedTransposed, Unsupported_16bit::PrepareBQuantizedTransposed); -void (*Int16::PrepareBTransposed)(const float *input, int16_t *output, float quant_mult, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels16::PrepareBTransposed, AVX512BW::Kernels16::PrepareBTransposed, AVX2::Kernels16::PrepareBTransposed, SSE2::Kernels16::PrepareBTransposed, SSE2::Kernels16::PrepareBTransposed, Unsupported_16bit::PrepareBTransposed); +void (*const Int16::PrepareBTransposed)(const float *input, int16_t *output, float quant_mult, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels16::PrepareBTransposed, AVX512BW::Kernels16::PrepareBTransposed, AVX2::Kernels16::PrepareBTransposed, SSE2::Kernels16::PrepareBTransposed, SSE2::Kernels16::PrepareBTransposed, Unsupported_16bit::PrepareBTransposed); -void (*Int16::SelectColumnsB)(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512BW::Kernels16::SelectColumnsB, AVX512BW::Kernels16::SelectColumnsB, AVX2::Kernels16::SelectColumnsB, SSE2::Kernels16::SelectColumnsB, SSE2::Kernels16::SelectColumnsB, Unsupported_16bit::SelectColumnsB); +void (*const Int16::SelectColumnsB)(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512BW::Kernels16::SelectColumnsB, AVX512BW::Kernels16::SelectColumnsB, AVX2::Kernels16::SelectColumnsB, SSE2::Kernels16::SelectColumnsB, SSE2::Kernels16::SelectColumnsB, Unsupported_16bit::SelectColumnsB); const char *const Int16::kName = ChooseCPU(AVX512BW::Kernels16::kName, AVX512BW::Kernels16::kName, AVX2::Kernels16::kName, SSE2::Kernels16::kName, SSE2::Kernels16::kName, Unsupported_16bit::kName); -void (*Int8::Quantize)(const float *input, int8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::Quantize, AVX512BW::Kernels8::Quantize, AVX2::Kernels8::Quantize, SSSE3::Kernels8::Quantize, Unsupported_8bit::Quantize, Unsupported_8bit::Quantize); +void (*const Int8::Quantize)(const float *input, int8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::Quantize, AVX512BW::Kernels8::Quantize, AVX2::Kernels8::Quantize, SSSE3::Kernels8::Quantize, Unsupported_8bit::Quantize, Unsupported_8bit::Quantize); -void (*Int8::QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::QuantizeU, AVX512BW::Kernels8::QuantizeU, AVX2::Kernels8::QuantizeU, SSSE3::Kernels8::QuantizeU, Unsupported_8bit::QuantizeU, Unsupported_8bit::QuantizeU); +void (*const Int8::QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::QuantizeU, AVX512BW::Kernels8::QuantizeU, AVX2::Kernels8::QuantizeU, SSSE3::Kernels8::QuantizeU, Unsupported_8bit::QuantizeU, Unsupported_8bit::QuantizeU); -void (*Int8::PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512VNNI::Kernels8::PrepareB, AVX512BW::Kernels8::PrepareB, AVX2::Kernels8::PrepareB, SSSE3::Kernels8::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB); +void (*const Int8::PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512VNNI::Kernels8::PrepareB, AVX512BW::Kernels8::PrepareB, AVX2::Kernels8::PrepareB, SSSE3::Kernels8::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB); -void (*Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels8::PrepareBQuantizedTransposed, AVX512BW::Kernels8::PrepareBQuantizedTransposed, AVX2::Kernels8::PrepareBQuantizedTransposed, SSSE3::Kernels8::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed); +void (*const Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels8::PrepareBQuantizedTransposed, AVX512BW::Kernels8::PrepareBQuantizedTransposed, AVX2::Kernels8::PrepareBQuantizedTransposed, SSSE3::Kernels8::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed); -void (*Int8::PrepareBTransposed)(const float *input, int8_t *output, float quant_mult, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels8::PrepareBTransposed, AVX512BW::Kernels8::PrepareBTransposed, AVX2::Kernels8::PrepareBTransposed, SSSE3::Kernels8::PrepareBTransposed, Unsupported_8bit::PrepareBTransposed, Unsupported_8bit::PrepareBTransposed); +void (*const Int8::PrepareBTransposed)(const float *input, int8_t *output, float quant_mult, Index inner, Index B_untransposed_cols) = ChooseCPU(AVX512BW::Kernels8::PrepareBTransposed, AVX512BW::Kernels8::PrepareBTransposed, AVX2::Kernels8::PrepareBTransposed, SSSE3::Kernels8::PrepareBTransposed, Unsupported_8bit::PrepareBTransposed, Unsupported_8bit::PrepareBTransposed); -void (*Int8::SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512VNNI::Kernels8::SelectColumnsB, AVX512BW::Kernels8::SelectColumnsB, AVX2::Kernels8::SelectColumnsB, SSSE3::Kernels8::SelectColumnsB, Unsupported_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB); +void (*const Int8::SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512VNNI::Kernels8::SelectColumnsB, AVX512BW::Kernels8::SelectColumnsB, AVX2::Kernels8::SelectColumnsB, SSSE3::Kernels8::SelectColumnsB, Unsupported_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB); const char *const Int8::kName = ChooseCPU(AVX512VNNI::Kernels8::kName, AVX512BW::Kernels8::kName, AVX2::Kernels8::kName, SSSE3::Kernels8::kName, Unsupported_8bit::kName, Unsupported_8bit::kName); -void (*Int8Shift::QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::QuantizeU, AVX512BW::Kernels8::QuantizeU, AVX2::Kernels8::QuantizeU, SSSE3::Kernels8::QuantizeU, Unsupported_8bit::QuantizeU, Unsupported_8bit::QuantizeU); +void (*const Int8Shift::QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size) = ChooseCPU(AVX512VNNI::Kernels8::QuantizeU, AVX512BW::Kernels8::QuantizeU, AVX2::Kernels8::QuantizeU, SSSE3::Kernels8::QuantizeU, Unsupported_8bit::QuantizeU, Unsupported_8bit::QuantizeU); const char *const Int8Shift::kName = ChooseCPU(AVX512VNNI::Kernels8::kName, AVX512BW::Kernels8::kName, AVX2::Kernels8::kName, SSSE3::Kernels8::kName, Unsupported_8bit::kName, Unsupported_8bit::kName); @@ -184,9 +184,9 @@ using AVX2::VectorMeanStd; } // namespace AVX512BW #endif -float (*MaxAbsolute)(const float *begin, const float *end) = ChooseCPU(AVX512BW::MaxAbsolute, AVX512BW::MaxAbsolute, AVX2::MaxAbsolute, SSE2::MaxAbsolute, SSE2::MaxAbsolute, Unsupported_MaxAbsolute); +float (*const MaxAbsolute)(const float *begin, const float *end) = ChooseCPU(AVX512BW::MaxAbsolute, AVX512BW::MaxAbsolute, AVX2::MaxAbsolute, SSE2::MaxAbsolute, SSE2::MaxAbsolute, Unsupported_MaxAbsolute); -MeanStd (*VectorMeanStd)(const float *begin, const float *end, bool absolute) = ChooseCPU(AVX512BW::VectorMeanStd, AVX512BW::VectorMeanStd, AVX2::VectorMeanStd, SSE2::VectorMeanStd, SSE2::VectorMeanStd, Unsupported_VectorMeanStd); +MeanStd (*const VectorMeanStd)(const float *begin, const float *end, bool absolute) = ChooseCPU(AVX512BW::VectorMeanStd, AVX512BW::VectorMeanStd, AVX2::VectorMeanStd, SSE2::VectorMeanStd, SSE2::VectorMeanStd, Unsupported_VectorMeanStd); constexpr const char *const Unsupported_16bit::kName; constexpr const char *const Unsupported_8bit::kName; diff --git a/intgemm/intgemm.h b/intgemm/intgemm.h index 2528fdb..26febb5 100644 --- a/intgemm/intgemm.h +++ b/intgemm/intgemm.h @@ -178,29 +178,29 @@ struct Int8 { } // Multiply floats by quant_mult then convert to 8-bit integers with saturation. - static void (*Quantize)(const float *input, int8_t *output, float quant_mult, Index size); + static void (*const Quantize)(const float *input, int8_t *output, float quant_mult, Index size); // Multiply floats by quant_mult then convert to 8-bit integers with saturation. // A version that adds 127 to each number, making sure that all numbers are positive - static void (*QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); + static void (*const QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); // Warning: the output of PrepareB depends on the CPU. // It will match the Multiply function on the same CPU though. - static void (*PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols); + static void (*const PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols); // Convert from a B that was already transposed (routine not provided) and // quantized (e.g. with Quantize) to the CPU-dependent format used for // Multiply. This is useful for storing a quantized model on disk then in a // CPU-independent fashion. - static void (*PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols); + static void (*const PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index inner, Index B_untransposed_cols); // Convert from a B that was already transposed (routine not provided) to // the CPU-dependent format used for Multiply. This is useful for storing // a quantized model on disk then in a CPU-independent fashion. - static void (*PrepareBTransposed)(const float *input, int8_t *output, float quant_mul, Index inner, Index B_untransposed_cols); + static void (*const PrepareBTransposed)(const float *input, int8_t *output, float quant_mul, Index inner, Index B_untransposed_cols); // Select columns from a prepared B matrix. The number of selected columns must be a multiple of 8. - static void (*SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end); + static void (*const SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end); // Multiply C = A * B, presuming A and B have been prepared. template @@ -213,12 +213,12 @@ struct Int8 { private: template struct MultiplyImpl { - static void (*run)(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback); + static void (*const run)(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback); }; }; template -void (*Int8::MultiplyImpl::run)(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU(OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, Unsupported_8bit::Multiply, Unsupported_8bit::Multiply); +void (*const Int8::MultiplyImpl::run)(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU(OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, Unsupported_8bit::Multiply, Unsupported_8bit::Multiply); /* * 8-bit matrix multiplication with shifting A by 127 @@ -236,7 +236,7 @@ struct Int8Shift { // Multiply floats by quant_mult then convert to 8-bit integers with saturation. // A version that adds 127 to each number, making sure that all numbers are positive - static void (*QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); + static void (*const QuantizeU)(const float *input, uint8_t *output, float quant_mult, Index size); // Warning: the output of PrepareB depends on the CPU. // It will match the Multiply function on the same CPU though. @@ -271,17 +271,17 @@ struct Int8Shift { private: template struct MultiplyImpl { - static void (*run)(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback); + static void (*const run)(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback); }; template struct PrepareBiasImpl { - static void (*run)(const int8_t *B, Index width, Index B_cols, Callback callback); + static void (*const run)(const int8_t *B, Index width, Index B_cols, Callback callback); }; }; template -void (*Int8Shift::MultiplyImpl::run)(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU( +void (*const Int8Shift::MultiplyImpl::run)(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU( OMPParallelWrap8Shift, OMPParallelWrap8Shift, OMPParallelWrap8Shift, @@ -289,7 +289,7 @@ void (*Int8Shift::MultiplyImpl::run)(const uint8_t *A, const int8_t *B Unsupported_8bit::Multiply8Shift, Unsupported_8bit::Multiply8Shift); template -void (*Int8Shift::PrepareBiasImpl::run)(const int8_t *B, Index width, Index B_cols, Callback callback) = ChooseCPU(AVX512VNNI::Kernels8::PrepareBias, AVX512BW::Kernels8::PrepareBias, AVX2::Kernels8::PrepareBias, SSSE3::Kernels8::PrepareBias, SSSE3::Kernels8::PrepareBias, Unsupported_8bit::PrepareBias); +void (*const Int8Shift::PrepareBiasImpl::run)(const int8_t *B, Index width, Index B_cols, Callback callback) = ChooseCPU(AVX512VNNI::Kernels8::PrepareBias, AVX512BW::Kernels8::PrepareBias, AVX2::Kernels8::PrepareBias, SSSE3::Kernels8::PrepareBias, SSSE3::Kernels8::PrepareBias, Unsupported_8bit::PrepareBias); /* * 16-bit matrix multiplication @@ -309,25 +309,25 @@ struct Int16 { // Multiply floats by quant_mult then convert to 16-bit integers with saturation. // input - static void (*Quantize)(const float *input, int16_t *output, float quant_mult, Index size); + static void (*const Quantize)(const float *input, int16_t *output, float quant_mult, Index size); // Warning: the output of PrepareB depends on the CPU. // It will match the Multiply function on the same CPU though. - static void (*PrepareB)(const float *input, int16_t *output, float quant_mult, Index rows, Index cols); + static void (*const PrepareB)(const float *input, int16_t *output, float quant_mult, Index rows, Index cols); // Convert from a B that was already transposed (routine not provided) and // quantized (e.g. with Quantize) to the CPU-dependent format used for // Multiply. This is useful for storing a quantized model on disk then in a // CPU-independent fashion. - static void (*PrepareBQuantizedTransposed)(const int16_t *input, int16_t *output, Index inner, Index B_untransposed_cols); + static void (*const PrepareBQuantizedTransposed)(const int16_t *input, int16_t *output, Index inner, Index B_untransposed_cols); // Convert from a B that was already transposed (routine not provided) to // the CPU-dependent format used for Multiply. This is useful for storing // a quantized model on disk then in a CPU-independent fashion. - static void (*PrepareBTransposed)(const float *input, int16_t *output, float quant_mul, Index inner, Index B_untransposed_cols); + static void (*const PrepareBTransposed)(const float *input, int16_t *output, float quant_mul, Index inner, Index B_untransposed_cols); // Select columns from a prepared B matrix. The number of selected columns must be a multiple of 8. - static void (*SelectColumnsB)(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end); + static void (*const SelectColumnsB)(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end); // Multiply C = A * B, presuming A and B have been prepared. template @@ -340,20 +340,20 @@ struct Int16 { private: template struct MultiplyImpl { - static void (*run)(const int16_t *A, const int16_t *B, Index A_rows, Index width, Index B_cols, Callback callback); + static void (*const run)(const int16_t *A, const int16_t *B, Index A_rows, Index width, Index B_cols, Callback callback); }; }; template -void (*Int16::MultiplyImpl::run)(const int16_t *A, const int16_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU(OMPParallelWrap /*TODO VNNI 16-bit. */, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, Unsupported_16bit::Multiply); +void (*const Int16::MultiplyImpl::run)(const int16_t *A, const int16_t *B, Index A_rows, Index width, Index B_cols, Callback callback) = ChooseCPU(OMPParallelWrap /*TODO VNNI 16-bit. */, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, OMPParallelWrap, Unsupported_16bit::Multiply); extern const CPUType kCPU; // Get the maximum absolute value of an array of floats. The number of floats must be a multiple of 16 and 64-byte aligned. -extern float (*MaxAbsolute)(const float *begin, const float *end); +extern float (*const MaxAbsolute)(const float *begin, const float *end); // Get a Quantization value that is equant to the mean of the data +N standard deviations. Use 2 by default -extern MeanStd (*VectorMeanStd)(const float *begin, const float *end, bool); +extern MeanStd (*const VectorMeanStd)(const float *begin, const float *end, bool); /* Returns the Mean and the Standard deviation of a vector. * If "absolute" is set to true, it computes the mean and the standard deviation of the absolute values of the vector */ -- cgit v1.2.3 From baa6bdf18ff0ee649977c40dd2e9fe8e10c550ef Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 16 Feb 2022 16:36:05 +0000 Subject: Readme Typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b8388dc..05ac8c3 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ The last argument of `Multiply` is a callback which is usually used to performs - in [callbacks/implementations.inl](callbacks/implementations.inl) if you want to implement it for all architecturs at the same time. - in `callbacks/ARCHITECTURE.h` (e.g. [callbacks/sse2.h](callbacks/sse2.h)) if you want to implement it only for the specific architecture. -For 8-bit, you can make use a of a slightly faster implementation, assuming you can determine tha quantization multipliers and prepare the biases offline: +For 8-bit, you can make use a of a slightly faster implementation, assuming you can determine the quantization multipliers and prepare the biases offline: ```C++ #include "intgemm/intgemm.h" -- cgit v1.2.3 From 2d76dbb7268cb7be42c751de45c1dd916fe9a0f0 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 30 Mar 2022 17:48:16 +0100 Subject: Remove wormhole wrapper around PMADDWD. Fixes #100 --- intgemm/intrinsics.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/intgemm/intrinsics.h b/intgemm/intrinsics.h index 9f370cd..8db234e 100644 --- a/intgemm/intrinsics.h +++ b/intgemm/intrinsics.h @@ -95,12 +95,7 @@ template <> INTGEMM_SSE2 inline __m128 loadu_ps(const float* mem_addr) { return _mm_loadu_ps(mem_addr); } INTGEMM_SSE2 static inline __m128i madd_epi16(__m128i first, __m128i second) { -// https://bugzilla.mozilla.org/show_bug.cgi?id=1672160 -#ifdef INTGEMM_WORMHOLE - return wasm_v8x16_shuffle(first, second, 31, 0, 30, 2, 29, 4, 28, 6, 27, 8, 26, 10, 25, 12, 24, 2 /* PMADDWD */); -#else return _mm_madd_epi16(first, second); -#endif } INTGEMM_SSSE3 static inline __m128i maddubs_epi16(__m128i first, __m128i second) { // https://bugzilla.mozilla.org/show_bug.cgi?id=1672160 -- cgit v1.2.3 From 5ff3a99f28b7c3bee3c597219690aa587b0768f1 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Tue, 17 May 2022 09:41:17 +0100 Subject: A temporary workaround for gcc 12.1 (#104) Bit of a hack but gcc 12.1 seems to warn on its own headers. --- CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c9f78fa..11d613e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,13 @@ else() endif() endif() +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + # GCC 12.1 erroneously detects a bunch unitialised values + if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.1) + add_compile_options(-Wno-uninitialized -Wno-maybe-uninitialized) + endif() +endif() + # Check if compiler supports AVX2 (this should only catch emscripten) try_compile(INTGEMM_COMPILER_SUPPORTS_AVX2 ${CMAKE_CURRENT_BINARY_DIR}/compile_tests -- cgit v1.2.3 From be3053515a8a04d19c6959a370eaf8b5a6eab686 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Wed, 18 May 2022 10:49:15 +0100 Subject: Update 3 year old catch and hide assert variable behind ifdef (#103) * Update 3 year old catch and hide assert variable behind ifdef * A temporary workaround for gcc 12.1 (#104) Bit of a hack but gcc 12.1 seems to warn on its own headers. * Fix gcc 9.3 with newer check when using -fno-exceptions It seems that std::iota generates weird sequences with -fno-exceptions under that gcc and when inside check (maybe some undefined behaviour as check relies does some sort of exception handling?). Comparing against the actual vector as opposed to generating the sequence again via for loop seems to fix the issue * Add 9.4 to the exceptions --- example.cc | 3 + test/3rd_party/catch.hpp | 6082 +++++++++++++++++++++++++++++---------- test/kernels/add_bias_test.cc | 4 + test/kernels/unquantize_test.cc | 4 + test/kernels/write_test.cc | 4 + 5 files changed, 4574 insertions(+), 1523 deletions(-) diff --git a/example.cc b/example.cc index 5f558d0..b1ba745 100644 --- a/example.cc +++ b/example.cc @@ -33,10 +33,13 @@ int main() { } // Compute the top left corner of C as a sanity check. + // Since we are only checking this with debug builds, disable it by default as it causes compilation failure with recent compilers +#ifndef NDEBUG float top_left_reference = 0.0f; for (Index w = 0; w < width; ++w) { top_left_reference += A[w] * B[w * B_cols]; } +#endif // 16-bit multiplication. { diff --git a/test/3rd_party/catch.hpp b/test/3rd_party/catch.hpp index 1850fff..d2a1242 100644 --- a/test/3rd_party/catch.hpp +++ b/test/3rd_party/catch.hpp @@ -1,9 +1,9 @@ /* - * Catch v2.7.0 - * Generated: 2019-03-07 21:34:30.252164 + * Catch v2.13.9 + * Generated: 2022-04-12 22:37:23.260201 * ---------------------------------------------------------- * This file has been merged from multiple headers. Please don't edit it directly - * Copyright (c) 2019 Two Blue Cubes Ltd. All rights reserved. + * Copyright (c) 2022 Two Blue Cubes Ltd. All rights reserved. * * Distributed under the Boost Software License, Version 1.0. (See accompanying * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -14,8 +14,8 @@ #define CATCH_VERSION_MAJOR 2 -#define CATCH_VERSION_MINOR 7 -#define CATCH_VERSION_PATCH 0 +#define CATCH_VERSION_MINOR 13 +#define CATCH_VERSION_PATCH 9 #ifdef __clang__ # pragma clang system_header @@ -66,13 +66,16 @@ #if !defined(CATCH_CONFIG_IMPL_ONLY) // start catch_platform.h +// See e.g.: +// https://opensource.apple.com/source/CarbonHeaders/CarbonHeaders-18.1/TargetConditionals.h.auto.html #ifdef __APPLE__ -# include -# if TARGET_OS_OSX == 1 -# define CATCH_PLATFORM_MAC -# elif TARGET_OS_IPHONE == 1 -# define CATCH_PLATFORM_IPHONE -# endif +# include +# if (defined(TARGET_OS_OSX) && TARGET_OS_OSX == 1) || \ + (defined(TARGET_OS_MAC) && TARGET_OS_MAC == 1) +# define CATCH_PLATFORM_MAC +# elif (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE == 1) +# define CATCH_PLATFORM_IPHONE +# endif #elif defined(linux) || defined(__linux) || defined(__linux__) # define CATCH_PLATFORM_LINUX @@ -132,30 +135,51 @@ namespace Catch { #endif -#if defined(CATCH_CPP17_OR_GREATER) -# define CATCH_INTERNAL_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS +// Only GCC compiler should be used in this block, so other compilers trying to +// mask themselves as GCC should be ignored. +#if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC) && !defined(__CUDACC__) && !defined(__LCC__) +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma( "GCC diagnostic push" ) +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma( "GCC diagnostic pop" ) + +# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) (void)__builtin_constant_p(__VA_ARGS__) + #endif -#ifdef __clang__ +#if defined(__clang__) + +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma( "clang diagnostic push" ) +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma( "clang diagnostic pop" ) + +// As of this writing, IBM XL's implementation of __builtin_constant_p has a bug +// which results in calls to destructors being emitted for each temporary, +// without a matching initialization. In practice, this can result in something +// like `std::string::~string` being called on an uninitialized value. +// +// For example, this code will likely segfault under IBM XL: +// ``` +// REQUIRE(std::string("12") + "34" == "1234") +// ``` +// +// Therefore, `CATCH_INTERNAL_IGNORE_BUT_WARN` is not implemented. +# if !defined(__ibmxl__) && !defined(__CUDACC__) +# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) (void)__builtin_constant_p(__VA_ARGS__) /* NOLINT(cppcoreguidelines-pro-type-vararg, hicpp-vararg) */ +# endif + +# define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wexit-time-destructors\"" ) \ + _Pragma( "clang diagnostic ignored \"-Wglobal-constructors\"") + +# define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wparentheses\"" ) + +# define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wunused-variable\"" ) + +# define CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wgnu-zero-variadic-macro-arguments\"" ) -# define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ - _Pragma( "clang diagnostic push" ) \ - _Pragma( "clang diagnostic ignored \"-Wexit-time-destructors\"" ) \ - _Pragma( "clang diagnostic ignored \"-Wglobal-constructors\"") -# define CATCH_INTERNAL_UNSUPPRESS_GLOBALS_WARNINGS \ - _Pragma( "clang diagnostic pop" ) - -# define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ - _Pragma( "clang diagnostic push" ) \ - _Pragma( "clang diagnostic ignored \"-Wparentheses\"" ) -# define CATCH_INTERNAL_UNSUPPRESS_PARENTHESES_WARNINGS \ - _Pragma( "clang diagnostic pop" ) - -# define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS \ - _Pragma( "clang diagnostic push" ) \ - _Pragma( "clang diagnostic ignored \"-Wunused-variable\"" ) -# define CATCH_INTERNAL_UNSUPPRESS_UNUSED_WARNINGS \ - _Pragma( "clang diagnostic pop" ) +# define CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wunused-template\"" ) #endif // __clang__ @@ -180,6 +204,7 @@ namespace Catch { // Android somehow still does not support std::to_string #if defined(__ANDROID__) # define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING +# define CATCH_INTERNAL_CONFIG_ANDROID_LOGWRITE #endif //////////////////////////////////////////////////////////////////////////////// @@ -204,20 +229,16 @@ namespace Catch { // some versions of cygwin (most) do not support std::to_string. Use the libstd check. // https://gcc.gnu.org/onlinedocs/gcc-4.8.2/libstdc++/api/a01053_source.html line 2812-2813 # if !((__cplusplus >= 201103L) && defined(_GLIBCXX_USE_C99) \ - && !defined(_GLIBCXX_HAVE_BROKEN_VSWPRINTF)) + && !defined(_GLIBCXX_HAVE_BROKEN_VSWPRINTF)) -# define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING +# define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING # endif #endif // __CYGWIN__ //////////////////////////////////////////////////////////////////////////////// // Visual C++ -#ifdef _MSC_VER - -# if _MSC_VER >= 1900 // Visual Studio 2015 or newer -# define CATCH_INTERNAL_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS -# endif +#if defined(_MSC_VER) // Universal Windows platform does not support SEH // Or console colours (or console at all...) @@ -227,13 +248,25 @@ namespace Catch { # define CATCH_INTERNAL_CONFIG_WINDOWS_SEH # endif +# if !defined(__clang__) // Handle Clang masquerading for msvc + // MSVC traditional preprocessor needs some workaround for __VA_ARGS__ // _MSVC_TRADITIONAL == 0 means new conformant preprocessor // _MSVC_TRADITIONAL == 1 means old traditional non-conformant preprocessor -# if !defined(_MSVC_TRADITIONAL) || (defined(_MSVC_TRADITIONAL) && _MSVC_TRADITIONAL) -# define CATCH_INTERNAL_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR -# endif +# if !defined(_MSVC_TRADITIONAL) || (defined(_MSVC_TRADITIONAL) && _MSVC_TRADITIONAL) +# define CATCH_INTERNAL_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +# endif // MSVC_TRADITIONAL + +// Only do this if we're not using clang on Windows, which uses `diagnostic push` & `diagnostic pop` +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION __pragma( warning(push) ) +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION __pragma( warning(pop) ) +# endif // __clang__ + +#endif // _MSC_VER +#if defined(_REENTRANT) || defined(_MSC_VER) +// Enable async processing, as -pthread is specified or no additional linking is required +# define CATCH_INTERNAL_CONFIG_USE_ASYNC #endif // _MSC_VER //////////////////////////////////////////////////////////////////////////////// @@ -266,40 +299,56 @@ namespace Catch { #endif //////////////////////////////////////////////////////////////////////////////// -// Check if string_view is available and usable -// The check is split apart to work around v140 (VS2015) preprocessor issue... -#if defined(__has_include) -#if __has_include() && defined(CATCH_CPP17_OR_GREATER) -# define CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW -#endif + +// RTX is a special version of Windows that is real time. +// This means that it is detected as Windows, but does not provide +// the same set of capabilities as real Windows does. +#if defined(UNDER_RTSS) || defined(RTX64_BUILD) + #define CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH + #define CATCH_INTERNAL_CONFIG_NO_ASYNC + #define CATCH_CONFIG_COLOUR_NONE #endif -//////////////////////////////////////////////////////////////////////////////// -// Check if optional is available and usable -#if defined(__has_include) -# if __has_include() && defined(CATCH_CPP17_OR_GREATER) -# define CATCH_INTERNAL_CONFIG_CPP17_OPTIONAL -# endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) -#endif // __has_include +#if !defined(_GLIBCXX_USE_C99_MATH_TR1) +#define CATCH_INTERNAL_CONFIG_GLOBAL_NEXTAFTER +#endif -//////////////////////////////////////////////////////////////////////////////// -// Check if variant is available and usable +// Various stdlib support checks that require __has_include #if defined(__has_include) -# if __has_include() && defined(CATCH_CPP17_OR_GREATER) -# if defined(__clang__) && (__clang_major__ < 8) - // work around clang bug with libstdc++ https://bugs.llvm.org/show_bug.cgi?id=31852 - // fix should be in clang 8, workaround in libstdc++ 8.2 -# include -# if defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) -# define CATCH_CONFIG_NO_CPP17_VARIANT -# else -# define CATCH_INTERNAL_CONFIG_CPP17_VARIANT -# endif // defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) -# else -# define CATCH_INTERNAL_CONFIG_CPP17_VARIANT -# endif // defined(__clang__) && (__clang_major__ < 8) -# endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) -#endif // __has_include + // Check if string_view is available and usable + #if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # define CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW + #endif + + // Check if optional is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # define CATCH_INTERNAL_CONFIG_CPP17_OPTIONAL + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) + + // Check if byte is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # include + # if defined(__cpp_lib_byte) && (__cpp_lib_byte > 0) + # define CATCH_INTERNAL_CONFIG_CPP17_BYTE + # endif + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) + + // Check if variant is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # if defined(__clang__) && (__clang_major__ < 8) + // work around clang bug with libstdc++ https://bugs.llvm.org/show_bug.cgi?id=31852 + // fix should be in clang 8, workaround in libstdc++ 8.2 + # include + # if defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) + # define CATCH_CONFIG_NO_CPP17_VARIANT + # else + # define CATCH_INTERNAL_CONFIG_CPP17_VARIANT + # endif // defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) + # else + # define CATCH_INTERNAL_CONFIG_CPP17_VARIANT + # endif // defined(__clang__) && (__clang_major__ < 8) + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) +#endif // defined(__has_include) #if defined(CATCH_INTERNAL_CONFIG_COUNTER) && !defined(CATCH_CONFIG_NO_COUNTER) && !defined(CATCH_CONFIG_COUNTER) # define CATCH_CONFIG_COUNTER @@ -324,10 +373,6 @@ namespace Catch { # define CATCH_CONFIG_CPP17_OPTIONAL #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS) && !defined(CATCH_CONFIG_NO_CPP17_UNCAUGHT_EXCEPTIONS) && !defined(CATCH_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS) -# define CATCH_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS -#endif - #if defined(CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW) && !defined(CATCH_CONFIG_NO_CPP17_STRING_VIEW) && !defined(CATCH_CONFIG_CPP17_STRING_VIEW) # define CATCH_CONFIG_CPP17_STRING_VIEW #endif @@ -336,6 +381,10 @@ namespace Catch { # define CATCH_CONFIG_CPP17_VARIANT #endif +#if defined(CATCH_INTERNAL_CONFIG_CPP17_BYTE) && !defined(CATCH_CONFIG_NO_CPP17_BYTE) && !defined(CATCH_CONFIG_CPP17_BYTE) +# define CATCH_CONFIG_CPP17_BYTE +#endif + #if defined(CATCH_CONFIG_EXPERIMENTAL_REDIRECT) # define CATCH_INTERNAL_CONFIG_NEW_CAPTURE #endif @@ -352,17 +401,53 @@ namespace Catch { # define CATCH_CONFIG_POLYFILL_ISNAN #endif +#if defined(CATCH_INTERNAL_CONFIG_USE_ASYNC) && !defined(CATCH_INTERNAL_CONFIG_NO_ASYNC) && !defined(CATCH_CONFIG_NO_USE_ASYNC) && !defined(CATCH_CONFIG_USE_ASYNC) +# define CATCH_CONFIG_USE_ASYNC +#endif + +#if defined(CATCH_INTERNAL_CONFIG_ANDROID_LOGWRITE) && !defined(CATCH_CONFIG_NO_ANDROID_LOGWRITE) && !defined(CATCH_CONFIG_ANDROID_LOGWRITE) +# define CATCH_CONFIG_ANDROID_LOGWRITE +#endif + +#if defined(CATCH_INTERNAL_CONFIG_GLOBAL_NEXTAFTER) && !defined(CATCH_CONFIG_NO_GLOBAL_NEXTAFTER) && !defined(CATCH_CONFIG_GLOBAL_NEXTAFTER) +# define CATCH_CONFIG_GLOBAL_NEXTAFTER +#endif + +// Even if we do not think the compiler has that warning, we still have +// to provide a macro that can be used by the code. +#if !defined(CATCH_INTERNAL_START_WARNINGS_SUPPRESSION) +# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION +#endif +#if !defined(CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION) +# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION +#endif #if !defined(CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS) # define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS -# define CATCH_INTERNAL_UNSUPPRESS_PARENTHESES_WARNINGS #endif #if !defined(CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS) # define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS -# define CATCH_INTERNAL_UNSUPPRESS_GLOBALS_WARNINGS #endif #if !defined(CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS) # define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS -# define CATCH_INTERNAL_UNSUPPRESS_UNUSED_WARNINGS +#endif +#if !defined(CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS +#endif + +// The goal of this macro is to avoid evaluation of the arguments, but +// still have the compiler warn on problems inside... +#if !defined(CATCH_INTERNAL_IGNORE_BUT_WARN) +# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) +#endif + +#if defined(__APPLE__) && defined(__apple_build_version__) && (__clang_major__ < 10) +# undef CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS +#elif defined(__clang__) && (__clang_major__ < 5) +# undef CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS +#endif + +#if !defined(CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS #endif #if defined(CATCH_CONFIG_DISABLE_EXCEPTIONS) @@ -427,7 +512,7 @@ namespace Catch { SourceLineInfo( SourceLineInfo&& ) noexcept = default; SourceLineInfo& operator = ( SourceLineInfo&& ) noexcept = default; - bool empty() const noexcept; + bool empty() const noexcept { return file[0] == '\0'; } bool operator == ( SourceLineInfo const& other ) const noexcept; bool operator < ( SourceLineInfo const& other ) const noexcept; @@ -468,9 +553,10 @@ namespace Catch { } // end namespace Catch #define CATCH_REGISTER_TAG_ALIAS( alias, spec ) \ + CATCH_INTERNAL_START_WARNINGS_SUPPRESSION \ CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ namespace{ Catch::RegistrarForTagAliases INTERNAL_CATCH_UNIQUE_NAME( AutoRegisterTagAlias )( alias, spec, CATCH_INTERNAL_LINEINFO ); } \ - CATCH_INTERNAL_UNSUPPRESS_GLOBALS_WARNINGS + CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION // end catch_tag_alias_autoregistrar.h // start catch_test_registry.h @@ -497,6 +583,7 @@ namespace Catch { virtual std::vector const& getAllTestsSorted( IConfig const& config ) const = 0; }; + bool isThrowSafe( TestCase const& testCase, IConfig const& config ); bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ); std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ); std::vector const& getAllTestCasesSorted( IConfig const& config ); @@ -509,53 +596,30 @@ namespace Catch { #include #include #include +#include namespace Catch { /// A non-owning string class (similar to the forthcoming std::string_view) /// Note that, because a StringRef may be a substring of another string, - /// it may not be null terminated. c_str() must return a null terminated - /// string, however, and so the StringRef will internally take ownership - /// (taking a copy), if necessary. In theory this ownership is not externally - /// visible - but it does mean (substring) StringRefs should not be shared between - /// threads. + /// it may not be null terminated. class StringRef { public: using size_type = std::size_t; + using const_iterator = const char*; private: - friend struct StringRefTestAccess; - - char const* m_start; - size_type m_size; - - char* m_data = nullptr; - - void takeOwnership(); - static constexpr char const* const s_empty = ""; - public: // construction/ assignment - StringRef() noexcept - : StringRef( s_empty, 0 ) - {} - - StringRef( StringRef const& other ) noexcept - : m_start( other.m_start ), - m_size( other.m_size ) - {} + char const* m_start = s_empty; + size_type m_size = 0; - StringRef( StringRef&& other ) noexcept - : m_start( other.m_start ), - m_size( other.m_size ), - m_data( other.m_data ) - { - other.m_data = nullptr; - } + public: // construction + constexpr StringRef() noexcept = default; StringRef( char const* rawChars ) noexcept; - StringRef( char const* rawChars, size_type size ) noexcept + constexpr StringRef( char const* rawChars, size_type size ) noexcept : m_start( rawChars ), m_size( size ) {} @@ -565,101 +629,64 @@ namespace Catch { m_size( stdString.size() ) {} - ~StringRef() noexcept { - delete[] m_data; - } - - auto operator = ( StringRef const &other ) noexcept -> StringRef& { - delete[] m_data; - m_data = nullptr; - m_start = other.m_start; - m_size = other.m_size; - return *this; + explicit operator std::string() const { + return std::string(m_start, m_size); } - operator std::string() const; - - void swap( StringRef& other ) noexcept; - public: // operators auto operator == ( StringRef const& other ) const noexcept -> bool; - auto operator != ( StringRef const& other ) const noexcept -> bool; + auto operator != (StringRef const& other) const noexcept -> bool { + return !(*this == other); + } - auto operator[] ( size_type index ) const noexcept -> char; + auto operator[] ( size_type index ) const noexcept -> char { + assert(index < m_size); + return m_start[index]; + } public: // named queries - auto empty() const noexcept -> bool { + constexpr auto empty() const noexcept -> bool { return m_size == 0; } - auto size() const noexcept -> size_type { + constexpr auto size() const noexcept -> size_type { return m_size; } - auto numberOfCharacters() const noexcept -> size_type; + // Returns the current start pointer. If the StringRef is not + // null-terminated, throws std::domain_exception auto c_str() const -> char const*; public: // substrings and searches - auto substr( size_type start, size_type size ) const noexcept -> StringRef; + // Returns a substring of [start, start + length). + // If start + length > size(), then the substring is [start, size()). + // If start > size(), then the substring is empty. + auto substr( size_type start, size_type length ) const noexcept -> StringRef; - // Returns the current start pointer. - // Note that the pointer can change when if the StringRef is a substring - auto currentData() const noexcept -> char const*; + // Returns the current start pointer. May not be null-terminated. + auto data() const noexcept -> char const*; - private: // ownership queries - may not be consistent between calls - auto isOwned() const noexcept -> bool; - auto isSubstring() const noexcept -> bool; - }; + constexpr auto isNullTerminated() const noexcept -> bool { + return m_start[m_size] == '\0'; + } - auto operator + ( StringRef const& lhs, StringRef const& rhs ) -> std::string; - auto operator + ( StringRef const& lhs, char const* rhs ) -> std::string; - auto operator + ( char const* lhs, StringRef const& rhs ) -> std::string; + public: // iterators + constexpr const_iterator begin() const { return m_start; } + constexpr const_iterator end() const { return m_start + m_size; } + }; auto operator += ( std::string& lhs, StringRef const& sr ) -> std::string&; auto operator << ( std::ostream& os, StringRef const& sr ) -> std::ostream&; - inline auto operator "" _sr( char const* rawChars, std::size_t size ) noexcept -> StringRef { + constexpr auto operator "" _sr( char const* rawChars, std::size_t size ) noexcept -> StringRef { return StringRef( rawChars, size ); } - } // namespace Catch -inline auto operator "" _catch_sr( char const* rawChars, std::size_t size ) noexcept -> Catch::StringRef { +constexpr auto operator "" _catch_sr( char const* rawChars, std::size_t size ) noexcept -> Catch::StringRef { return Catch::StringRef( rawChars, size ); } // end catch_stringref.h -// start catch_type_traits.hpp - - -#include - -namespace Catch{ - -#ifdef CATCH_CPP17_OR_GREATER - template - inline constexpr auto is_unique = std::true_type{}; - - template - inline constexpr auto is_unique = std::bool_constant< - (!std::is_same_v && ...) && is_unique - >{}; -#else - -template -struct is_unique : std::true_type{}; - -template -struct is_unique : std::integral_constant -::value - && is_unique::value - && is_unique::value ->{}; - -#endif -} - -// end catch_type_traits.hpp // start catch_preprocessor.hpp @@ -722,23 +749,170 @@ struct is_unique : std::integral_constant #define INTERNAL_CATCH_STRINGIZE_WITHOUT_PARENS(param) (INTERNAL_CATCH_STRINGIZE(INTERNAL_CATCH_REMOVE_PARENS(param)) + 1) #endif +#define INTERNAL_CATCH_MAKE_NAMESPACE2(...) ns_##__VA_ARGS__ +#define INTERNAL_CATCH_MAKE_NAMESPACE(name) INTERNAL_CATCH_MAKE_NAMESPACE2(name) + #define INTERNAL_CATCH_REMOVE_PARENS(...) INTERNAL_CATCH_EXPAND1(INTERNAL_CATCH_DEF __VA_ARGS__) -#define INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME2(Name, ...) INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME3(Name, __VA_ARGS__) #ifndef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR -#define INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME3(Name,...) Name " - " #__VA_ARGS__ -#define INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME(Name,...) INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME2(Name, INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__)) +#define INTERNAL_CATCH_MAKE_TYPE_LIST2(...) decltype(get_wrapper()) +#define INTERNAL_CATCH_MAKE_TYPE_LIST(...) INTERNAL_CATCH_MAKE_TYPE_LIST2(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__)) #else -// MSVC is adding extra space and needs more calls to properly remove () -#define INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME3(Name,...) Name " -" #__VA_ARGS__ -#define INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME1(Name, ...) INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME2(Name, __VA_ARGS__) -#define INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME(Name, ...) INTERNAL_CATCH_TEMPLATE_UNIQUE_NAME1(Name, INTERNAL_CATCH_EXPAND_VARGS(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__))) +#define INTERNAL_CATCH_MAKE_TYPE_LIST2(...) INTERNAL_CATCH_EXPAND_VARGS(decltype(get_wrapper())) +#define INTERNAL_CATCH_MAKE_TYPE_LIST(...) INTERNAL_CATCH_EXPAND_VARGS(INTERNAL_CATCH_MAKE_TYPE_LIST2(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__))) #endif -#define INTERNAL_CATCH_MAKE_TYPE_LIST(types) Catch::TypeList +#define INTERNAL_CATCH_MAKE_TYPE_LISTS_FROM_TYPES(...)\ + CATCH_REC_LIST(INTERNAL_CATCH_MAKE_TYPE_LIST,__VA_ARGS__) + +#define INTERNAL_CATCH_REMOVE_PARENS_1_ARG(_0) INTERNAL_CATCH_REMOVE_PARENS(_0) +#define INTERNAL_CATCH_REMOVE_PARENS_2_ARG(_0, _1) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_1_ARG(_1) +#define INTERNAL_CATCH_REMOVE_PARENS_3_ARG(_0, _1, _2) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_2_ARG(_1, _2) +#define INTERNAL_CATCH_REMOVE_PARENS_4_ARG(_0, _1, _2, _3) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_3_ARG(_1, _2, _3) +#define INTERNAL_CATCH_REMOVE_PARENS_5_ARG(_0, _1, _2, _3, _4) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_4_ARG(_1, _2, _3, _4) +#define INTERNAL_CATCH_REMOVE_PARENS_6_ARG(_0, _1, _2, _3, _4, _5) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_5_ARG(_1, _2, _3, _4, _5) +#define INTERNAL_CATCH_REMOVE_PARENS_7_ARG(_0, _1, _2, _3, _4, _5, _6) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_6_ARG(_1, _2, _3, _4, _5, _6) +#define INTERNAL_CATCH_REMOVE_PARENS_8_ARG(_0, _1, _2, _3, _4, _5, _6, _7) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_7_ARG(_1, _2, _3, _4, _5, _6, _7) +#define INTERNAL_CATCH_REMOVE_PARENS_9_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_8_ARG(_1, _2, _3, _4, _5, _6, _7, _8) +#define INTERNAL_CATCH_REMOVE_PARENS_10_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_9_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9) +#define INTERNAL_CATCH_REMOVE_PARENS_11_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_10_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10) + +#define INTERNAL_CATCH_VA_NARGS_IMPL(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N + +#define INTERNAL_CATCH_TYPE_GEN\ + template struct TypeList {};\ + template\ + constexpr auto get_wrapper() noexcept -> TypeList { return {}; }\ + template class...> struct TemplateTypeList{};\ + template class...Cs>\ + constexpr auto get_wrapper() noexcept -> TemplateTypeList { return {}; }\ + template\ + struct append;\ + template\ + struct rewrap;\ + template class, typename...>\ + struct create;\ + template class, typename>\ + struct convert;\ + \ + template \ + struct append { using type = T; };\ + template< template class L1, typename...E1, template class L2, typename...E2, typename...Rest>\ + struct append, L2, Rest...> { using type = typename append, Rest...>::type; };\ + template< template class L1, typename...E1, typename...Rest>\ + struct append, TypeList, Rest...> { using type = L1; };\ + \ + template< template class Container, template class List, typename...elems>\ + struct rewrap, List> { using type = TypeList>; };\ + template< template class Container, template class List, class...Elems, typename...Elements>\ + struct rewrap, List, Elements...> { using type = typename append>, typename rewrap, Elements...>::type>::type; };\ + \ + template