Merge pull request #58 from kpuatamazon/master

Expose PrepareB from quantized and transposed format
author: Kenneth Heafield <kpu@users.noreply.github.com> 2020-01-27 18:26:55 +0300
committer: GitHub <noreply@github.com> 2020-01-27 18:26:55 +0300
commit: 695dbb28dbf080ccfd5ac8cbb5d3a58c460cf193 (patch)
tree: f4b429127b05500c7bd67d57183f0153dc57609b
parent: b3311f5f6342a6406379b2e8fbae5bebd279ad7c (diff)
parent: 583a624df19db347adad7265b131ff52a930cc33 (diff)
2 files changed, 11 insertions, 0 deletions
diff --git a/intgemm.cc b/intgemm.cc
index 88c571d..f1d328d 100644
--- a/intgemm.cc
+++ b/intgemm.cc
@@ -20,6 +20,8 @@ void (*Int8::QuantizeU)(const float *input, uint8_t *output, float quant_mult, I
 
 void (*Int8::PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) = ChooseCPU(AVX512VNNI_8bit::PrepareB, AVX512_8bit::PrepareB, AVX2_8bit::PrepareB, SSSE3_8bit::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB);
 
+void (*Int8::PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index rows, Index cols) = ChooseCPU(AVX512_8bit::PrepareBQuantizedTransposed, AVX512_8bit::PrepareBQuantizedTransposed, AVX2_8bit::PrepareBQuantizedTransposed, SSSE3_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed, Unsupported_8bit::PrepareBQuantizedTransposed);
+
 void (*Int8::SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) = ChooseCPU(AVX512VNNI_8bit::SelectColumnsB, AVX512_8bit::SelectColumnsB, AVX2_8bit::SelectColumnsB, SSSE3_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB, Unsupported_8bit::SelectColumnsB);
 
 const char *const Int8::kName = ChooseCPU(AVX512VNNI_8bit::kName, AVX512_8bit::kName, AVX2_8bit::kName, SSSE3_8bit::kName, Unsupported_8bit::kName, Unsupported_8bit::kName);
diff --git a/intgemm.h b/intgemm.h
index a08c1cc..581c6f4 100644
--- a/intgemm.h
+++ b/intgemm.h
@@ -86,6 +86,9 @@ struct Unsupported_8bit {
   static void PrepareA(const float *, int8_t *, float, Index, Index) {
     throw UnsupportedCPU();
   }
+  static void PrepareBQuantizedTransposed(const int8_t *, int8_t *, Index, Index) {
+    throw UnsupportedCPU();
+  }
   static void PrepareB(const float *, int8_t *, float, Index, Index) {
     throw UnsupportedCPU();
   }
@@ -279,6 +282,12 @@ struct Int8 {
   // It will match the Multiply function on the same CPU though.
   static void (*PrepareB)(const float *input, int8_t *output, float quant_mult, Index rows, Index cols);
 
+  // Convert from a B that was already transposed (routine not provided) and
+  // quantized (e.g. with Quantize) to the CPU-dependent format used for
+  // Multiply.  This is useful for storing a quantized model on disk then in a
+  // CPU-independent fashion.
+  static void (*PrepareBQuantizedTransposed)(const int8_t *input, int8_t *output, Index rows, Index cols);
+
   // Select columns from a prepared B matrix.  The number of selected columns must be a multiple of 8. 
   static void (*SelectColumnsB)(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end);
author	Kenneth Heafield <kpu@users.noreply.github.com>	2020-01-27 18:26:55 +0300
committer	GitHub <noreply@github.com>	2020-01-27 18:26:55 +0300
commit	695dbb28dbf080ccfd5ac8cbb5d3a58c460cf193 (patch)
tree	f4b429127b05500c7bd67d57183f0153dc57609b
parent	b3311f5f6342a6406379b2e8fbae5bebd279ad7c (diff)
parent	583a624df19db347adad7265b131ff52a930cc33 (diff)