diff options
author | kpu <github@kheafield.com> | 2018-06-23 20:48:45 +0300 |
---|---|---|
committer | kpu <github@kheafield.com> | 2018-06-23 20:48:45 +0300 |
commit | 37f49df4aa08734ba61b9462ca5f75fe84432bde (patch) | |
tree | e6bccd3d50f93d5bdb62462beb96c553f9e16138 /example.cc | |
parent | 801bd5185314d8db4b2ee77327d4d2dec6cb27fa (diff) |
Add example
Diffstat (limited to 'example.cc')
-rw-r--r-- | example.cc | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/example.cc b/example.cc new file mode 100644 index 0000000..c67e48b --- /dev/null +++ b/example.cc @@ -0,0 +1,74 @@ +#include "intgemm.h" +// This is just for AlignedVector, which helps managed 64-byte aligned memory. +// Feel free to manage memory yourself. +#include "aligned.h" + +#include <cassert> +#include <stdlib.h> +#include <math.h> + +int main() { + const int A_rows = 1; + // The shared dimension: A's columns and B's rows. + const int width = 64; + const int B_cols = 8; + + // This is a simple vector class that allocates memory aligned to 64 bytes. + // You don't have to use it; just use aligned_alloc and friends directly. + using intgemm::AlignedVector; + AlignedVector<float> A(A_rows * width); + AlignedVector<float> B(width * B_cols); + + // Fill with random values in range [-2, 2]. + srand(1); + for (int i = 0; i < A_rows * width; ++i) { + A[i] = ((float)rand()/(float)RAND_MAX)*4.0f - 2.0f; + } + for (int i = 0; i < width * B_cols; ++i) { + B[i] = ((float)rand()/(float)RAND_MAX)*4.0f - 2.0f; + } + + // Compute the top left corner of C as a sanity check. + float top_left_reference = 0.0; + for (int w = 0; w < width; ++w) { + top_left_reference += A[w] * B[w * B_cols]; + } + + // 16-bit multiplication. + { + // For 16-bit, Jacob Devlin recommends 1024 so as to not overflow in 32-bit accumulation. + float quant_mult = 1024.0; + AlignedVector<int16_t> A_prepared(A_rows * width); + AlignedVector<int16_t> B_prepared(width * B_cols); + // Quantize A. + intgemm::Generic_16bit::PrepareA(A.get(), A_prepared.get(), quant_mult, A_rows, width); + // Quantize and reshape B. + // Typically you will do this once when parameters are loaded, not every time. + intgemm::Generic_16bit::PrepareB(B.get(), B_prepared.get(), quant_mult, width, B_cols); + + AlignedVector<float> C(A_rows * B_cols); + // Do the actual multiply. + intgemm::Generic_16bit::Multiply(A_prepared.get(), B_prepared.get(), C.get(), 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols); + // Sanity check. C will be row major. + assert(fabs(C[0] - top_left_reference) < 0.05); + } + + // 8-bit multiplication. + { + // For 8-bit a good quantization multiplier is 127 / largest absolute value.. + float quant_mult = 127.0 / 2.0; + AlignedVector<int8_t> A_prepared(A_rows * width); + AlignedVector<int8_t> B_prepared(width * B_cols); + // Quantize A. + intgemm::Generic_8bit::PrepareA(A.get(), A_prepared.get(), quant_mult, A_rows, width); + // Quantize and reshape B. + // Typically you will do this once when parameters are loaded, not every time. + intgemm::Generic_8bit::PrepareB(B.get(), B_prepared.get(), quant_mult, width, B_cols); + + AlignedVector<float> C(A_rows * B_cols); + // Do the actual multiply. + intgemm::Generic_8bit::Multiply(A_prepared.get(), B_prepared.get(), C.get(), 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols); + // Sanity check. C will be row major. + assert(fabs(C[0] - top_left_reference) < 0.05); + } +} |