Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkpu <github@kheafield.com>2018-06-23 20:48:45 +0300
committerkpu <github@kheafield.com>2018-06-23 20:48:45 +0300
commit37f49df4aa08734ba61b9462ca5f75fe84432bde (patch)
treee6bccd3d50f93d5bdb62462beb96c553f9e16138 /example.cc
parent801bd5185314d8db4b2ee77327d4d2dec6cb27fa (diff)
Add example
Diffstat (limited to 'example.cc')
-rw-r--r--example.cc74
1 files changed, 74 insertions, 0 deletions
diff --git a/example.cc b/example.cc
new file mode 100644
index 0000000..c67e48b
--- /dev/null
+++ b/example.cc
@@ -0,0 +1,74 @@
+#include "intgemm.h"
+// This is just for AlignedVector, which helps managed 64-byte aligned memory.
+// Feel free to manage memory yourself.
+#include "aligned.h"
+
+#include <cassert>
+#include <stdlib.h>
+#include <math.h>
+
+int main() {
+ const int A_rows = 1;
+ // The shared dimension: A's columns and B's rows.
+ const int width = 64;
+ const int B_cols = 8;
+
+ // This is a simple vector class that allocates memory aligned to 64 bytes.
+ // You don't have to use it; just use aligned_alloc and friends directly.
+ using intgemm::AlignedVector;
+ AlignedVector<float> A(A_rows * width);
+ AlignedVector<float> B(width * B_cols);
+
+ // Fill with random values in range [-2, 2].
+ srand(1);
+ for (int i = 0; i < A_rows * width; ++i) {
+ A[i] = ((float)rand()/(float)RAND_MAX)*4.0f - 2.0f;
+ }
+ for (int i = 0; i < width * B_cols; ++i) {
+ B[i] = ((float)rand()/(float)RAND_MAX)*4.0f - 2.0f;
+ }
+
+ // Compute the top left corner of C as a sanity check.
+ float top_left_reference = 0.0;
+ for (int w = 0; w < width; ++w) {
+ top_left_reference += A[w] * B[w * B_cols];
+ }
+
+ // 16-bit multiplication.
+ {
+ // For 16-bit, Jacob Devlin recommends 1024 so as to not overflow in 32-bit accumulation.
+ float quant_mult = 1024.0;
+ AlignedVector<int16_t> A_prepared(A_rows * width);
+ AlignedVector<int16_t> B_prepared(width * B_cols);
+ // Quantize A.
+ intgemm::Generic_16bit::PrepareA(A.get(), A_prepared.get(), quant_mult, A_rows, width);
+ // Quantize and reshape B.
+ // Typically you will do this once when parameters are loaded, not every time.
+ intgemm::Generic_16bit::PrepareB(B.get(), B_prepared.get(), quant_mult, width, B_cols);
+
+ AlignedVector<float> C(A_rows * B_cols);
+ // Do the actual multiply.
+ intgemm::Generic_16bit::Multiply(A_prepared.get(), B_prepared.get(), C.get(), 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols);
+ // Sanity check. C will be row major.
+ assert(fabs(C[0] - top_left_reference) < 0.05);
+ }
+
+ // 8-bit multiplication.
+ {
+ // For 8-bit a good quantization multiplier is 127 / largest absolute value..
+ float quant_mult = 127.0 / 2.0;
+ AlignedVector<int8_t> A_prepared(A_rows * width);
+ AlignedVector<int8_t> B_prepared(width * B_cols);
+ // Quantize A.
+ intgemm::Generic_8bit::PrepareA(A.get(), A_prepared.get(), quant_mult, A_rows, width);
+ // Quantize and reshape B.
+ // Typically you will do this once when parameters are loaded, not every time.
+ intgemm::Generic_8bit::PrepareB(B.get(), B_prepared.get(), quant_mult, width, B_cols);
+
+ AlignedVector<float> C(A_rows * B_cols);
+ // Do the actual multiply.
+ intgemm::Generic_8bit::Multiply(A_prepared.get(), B_prepared.get(), C.get(), 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols);
+ // Sanity check. C will be row major.
+ assert(fabs(C[0] - top_left_reference) < 0.05);
+ }
+}