Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2018-06-23 22:27:52 +0300
committerKenneth Heafield <github@kheafield.com>2018-06-23 22:27:52 +0300
commit0ae900a954ee19ebb8d33ba57493e4a14249c719 (patch)
tree19192163a762ac28ec454439e0a7e40575479338
parent4da058ae1c33a859882e60eb0bf89512762422d8 (diff)
Name Generic_16bit to Int16, Generic_8bit to Int8
-rw-r--r--README.md8
-rw-r--r--benchmark.cc2
-rw-r--r--example.cc12
-rw-r--r--intgemm.cc12
-rw-r--r--intgemm.h4
-rw-r--r--test.cc4
6 files changed, 20 insertions, 22 deletions
diff --git a/README.md b/README.md
index 46a9183..33728a5 100644
--- a/README.md
+++ b/README.md
@@ -28,13 +28,13 @@ Both A and B should be prepared before multiplication.
* B is width x B_cols.
*/
/* Prepare A for multiplication. This might be offline or on the fly. */
-intgemm::Generic_16bit::PrepareA(A, A_prepared, quant_mult, A_rows, width);
+intgemm::Int16::PrepareA(A, A_prepared, quant_mult, A_rows, width);
/* Prepare B for multiplication. This is typically done offline. */
-intgemm::Generic_16bit::PrepareB(B, B_prepared, quant_mult, width, B_cols);
+intgemm::Int16::PrepareB(B, B_prepared, quant_mult, width, B_cols);
/* Multiply and produce results in C */
-intgemm::Generic_16bit::Multiply(A_prepared, B_prepared, C, 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols);
+intgemm::Int16::Multiply(A_prepared, B_prepared, C, 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols);
```
-For 8-bit, use `Generic_8bit` instead of `Generic_16bit`.
+For 8-bit, use `Int8` instead of `Int16`.
When repesented as floats, all of A, B, and C are in row-major format.
diff --git a/benchmark.cc b/benchmark.cc
index 014cb4d..be07e9a 100644
--- a/benchmark.cc
+++ b/benchmark.cc
@@ -66,13 +66,11 @@ void Time(int A_rows, int width, int B_cols, int repeat = 20) {
#ifdef __AVX512BW__
Run<AVX512_8bit>(m, repeat);
#endif
- Run<Generic_8bit>(m, repeat);
Run<SSE2_16bit>(m, repeat);
Run<AVX2_16bit>(m, repeat);
#ifdef __AVX512BW__
Run<AVX512_16bit>(m, repeat);
#endif
- Run<Generic_16bit>(m, repeat);
}
} // namespace intgemm
diff --git a/example.cc b/example.cc
index c67e48b..0072596 100644
--- a/example.cc
+++ b/example.cc
@@ -41,14 +41,14 @@ int main() {
AlignedVector<int16_t> A_prepared(A_rows * width);
AlignedVector<int16_t> B_prepared(width * B_cols);
// Quantize A.
- intgemm::Generic_16bit::PrepareA(A.get(), A_prepared.get(), quant_mult, A_rows, width);
+ intgemm::Int16::PrepareA(A.get(), A_prepared.get(), quant_mult, A_rows, width);
// Quantize and reshape B.
// Typically you will do this once when parameters are loaded, not every time.
- intgemm::Generic_16bit::PrepareB(B.get(), B_prepared.get(), quant_mult, width, B_cols);
+ intgemm::Int16::PrepareB(B.get(), B_prepared.get(), quant_mult, width, B_cols);
AlignedVector<float> C(A_rows * B_cols);
// Do the actual multiply.
- intgemm::Generic_16bit::Multiply(A_prepared.get(), B_prepared.get(), C.get(), 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols);
+ intgemm::Int16::Multiply(A_prepared.get(), B_prepared.get(), C.get(), 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols);
// Sanity check. C will be row major.
assert(fabs(C[0] - top_left_reference) < 0.05);
}
@@ -60,14 +60,14 @@ int main() {
AlignedVector<int8_t> A_prepared(A_rows * width);
AlignedVector<int8_t> B_prepared(width * B_cols);
// Quantize A.
- intgemm::Generic_8bit::PrepareA(A.get(), A_prepared.get(), quant_mult, A_rows, width);
+ intgemm::Int8::PrepareA(A.get(), A_prepared.get(), quant_mult, A_rows, width);
// Quantize and reshape B.
// Typically you will do this once when parameters are loaded, not every time.
- intgemm::Generic_8bit::PrepareB(B.get(), B_prepared.get(), quant_mult, width, B_cols);
+ intgemm::Int8::PrepareB(B.get(), B_prepared.get(), quant_mult, width, B_cols);
AlignedVector<float> C(A_rows * B_cols);
// Do the actual multiply.
- intgemm::Generic_8bit::Multiply(A_prepared.get(), B_prepared.get(), C.get(), 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols);
+ intgemm::Int8::Multiply(A_prepared.get(), B_prepared.get(), C.get(), 1.0 / (quant_mult * quant_mult), A_rows, width, B_cols);
// Sanity check. C will be row major.
assert(fabs(C[0] - top_left_reference) < 0.05);
}
diff --git a/intgemm.cc b/intgemm.cc
index bdf2f04..7653a85 100644
--- a/intgemm.cc
+++ b/intgemm.cc
@@ -69,12 +69,12 @@ template <class T> T ChooseCPU(T avx512, T avx2, T ssse3, T sse2, T unsupported)
} // namespace
-void (*Generic_16bit::Quantize)(const float *input, int16_t *output, float quant_mult, int size) = ChooseCPU(AVX512_16bit::Quantize, AVX2_16bit::Quantize, SSE2_16bit::Quantize, SSE2_16bit::Quantize, Unsupported_16bit::Quantize);
-void (*Generic_16bit::PrepareB)(const float *input, int16_t *output, float quant_mult, int rows, int cols) = ChooseCPU(AVX512_16bit::PrepareB, AVX2_16bit::PrepareB, SSE2_16bit::PrepareB, SSE2_16bit::PrepareB, Unsupported_16bit::PrepareB);
-void (*Generic_16bit::Multiply)(const int16_t *A, const int16_t *B, float *C, float unquant_mult, int A_rows, int width, int B_cols) = ChooseCPU(AVX512_16bit::Multiply, AVX2_16bit::Multiply, SSE2_16bit::Multiply, SSE2_16bit::Multiply, Unsupported_16bit::Multiply);
+void (*Int16::Quantize)(const float *input, int16_t *output, float quant_mult, int size) = ChooseCPU(AVX512_16bit::Quantize, AVX2_16bit::Quantize, SSE2_16bit::Quantize, SSE2_16bit::Quantize, Unsupported_16bit::Quantize);
+void (*Int16::PrepareB)(const float *input, int16_t *output, float quant_mult, int rows, int cols) = ChooseCPU(AVX512_16bit::PrepareB, AVX2_16bit::PrepareB, SSE2_16bit::PrepareB, SSE2_16bit::PrepareB, Unsupported_16bit::PrepareB);
+void (*Int16::Multiply)(const int16_t *A, const int16_t *B, float *C, float unquant_mult, int A_rows, int width, int B_cols) = ChooseCPU(AVX512_16bit::Multiply, AVX2_16bit::Multiply, SSE2_16bit::Multiply, SSE2_16bit::Multiply, Unsupported_16bit::Multiply);
-void (*Generic_8bit::Quantize)(const float *input, int8_t *output, float quant_mult, int size) = ChooseCPU(AVX512_8bit::Quantize, AVX2_8bit::Quantize, SSSE3_8bit::Quantize, Unsupported_8bit::Quantize, Unsupported_8bit::Quantize);
-void (*Generic_8bit::PrepareB)(const float *input, int8_t *output, float quant_mult, int rows, int cols) = ChooseCPU(AVX512_8bit::PrepareB, AVX2_8bit::PrepareB, SSSE3_8bit::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB);
-void (*Generic_8bit::Multiply)(const int8_t *A, const int8_t *B, float *C, float unquant_mult, int A_rows, int width, int B_cols) = ChooseCPU(AVX512_8bit::Multiply, AVX2_8bit::Multiply, SSSE3_8bit::Multiply, Unsupported_8bit::Multiply, Unsupported_8bit::Multiply);
+void (*Int8::Quantize)(const float *input, int8_t *output, float quant_mult, int size) = ChooseCPU(AVX512_8bit::Quantize, AVX2_8bit::Quantize, SSSE3_8bit::Quantize, Unsupported_8bit::Quantize, Unsupported_8bit::Quantize);
+void (*Int8::PrepareB)(const float *input, int8_t *output, float quant_mult, int rows, int cols) = ChooseCPU(AVX512_8bit::PrepareB, AVX2_8bit::PrepareB, SSSE3_8bit::PrepareB, Unsupported_8bit::PrepareB, Unsupported_8bit::PrepareB);
+void (*Int8::Multiply)(const int8_t *A, const int8_t *B, float *C, float unquant_mult, int A_rows, int width, int B_cols) = ChooseCPU(AVX512_8bit::Multiply, AVX2_8bit::Multiply, SSSE3_8bit::Multiply, Unsupported_8bit::Multiply, Unsupported_8bit::Multiply);
} // namespace intgemm
diff --git a/intgemm.h b/intgemm.h
index 016acd6..ea651b9 100644
--- a/intgemm.h
+++ b/intgemm.h
@@ -58,7 +58,7 @@ class UnsupportedCPU : public std::exception {
};
/* 16-bit matrix multiplication. */
-struct Generic_16bit {
+struct Int16 {
typedef int16_t Integer;
// A's size must be a multiple of 1x32.
@@ -90,7 +90,7 @@ struct Generic_16bit {
};
/* 8-bit matrix multiplication */
-struct Generic_8bit {
+struct Int8 {
typedef int8_t Integer;
// A's size must be a multiple of 1x64.
diff --git a/test.cc b/test.cc
index 58e12e0..bd0ccb6 100644
--- a/test.cc
+++ b/test.cc
@@ -220,12 +220,12 @@ template <class Routine> void TestMultiply(int A_rows, int width, int B_cols) {
}
void TestBoth(int A_rows, int width, int B_cols) {
- if (Generic_16bit::Quantize == AVX512_16bit::Quantize) {
+ if (Int16::Quantize == AVX512_16bit::Quantize) {
TestMultiply<AVX512_16bit>(A_rows, width, B_cols);
}
TestMultiply<AVX2_16bit>(A_rows, width, B_cols);
TestMultiply<SSE2_16bit>(A_rows, width, B_cols);
- if (Generic_16bit::Quantize == AVX512_16bit::Quantize) {
+ if (Int16::Quantize == AVX512_16bit::Quantize) {
TestMultiply<AVX512_8bit>(A_rows, width, B_cols);
}
TestMultiply<AVX2_8bit>(A_rows, width, B_cols);