Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <kheafiel@amazon.com>2020-02-24 20:41:29 +0300
committerKenneth Heafield <kheafiel@amazon.com>2020-02-24 20:41:29 +0300
commit2985958d1d9554789c5cc3004c162d43ad80e361 (patch)
treedcc959bd94f1532eba4327b5a8a4a60fb9075e35
parent1db4a86d5736d09d5b2e7f1965a99057b03ba7af (diff)
MaxAbsolute with arbitrary many arguments
-rw-r--r--multiply.h26
-rw-r--r--test/multiply_test.cc24
2 files changed, 30 insertions, 20 deletions
diff --git a/multiply.h b/multiply.h
index 0aa86aa..9a15e0e 100644
--- a/multiply.h
+++ b/multiply.h
@@ -562,20 +562,28 @@ INTGEMM_SSSE3 inline static void InnerINTGEMM_SSSE3(
} \
#define INTGEMM_MAXABSOLUTE(Register, target) \
-target static float MaxAbsolute(const float *begin_float, const float *end_float) { \
+target static inline float MaxAbsolute(const float *begin_float, const float *end_float) { \
assert(end_float > begin_float); \
- assert((end_float - begin_float) % (sizeof(Register) / sizeof(float)) == 0); \
+ assert(reinterpret_cast<uintptr_t>(begin_float) % sizeof(Register) == 0); \
const Register *begin = reinterpret_cast<const Register*>(begin_float); \
- const Register *end = reinterpret_cast<const Register*>(end_float); \
- union {float f; int32_t i;} float_convert; \
- float_convert.i = 0x7fffffff; \
- Register and_me = set1_ps<Register>(float_convert.f); \
- Register highest = and_ps(and_me, *begin); \
- for (++begin; begin != end; ++begin) { \
+ const float *end_reg = end_float - (reinterpret_cast<uintptr_t>(end_float) % sizeof(Register)) / sizeof(float); \
+ const Register *end = reinterpret_cast<const Register*>(end_reg); \
+ union {float f; int32_t i;} and_convert, float_convert; \
+ and_convert.i = 0x7fffffff; \
+ Register and_me = set1_ps<Register>(and_convert.f); \
+ Register highest = setzero_ps<Register>(); \
+ for (; begin < end; ++begin) { \
Register reg = and_ps(and_me, *begin); \
highest = max_ps(highest, reg); \
} \
- return MaxFloat32(highest); \
+ float ret = MaxFloat32(highest); \
+ /* Overhang: this would be more efficient if done in a single SIMD operation with some zeroing */ \
+ for (const float *i = end_reg; i < end_float; ++i) { \
+ float_convert.f = *i; \
+ float_convert.i &= and_convert.i; \
+ ret = std::max(ret, float_convert.f); \
+ } \
+ return ret; \
} \
} // namespace intgemm
diff --git a/test/multiply_test.cc b/test/multiply_test.cc
index 59c62a9..97f68a3 100644
--- a/test/multiply_test.cc
+++ b/test/multiply_test.cc
@@ -194,18 +194,20 @@ void CompareMaxAbs(const float *begin, const float *end, float test) {
template <float (*Backend) (const float *, const float *)> void TestMaxAbsolute() {
std::mt19937 gen;
std::uniform_real_distribution<float> dist(-8.0, 8.0);
- AlignedVector<float> test(64);
- // 64 tries.
- for (int t = 0; t < 64; ++t) {
- // Fill with [-8, 8).
- for (auto& it : test) {
- it = dist(gen);
+ const std::size_t kLengthMax = 65;
+ AlignedVector<float> test(kLengthMax);
+ for (std::size_t len = 1; len < kLengthMax; ++len) {
+ for (int t = 0; t < len; ++t) {
+ // Fill with [-8, 8).
+ for (auto& it : test) {
+ it = dist(gen);
+ }
+ CompareMaxAbs(test.begin(), test.begin() + len, Backend(test.begin(), test.begin() + len));
+ test[t] = -32.0;
+ CompareMaxAbs(test.begin(), test.begin() + len, Backend(test.begin(), test.begin() + len));
+ test[t] = 32.0;
+ CompareMaxAbs(test.begin(), test.begin() + len, Backend(test.begin(), test.begin() + len));
}
- CompareMaxAbs(test.begin(), test.end(), Backend(test.begin(), test.end()));
- test[t] = -32.0;
- CompareMaxAbs(test.begin(), test.end(), Backend(test.begin(), test.end()));
- test[t] = 32.0;
- CompareMaxAbs(test.begin(), test.end(), Backend(test.begin(), test.end()));
}
}