Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkpu <github@kheafield.com>2018-06-23 19:43:02 +0300
committerkpu <github@kheafield.com>2018-06-23 19:43:02 +0300
commitfaf1ac0293ce8c5c2b5e4eb25b785bdafd77fdd0 (patch)
treef03981a2b1c95e50f1dd3ab259db3e00d7ad9bee
parent559fb9b392bcf52e5b77c08fcd49d0f439582413 (diff)
Simplify aligned allocation, reduce C++ version
-rw-r--r--Makefile2
-rw-r--r--aligned.h33
-rw-r--r--dispatch.cc2
-rw-r--r--dispatch.h2
-rw-r--r--quantize_test.cc6
-rw-r--r--test.cc30
6 files changed, 32 insertions, 43 deletions
diff --git a/Makefile b/Makefile
index 0f73af9..a6f0750 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
CXX := g++
-CXXFLAGS := -Wall -Werror -fPIC -O3 -march=native -std=c++11 -DNDEBUG
+CXXFLAGS := -Wall -Werror -fPIC -O3 -march=native -DNDEBUG
SRC := avx512_gemm.cc avx2_gemm.cc sse2_gemm.cc stop_watch.cc dispatch.cc
OBJ := ${SRC:.cc=.o}
diff --git a/aligned.h b/aligned.h
index a1a3ba1..733477a 100644
--- a/aligned.h
+++ b/aligned.h
@@ -1,38 +1,27 @@
#pragma once
-// Define allocation like:
-// free_ptr<Integer> quantized(AlignedArray<Integer>(rows * cols));
+// Aligned vector of things.
// This is only used by tests.
#include <cstdlib>
-#include <memory>
namespace intgemm {
-struct DeleteWithFree {
- template <class T> void operator() (T *t) const {
-// This requires newer C++
-// std::free(const_cast<std::remove_const_t<T>* >(t));
- std::free(t);
- }
-};
-template <class T> using free_ptr = std::unique_ptr<T, DeleteWithFree>;
-// Return memory suitably aligned for SIMD.
-template <class T> T* AlignedArray(std::size_t size) {
- return static_cast<T*>(aligned_alloc(64, size * sizeof(T)));
-}
-
template <class T> class AlignedVector {
public:
- explicit AlignedVector(std::size_t size) : mem_(AlignedArray<T>(size)) {}
+ explicit AlignedVector(std::size_t size)
+ : mem_(static_cast<T*>(aligned_alloc(64, size * sizeof(T)))) {}
+
+ ~AlignedVector() { std::free(mem_); }
+
+ T &operator[](std::size_t offset) { return mem_[offset]; }
+ const T &operator[](std::size_t offset) const { return mem_[offset]; }
- T &operator[](std::size_t offset) { return mem_.get()[offset]; }
- const T &operator[](std::size_t offset) const { return mem_.get()[offset]; }
+ T *get() { return mem_; }
+ const T *get() const { return mem_; }
- T *get() { return mem_.get(); }
- const T *get() const { return mem_.get(); }
private:
- free_ptr<T> mem_;
+ T *mem_;
};
} // namespace intgemm
diff --git a/dispatch.cc b/dispatch.cc
index 318242a..e4b40e0 100644
--- a/dispatch.cc
+++ b/dispatch.cc
@@ -8,7 +8,7 @@ namespace intgemm {
UnsupportedCPU::UnsupportedCPU() {}
-UnsupportedCPU::~UnsupportedCPU() {}
+UnsupportedCPU::~UnsupportedCPU() throw() {}
const char *UnsupportedCPU::what() const throw() {
return "Integer matrix multiplication has not been efficiently implemented for your CPU.";
diff --git a/dispatch.h b/dispatch.h
index a08ad8e..f191ed3 100644
--- a/dispatch.h
+++ b/dispatch.h
@@ -50,7 +50,7 @@ class UnsupportedCPU : public std::exception {
public:
UnsupportedCPU();
- ~UnsupportedCPU();
+ ~UnsupportedCPU() throw();
const char *what() const throw();
};
diff --git a/quantize_test.cc b/quantize_test.cc
index 36745de..21bb48b 100644
--- a/quantize_test.cc
+++ b/quantize_test.cc
@@ -42,11 +42,11 @@ template <class I> bool IsOff(float from, I ref, I test) {
template <class Backend> bool Test(const float *input_unaligned, float quant_mult, std::size_t size) {
typedef typename Backend::Integer Integer;
bool success = true;
- free_ptr<float> input(AlignedArray<float>(size));
+ AlignedVector<float> input(size);
std::memcpy(input.get(), input_unaligned, sizeof(float) * size);
- free_ptr<Integer> ref(AlignedArray<Integer>(size));
- free_ptr<Integer> test(AlignedArray<Integer>(size));
+ AlignedVector<Integer> ref(size);
+ AlignedVector<Integer> test(size);
QuantizeRef(input.get(), ref.get(), quant_mult, size);
Backend::Quantize(input.get(), test.get(), quant_mult, size);
for (std::size_t i = 0; i < size; ++i) {
diff --git a/test.cc b/test.cc
index 9ceb583..e263698 100644
--- a/test.cc
+++ b/test.cc
@@ -45,11 +45,11 @@ template <class V> void SlowTranspose(const V *from, V *to, int rows, int cols)
}
void TestTranspose16() {
- free_ptr<int16_t> input(AlignedArray<int16_t>(8 * 8));
+ AlignedVector<int16_t> input(8 * 8);
for (int16_t i = 0; i < 64; ++i) {
input.get()[i] = i;
}
- free_ptr<int16_t> ref(AlignedArray<int16_t>(8 * 8));
+ AlignedVector<int16_t> ref(8 * 8);
SlowTranspose(input.get(), ref.get(), 8, 8);
// Overwrite input.
@@ -64,11 +64,11 @@ void TestTranspose16() {
}
void TestTranspose8() {
- free_ptr<int8_t> input(AlignedArray<int8_t>(16 * 16));
+ AlignedVector<int8_t> input(16 * 16);
for (int i = 0; i < 16 * 16; ++i) {
input.get()[i] = i;
}
- free_ptr<int8_t> ref(AlignedArray<int8_t>(16 * 16));
+ AlignedVector<int8_t> ref(16 * 16);
SlowTranspose(input.get(), ref.get(), 16, 16);
// Overwrite input.
@@ -93,7 +93,7 @@ template <class T> void PrintMatrix(const T *mem, int rows, int cols) {
template <class Routine> void TestPrepare(int rows = 32, int cols = 16) {
// Create array.
- free_ptr<float> input(AlignedArray<float>(rows * cols));
+ AlignedVector<float> input(rows * cols);
for (int i = 0; i < rows * cols; ++i) {
input.get()[i] = //(i > 127) ? (i - 256) : i;
(float)rand() / (float)RAND_MAX * 256.0 - 127.0;
@@ -101,13 +101,13 @@ template <class Routine> void TestPrepare(int rows = 32, int cols = 16) {
typedef typename Routine::Integer Integer;
// Call Prepare
- free_ptr<Integer> test(AlignedArray<Integer>(rows * cols));
+ AlignedVector<Integer> test(rows * cols);
Routine::PrepareB(input.get(), test.get(), 1, rows, cols);
// Compute reference output.
- free_ptr<Integer> quantized(AlignedArray<Integer>(rows * cols));
+ AlignedVector<Integer> quantized(rows * cols);
Routine::Quantize(input.get(), quantized.get(), 1, rows * cols);
- free_ptr<Integer> reference(AlignedArray<Integer>(rows * cols));
+ AlignedVector<Integer> reference(rows * cols);
SlowRearrange<Integer>(quantized.get(), reference.get(), Routine::kBTileRow, Routine::kBTileCol, rows, cols);
if (memcmp(reference.get(), test.get(), rows * cols * sizeof(Integer))) {
@@ -188,8 +188,8 @@ template <class Routine> void TestMultiply(int A_rows, int width, int B_cols) {
std::cout << Routine::Name() << "\t" << A_rows << '\t' << width << '\t' << B_cols << '\n';
// Initialize A and B.
- free_ptr<float> A(AlignedArray<float>(A_rows * width));
- free_ptr<float> B(AlignedArray<float>(width * B_cols));
+ AlignedVector<float> A(A_rows * width);
+ AlignedVector<float> B(width * B_cols);
for (int i = 0; i < A_rows * width; i++) {
A.get()[i] = ((float)rand()/(float)RAND_MAX)*2.0f - 1.0f;
}
@@ -200,20 +200,20 @@ template <class Routine> void TestMultiply(int A_rows, int width, int B_cols) {
float quant_mult = (sizeof(Integer) == 2) ? 1024 : 64;
float unquant_mult = 1.0/(quant_mult*quant_mult);
- free_ptr<Integer> A_prep(AlignedArray<Integer>(A_rows * width)), B_prep(AlignedArray<Integer>(width * B_cols));
+ AlignedVector<Integer> A_prep(A_rows * width), B_prep(width * B_cols);
Routine::PrepareA(A.get(), A_prep.get(), quant_mult, A_rows, width);
Routine::PrepareB(B.get(), B_prep.get(), quant_mult, width, B_cols);
- free_ptr<float> test_C(AlignedArray<float>(A_rows * B_cols));
+ AlignedVector<float> test_C(A_rows * B_cols);
Routine::Multiply(A_prep.get(), B_prep.get(), test_C.get(), unquant_mult, A_rows, width, B_cols);
- free_ptr<Integer> B_quant(AlignedArray<Integer>(width * B_cols));
+ AlignedVector<Integer> B_quant(width * B_cols);
Routine::Quantize(B.get(), B_quant.get(), quant_mult, width * B_cols);
- free_ptr<float> slowint_C(AlignedArray<float>(A_rows * B_cols));
+ AlignedVector<float> slowint_C(A_rows * B_cols);
// Assuming A is just quantization here.
SlowRefInt(A_prep.get(), B_quant.get(), slowint_C.get(), unquant_mult, A_rows, width, B_cols);
- free_ptr<float> float_C(AlignedArray<float>(A_rows * B_cols));
+ AlignedVector<float> float_C(A_rows * B_cols);
SlowRefFloat(A.get(), B.get(), float_C.get(), A_rows, width, B_cols);
Compare(float_C.get(), slowint_C.get(), test_C.get(), A_rows * B_cols);