Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Chudyk <mateuszchudyk@gmail.com>2019-12-16 08:08:06 +0300
committerMateusz Chudyk <mateuszchudyk@gmail.com>2019-12-16 08:10:16 +0300
commit233ad7e9cd282965dfe82639771e588599e624c3 (patch)
tree9ef8083aa99a31c8b9e8e5076ccb755bf935443d
parentb891e08e0594788d0de1d5ba414a237dd46a1293 (diff)
parent4b04b639796a78a3c755aa94c2f3a09b0e365a54 (diff)
Merge remote-tracking branch 'origin/master' into big-refactoringbig-refactoring
-rw-r--r--CMakeLists.txt1
-rw-r--r--backends/avx2.h12
-rw-r--r--backends/avx512.h8
-rw-r--r--backends/avx512vnni.h2
-rw-r--r--backends/backend.h2
-rw-r--r--backends/sse2.h8
-rw-r--r--backends/ssse3.h10
-rw-r--r--benchmarks/benchmark.cc25
-rw-r--r--benchmarks/biasmultiply.cc4
-rw-r--r--callbacks.h24
-rw-r--r--callbacks/avx2.h13
-rw-r--r--callbacks/avx512.h19
-rw-r--r--callbacks/implementations.inl10
-rw-r--r--callbacks/output_buffer_info.h2
-rw-r--r--callbacks/sse2.h13
-rw-r--r--kernels.h25
-rw-r--r--kernels/avx2.h13
-rw-r--r--kernels/avx512.h19
-rw-r--r--kernels/implementations.inl7
-rw-r--r--kernels/sse2.h13
-rw-r--r--multiply.h7
-rw-r--r--test/add127_test.cc2
-rw-r--r--test/kernels/add_bias_test.cc6
-rw-r--r--test/kernels/bitwise_not_test.cc6
-rw-r--r--test/kernels/downcast_test.cc6
-rw-r--r--test/kernels/exp_test.cc6
-rw-r--r--test/kernels/floor_test.cc6
-rw-r--r--test/kernels/multiply_sat_test.cc6
-rw-r--r--test/kernels/multiply_test.cc6
-rw-r--r--test/kernels/quantize_test.cc6
-rw-r--r--test/kernels/relu_test.cc6
-rw-r--r--test/kernels/rescale_test.cc6
-rw-r--r--test/kernels/sigmoid_test.cc6
-rw-r--r--test/kernels/tanh_test.cc6
-rw-r--r--test/kernels/unquantize_test.cc6
-rw-r--r--test/kernels/upcast_test.cc6
-rw-r--r--test/kernels/write_test.cc6
-rw-r--r--test/multiply_test.cc12
-rw-r--r--test/quantize_test.cc6
-rw-r--r--test/test.cc2
-rw-r--r--test/test.h6
-rw-r--r--test/utils_test.cc4
42 files changed, 153 insertions, 206 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f5c0e8d..eedc2f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,7 +39,6 @@ endif()
# Generate configure file
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/intgemm_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/intgemm_config.h)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
foreach(exe benchmark biasmultiply)
diff --git a/backends/avx2.h b/backends/avx2.h
index 67c56b6..e650084 100644
--- a/backends/avx2.h
+++ b/backends/avx2.h
@@ -1,9 +1,9 @@
#pragma once
-#include "interleave.h"
-#include "kernels.h"
-#include "multiply.h"
-#include "types.h"
+#include "../interleave.h"
+#include "../kernels.h"
+#include "../multiply.h"
+#include "../types.h"
#include "backend.h"
#include <cstdint>
@@ -52,7 +52,7 @@ class QuantizeTile16 {
template <>
struct Backend<CPUType::AVX2, int16_t> {
- static inline const char* const Name() { return "16-bit INTGEMM_AVX2"; };
+ static inline const char* const Name() { return "16-bit AVX2"; };
// Currently A is prepared by quantization but this could theoretically change.
INTGEMM_AVX2 static inline void PrepareA(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) {
@@ -171,7 +171,7 @@ INTGEMM_MAXABSOLUTE(__m256, INTGEMM_AVX2)
template <>
struct Backend<CPUType::AVX2, int8_t> {
- static inline const char* const Name() { return "8-bit INTGEMM_AVX2"; };
+ static inline const char* const Name() { return "8-bit AVX2"; };
// Currently A is prepared by quantization but this could theoretically change.
INTGEMM_AVX2 static inline void PrepareA(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) {
diff --git a/backends/avx512.h b/backends/avx512.h
index 093489d..e50a324 100644
--- a/backends/avx512.h
+++ b/backends/avx512.h
@@ -4,10 +4,10 @@
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
-#include "interleave.h"
-#include "kernels.h"
-#include "multiply.h"
-#include "types.h"
+#include "../interleave.h"
+#include "../kernels.h"
+#include "../multiply.h"
+#include "../types.h"
#include "backend.h"
#include <cassert>
diff --git a/backends/avx512vnni.h b/backends/avx512vnni.h
index 790e62c..96e000b 100644
--- a/backends/avx512vnni.h
+++ b/backends/avx512vnni.h
@@ -4,7 +4,7 @@
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
#include "avx512.h"
-#include "types.h"
+#include "../types.h"
#include "backend.h"
namespace intgemm {
diff --git a/backends/backend.h b/backends/backend.h
index 437595d..f9d8b85 100644
--- a/backends/backend.h
+++ b/backends/backend.h
@@ -1,6 +1,6 @@
#pragma once
-#include "types.h"
+#include "../types.h"
namespace intgemm {
diff --git a/backends/sse2.h b/backends/sse2.h
index 21e41cf..30c2d4a 100644
--- a/backends/sse2.h
+++ b/backends/sse2.h
@@ -1,8 +1,8 @@
#pragma once
-#include "kernels.h"
-#include "multiply.h"
-#include "types.h"
+#include "../kernels.h"
+#include "../multiply.h"
+#include "../types.h"
#include "backend.h"
#include <cstdint>
@@ -50,7 +50,7 @@ INTGEMM_MAXABSOLUTE(__m128, INTGEMM_SSE2)
// This should be pure INTGEMM_SSE2 (and below).
template <>
struct Backend<CPUType::SSE2, int16_t> {
- static inline const char* const Name() { return "16-bit INTGEMM_SSE2"; };
+ static inline const char* const Name() { return "16-bit SSE2"; };
// Currently A is prepared by quantization but this could theoretically change.
INTGEMM_SSE2 static inline void PrepareA(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) {
diff --git a/backends/ssse3.h b/backends/ssse3.h
index 7d87877..4b7c1b4 100644
--- a/backends/ssse3.h
+++ b/backends/ssse3.h
@@ -1,9 +1,9 @@
#pragma once
-#include "interleave.h"
-#include "kernels.h"
-#include "multiply.h"
-#include "types.h"
+#include "../interleave.h"
+#include "../kernels.h"
+#include "../multiply.h"
+#include "../types.h"
#include "backend.h"
#include <cstdint>
@@ -97,7 +97,7 @@ class QuantizeTile8 {
// pmaddubsw (the 8-bit multiply) is INTGEMM_SSSE3, so pedantically that's the version we need.
template <>
struct Backend<CPUType::SSSE3, int8_t> {
- static inline const char* const Name() { return "8-bit INTGEMM_SSSE3"; };
+ static inline const char* const Name() { return "8-bit SSSE3"; };
// Currently A is prepared by quantization but this could theoretically change.
INTGEMM_SSSE3 static inline void PrepareA(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) {
diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc
index 11141e5..87cc61f 100644
--- a/benchmarks/benchmark.cc
+++ b/benchmarks/benchmark.cc
@@ -1,9 +1,9 @@
-#include "aligned.h"
+#include "../aligned.h"
#include "intgemm_config.h"
-#include "backends.h"
-#include "intgemm.h"
-#include "stop_watch.h"
-#include "callbacks.h"
+#include "../backends.h"
+#include "../intgemm.h"
+#include "../stop_watch.h"
+#include "../callbacks.h"
#include <algorithm>
#include <cassert>
@@ -98,6 +98,7 @@ struct BackendStats {
std::vector<std::vector<uint64_t>> ssse3_8bit;
std::vector<std::vector<uint64_t>> avx2_8bit;
std::vector<std::vector<uint64_t>> avx512_8bit;
+ std::vector<std::vector<uint64_t>> avx512vnni_8bit;
std::vector<std::vector<uint64_t>> sse2_16bit;
std::vector<std::vector<uint64_t>> avx2_16bit;
std::vector<std::vector<uint64_t>> avx512_16bit;
@@ -119,12 +120,12 @@ void Summarize(std::vector<uint64_t> &stats) {
stddev += off * off;
}
stddev = sqrt(stddev / (keep - stats.begin() - 1));
- std::cout << std::setw(8) << *std::min_element(stats.begin(), stats.end()) << '\t' << std::setw(8) << avg << '\t' << std::setw(8) << stddev;
+ std::cout << std::setw(10) << *std::min_element(stats.begin(), stats.end()) << '\t' << std::setw(8) << avg << '\t' << std::setw(8) << stddev;
}
template <class Backend> void Print(std::vector<std::vector<uint64_t>> &stats, int index) {
if (stats.empty()) return;
- std::cout << Backend::Name() << '\t';
+ std::cout << std::setw(16) << Backend::Name() << '\t';
Summarize(stats[index]);
std::cout << '\n';
}
@@ -205,6 +206,13 @@ int main(int argc, char ** argv) {
RunAll<AVX512_16bit>(matrices, end, stats.avx512_16bit);
}
#endif
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
+ std::cerr << "AVX512VNNI 8bit, 100 samples..." << std::endl;
+ for (int samples = 0; samples < kSamples; ++samples) {
+ RandomMatrices *end = (samples < 4) ? matrices_end : full_sample;
+ RunAll<AVX512VNNI_8bit>(matrices, end, stats.avx512vnni_8bit);
+ }
+#endif
if (stats.sse2_16bit.empty()) {
std::cerr << "No CPU support." << std::endl;
@@ -217,6 +225,9 @@ int main(int argc, char ** argv) {
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
Print<AVX512_8bit>(stats.avx512_8bit, i);
#endif
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
+ Print<AVX512VNNI_8bit>(stats.avx512vnni_8bit, i);
+#endif
Print<SSE2_16bit>(stats.sse2_16bit, i);
Print<AVX2_16bit>(stats.avx2_16bit, i);
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
diff --git a/benchmarks/biasmultiply.cc b/benchmarks/biasmultiply.cc
index ec147ce..ec8ca95 100644
--- a/benchmarks/biasmultiply.cc
+++ b/benchmarks/biasmultiply.cc
@@ -1,5 +1,5 @@
-#include "intgemm.h"
-#include "aligned.h"
+#include "../intgemm.h"
+#include "../aligned.h"
#include <chrono>
#include <random>
#include <iostream>
diff --git a/callbacks.h b/callbacks.h
index da3e88f..c8a29df 100644
--- a/callbacks.h
+++ b/callbacks.h
@@ -3,6 +3,24 @@
#include "callbacks/configs.h"
#include "callbacks/output_buffer_info.h"
-#include "callbacks/sse2.h"
-#include "callbacks/avx2.h"
-#include "callbacks/avx512.h"
+#include "intgemm_config.h"
+#include "intrinsics.h"
+#include "kernels.h"
+#include "types.h"
+#include "utils.h"
+#include "vec_traits.h"
+
+#define CALLBACKS_THIS_IS_SSE2
+#include "callbacks/implementations.inl"
+#undef CALLBACKS_THIS_IS_SSE2
+
+#define CALLBACKS_THIS_IS_AVX2
+#include "callbacks/implementations.inl"
+#undef CALLBACKS_THIS_IS_AVX2
+
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#define CALLBACKS_THIS_IS_AVX512BW
+#include "callbacks/implementations.inl"
+#undef CALLBACKS_THIS_IS_AVX512BW
+#endif
+
diff --git a/callbacks/avx2.h b/callbacks/avx2.h
deleted file mode 100644
index 76b2605..0000000
--- a/callbacks/avx2.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#define CALLBACKS_THIS_IS_AVX2
-#include "callbacks/implementations.inl"
-#undef CALLBACKS_THIS_IS_AVX2
-
-namespace intgemm {
-namespace callbacks {
-
-// Put here callbacks supported only by AVX2...
-
-}
-}
diff --git a/callbacks/avx512.h b/callbacks/avx512.h
deleted file mode 100644
index 3e101dd..0000000
--- a/callbacks/avx512.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-#include "intgemm_config.h"
-
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
-
-#define CALLBACKS_THIS_IS_AVX512BW
-#include "callbacks/implementations.inl"
-#undef CALLBACKS_THIS_IS_AVX512BW
-
-namespace intgemm {
-namespace callbacks {
-
-// Put here callbacks supported only by AVX512BW...
-
-}
-}
-
-#endif
diff --git a/callbacks/implementations.inl b/callbacks/implementations.inl
index 4541664..dce89b2 100644
--- a/callbacks/implementations.inl
+++ b/callbacks/implementations.inl
@@ -1,12 +1,4 @@
-#include "callbacks/configs.h"
-#include "callbacks/output_buffer_info.h"
-
-#include "intrinsics.h"
-#include "kernels.h"
-#include "types.h"
-#include "utils.h"
-#include "vec_traits.h"
-
+/* This file is included multiple times, once per architecture. */
#if defined(CALLBACKS_THIS_IS_SSE2)
#define CPU_NAME SSE2
#define CPU_ATTR INTGEMM_SSE2
diff --git a/callbacks/output_buffer_info.h b/callbacks/output_buffer_info.h
index fa86587..213aef4 100644
--- a/callbacks/output_buffer_info.h
+++ b/callbacks/output_buffer_info.h
@@ -1,6 +1,6 @@
#pragma once
-#include "types.h"
+#include "../types.h"
namespace intgemm {
namespace callbacks {
diff --git a/callbacks/sse2.h b/callbacks/sse2.h
deleted file mode 100644
index a53b8ef..0000000
--- a/callbacks/sse2.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#define CALLBACKS_THIS_IS_SSE2
-#include "callbacks/implementations.inl"
-#undef CALLBACKS_THIS_IS_SSE2
-
-namespace intgemm {
-namespace callbacks {
-
-// Put here callbacks supported only by SSE2...
-
-}
-}
diff --git a/kernels.h b/kernels.h
index 4ab937c..ef63fec 100644
--- a/kernels.h
+++ b/kernels.h
@@ -1,5 +1,24 @@
#pragma once
-#include "kernels/sse2.h"
-#include "kernels/avx2.h"
-#include "kernels/avx512.h"
+#include "intgemm_config.h"
+#include "intrinsics.h"
+#include "types.h"
+#include "utils.h"
+#include "vec_traits.h"
+
+#include <cstdlib>
+
+#define KERNELS_THIS_IS_SSE2
+#include "kernels/implementations.inl"
+#undef KERNELS_THIS_IS_SSE2
+
+#define KERNELS_THIS_IS_AVX2
+#include "kernels/implementations.inl"
+#undef KERNELS_THIS_IS_AVX2
+
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+#define KERNELS_THIS_IS_AVX512BW
+#include "kernels/implementations.inl"
+#undef KERNELS_THIS_IS_AVX512BW
+#endif
+
diff --git a/kernels/avx2.h b/kernels/avx2.h
deleted file mode 100644
index c7f29ca..0000000
--- a/kernels/avx2.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#define KERNELS_THIS_IS_AVX2
-#include "kernels/implementations.inl"
-#undef KERNELS_THIS_IS_AVX2
-
-namespace intgemm {
-namespace kernels {
-
-// Put here kernels supported only by AVX2...
-
-}
-}
diff --git a/kernels/avx512.h b/kernels/avx512.h
deleted file mode 100644
index e472422..0000000
--- a/kernels/avx512.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-#include "intgemm_config.h"
-
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
-
-#define KERNELS_THIS_IS_AVX512BW
-#include "kernels/implementations.inl"
-#undef KERNELS_THIS_IS_AVX512BW
-
-namespace intgemm {
-namespace kernels {
-
-// Put here kernels supported only by AVX512BW...
-
-}
-}
-
-#endif
diff --git a/kernels/implementations.inl b/kernels/implementations.inl
index fda4a04..80347fc 100644
--- a/kernels/implementations.inl
+++ b/kernels/implementations.inl
@@ -1,9 +1,4 @@
-#include "intrinsics.h"
-#include "types.h"
-#include "utils.h"
-#include "vec_traits.h"
-
-#include <cstdlib>
+/* This file is included multiple times, once for each backend instruction set. */
#if defined(KERNELS_THIS_IS_SSE2)
#define CPU_NAME SSE2
diff --git a/kernels/sse2.h b/kernels/sse2.h
deleted file mode 100644
index 322fd37..0000000
--- a/kernels/sse2.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#define KERNELS_THIS_IS_SSE2
-#include "kernels/implementations.inl"
-#undef KERNELS_THIS_IS_SSE2
-
-namespace intgemm {
-namespace kernels {
-
-// Put here kernels supported only by SSE2...
-
-}
-}
diff --git a/multiply.h b/multiply.h
index 1d4113c..823fa6d 100644
--- a/multiply.h
+++ b/multiply.h
@@ -50,8 +50,11 @@ INTGEMM_AVX512BW static inline __m256i PermuteSummer(__m512i pack0123, __m512i p
}
// Find the maximum float.
-static inline INTGEMM_AVX512DQ float MaxFloat32(__m512 a) {
- return MaxFloat32(max_ps(_mm512_castps512_ps256(a), _mm512_extractf32x8_ps(a, 1)));
+static inline INTGEMM_AVX512F float MaxFloat32(__m512 a) {
+ // _mm512_extractf32x8_ps is AVX512DQ but we don't care about masking.
+ // So cast to pd, do AVX512F _mm512_extractf64x4_pd, then cast to ps.
+ __m256 upper = _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(a), 1));
+ return MaxFloat32(max_ps(_mm512_castps512_ps256(a), upper));
}
#endif
diff --git a/test/add127_test.cc b/test/add127_test.cc
index 4dc9ea1..0f73c09 100644
--- a/test/add127_test.cc
+++ b/test/add127_test.cc
@@ -1,4 +1,4 @@
-#include "test/test.h"
+#include "test.h"
namespace intgemm {
diff --git a/test/kernels/add_bias_test.cc b/test/kernels/add_bias_test.cc
index 3c4a593..4a2060e 100644
--- a/test/kernels/add_bias_test.cc
+++ b/test/kernels/add_bias_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/bitwise_not_test.cc b/test/kernels/bitwise_not_test.cc
index 3b78aa8..889e1bb 100644
--- a/test/kernels/bitwise_not_test.cc
+++ b/test/kernels/bitwise_not_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/downcast_test.cc b/test/kernels/downcast_test.cc
index 056c1e7..b25889f 100644
--- a/test/kernels/downcast_test.cc
+++ b/test/kernels/downcast_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/exp_test.cc b/test/kernels/exp_test.cc
index 2e4fecc..d4e100e 100644
--- a/test/kernels/exp_test.cc
+++ b/test/kernels/exp_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/floor_test.cc b/test/kernels/floor_test.cc
index 8f21af3..3f4fdf3 100644
--- a/test/kernels/floor_test.cc
+++ b/test/kernels/floor_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/multiply_sat_test.cc b/test/kernels/multiply_sat_test.cc
index 86bf581..83ce5ac 100644
--- a/test/kernels/multiply_sat_test.cc
+++ b/test/kernels/multiply_sat_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/multiply_test.cc b/test/kernels/multiply_test.cc
index 9673e89..90607f5 100644
--- a/test/kernels/multiply_test.cc
+++ b/test/kernels/multiply_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/quantize_test.cc b/test/kernels/quantize_test.cc
index 29a5ecc..e666654 100644
--- a/test/kernels/quantize_test.cc
+++ b/test/kernels/quantize_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/relu_test.cc b/test/kernels/relu_test.cc
index 7631623..fdf7c0e 100644
--- a/test/kernels/relu_test.cc
+++ b/test/kernels/relu_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/rescale_test.cc b/test/kernels/rescale_test.cc
index 9c0d581..1d7f556 100644
--- a/test/kernels/rescale_test.cc
+++ b/test/kernels/rescale_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/sigmoid_test.cc b/test/kernels/sigmoid_test.cc
index f38e890..e0e008e 100644
--- a/test/kernels/sigmoid_test.cc
+++ b/test/kernels/sigmoid_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/tanh_test.cc b/test/kernels/tanh_test.cc
index 3a4294a..7407a11 100644
--- a/test/kernels/tanh_test.cc
+++ b/test/kernels/tanh_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/unquantize_test.cc b/test/kernels/unquantize_test.cc
index b23e7bd..439970e 100644
--- a/test/kernels/unquantize_test.cc
+++ b/test/kernels/unquantize_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/upcast_test.cc b/test/kernels/upcast_test.cc
index bef4e41..5c13dfd 100644
--- a/test/kernels/upcast_test.cc
+++ b/test/kernels/upcast_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/kernels/write_test.cc b/test/kernels/write_test.cc
index 8d85600..53a0ea6 100644
--- a/test/kernels/write_test.cc
+++ b/test/kernels/write_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "kernels.h"
+#include "../test.h"
+#include "../../aligned.h"
+#include "../../kernels.h"
#include <numeric>
diff --git a/test/multiply_test.cc b/test/multiply_test.cc
index 5768976..49dff21 100644
--- a/test/multiply_test.cc
+++ b/test/multiply_test.cc
@@ -1,9 +1,9 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "interleave.h"
-#include "intgemm.h"
-#include "multiply.h"
-#include "callbacks.h"
+#include "test.h"
+#include "../aligned.h"
+#include "../interleave.h"
+#include "../intgemm.h"
+#include "../multiply.h"
+#include "../callbacks.h"
#include <algorithm>
#include <cassert>
diff --git a/test/quantize_test.cc b/test/quantize_test.cc
index 448f694..059bde7 100644
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -1,6 +1,6 @@
-#include "test/test.h"
-#include "aligned.h"
-#include "backends.h"
+#include "test.h"
+#include "../aligned.h"
+#include "../backends.h"
#include <cstring>
#include <iostream>
diff --git a/test/test.cc b/test/test.cc
index cb45b73..88daaa2 100644
--- a/test/test.cc
+++ b/test/test.cc
@@ -1,5 +1,5 @@
#define CATCH_CONFIG_RUNNER
-#include "test/test.h"
+#include "test.h"
int main(int argc, char ** argv) {
return Catch::Session().run(argc, argv);
diff --git a/test/test.h b/test/test.h
index 8fe5d9b..fc47da5 100644
--- a/test/test.h
+++ b/test/test.h
@@ -1,9 +1,9 @@
#pragma once
-#include "3rd_party/catch.hpp"
+#include "../3rd_party/catch.hpp"
#include <sstream>
-#include "intgemm.h"
-#include "aligned.h"
+#include "../intgemm.h"
+#include "../aligned.h"
#include "intgemm_config.h"
diff --git a/test/utils_test.cc b/test/utils_test.cc
index 580a872..782027e 100644
--- a/test/utils_test.cc
+++ b/test/utils_test.cc
@@ -1,5 +1,5 @@
-#include "test/test.h"
-#include "utils.h"
+#include "test.h"
+#include "../utils.h"
namespace intgemm {
namespace {