Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/torch7.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSoumith Chintala <soumith@gmail.com>2017-03-03 23:37:26 +0300
committerGitHub <noreply@github.com>2017-03-03 23:37:26 +0300
commit226484acc0e27fcd132ab28a7c3c794ce8e1299b (patch)
tree7e735a103a68d53ff77339766bb4fa4c0dabe50d
parent5e235ad7978389fa11865f8fa02f53d74b3bc361 (diff)
parent948e753486468adcd4985a5301ce32668d5acaca (diff)
Merge pull request #961 from torch/avxfix
TH CMake cleanup + AVX/AVX2 only on appropriate files
-rw-r--r--lib/TH/CMakeLists.txt207
-rw-r--r--lib/TH/THGeneral.h.in7
-rw-r--r--lib/TH/THVector.c8
-rw-r--r--lib/TH/generic/THVectorDispatch.c18
-rw-r--r--lib/TH/generic/simd/convolve.c6
-rw-r--r--lib/TH/vector/AVX.c37
-rw-r--r--lib/TH/vector/AVX.h23
-rw-r--r--lib/TH/vector/AVX2.c7
-rw-r--r--lib/TH/vector/AVX2.h9
9 files changed, 172 insertions, 150 deletions
diff --git a/lib/TH/CMakeLists.txt b/lib/TH/CMakeLists.txt
index 20f6bd6..351594d 100644
--- a/lib/TH/CMakeLists.txt
+++ b/lib/TH/CMakeLists.txt
@@ -20,11 +20,21 @@ IF(NOT TH_INSTALL_BIN_SUBDIR
SET(TH_INSTALL_CMAKE_SUBDIR "share/cmake/TH" CACHE PATH "TH install cmake subdirectory")
ENDIF()
-# flags
+#######################################################################
+##### flags section
+######################################################################
IF(MSVC)
- # respect the standard
- ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)
+ # MSVC now supports C99 since VS2013/VS2015
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /std:c99")
+ELSE(MSVC)
+ # enable gnu99 and not c99 because we use
+ # gnu extensions like posix_memalign
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99")
+ENDIF(MSVC)
+
+IF(MSVC)
+ ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) # respect the standard
ENDIF(MSVC)
IF(UNIX)
@@ -114,82 +124,25 @@ IF(NOT NO_GCC_EBX_FPIC_BUG)
ENDIF(NOT NO_GCC_EBX_FPIC_BUG)
-FIND_PACKAGE(SSE)
+FIND_PACKAGE(SSE) # checks SSE, AVX and AVX2
IF(C_SSE2_FOUND)
+ MESSAGE(STATUS "SSE2 Found")
SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}")
ENDIF(C_SSE2_FOUND)
IF(C_SSE3_FOUND)
+ MESSAGE(STATUS "SSE3 Found")
SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}")
ENDIF(C_SSE3_FOUND)
+# we dont set AVX and AVX2 flags globally, but only for specific files
IF(C_AVX_FOUND)
- SET(CMAKE_C_FLAGS "${C_AVX_FLAGS} -DUSE_AVX ${CMAKE_C_FLAGS}")
+ MESSAGE(STATUS "AVX Found")
+ # SET(CMAKE_C_FLAGS "${C_AVX_FLAGS} ${CMAKE_C_FLAGS}")
ENDIF(C_AVX_FOUND)
IF(C_AVX2_FOUND)
- SET(CMAKE_C_FLAGS "${C_AVX2_FLAGS} -DUSE_AVX2 ${CMAKE_C_FLAGS}")
+ MESSAGE(STATUS "AVX2 Found")
+ # SET(CMAKE_C_FLAGS "${C_AVX2_FLAGS} ${CMAKE_C_FLAGS}")
ENDIF(C_AVX2_FOUND)
-IF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)
- SET(simd generic/simd/convolve.c)
- IF(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve.c PROPERTIES COMPILE_FLAGS "/std:c99")
- ELSE(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve.c PROPERTIES COMPILE_FLAGS "-std=c99")
- ENDIF(MSVC)
-ENDIF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)
-
-IF(C_SSE4_1_FOUND)
- SET(CMAKE_C_FLAGS "${C_SSE4_1_FLAGS} -DUSE_SSE4_1 ${CMAKE_C_FLAGS}")
-ENDIF(C_SSE4_1_FOUND)
-IF(C_SSE4_2_FOUND)
- SET(CMAKE_C_FLAGS "${C_SSE4_2_FLAGS} -DUSE_SSE4_2 ${CMAKE_C_FLAGS}")
-ENDIF(C_SSE4_2_FOUND)
-
-IF(C_SSE4_1_FOUND OR C_SSE4_2_FOUND)
- IF(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast /std:c99")
- ELSE(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math -std=c99")
- ENDIF(MSVC)
- SET(simd ${simd} generic/simd/convolve5x5_sse.c)
-ENDIF(C_SSE4_1_FOUND OR C_SSE4_2_FOUND)
-
-IF(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/THTensorMath.c PROPERTIES COMPILE_FLAGS "/std:c99")
-ELSE(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/THTensorMath.c PROPERTIES COMPILE_FLAGS "-std=c99")
-ENDIF(MSVC)
-
-IF(C_AVX_FOUND OR C_AVX2_FOUND)
- SET(CMAKE_C_FLAGS "-DUSE_AVX ${CMAKE_C_FLAGS}")
- IF(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast /arch:AVX /std:c99")
- ELSE(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math -mavx -std=c99")
- ENDIF(MSVC)
- SET(simd ${simd} generic/simd/convolve5x5_avx.c)
-ENDIF(C_AVX_FOUND OR C_AVX2_FOUND)
-
-SET(hdr
- THGeneral.h THHalf.h THAllocator.h THStorage.h THTensor.h THTensorApply.h THBlas.h THMath.h
- THLapack.h THLogAdd.h THRandom.h THVector.h THAtomic.h )
-
-SET(src
- THGeneral.c THHalf.c THAllocator.c THStorage.c THTensor.c THBlas.c THLapack.c
- THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c)
-
-SET(src ${src} ${hdr} ${simd})
-ADD_LIBRARY(TH SHARED ${src})
-if(BUILD_STATIC)
- ADD_LIBRARY(TH_static STATIC ${src})
-endif()
-
-IF(NOT TH_SO_VERSION)
- SET(TH_SO_VERSION 0)
-ENDIF(NOT TH_SO_VERSION)
-MESSAGE(STATUS "TH_SO_VERSION: ${TH_SO_VERSION}")
-SET_TARGET_PROPERTIES(TH PROPERTIES
- VERSION ${TH_SO_VERSION}
- SOVERSION ${TH_SO_VERSION})
CHECK_C_SOURCE_RUNS("
#include <stdatomic.h>
@@ -233,6 +186,72 @@ int main()
" HAS_GCC_ATOMICS)
ENDIF()
+#######################################################################
+##### sources section
+######################################################################
+
+# IF ANY SIMD FOUND
+IF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)
+ SET(simd generic/simd/convolve.c)
+ENDIF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)
+
+# IF SSE4 FOUND
+IF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND)
+ SET(CMAKE_C_FLAGS "${C_SSE4_1_FLAGS} -DUSE_SSE4_1 ${C_SSE4_2_FLAGS} -DUSE_SSE4_2 ${CMAKE_C_FLAGS}")
+ IF(MSVC)
+ SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast")
+ ELSE(MSVC)
+ SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math")
+ ENDIF(MSVC)
+ SET(simd ${simd} generic/simd/convolve5x5_sse.c)
+ENDIF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND)
+
+# IF AVX FOUND
+IF(C_AVX_FOUND)
+ IF(MSVC)
+ SET_SOURCE_FILES_PROPERTIES(vector/AVX.c generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast ${C_AVX_FLAGS}")
+ ELSE(MSVC)
+ SET_SOURCE_FILES_PROPERTIES(vector/AVX.c generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${C_AVX_FLAGS}")
+ ENDIF(MSVC)
+ SET(simd ${simd} vector/AVX.c generic/simd/convolve5x5_avx.c)
+ENDIF(C_AVX_FOUND)
+
+IF(C_AVX2_FOUND)
+ IF(MSVC)
+ SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast ${C_AVX2_FLAGS}")
+ ELSE(MSVC)
+ SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${C_AVX2_FLAGS}")
+ ENDIF(MSVC)
+ SET(simd ${simd} vector/AVX2.c)
+ENDIF(C_AVX2_FOUND)
+
+SET(hdr
+ THGeneral.h THHalf.h THAllocator.h THStorage.h THTensor.h THTensorApply.h THBlas.h THMath.h
+ THLapack.h THLogAdd.h THRandom.h THVector.h THAtomic.h )
+
+SET(src
+ THGeneral.c THHalf.c THAllocator.c THStorage.c THTensor.c THBlas.c THLapack.c
+ THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c)
+
+SET(src ${src} ${hdr} ${simd})
+
+#######################################################################
+##### build section
+######################################################################
+
+ADD_LIBRARY(TH SHARED ${src})
+if(BUILD_STATIC)
+ ADD_LIBRARY(TH_static STATIC ${src})
+endif()
+
+IF(NOT TH_SO_VERSION)
+ SET(TH_SO_VERSION 0)
+ENDIF(NOT TH_SO_VERSION)
+MESSAGE(STATUS "TH_SO_VERSION: ${TH_SO_VERSION}")
+SET_TARGET_PROPERTIES(TH PROPERTIES
+ VERSION ${TH_SO_VERSION}
+ SOVERSION ${TH_SO_VERSION})
+
IF(HAS_C11_ATOMICS)
ADD_DEFINITIONS(-DUSE_C11_ATOMICS=1)
MESSAGE(STATUS "Atomics: using C11 intrinsics")
@@ -264,10 +283,6 @@ IF(LAPACK_FOUND)
TARGET_LINK_LIBRARIES(TH ${LAPACK_LIBRARIES})
ENDIF(LAPACK_FOUND)
-IF(BLAS_IS_ACCELERATE)
- MESSAGE(STATUS "BLAS FOUND IS ACCELERATE: Fix for sdot")
-ENDIF()
-
IF (UNIX AND NOT APPLE)
INCLUDE(CheckLibraryExists)
# https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830
@@ -284,6 +299,7 @@ IF(UNIX)
IF(HAVE_MMAP)
ADD_DEFINITIONS(-DHAVE_MMAP=1)
ENDIF(HAVE_MMAP)
+ # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html
ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64)
CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN)
IF(HAVE_SHM_OPEN)
@@ -299,47 +315,10 @@ IF(UNIX)
ENDIF(HAVE_MALLOC_USABLE_SIZE)
ENDIF(UNIX)
-
-
IF(NOT MSVC)
TARGET_LINK_LIBRARIES(TH m)
ENDIF(NOT MSVC)
-SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
-FOREACH(KEYWORD "inline" "__inline__" "__inline")
- IF(NOT DEFINED C_INLINE)
-
- SET(CMAKE_REQUIRED_FLAGS "-Dinline=${KEYWORD} ${CMAKE_C_FLAGS}")
- CHECK_C_SOURCE_RUNS("
- static inline int static_foo()
- {
- return 0;
- }
-
- int main(int argc, char *argv[])
- {
- static_foo();
- return 0;
- }" C_HAS_${KEYWORD})
-
- IF(C_HAS_${KEYWORD})
- SET(C_INLINE TRUE)
-# Right now i put it in THGeneral.h -- debatable
-# ADD_DEFINITIONS("-Dinline=${KEYWORD}")
- SET(TH_INLINE ${KEYWORD})
- MESSAGE(STATUS "C inline is supported (${KEYWORD})")
- ENDIF(C_HAS_${KEYWORD})
- ENDIF(NOT DEFINED C_INLINE)
-ENDFOREACH(KEYWORD)
-SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
-
-IF(NOT DEFINED C_INLINE)
- MESSAGE(STATUS "C inline seems not supported")
-# Right now i put it in THGeneral.h -- debatable
-# ADD_DEFINITIONS("-Dinline=")
-SET(TH_INLINE "")
-ENDIF(NOT DEFINED C_INLINE)
-
# Is __thread supported?
IF(NOT MSVC)
CHECK_C_SOURCE_COMPILES("static __thread int x = 1; int main() { return x; }" C_HAS_THREAD)
@@ -355,6 +334,11 @@ ENDIF(NOT C_HAS_THREAD)
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
CONFIGURE_FILE(THGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h")
+
+#######################################################################
+##### install section
+######################################################################
+
INSTALL(TARGETS TH
EXPORT TH-exports
RUNTIME DESTINATION "${TH_INSTALL_BIN_SUBDIR}"
@@ -389,6 +373,11 @@ INSTALL(FILES
DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH")
INSTALL(FILES
+ vector/AVX.h
+ vector/AVX2.h
+ DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH/vector")
+
+INSTALL(FILES
generic/THBlas.c
generic/THBlas.h
generic/THLapack.c
diff --git a/lib/TH/THGeneral.h.in b/lib/TH/THGeneral.h.in
index bc7e448..de11f1b 100644
--- a/lib/TH/THGeneral.h.in
+++ b/lib/TH/THGeneral.h.in
@@ -13,7 +13,6 @@
#cmakedefine USE_BLAS
#cmakedefine USE_LAPACK
-#cmakedefine BLAS_IS_ACCELERATE
#cmakedefine BLAS_F2C
#ifdef __cplusplus
@@ -32,12 +31,6 @@
# define TH_API TH_EXTERNC
#endif
-#define TH_INLINE @TH_INLINE@
-
-#ifndef __cplusplus
-#define inline @TH_INLINE@
-#endif
-
#ifndef M_PI
# define M_PI 3.14159265358979323846
#endif
diff --git a/lib/TH/THVector.c b/lib/TH/THVector.c
index 1c9ea24..d8493a5 100644
--- a/lib/TH/THVector.c
+++ b/lib/TH/THVector.c
@@ -15,12 +15,12 @@
#include "vector/SSE.c"
#endif
-#if defined(USE_AVX) || defined(USE_AVX2)
-#include "vector/AVX.c"
+#if defined(__AVX__) || defined(__AVX2__)
+#include "vector/AVX.h"
#endif
-#if defined(USE_AVX2)
-#include "vector/AVX2.c"
+#if defined(__AVX2__)
+#include "vector/AVX2.h"
#endif
#include "generic/THVectorDefault.c"
diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c
index 5b88852..6220dd6 100644
--- a/lib/TH/generic/THVectorDispatch.c
+++ b/lib/TH/generic/THVectorDispatch.c
@@ -26,7 +26,7 @@ static FunctionDescription THVector_(fill_DISPATCHTABLE)[] = {
#endif
#endif
- #if defined(USE_AVX)
+ #if defined(__AVX__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(fill_AVX), SIMDExtension_AVX),
#endif
@@ -52,13 +52,13 @@ static FunctionDescription THVector_(cadd_DISPATCHTABLE)[] = {
#endif
#endif
- #if defined(USE_AVX2)
+ #if defined(__AVX2__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cadd_AVX2), SIMDExtension_AVX2),
#endif
#endif
- #if defined(USE_AVX)
+ #if defined(__AVX__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cadd_AVX), SIMDExtension_AVX),
#endif
@@ -91,7 +91,7 @@ static FunctionDescription THVector_(adds_DISPATCHTABLE)[] = {
#endif
#endif
- #if defined(USE_AVX)
+ #if defined(__AVX__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX),
#endif
@@ -119,7 +119,7 @@ static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {
#endif
#endif
- #if defined(USE_AVX)
+ #if defined(__AVX__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cmul_AVX), SIMDExtension_AVX),
#endif
@@ -152,7 +152,7 @@ static FunctionDescription THVector_(muls_DISPATCHTABLE)[] = {
#endif
#endif
- #if defined(USE_AVX)
+ #if defined(__AVX__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX),
#endif
@@ -179,7 +179,7 @@ static FunctionDescription THVector_(cdiv_DISPATCHTABLE)[] = {
#endif
#endif
- #if defined(USE_AVX)
+ #if defined(__AVX__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(cdiv_AVX), SIMDExtension_AVX),
#endif
@@ -206,7 +206,7 @@ static FunctionDescription THVector_(divs_DISPATCHTABLE)[] = {
#endif
#endif
- #if defined(USE_AVX)
+ #if defined(__AVX__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX),
#endif
@@ -227,7 +227,7 @@ void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) {
static void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT);
static FunctionDescription THVector_(copy_DISPATCHTABLE)[] = {
- #if defined(USE_AVX)
+ #if defined(__AVX__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
FUNCTION_IMPL(THVector_(copy_AVX), SIMDExtension_AVX),
#endif
diff --git a/lib/TH/generic/simd/convolve.c b/lib/TH/generic/simd/convolve.c
index 842af17..bf07bbe 100644
--- a/lib/TH/generic/simd/convolve.c
+++ b/lib/TH/generic/simd/convolve.c
@@ -1,4 +1,4 @@
-#if defined(USE_AVX)
+#if defined(__AVX__)
#ifdef _MSC_VER
#include <intrin.h>
@@ -113,7 +113,7 @@ void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows,
void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);
void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols) {
-#if defined(USE_AVX)
+#if defined(__AVX__)
int avx = haveCPUFeature(kCPUFeature_AVX);
if (avx)
{
@@ -124,4 +124,4 @@ void convolve_5x5(float* output, float* input, float* kernel, long outRows, long
{
convolve_5x5_sse(output, input, kernel, outRows, outCols, outCols, inCols);
}
-} \ No newline at end of file
+}
diff --git a/lib/TH/vector/AVX.c b/lib/TH/vector/AVX.c
index 1f902cc..b7d5dd1 100644
--- a/lib/TH/vector/AVX.c
+++ b/lib/TH/vector/AVX.c
@@ -1,10 +1,13 @@
+#if defined(__AVX__)
#ifndef _MSC_VER
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
-static void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) {
+#include "AVX.h"
+
+void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) {
ptrdiff_t i;
ptrdiff_t off;
for (i=0; i<=((n)-8); i+=8) {
@@ -17,7 +20,7 @@ static void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t
}
}
-static void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {
+void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
ptrdiff_t off;
__m256d YMM0 = _mm256_set_pd(c, c, c, c);
@@ -33,7 +36,7 @@ static void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n
}
}
-static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
+void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM0, YMM1, YMM2, YMM3;
for (i=0; i<=((n)-8); i+=8) {
@@ -51,7 +54,7 @@ static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
__m256d YMM0, YMM1;
@@ -68,7 +71,7 @@ static void THDoubleVector_divs_AVX(double *y, const double *x, const double c,
}
}
-static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
+void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM0, YMM1, YMM2, YMM3;
for (i=0; i<=((n)-8); i+=8) {
@@ -86,7 +89,7 @@ static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
__m256d YMM0, YMM1;
@@ -103,7 +106,7 @@ static void THDoubleVector_muls_AVX(double *y, const double *x, const double c,
}
}
-static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
+void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
__m256d YMM0, YMM1, YMM2, YMM3;
@@ -119,7 +122,7 @@ static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
__m256d YMM0, YMM1;
@@ -136,7 +139,7 @@ static void THDoubleVector_adds_AVX(double *y, const double *x, const double c,
}
}
-static void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) {
+void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) {
ptrdiff_t i;
ptrdiff_t off;
for (i=0; i<=((n)-16); i+=16) {
@@ -149,7 +152,7 @@ static void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n)
}
}
-static void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
+void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
ptrdiff_t off;
__m256 YMM0 = _mm256_set_ps(c, c, c, c, c, c, c, c);
@@ -165,7 +168,7 @@ static void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
}
}
-static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
+void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM0, YMM1, YMM2, YMM3;
for (i=0; i<=((n)-16); i+=16) {
@@ -183,7 +186,7 @@ static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
__m256 YMM0, YMM1;
@@ -200,7 +203,7 @@ static void THFloatVector_divs_AVX(float *y, const float *x, const float c, cons
}
}
-static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
+void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM0, YMM1, YMM2, YMM3;
for (i=0; i<=((n)-16); i+=16) {
@@ -218,7 +221,7 @@ static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
__m256 YMM0, YMM1;
@@ -235,7 +238,7 @@ static void THFloatVector_muls_AVX(float *y, const float *x, const float c, cons
}
}
-static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
+void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
__m256 YMM0, YMM1, YMM2, YMM3;
@@ -251,7 +254,7 @@ static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
__m256 YMM0, YMM1;
@@ -267,3 +270,5 @@ static void THFloatVector_adds_AVX(float *y, const float *x, const float c, cons
y[i] = x[i] + c;
}
}
+
+#endif // defined(__AVX__)
diff --git a/lib/TH/vector/AVX.h b/lib/TH/vector/AVX.h
new file mode 100644
index 0000000..bfaeaa6
--- /dev/null
+++ b/lib/TH/vector/AVX.h
@@ -0,0 +1,23 @@
+#ifndef TH_AVX_H
+#define TH_AVX_H
+
+#include <stddef.h>
+
+void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n);
+void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n);
+void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
+void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
+void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
+void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
+void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
+void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
+void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n);
+void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n);
+void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
+void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
+void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
+void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
+void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
+void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
+
+#endif
diff --git a/lib/TH/vector/AVX2.c b/lib/TH/vector/AVX2.c
index 3ccfc82..082a680 100644
--- a/lib/TH/vector/AVX2.c
+++ b/lib/TH/vector/AVX2.c
@@ -1,10 +1,12 @@
+#if defined(__AVX2__)
#ifndef _MSC_VER
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
+#include "AVX2.h"
-static void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
+void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
__m256d YMM0, YMM1, YMM2, YMM3;
@@ -23,7 +25,7 @@ static void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y
}
}
-static void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
+void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
__m256 YMM0, YMM1, YMM2, YMM3;
@@ -42,3 +44,4 @@ static void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, co
}
}
+#endif // defined(__AVX2__)
diff --git a/lib/TH/vector/AVX2.h b/lib/TH/vector/AVX2.h
new file mode 100644
index 0000000..85a9e93
--- /dev/null
+++ b/lib/TH/vector/AVX2.h
@@ -0,0 +1,9 @@
+#ifndef TH_AVX2_H
+#define TH_AVX2_H
+
+#include <stddef.h>
+
+void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
+void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
+
+#endif