Merge pull request #961 from torch/avxfix

TH CMake cleanup + AVX/AVX2 only on appropriate files
author: Soumith Chintala <soumith@gmail.com> 2017-03-03 23:37:26 +0300
committer: GitHub <noreply@github.com> 2017-03-03 23:37:26 +0300
commit: 226484acc0e27fcd132ab28a7c3c794ce8e1299b (patch)
tree: 7e735a103a68d53ff77339766bb4fa4c0dabe50d
parent: 5e235ad7978389fa11865f8fa02f53d74b3bc361 (diff)
parent: 948e753486468adcd4985a5301ce32668d5acaca (diff)
9 files changed, 172 insertions, 150 deletions
diff --git a/lib/TH/CMakeLists.txt b/lib/TH/CMakeLists.txt
index 20f6bd6..351594d 100644
--- a/lib/TH/CMakeLists.txt
+++ b/lib/TH/CMakeLists.txt
@@ -20,11 +20,21 @@ IF(NOT TH_INSTALL_BIN_SUBDIR
   SET(TH_INSTALL_CMAKE_SUBDIR "share/cmake/TH" CACHE PATH "TH install cmake subdirectory")
 ENDIF()
 
-# flags
+#######################################################################
+##### flags section
+######################################################################
 
 IF(MSVC)
-  # respect the standard
-  ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)
+  # MSVC now supports C99 since VS2013/VS2015
+  SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /std:c99")
+ELSE(MSVC)
+  # enable gnu99 and not c99 because we use
+  # gnu extensions like posix_memalign
+  SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99")
+ENDIF(MSVC)
+
+IF(MSVC)
+  ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)  # respect the standard
 ENDIF(MSVC)
 
 IF(UNIX)
@@ -114,82 +124,25 @@ IF(NOT NO_GCC_EBX_FPIC_BUG)
 ENDIF(NOT NO_GCC_EBX_FPIC_BUG)
 
 
-FIND_PACKAGE(SSE)
+FIND_PACKAGE(SSE) # checks SSE, AVX and AVX2
 IF(C_SSE2_FOUND)
+  MESSAGE(STATUS "SSE2 Found")
   SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}")
 ENDIF(C_SSE2_FOUND)
 IF(C_SSE3_FOUND)
+  MESSAGE(STATUS "SSE3 Found")
   SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}")
 ENDIF(C_SSE3_FOUND)
+# we dont set AVX and AVX2 flags globally, but only for specific files
 IF(C_AVX_FOUND)
-  SET(CMAKE_C_FLAGS "${C_AVX_FLAGS} -DUSE_AVX ${CMAKE_C_FLAGS}")
+  MESSAGE(STATUS "AVX Found")
+  #   SET(CMAKE_C_FLAGS "${C_AVX_FLAGS} ${CMAKE_C_FLAGS}")
 ENDIF(C_AVX_FOUND)
 IF(C_AVX2_FOUND)
-  SET(CMAKE_C_FLAGS "${C_AVX2_FLAGS} -DUSE_AVX2 ${CMAKE_C_FLAGS}")
+  MESSAGE(STATUS "AVX2 Found")
+  #   SET(CMAKE_C_FLAGS "${C_AVX2_FLAGS} ${CMAKE_C_FLAGS}")
 ENDIF(C_AVX2_FOUND)
 
-IF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)
-  SET(simd generic/simd/convolve.c)
-  IF(MSVC)
-    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve.c PROPERTIES COMPILE_FLAGS "/std:c99")
-  ELSE(MSVC)
-    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve.c PROPERTIES COMPILE_FLAGS "-std=c99")
-  ENDIF(MSVC)
-ENDIF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)
-
-IF(C_SSE4_1_FOUND)
-  SET(CMAKE_C_FLAGS "${C_SSE4_1_FLAGS} -DUSE_SSE4_1 ${CMAKE_C_FLAGS}")
-ENDIF(C_SSE4_1_FOUND)
-IF(C_SSE4_2_FOUND)
-  SET(CMAKE_C_FLAGS "${C_SSE4_2_FLAGS} -DUSE_SSE4_2 ${CMAKE_C_FLAGS}")
-ENDIF(C_SSE4_2_FOUND)
-
-IF(C_SSE4_1_FOUND OR C_SSE4_2_FOUND)
-  IF(MSVC)
-    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast /std:c99")
-  ELSE(MSVC)
-    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math -std=c99")
-  ENDIF(MSVC)
-  SET(simd ${simd} generic/simd/convolve5x5_sse.c)
-ENDIF(C_SSE4_1_FOUND OR C_SSE4_2_FOUND)
-
-IF(MSVC)
-  SET_SOURCE_FILES_PROPERTIES(generic/THTensorMath.c PROPERTIES COMPILE_FLAGS "/std:c99")
-ELSE(MSVC)
-  SET_SOURCE_FILES_PROPERTIES(generic/THTensorMath.c PROPERTIES COMPILE_FLAGS "-std=c99")
-ENDIF(MSVC)
-
-IF(C_AVX_FOUND OR C_AVX2_FOUND)
-  SET(CMAKE_C_FLAGS "-DUSE_AVX ${CMAKE_C_FLAGS}")
-  IF(MSVC)
-    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast /arch:AVX /std:c99")
-  ELSE(MSVC)
-    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math -mavx -std=c99")
-  ENDIF(MSVC)
-  SET(simd ${simd} generic/simd/convolve5x5_avx.c)
-ENDIF(C_AVX_FOUND OR C_AVX2_FOUND)
-
-SET(hdr
-  THGeneral.h THHalf.h THAllocator.h THStorage.h THTensor.h THTensorApply.h THBlas.h THMath.h
-  THLapack.h THLogAdd.h THRandom.h THVector.h THAtomic.h )
-
-SET(src
-  THGeneral.c THHalf.c THAllocator.c THStorage.c THTensor.c THBlas.c THLapack.c
-  THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c)
-
-SET(src ${src} ${hdr} ${simd})
-ADD_LIBRARY(TH SHARED ${src})
-if(BUILD_STATIC)
-  ADD_LIBRARY(TH_static STATIC ${src})
-endif()
-
-IF(NOT TH_SO_VERSION)
-  SET(TH_SO_VERSION 0)
-ENDIF(NOT TH_SO_VERSION)
-MESSAGE(STATUS "TH_SO_VERSION: ${TH_SO_VERSION}")
-SET_TARGET_PROPERTIES(TH PROPERTIES
-  VERSION   ${TH_SO_VERSION}
-  SOVERSION ${TH_SO_VERSION})
 
 CHECK_C_SOURCE_RUNS("
 #include <stdatomic.h>
@@ -233,6 +186,72 @@ int main()
 " HAS_GCC_ATOMICS)
 ENDIF()
 
+#######################################################################
+##### sources section
+######################################################################
+
+# IF ANY SIMD FOUND
+IF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)
+  SET(simd generic/simd/convolve.c)
+ENDIF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)
+
+# IF SSE4 FOUND
+IF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND)
+  SET(CMAKE_C_FLAGS "${C_SSE4_1_FLAGS} -DUSE_SSE4_1 ${C_SSE4_2_FLAGS} -DUSE_SSE4_2 ${CMAKE_C_FLAGS}")
+  IF(MSVC)
+    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast")
+  ELSE(MSVC)
+    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math")
+  ENDIF(MSVC)
+  SET(simd ${simd} generic/simd/convolve5x5_sse.c)
+ENDIF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND)
+
+# IF AVX FOUND
+IF(C_AVX_FOUND)
+  IF(MSVC)
+    SET_SOURCE_FILES_PROPERTIES(vector/AVX.c generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast ${C_AVX_FLAGS}")
+  ELSE(MSVC)
+    SET_SOURCE_FILES_PROPERTIES(vector/AVX.c generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${C_AVX_FLAGS}")
+  ENDIF(MSVC)
+  SET(simd ${simd} vector/AVX.c generic/simd/convolve5x5_avx.c)
+ENDIF(C_AVX_FOUND)
+
+IF(C_AVX2_FOUND)
+  IF(MSVC)
+    SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast ${C_AVX2_FLAGS}")
+  ELSE(MSVC)
+    SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${C_AVX2_FLAGS}")
+  ENDIF(MSVC)
+  SET(simd ${simd} vector/AVX2.c)
+ENDIF(C_AVX2_FOUND)
+
+SET(hdr
+  THGeneral.h THHalf.h THAllocator.h THStorage.h THTensor.h THTensorApply.h THBlas.h THMath.h
+  THLapack.h THLogAdd.h THRandom.h THVector.h THAtomic.h )
+
+SET(src
+  THGeneral.c THHalf.c THAllocator.c THStorage.c THTensor.c THBlas.c THLapack.c
+  THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c)
+
+SET(src ${src} ${hdr} ${simd})
+
+#######################################################################
+##### build section
+######################################################################
+
+ADD_LIBRARY(TH SHARED ${src})
+if(BUILD_STATIC)
+  ADD_LIBRARY(TH_static STATIC ${src})
+endif()
+
+IF(NOT TH_SO_VERSION)
+  SET(TH_SO_VERSION 0)
+ENDIF(NOT TH_SO_VERSION)
+MESSAGE(STATUS "TH_SO_VERSION: ${TH_SO_VERSION}")
+SET_TARGET_PROPERTIES(TH PROPERTIES
+  VERSION   ${TH_SO_VERSION}
+  SOVERSION ${TH_SO_VERSION})
+
 IF(HAS_C11_ATOMICS)
   ADD_DEFINITIONS(-DUSE_C11_ATOMICS=1)
   MESSAGE(STATUS "Atomics: using C11 intrinsics")
@@ -264,10 +283,6 @@ IF(LAPACK_FOUND)
   TARGET_LINK_LIBRARIES(TH ${LAPACK_LIBRARIES})
 ENDIF(LAPACK_FOUND)
 
-IF(BLAS_IS_ACCELERATE)
-  MESSAGE(STATUS "BLAS FOUND IS ACCELERATE: Fix for sdot")
-ENDIF()
-
 IF (UNIX AND NOT APPLE)
    INCLUDE(CheckLibraryExists)
    # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830
@@ -284,6 +299,7 @@ IF(UNIX)
   IF(HAVE_MMAP)
     ADD_DEFINITIONS(-DHAVE_MMAP=1)
   ENDIF(HAVE_MMAP)
+  # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html
   ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64)
   CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN)
   IF(HAVE_SHM_OPEN)
@@ -299,47 +315,10 @@ IF(UNIX)
   ENDIF(HAVE_MALLOC_USABLE_SIZE)
 ENDIF(UNIX)
 
-
-
 IF(NOT MSVC)
   TARGET_LINK_LIBRARIES(TH m)
 ENDIF(NOT MSVC)
 
-SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
-FOREACH(KEYWORD "inline" "__inline__" "__inline")
-  IF(NOT DEFINED C_INLINE)
-
-    SET(CMAKE_REQUIRED_FLAGS "-Dinline=${KEYWORD} ${CMAKE_C_FLAGS}")
-    CHECK_C_SOURCE_RUNS("
-       static inline int static_foo()
-       {
-         return 0;
-       }
-
-       int main(int argc, char *argv[])
-       {
-         static_foo();
-         return 0;
-       }" C_HAS_${KEYWORD})
-
-    IF(C_HAS_${KEYWORD})
-      SET(C_INLINE TRUE)
-# Right now i put it in THGeneral.h -- debatable
-#      ADD_DEFINITIONS("-Dinline=${KEYWORD}")
-      SET(TH_INLINE ${KEYWORD})
-      MESSAGE(STATUS "C inline is supported (${KEYWORD})")
-    ENDIF(C_HAS_${KEYWORD})
-  ENDIF(NOT DEFINED C_INLINE)
-ENDFOREACH(KEYWORD)
-SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
-
-IF(NOT DEFINED C_INLINE)
-  MESSAGE(STATUS "C inline seems not supported")
-# Right now i put it in THGeneral.h -- debatable
-#  ADD_DEFINITIONS("-Dinline=")
-SET(TH_INLINE "")
-ENDIF(NOT DEFINED C_INLINE)
-
 # Is __thread supported?
 IF(NOT MSVC)
   CHECK_C_SOURCE_COMPILES("static __thread int x = 1; int main() { return x; }" C_HAS_THREAD)
@@ -355,6 +334,11 @@ ENDIF(NOT C_HAS_THREAD)
 INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
 CONFIGURE_FILE(THGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h")
 
+
+#######################################################################
+##### install section
+######################################################################
+
 INSTALL(TARGETS TH
   EXPORT TH-exports
   RUNTIME DESTINATION "${TH_INSTALL_BIN_SUBDIR}"
@@ -389,6 +373,11 @@ INSTALL(FILES
   DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH")
 
 INSTALL(FILES
+  vector/AVX.h
+  vector/AVX2.h
+  DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH/vector")
+
+INSTALL(FILES
   generic/THBlas.c
   generic/THBlas.h
   generic/THLapack.c
diff --git a/lib/TH/THGeneral.h.in b/lib/TH/THGeneral.h.in
index bc7e448..de11f1b 100644
--- a/lib/TH/THGeneral.h.in
+++ b/lib/TH/THGeneral.h.in
@@ -13,7 +13,6 @@
 
 #cmakedefine USE_BLAS
 #cmakedefine USE_LAPACK
-#cmakedefine BLAS_IS_ACCELERATE
 #cmakedefine BLAS_F2C
 
 #ifdef __cplusplus
@@ -32,12 +31,6 @@
 # define TH_API TH_EXTERNC
 #endif
 
-#define TH_INLINE @TH_INLINE@
-
-#ifndef __cplusplus
-#define inline @TH_INLINE@
-#endif
-
 #ifndef M_PI
 # define M_PI 3.14159265358979323846
 #endif
diff --git a/lib/TH/THVector.c b/lib/TH/THVector.c
index 1c9ea24..d8493a5 100644
--- a/lib/TH/THVector.c
+++ b/lib/TH/THVector.c
@@ -15,12 +15,12 @@
 #include "vector/SSE.c"
 #endif
 
-#if defined(USE_AVX) || defined(USE_AVX2)
-#include "vector/AVX.c"
+#if defined(__AVX__) || defined(__AVX2__)
+#include "vector/AVX.h"
 #endif
 
-#if defined(USE_AVX2)
-#include "vector/AVX2.c"
+#if defined(__AVX2__)
+#include "vector/AVX2.h"
 #endif
 
 #include "generic/THVectorDefault.c"
diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c
index 5b88852..6220dd6 100644
--- a/lib/TH/generic/THVectorDispatch.c
+++ b/lib/TH/generic/THVectorDispatch.c
@@ -26,7 +26,7 @@ static FunctionDescription THVector_(fill_DISPATCHTABLE)[] = {
     #endif
   #endif
 
-  #if defined(USE_AVX)
+  #if defined(__AVX__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(fill_AVX), SIMDExtension_AVX),
     #endif
@@ -52,13 +52,13 @@ static FunctionDescription THVector_(cadd_DISPATCHTABLE)[] = {
     #endif
   #endif
 
-  #if defined(USE_AVX2)
+  #if defined(__AVX2__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(cadd_AVX2), SIMDExtension_AVX2),
     #endif
   #endif
 
-  #if defined(USE_AVX)
+  #if defined(__AVX__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(cadd_AVX), SIMDExtension_AVX),
     #endif
@@ -91,7 +91,7 @@ static FunctionDescription THVector_(adds_DISPATCHTABLE)[] = {
     #endif
   #endif
 
-  #if defined(USE_AVX)
+  #if defined(__AVX__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX),
     #endif
@@ -119,7 +119,7 @@ static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {
     #endif
   #endif
 
-  #if defined(USE_AVX)
+  #if defined(__AVX__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(cmul_AVX), SIMDExtension_AVX),
     #endif
@@ -152,7 +152,7 @@ static FunctionDescription THVector_(muls_DISPATCHTABLE)[] = {
     #endif
   #endif
 
-  #if defined(USE_AVX)
+  #if defined(__AVX__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX),
     #endif
@@ -179,7 +179,7 @@ static FunctionDescription THVector_(cdiv_DISPATCHTABLE)[] = {
     #endif
   #endif
 
-  #if defined(USE_AVX)
+  #if defined(__AVX__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(cdiv_AVX), SIMDExtension_AVX),
     #endif
@@ -206,7 +206,7 @@ static FunctionDescription THVector_(divs_DISPATCHTABLE)[] = {
     #endif
   #endif
 
-  #if defined(USE_AVX)
+  #if defined(__AVX__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX),
     #endif
@@ -227,7 +227,7 @@ void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) {
 
 static void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT);
 static FunctionDescription THVector_(copy_DISPATCHTABLE)[] = {
-  #if defined(USE_AVX)
+  #if defined(__AVX__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
       FUNCTION_IMPL(THVector_(copy_AVX), SIMDExtension_AVX),
     #endif
diff --git a/lib/TH/generic/simd/convolve.c b/lib/TH/generic/simd/convolve.c
index 842af17..bf07bbe 100644
--- a/lib/TH/generic/simd/convolve.c
+++ b/lib/TH/generic/simd/convolve.c
@@ -1,4 +1,4 @@
-#if defined(USE_AVX)
+#if defined(__AVX__)
 
 #ifdef _MSC_VER
 #include <intrin.h>
@@ -113,7 +113,7 @@ void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows,
 void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);
 
 void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols) {
-#if defined(USE_AVX)
+#if defined(__AVX__)
   int avx = haveCPUFeature(kCPUFeature_AVX);
   if (avx)
   {
@@ -124,4 +124,4 @@ void convolve_5x5(float* output, float* input, float* kernel, long outRows, long
   {
     convolve_5x5_sse(output, input, kernel, outRows, outCols, outCols, inCols);
   }
-}
-\ No newline at end of file
+}
diff --git a/lib/TH/vector/AVX.c b/lib/TH/vector/AVX.c
index 1f902cc..b7d5dd1 100644
--- a/lib/TH/vector/AVX.c
+++ b/lib/TH/vector/AVX.c
@@ -1,10 +1,13 @@
+#if defined(__AVX__)
 #ifndef _MSC_VER
 #include <x86intrin.h>
 #else
 #include <intrin.h>
 #endif
 
-static void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) {
+#include "AVX.h"
+
+void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) {
   ptrdiff_t i;
   ptrdiff_t off;
   for (i=0; i<=((n)-8); i+=8) {
@@ -17,7 +20,7 @@ static void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t
   }
 }
 
-static void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {
+void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   ptrdiff_t off;
   __m256d YMM0 = _mm256_set_pd(c, c, c, c);
@@ -33,7 +36,7 @@ static void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n
   }
 }
 
-static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
+void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM0, YMM1, YMM2, YMM3;
   for (i=0; i<=((n)-8); i+=8) {
@@ -51,7 +54,7 @@ static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM15 = _mm256_set_pd(c, c, c, c);
   __m256d YMM0, YMM1;
@@ -68,7 +71,7 @@ static void THDoubleVector_divs_AVX(double *y, const double *x, const double c,
   }
 }
 
-static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
+void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM0, YMM1, YMM2, YMM3;
   for (i=0; i<=((n)-8); i+=8) {
@@ -86,7 +89,7 @@ static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM15 = _mm256_set_pd(c, c, c, c);
   __m256d YMM0, YMM1;
@@ -103,7 +106,7 @@ static void THDoubleVector_muls_AVX(double *y, const double *x, const double c,
   }
 }
 
-static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
+void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM15 = _mm256_set_pd(c, c, c, c);
   __m256d YMM0, YMM1, YMM2, YMM3;
@@ -119,7 +122,7 @@ static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM15 = _mm256_set_pd(c, c, c, c);
   __m256d YMM0, YMM1;
@@ -136,7 +139,7 @@ static void THDoubleVector_adds_AVX(double *y, const double *x, const double c,
   }
 }
 
-static void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) {
+void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) {
   ptrdiff_t i;
   ptrdiff_t off;
   for (i=0; i<=((n)-16); i+=16) {
@@ -149,7 +152,7 @@ static void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n)
   }
 }
 
-static void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
+void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   ptrdiff_t off;
   __m256 YMM0 = _mm256_set_ps(c, c, c, c, c, c, c, c);
@@ -165,7 +168,7 @@ static void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
   }
 }
 
-static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
+void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM0, YMM1, YMM2, YMM3;
   for (i=0; i<=((n)-16); i+=16) {
@@ -183,7 +186,7 @@ static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
   __m256 YMM0, YMM1;
@@ -200,7 +203,7 @@ static void THFloatVector_divs_AVX(float *y, const float *x, const float c, cons
   }
 }
 
-static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
+void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM0, YMM1, YMM2, YMM3;
   for (i=0; i<=((n)-16); i+=16) {
@@ -218,7 +221,7 @@ static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
   __m256 YMM0, YMM1;
@@ -235,7 +238,7 @@ static void THFloatVector_muls_AVX(float *y, const float *x, const float c, cons
   }
 }
 
-static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
+void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
   __m256 YMM0, YMM1, YMM2, YMM3;
@@ -251,7 +254,7 @@ static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
   __m256 YMM0, YMM1;
@@ -267,3 +270,5 @@ static void THFloatVector_adds_AVX(float *y, const float *x, const float c, cons
     y[i] = x[i] + c;
   }
 }
+
+#endif // defined(__AVX__)
diff --git a/lib/TH/vector/AVX.h b/lib/TH/vector/AVX.h
new file mode 100644
index 0000000..bfaeaa6
--- /dev/null
+++ b/lib/TH/vector/AVX.h
@@ -0,0 +1,23 @@
+#ifndef TH_AVX_H
+#define TH_AVX_H
+
+#include <stddef.h>
+
+void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n);
+void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n);
+void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
+void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
+void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
+void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
+void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
+void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
+void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n);
+void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n);
+void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
+void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
+void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
+void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
+void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
+void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
+
+#endif
diff --git a/lib/TH/vector/AVX2.c b/lib/TH/vector/AVX2.c
index 3ccfc82..082a680 100644
--- a/lib/TH/vector/AVX2.c
+++ b/lib/TH/vector/AVX2.c
@@ -1,10 +1,12 @@
+#if defined(__AVX2__)
 #ifndef _MSC_VER
 #include <x86intrin.h>
 #else
 #include <intrin.h>
 #endif
+#include "AVX2.h"
 
-static void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
+void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM15 = _mm256_set_pd(c, c, c, c);
   __m256d YMM0, YMM1, YMM2, YMM3;
@@ -23,7 +25,7 @@ static void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y
   }
 }
 
-static void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
+void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
   __m256 YMM0, YMM1, YMM2, YMM3;
@@ -42,3 +44,4 @@ static void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, co
   }
 }
 
+#endif // defined(__AVX2__)
diff --git a/lib/TH/vector/AVX2.h b/lib/TH/vector/AVX2.h
new file mode 100644
index 0000000..85a9e93
--- /dev/null
+++ b/lib/TH/vector/AVX2.h
@@ -0,0 +1,9 @@
+#ifndef TH_AVX2_H
+#define TH_AVX2_H
+
+#include <stddef.h>
+
+void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
+void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
+
+#endif
author	Soumith Chintala <soumith@gmail.com>	2017-03-03 23:37:26 +0300
committer	GitHub <noreply@github.com>	2017-03-03 23:37:26 +0300
commit	226484acc0e27fcd132ab28a7c3c794ce8e1299b (patch)
tree	7e735a103a68d53ff77339766bb4fa4c0dabe50d
parent	5e235ad7978389fa11865f8fa02f53d74b3bc361 (diff)
parent	948e753486468adcd4985a5301ce32668d5acaca (diff)