diff options
author | jokeren <robinho364@gmail.com> | 2016-11-27 11:30:43 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@gmail.com> | 2017-02-23 13:32:28 +0300 |
commit | 8a049f482a1413720434607e22f2790056b98965 (patch) | |
tree | 2e89f0d0d58e710f5e4d7b7d9131ec1cec6e5583 | |
parent | 0a9d7903964aa0039dba8993c9548596552d4ed5 (diff) |
Fix compilation for simd tensor add
-rw-r--r-- | generic/Tensor.c | 1 | ||||
-rw-r--r-- | generic/TensorOperator.c | 3 | ||||
-rw-r--r-- | lib/TH/CMakeLists.txt | 6 | ||||
-rw-r--r-- | lib/TH/THTensor.c | 1 | ||||
-rw-r--r-- | lib/TH/cmake/FindSSE.cmake | 3 | ||||
-rw-r--r-- | lib/TH/generic/THTensorMath.c | 4 | ||||
-rw-r--r-- | lib/TH/generic/THTensorMath.h | 2 | ||||
-rw-r--r-- | lib/TH/generic/THTensorMathDispatch.c | 47 | ||||
-rw-r--r-- | lib/TH/generic/THVectorDispatch.c | 10 |
9 files changed, 20 insertions, 57 deletions
diff --git a/generic/Tensor.c b/generic/Tensor.c index c2417fe..aabbbdc 100644 --- a/generic/Tensor.c +++ b/generic/Tensor.c @@ -1355,7 +1355,6 @@ void torch_Tensor_(init)(lua_State *L) #ifndef TH_REAL_IS_HALF THVector_(vectorDispatchInit)(); #endif - } #endif diff --git a/generic/TensorOperator.c b/generic/TensorOperator.c index e131c57..8aecc09 100644 --- a/generic/TensorOperator.c +++ b/generic/TensorOperator.c @@ -181,8 +181,11 @@ static const struct luaL_Reg torch_TensorOperator_(_) [] = { {NULL, NULL} }; +extern int THTensor_(cpuDispatchInit)(); + void torch_TensorOperator_(init)(lua_State *L) { + THTensor_(cpuDispatchInit)(); luaT_pushmetatable(L, torch_Tensor); luaT_setfuncs(L, torch_TensorOperator_(_), 0); lua_pop(L, 1); diff --git a/lib/TH/CMakeLists.txt b/lib/TH/CMakeLists.txt index 7a55568..0fa2c6d 100644 --- a/lib/TH/CMakeLists.txt +++ b/lib/TH/CMakeLists.txt @@ -121,6 +121,9 @@ ENDIF(C_SSE2_FOUND) IF(C_SSE3_FOUND) SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}") ENDIF(C_SSE3_FOUND) +IF(C_AVX_FOUND) + SET(CMAKE_C_FLAGS "${C_AVX_FLAGS} -DUSE_AVX ${CMAKE_C_FLAGS}") +ENDIF(C_AVX_FOUND) IF(C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND) SET(simd generic/simd/convolve.c) @@ -394,13 +397,12 @@ INSTALL(FILES generic/THTensorLapack.c generic/THTensorLapack.h generic/THTensorMath.c + generic/THTensorMathDispatch.c generic/THTensorMath.h generic/THTensorRandom.c generic/THTensorRandom.h generic/THVectorDispatch.c generic/THVector.h - generic/THTensorMathDispatch.c - generic/THTensorMathDispatch.h DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH/generic") diff --git a/lib/TH/THTensor.c b/lib/TH/THTensor.c index 071cdb9..7f0f0e2 100644 --- a/lib/TH/THTensor.c +++ b/lib/TH/THTensor.c @@ -1,6 +1,7 @@ #include "THAtomic.h" #include "THTensor.h" #include "THVector.h" +#include "generic/simd/simd.h" #include "THBlas.h" #include "THLapack.h" diff --git a/lib/TH/cmake/FindSSE.cmake b/lib/TH/cmake/FindSSE.cmake index f6aac07..d03cc19 100644 --- a/lib/TH/cmake/FindSSE.cmake +++ b/lib/TH/cmake/FindSSE.cmake @@ -62,7 +62,7 @@ SET(AVX_CODE " int main() { - __m256 a; + __m256 a; a = _mm256_set1_ps(0); return 0; } @@ -109,3 +109,4 @@ CHECK_SSE(CXX "SSE2" " ;-msse2;/arch:SSE2") CHECK_SSE(CXX "SSE3" " ;-msse3;/arch:SSE3") CHECK_SSE(CXX "SSE4_1" " ;-msse4.1;-msse4;/arch:SSE4") CHECK_SSE(CXX "SSE4_2" " ;-msse4.2;-msse4;/arch:SSE4") +CHECK_SSE(CXX "AVX" " ;-mavx;/arch:AVX") diff --git a/lib/TH/generic/THTensorMath.c b/lib/TH/generic/THTensorMath.c index 83f3ccc..6e34792 100644 --- a/lib/TH/generic/THTensorMath.c +++ b/lib/TH/generic/THTensorMath.c @@ -2,6 +2,8 @@ #define TH_GENERIC_FILE "generic/THTensorMath.c" #else +#include <immintrin.h> + #define TH_OMP_OVERHEAD_THRESHOLD 100000 void THTensor_(fill)(THTensor *r_, real value) @@ -460,7 +462,7 @@ accreal THTensor_(prodall)(THTensor *tensor) return prod; } -void THTensor_(add_Default)(THTensor *r_, THTensor *t, real value) +void THTensor_(add)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { diff --git a/lib/TH/generic/THTensorMath.h b/lib/TH/generic/THTensorMath.h index 9730536..3eab1da 100644 --- a/lib/TH/generic/THTensorMath.h +++ b/lib/TH/generic/THTensorMath.h @@ -192,4 +192,6 @@ TH_API int THTensor_(logicalany)(THTensor *self); #endif /* TH_REAL_IS_BYTE */ +TH_API int THTensor_(tensorMathDispatchInit)(); + #endif diff --git a/lib/TH/generic/THTensorMathDispatch.c b/lib/TH/generic/THTensorMathDispatch.c deleted file mode 100644 index 9ed71a2..0000000 --- a/lib/TH/generic/THTensorMathDispatch.c +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorMathDispatch.c" -#else - -#include "simd/simd.h" - -// This file will contain static function pointers that will be initialized by -// the initialization call. It will also have globally linked dispatch stubs -// which delegate to the function pointers. The dispatch stubs will be the symbols -// called by clients - -// Dispatch pointers. These guys will be set to point to the most-optimized implementation -// for the host. -static void (*THTensor_(dispatchPtrAdd))(THTensor*, THTensor*, real) = NULL; - -// Dispatch stubs that just call the pointers -TH_API void THTensor_(add)(THTensor *r_, THTensor *t, real value) -{ - THTensor_(dispatchPtrAdd)(r_, t, value); -} - - -// Dispatch tables: each optimized implementation of a function -// is described in a table, and the tables are used to initialize -// the function pointers for dynamic dispatch - -#if defined(TH_REAL_IS_DOUBLE) -FunctionDescription THTensor_(dispatchTblAdd)[] = { - FUNCTION_IMPL(THTensor_(add_AVX), SIMDExtension_AVX), - FUNCTION_IMPL(THTensor_(add_Default), SIMDExtension_DEFAULT) -}; -#else -FunctionDescription THTensor_(dispatchTblAdd)[] = { - FUNCTION_IMPL(THTensor_(add_Default), SIMDExtension_DEFAULT) -}; -#endif - -int THTensor_(cpuDispatchInit)() -{ - uint32_t hostSimdExts = detectHostSIMDExtensions(); - - // Initialize the dispatch pointers to point to the correct functions - INIT_DISPATCH_PTR(Add); - return 0; -} - -#endif diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c index 2436a12..535f11d 100644 --- a/lib/TH/generic/THVectorDispatch.c +++ b/lib/TH/generic/THVectorDispatch.c @@ -159,11 +159,11 @@ void THVector_(mul)(real *y, const real *x, const ptrdiff_t n) { void THVector_(vectorDispatchInit)(void) { uint32_t hostSimdExts = detectHostSIMDExtensions(); - INIT_DISPATCH_PTR(fill); - INIT_DISPATCH_PTR(add); - INIT_DISPATCH_PTR(diff); - INIT_DISPATCH_PTR(scale); - INIT_DISPATCH_PTR(mul); + INIT_VECTOR_DISPATCH_PTR(fill); + INIT_VECTOR_DISPATCH_PTR(add); + INIT_VECTOR_DISPATCH_PTR(diff); + INIT_VECTOR_DISPATCH_PTR(scale); + INIT_VECTOR_DISPATCH_PTR(mul); } #endif |