diff options
author | Soumith Chintala <soumith@gmail.com> | 2017-02-28 20:10:47 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@gmail.com> | 2017-02-28 20:10:47 +0300 |
commit | 5f9d30769789bc36ec628e3eec2f2fb85a1e6b50 (patch) | |
tree | 851111b7baf0a55c46092bf95e2a9f076960095d | |
parent | 641d9c508e027c0cd550ff435f5d6cfd02c7cecd (diff) |
THVector_(add),(mul),(div) -> (adds),(muls),(divs)simd-opt
-rw-r--r-- | lib/TH/generic/THTensorMath.c | 7 | ||||
-rw-r--r-- | lib/TH/generic/THVector.h | 6 | ||||
-rw-r--r-- | lib/TH/generic/THVectorDefault.c | 6 | ||||
-rw-r--r-- | lib/TH/generic/THVectorDispatch.c | 58 | ||||
-rw-r--r-- | lib/TH/vector/AVX.c | 12 | ||||
-rw-r--r-- | lib/TH/vector/NEON.c | 6 | ||||
-rw-r--r-- | lib/TH/vector/SSE.c | 12 |
7 files changed, 54 insertions, 53 deletions
diff --git a/lib/TH/generic/THTensorMath.c b/lib/TH/generic/THTensorMath.c index f698e25..9beb3f5 100644 --- a/lib/TH/generic/THTensorMath.c +++ b/lib/TH/generic/THTensorMath.c @@ -97,6 +97,7 @@ void THTensor_(fill)(THTensor *r_, real value) TH_TENSOR_APPLY(real, r_, if (r__stride == 1) { THVector_(fill)(r__data, value, r__size); + r__i = r__size; r__data += r__stride * r__size; break; } else { @@ -559,7 +560,7 @@ void THTensor_(add)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(add)(r__data, t_data, value, r__len);); + TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(adds)(r__data, t_data, value, r__len);); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;); } @@ -574,7 +575,7 @@ void THTensor_(mul)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(mul)(r__data, t_data, value, r__len);); + TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(muls)(r__data, t_data, value, r__len);); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;); } @@ -584,7 +585,7 @@ void THTensor_(div)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(div)(r__data, t_data, value, r__len);); + TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(divs)(r__data, t_data, value, r__len);); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;); } diff --git a/lib/TH/generic/THVector.h b/lib/TH/generic/THVector.h index 17d4253..7d36854 100644 --- a/lib/TH/generic/THVector.h +++ b/lib/TH/generic/THVector.h @@ -4,11 +4,11 @@ TH_API void THVector_(fill)(real *x, const real c, const ptrdiff_t n); TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n); -TH_API void THVector_(add)(real *y, const real *x, const real c, const ptrdiff_t n); +TH_API void THVector_(adds)(real *y, const real *x, const real c, const ptrdiff_t n); TH_API void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n); -TH_API void THVector_(mul)(real *y, const real *x, const real c, const ptrdiff_t n); +TH_API void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n); TH_API void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n); -TH_API void THVector_(div)(real *y, const real *x, const real c, const ptrdiff_t n); +TH_API void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n); TH_API void THVector_(copy)(real *y, const real *x, const ptrdiff_t n); /* Initialize the dispatch pointers */ diff --git a/lib/TH/generic/THVectorDefault.c b/lib/TH/generic/THVectorDefault.c index 2cb3ec0..3388e0d 100644 --- a/lib/TH/generic/THVectorDefault.c +++ b/lib/TH/generic/THVectorDefault.c @@ -48,7 +48,7 @@ void THVector_(cadd_DEFAULT)(real *z, const real *x, const real *y, const real c z[i] = x[i] + c * y[i]; } -void THVector_(add_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) +void THVector_(adds_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) { ptrdiff_t i = 0; @@ -80,7 +80,7 @@ void THVector_(cmul_DEFAULT)(real *z, const real *x, const real *y, const ptrdif z[i] = x[i] * y[i]; } -void THVector_(mul_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) +void THVector_(muls_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) { ptrdiff_t i = 0; @@ -112,7 +112,7 @@ void THVector_(cdiv_DEFAULT)(real *z, const real *x, const real *y, const ptrdif z[i] = x[i] / y[i]; } -void THVector_(div_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) +void THVector_(divs_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) { ptrdiff_t i = 0; diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c index 900bb2a..5b88852 100644 --- a/lib/TH/generic/THVectorDispatch.c +++ b/lib/TH/generic/THVectorDispatch.c @@ -77,38 +77,38 @@ void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const THVector_(cadd_DISPATCHPTR)(z, x, y, c, n); } -static void (*THVector_(add_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(add_DEFAULT); -static FunctionDescription THVector_(add_DISPATCHTABLE)[] = { +static void (*THVector_(adds_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(adds_DEFAULT); +static FunctionDescription THVector_(adds_DISPATCHTABLE)[] = { #if defined(__NEON__) #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(add_NEON), SIMDExtension_NEON), + FUNCTION_IMPL(THVector_(adds_NEON), SIMDExtension_NEON), #endif #endif #if defined(__PPC64__) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(add_VSX), SIMDExtension_VSX), + FUNCTION_IMPL(THVector_(adds_VSX), SIMDExtension_VSX), #endif #endif #if defined(USE_AVX) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(add_AVX), SIMDExtension_AVX), + FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX), #endif #endif #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ || defined(USE_SSE4_1) || defined(USE_SSE4_2) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(add_SSE), SIMDExtension_SSE), + FUNCTION_IMPL(THVector_(adds_SSE), SIMDExtension_SSE), #endif #endif - FUNCTION_IMPL(THVector_(add_DEFAULT), SIMDExtension_DEFAULT) + FUNCTION_IMPL(THVector_(adds_DEFAULT), SIMDExtension_DEFAULT) }; // Dispatch stubs that just call the pointers -TH_API void THVector_(add)(real *r_, const real *t, const real value, const ptrdiff_t n) { - THVector_(add_DISPATCHPTR)(r_, t, value, n); +TH_API void THVector_(adds)(real *r_, const real *t, const real value, const ptrdiff_t n) { + THVector_(adds_DISPATCHPTR)(r_, t, value, n); } static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT); @@ -138,37 +138,37 @@ void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n) { THVector_(cmul_DISPATCHPTR)(z, x, y, n); } -static void (*THVector_(mul_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(mul_DEFAULT); -static FunctionDescription THVector_(mul_DISPATCHTABLE)[] = { +static void (*THVector_(muls_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(muls_DEFAULT); +static FunctionDescription THVector_(muls_DISPATCHTABLE)[] = { #if defined(__NEON__) #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(mul_NEON), SIMDExtension_NEON), + FUNCTION_IMPL(THVector_(muls_NEON), SIMDExtension_NEON), #endif #endif #if defined(__PPC64__) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(mul_VSX), SIMDExtension_VSX), + FUNCTION_IMPL(THVector_(muls_VSX), SIMDExtension_VSX), #endif #endif #if defined(USE_AVX) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(mul_AVX), SIMDExtension_AVX), + FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX), #endif #endif #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ || defined(USE_SSE4_1) || defined(USE_SSE4_2) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(mul_SSE), SIMDExtension_SSE), + FUNCTION_IMPL(THVector_(muls_SSE), SIMDExtension_SSE), #endif #endif - FUNCTION_IMPL(THVector_(mul_DEFAULT), SIMDExtension_DEFAULT) + FUNCTION_IMPL(THVector_(muls_DEFAULT), SIMDExtension_DEFAULT) }; -void THVector_(mul)(real *y, const real *x, const real c, const ptrdiff_t n) { - THVector_(mul_DISPATCHPTR)(y, x, c, n); +void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n) { + THVector_(muls_DISPATCHPTR)(y, x, c, n); } static void (*THVector_(cdiv_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cdiv_DEFAULT); @@ -198,31 +198,31 @@ void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n) { THVector_(cdiv_DISPATCHPTR)(z, x, y, n); } -static void (*THVector_(div_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(div_DEFAULT); -static FunctionDescription THVector_(div_DISPATCHTABLE)[] = { +static void (*THVector_(divs_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(divs_DEFAULT); +static FunctionDescription THVector_(divs_DISPATCHTABLE)[] = { #if defined(__NEON__) #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(div_NEON), SIMDExtension_NEON), + FUNCTION_IMPL(THVector_(divs_NEON), SIMDExtension_NEON), #endif #endif #if defined(USE_AVX) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(div_AVX), SIMDExtension_AVX), + FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX), #endif #endif #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ || defined(USE_SSE4_1) || defined(USE_SSE4_2) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(div_SSE), SIMDExtension_SSE), + FUNCTION_IMPL(THVector_(divs_SSE), SIMDExtension_SSE), #endif #endif - FUNCTION_IMPL(THVector_(div_DEFAULT), SIMDExtension_DEFAULT) + FUNCTION_IMPL(THVector_(divs_DEFAULT), SIMDExtension_DEFAULT) }; -void THVector_(div)(real *y, const real *x, const real c, const ptrdiff_t n) { - THVector_(div_DISPATCHPTR)(y, x, c, n); +void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) { + THVector_(divs_DISPATCHPTR)(y, x, c, n); } static void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT); @@ -251,11 +251,11 @@ void THVector_(vectorDispatchInit)(void) uint32_t hostSimdExts = detectHostSIMDExtensions(); INIT_DISPATCH_PTR(fill); INIT_DISPATCH_PTR(cadd); - INIT_DISPATCH_PTR(add); + INIT_DISPATCH_PTR(adds); INIT_DISPATCH_PTR(cmul); - INIT_DISPATCH_PTR(mul); + INIT_DISPATCH_PTR(muls); INIT_DISPATCH_PTR(cdiv); - INIT_DISPATCH_PTR(div); + INIT_DISPATCH_PTR(divs); INIT_DISPATCH_PTR(copy); } diff --git a/lib/TH/vector/AVX.c b/lib/TH/vector/AVX.c index 3356a74..1f902cc 100644 --- a/lib/TH/vector/AVX.c +++ b/lib/TH/vector/AVX.c @@ -51,7 +51,7 @@ static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, } } -static void THDoubleVector_div_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { +static void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m256d YMM15 = _mm256_set_pd(c, c, c, c); __m256d YMM0, YMM1; @@ -86,7 +86,7 @@ static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, } } -static void THDoubleVector_mul_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { +static void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m256d YMM15 = _mm256_set_pd(c, c, c, c); __m256d YMM0, YMM1; @@ -119,7 +119,7 @@ static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, } } -static void THDoubleVector_add_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { +static void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m256d YMM15 = _mm256_set_pd(c, c, c, c); __m256d YMM0, YMM1; @@ -183,7 +183,7 @@ static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, con } } -static void THFloatVector_div_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); __m256 YMM0, YMM1; @@ -218,7 +218,7 @@ static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, con } } -static void THFloatVector_mul_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); __m256 YMM0, YMM1; @@ -251,7 +251,7 @@ static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, con } } -static void THFloatVector_add_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); __m256 YMM0, YMM1; diff --git a/lib/TH/vector/NEON.c b/lib/TH/vector/NEON.c index 0efbf51..7920fb1 100644 --- a/lib/TH/vector/NEON.c +++ b/lib/TH/vector/NEON.c @@ -29,7 +29,7 @@ static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, co z[i] = x[i] * y[i]; } -static void THFloatVector_mul_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { long i = 0; for(; i < n-4; i += 4) @@ -59,7 +59,7 @@ static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, co z[i] = x[i] + c * y[i]; } -static void THFloatVector_add_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { long i = 0; for(;i < n-4; i += 4) @@ -89,7 +89,7 @@ static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, co z[i] = x[i] / y[i]; } -static void THFloatVector_div_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { long i = 0; for(;i < n-4; i += 4) diff --git a/lib/TH/vector/SSE.c b/lib/TH/vector/SSE.c index 521e29f..01ac789 100644 --- a/lib/TH/vector/SSE.c +++ b/lib/TH/vector/SSE.c @@ -36,7 +36,7 @@ static void THDoubleVector_cadd_SSE(double *z, const double *x, const double *y, } } -static void THDoubleVector_add_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { +static void THDoubleVector_adds_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m128d XMM7 = _mm_set1_pd(c); __m128d XMM0, XMM2; @@ -78,7 +78,7 @@ static void THDoubleVector_cmul_SSE(double *z, const double *x, const double *y, } } -static void THDoubleVector_mul_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { +static void THDoubleVector_muls_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m128d XMM15 = _mm_set1_pd(c); for (i=0; i<=((n)-8); i+=8) { @@ -118,7 +118,7 @@ static void THDoubleVector_cdiv_SSE(double *z, const double *x, const double *y, } } -static void THDoubleVector_div_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { +static void THDoubleVector_divs_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m128d XMM7 = _mm_set1_pd(c); __m128d XMM0, XMM1; @@ -168,7 +168,7 @@ static void THFloatVector_cadd_SSE(float *z, const float *x, const float *y, con } } -static void THFloatVector_add_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_adds_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM7 = _mm_set1_ps(c); __m128 XMM0, XMM2; @@ -210,7 +210,7 @@ static void THFloatVector_cmul_SSE(float *z, const float *x, const float *y, con } } -static void THFloatVector_mul_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_muls_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM15 = _mm_set_ps1(c); for (i=0; i<=((n)-16); i+=16) { @@ -250,7 +250,7 @@ static void THFloatVector_cdiv_SSE(float *z, const float *x, const float *y, con } } -static void THFloatVector_div_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { +static void THFloatVector_divs_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM7 = _mm_set1_ps(c); __m128 XMM0, XMM1; |