diff options
author | jokeren <robinho364@gmail.com> | 2016-12-05 05:13:43 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@gmail.com> | 2017-02-23 13:40:33 +0300 |
commit | 1ad347c00ed2224ff13fb93ad0d45e5e709b8310 (patch) | |
tree | 1f04fd6c10296d7a83e9707e2a470dc8d73266a3 | |
parent | 41732e69fe9ed3c79223cfdbf3601d30936d2a58 (diff) |
Change THVector mul to cmul
-rw-r--r-- | lib/TH/generic/THTensorMathSIMD.c | 37 | ||||
-rw-r--r-- | lib/TH/generic/THVector.h | 2 | ||||
-rw-r--r-- | lib/TH/generic/THVectorDefault.c | 2 | ||||
-rw-r--r-- | lib/TH/generic/THVectorDispatch.c | 16 | ||||
-rw-r--r-- | lib/TH/vector/NEON.c | 2 | ||||
-rw-r--r-- | lib/TH/vector/SSE.c | 4 |
6 files changed, 13 insertions, 50 deletions
diff --git a/lib/TH/generic/THTensorMathSIMD.c b/lib/TH/generic/THTensorMathSIMD.c index 9dd254e..ee56663 100644 --- a/lib/TH/generic/THTensorMathSIMD.c +++ b/lib/TH/generic/THTensorMathSIMD.c @@ -92,43 +92,6 @@ void THTensor_(div_AVX)(THTensor *r_, THTensor *t, real value) #if defined(TH_REAL_IS_FLOAT) -void THTensor_(add_AVX)(THTensor *r_, THTensor *t, real value) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *rp = THTensor_(data)(r_); - real *tp = THTensor_(data)(t); - ptrdiff_t sz = THTensor_(nElement)(t); - #pragma omp parallel if(sz > TH_OMP_OVERHEAD_THRESHOLD) - { - #ifdef _OPENMP - size_t num_threads = omp_get_num_threads(); - size_t tid = omp_get_thread_num(); - #else - size_t num_threads = 1; - size_t tid = 0; - #endif - ptrdiff_t i = tid * (sz / num_threads); - ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads; - __m256 YMM15 = _mm256_set_ps(value, value, value, value, value, value, value, value); - __m256 YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7; - for (; i<=((i_end)-16); i+=16) { - YMM0 = _mm256_loadu_ps(tp+i); - YMM1 = _mm256_loadu_ps(tp+i+8); - YMM4 = _mm256_add_ps(YMM0, YMM15); - YMM5 = _mm256_add_ps(YMM1, YMM15); - _mm256_storeu_ps(rp+i, YMM4); - _mm256_storeu_ps(rp+i+8, YMM5); - } - for (; i<i_end; i++) { - rp[i] = tp[i] + value; - } - } - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;); - } -} - void THTensor_(mul_AVX)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); diff --git a/lib/TH/generic/THVector.h b/lib/TH/generic/THVector.h index 426e3d8..3edb83f 100644 --- a/lib/TH/generic/THVector.h +++ b/lib/TH/generic/THVector.h @@ -7,7 +7,7 @@ TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c, TH_API void THVector_(add)(real *y, const real *x, const real c, const ptrdiff_t n); TH_API void THVector_(diff)(real *z, const real *x, const real *y, const ptrdiff_t n); TH_API void THVector_(scale)(real *y, const real c, const ptrdiff_t n); -TH_API void THVector_(mul)(real *y, const real *x, const ptrdiff_t n); +TH_API void THVector_(cmul)(real *y, const real *x, const ptrdiff_t n); /* Initialize the dispatch pointers */ TH_API void THVector_(vectorDispatchInit)(void); diff --git a/lib/TH/generic/THVectorDefault.c b/lib/TH/generic/THVectorDefault.c index 348343c..89e007f 100644 --- a/lib/TH/generic/THVectorDefault.c +++ b/lib/TH/generic/THVectorDefault.c @@ -81,7 +81,7 @@ void THVector_(scale_DEFAULT)(real *y, const real c, const ptrdiff_t n) y[i] *= c; } -void THVector_(mul_DEFAULT)(real *y, const real *x, const ptrdiff_t n) +void THVector_(cmul_DEFAULT)(real *y, const real *x, const ptrdiff_t n) { ptrdiff_t i = 0; diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c index 0ff05c8..9f70e75 100644 --- a/lib/TH/generic/THVectorDispatch.c +++ b/lib/TH/generic/THVectorDispatch.c @@ -136,11 +136,11 @@ TH_API void THVector_(scale)(real *y, const real c, const ptrdiff_t n) { } -static void (*THVector_(mul_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(mul_DEFAULT); -static FunctionDescription THVector_(mul_DISPATCHTABLE)[] = { +static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT); +static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = { #if defined(__NEON__) #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(mul_NEON), SIMDExtension_NEON), + FUNCTION_IMPL(THVector_(cmul_NEON), SIMDExtension_NEON), #endif #endif @@ -153,14 +153,14 @@ static FunctionDescription THVector_(mul_DISPATCHTABLE)[] = { #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ || defined(USE_SSE4_1) || defined(USE_SSE4_2) #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(mul_SSE), SIMDExtension_SSE), + FUNCTION_IMPL(THVector_(cmul_SSE), SIMDExtension_SSE), #endif #endif - FUNCTION_IMPL(THVector_(mul_DEFAULT), SIMDExtension_DEFAULT) + FUNCTION_IMPL(THVector_(cmul_DEFAULT), SIMDExtension_DEFAULT) }; -void THVector_(mul)(real *y, const real *x, const ptrdiff_t n) { - THVector_(mul_DISPATCHPTR); +void THVector_(cmul)(real *y, const real *x, const ptrdiff_t n) { + THVector_(cmul_DISPATCHPTR); } /* This needs to be called in order to initialize the dispatch pointers at runtime. @@ -178,7 +178,7 @@ void THVector_(vectorDispatchInit)(void) INIT_VECTOR_DISPATCH_PTR(add); INIT_VECTOR_DISPATCH_PTR(diff); INIT_VECTOR_DISPATCH_PTR(scale); - INIT_VECTOR_DISPATCH_PTR(mul); + INIT_VECTOR_DISPATCH_PTR(cmul); } #endif diff --git a/lib/TH/vector/NEON.c b/lib/TH/vector/NEON.c index a2c46aa..94a3907 100644 --- a/lib/TH/vector/NEON.c +++ b/lib/TH/vector/NEON.c @@ -47,7 +47,7 @@ static void THFloatVector_scale_NEON(float *y, const float c, const ptrdiff_t n) y[i] *= c; } -static void THFloatVector_mul_NEON(float *y, const float *x, const ptrdiff_t n) { +static void THFloatVector_cmul_NEON(float *y, const float *x, const ptrdiff_t n) { long i = 0; for(; i < n-4; i += 4) diff --git a/lib/TH/vector/SSE.c b/lib/TH/vector/SSE.c index da40282..4735e63 100644 --- a/lib/TH/vector/SSE.c +++ b/lib/TH/vector/SSE.c @@ -84,7 +84,7 @@ static void THDoubleVector_scale_SSE(double *y, const double c, const ptrdiff_t } -static void THDoubleVector_mul_SSE(double *y, const double *x, const ptrdiff_t n) { +static void THDoubleVector_cmul_SSE(double *y, const double *x, const ptrdiff_t n) { ptrdiff_t i; for (i=0; i<=((n)-8); i+=8) { __m128d XMM0 = _mm_loadu_pd((x)+i ); @@ -189,7 +189,7 @@ static void THFloatVector_scale_SSE(float *y, const float c, const ptrdiff_t n) } } -static void THFloatVector_mul_SSE(float *y, const float *x, const ptrdiff_t n) { +static void THFloatVector_cmul_SSE(float *y, const float *x, const ptrdiff_t n) { ptrdiff_t i; for (i=0; i<=((n)-16); i+=16) { __m128 XMM0 = _mm_loadu_ps((x)+i ); |