Change THVector mul to cmul

author: jokeren <robinho364@gmail.com> 2016-12-05 05:13:43 +0300
committer: Soumith Chintala <soumith@gmail.com> 2017-02-23 13:40:33 +0300
commit: 1ad347c00ed2224ff13fb93ad0d45e5e709b8310 (patch)
tree: 1f04fd6c10296d7a83e9707e2a470dc8d73266a3
parent: 41732e69fe9ed3c79223cfdbf3601d30936d2a58 (diff)
6 files changed, 13 insertions, 50 deletions
diff --git a/lib/TH/generic/THTensorMathSIMD.c b/lib/TH/generic/THTensorMathSIMD.c
index 9dd254e..ee56663 100644
--- a/lib/TH/generic/THTensorMathSIMD.c
+++ b/lib/TH/generic/THTensorMathSIMD.c
@@ -92,43 +92,6 @@ void THTensor_(div_AVX)(THTensor *r_, THTensor *t, real value)
 
 #if defined(TH_REAL_IS_FLOAT)
 
-void THTensor_(add_AVX)(THTensor *r_, THTensor *t, real value)
-{
-  THTensor_(resizeAs)(r_, t);
-  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
-    real *rp = THTensor_(data)(r_);
-    real *tp = THTensor_(data)(t);
-    ptrdiff_t sz = THTensor_(nElement)(t);
-    #pragma omp parallel if(sz > TH_OMP_OVERHEAD_THRESHOLD)
-    {
-      #ifdef _OPENMP
-      size_t num_threads = omp_get_num_threads();
-      size_t tid = omp_get_thread_num();
-      #else
-      size_t num_threads = 1;
-      size_t tid = 0;
-      #endif
-      ptrdiff_t i = tid * (sz / num_threads);
-      ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads;
-      __m256 YMM15 = _mm256_set_ps(value, value, value, value, value, value, value, value);
-      __m256 YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7;
-      for (; i<=((i_end)-16); i+=16) {
-        YMM0 = _mm256_loadu_ps(tp+i);
-        YMM1 = _mm256_loadu_ps(tp+i+8);
-        YMM4 = _mm256_add_ps(YMM0, YMM15);
-        YMM5 = _mm256_add_ps(YMM1, YMM15);
-        _mm256_storeu_ps(rp+i, YMM4);
-        _mm256_storeu_ps(rp+i+8, YMM5);
-      }
-      for (; i<i_end; i++) {
-        rp[i] = tp[i] + value;
-      }
-    }
-  } else {
-    TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;);
-  }
-}
-
 void THTensor_(mul_AVX)(THTensor *r_, THTensor *t, real value)
 {
   THTensor_(resizeAs)(r_, t);
diff --git a/lib/TH/generic/THVector.h b/lib/TH/generic/THVector.h
index 426e3d8..3edb83f 100644
--- a/lib/TH/generic/THVector.h
+++ b/lib/TH/generic/THVector.h
@@ -7,7 +7,7 @@ TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c,
 TH_API void THVector_(add)(real *y, const real *x, const real c, const ptrdiff_t n);
 TH_API void THVector_(diff)(real *z, const real *x, const real *y, const ptrdiff_t n);
 TH_API void THVector_(scale)(real *y, const real c, const ptrdiff_t n);
-TH_API void THVector_(mul)(real *y, const real *x, const ptrdiff_t n);
+TH_API void THVector_(cmul)(real *y, const real *x, const ptrdiff_t n);
 
 /* Initialize the dispatch pointers */
 TH_API void THVector_(vectorDispatchInit)(void);
diff --git a/lib/TH/generic/THVectorDefault.c b/lib/TH/generic/THVectorDefault.c
index 348343c..89e007f 100644
--- a/lib/TH/generic/THVectorDefault.c
+++ b/lib/TH/generic/THVectorDefault.c
@@ -81,7 +81,7 @@ void THVector_(scale_DEFAULT)(real *y, const real c, const ptrdiff_t n)
     y[i] *= c;
 }
 
-void THVector_(mul_DEFAULT)(real *y, const real *x, const ptrdiff_t n)
+void THVector_(cmul_DEFAULT)(real *y, const real *x, const ptrdiff_t n)
 {
   ptrdiff_t i = 0;
 
diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c
index 0ff05c8..9f70e75 100644
--- a/lib/TH/generic/THVectorDispatch.c
+++ b/lib/TH/generic/THVectorDispatch.c
@@ -136,11 +136,11 @@ TH_API void THVector_(scale)(real *y, const real c, const ptrdiff_t n) {
 }
 
 
-static void (*THVector_(mul_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(mul_DEFAULT);
-static FunctionDescription THVector_(mul_DISPATCHTABLE)[] = {
+static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT);
+static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {
   #if defined(__NEON__)
     #if defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(mul_NEON), SIMDExtension_NEON),
+      FUNCTION_IMPL(THVector_(cmul_NEON), SIMDExtension_NEON),
     #endif
   #endif
 
@@ -153,14 +153,14 @@ static FunctionDescription THVector_(mul_DISPATCHTABLE)[] = {
   #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
           || defined(USE_SSE4_1) || defined(USE_SSE4_2)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(mul_SSE), SIMDExtension_SSE),
+      FUNCTION_IMPL(THVector_(cmul_SSE), SIMDExtension_SSE),
     #endif
   #endif
 
-  FUNCTION_IMPL(THVector_(mul_DEFAULT), SIMDExtension_DEFAULT)
+  FUNCTION_IMPL(THVector_(cmul_DEFAULT), SIMDExtension_DEFAULT)
 };
-void THVector_(mul)(real *y, const real *x, const ptrdiff_t n) {
-  THVector_(mul_DISPATCHPTR);
+void THVector_(cmul)(real *y, const real *x, const ptrdiff_t n) {
+  THVector_(cmul_DISPATCHPTR);
 }
 
 /* This needs to be called in order to initialize the dispatch pointers at runtime.
@@ -178,7 +178,7 @@ void THVector_(vectorDispatchInit)(void)
   INIT_VECTOR_DISPATCH_PTR(add);
   INIT_VECTOR_DISPATCH_PTR(diff);
   INIT_VECTOR_DISPATCH_PTR(scale);
-  INIT_VECTOR_DISPATCH_PTR(mul);
+  INIT_VECTOR_DISPATCH_PTR(cmul);
 }
 
 #endif
diff --git a/lib/TH/vector/NEON.c b/lib/TH/vector/NEON.c
index a2c46aa..94a3907 100644
--- a/lib/TH/vector/NEON.c
+++ b/lib/TH/vector/NEON.c
@@ -47,7 +47,7 @@ static void THFloatVector_scale_NEON(float *y, const float c, const ptrdiff_t n)
     y[i] *= c;
 }
 
-static void THFloatVector_mul_NEON(float *y, const float *x, const ptrdiff_t n) {
+static void THFloatVector_cmul_NEON(float *y, const float *x, const ptrdiff_t n) {
   long i = 0;
 
   for(; i < n-4; i += 4)
diff --git a/lib/TH/vector/SSE.c b/lib/TH/vector/SSE.c
index da40282..4735e63 100644
--- a/lib/TH/vector/SSE.c
+++ b/lib/TH/vector/SSE.c
@@ -84,7 +84,7 @@ static void THDoubleVector_scale_SSE(double *y, const double c, const ptrdiff_t
 }
 
 
-static void THDoubleVector_mul_SSE(double *y, const double *x, const ptrdiff_t n) {
+static void THDoubleVector_cmul_SSE(double *y, const double *x, const ptrdiff_t n) {
   ptrdiff_t i;
   for (i=0; i<=((n)-8); i+=8) {
     __m128d XMM0 = _mm_loadu_pd((x)+i  );
@@ -189,7 +189,7 @@ static void THFloatVector_scale_SSE(float *y, const float c, const ptrdiff_t n)
   }
 }
 
-static void THFloatVector_mul_SSE(float *y, const float *x, const ptrdiff_t n) {
+static void THFloatVector_cmul_SSE(float *y, const float *x, const ptrdiff_t n) {
   ptrdiff_t i;
   for (i=0; i<=((n)-16); i+=16) {
     __m128 XMM0 = _mm_loadu_ps((x)+i   );
author	jokeren <robinho364@gmail.com>	2016-12-05 05:13:43 +0300
committer	Soumith Chintala <soumith@gmail.com>	2017-02-23 13:40:33 +0300
commit	1ad347c00ed2224ff13fb93ad0d45e5e709b8310 (patch)
tree	1f04fd6c10296d7a83e9707e2a470dc8d73266a3
parent	41732e69fe9ed3c79223cfdbf3601d30936d2a58 (diff)