THVector_(add),(mul),(div) -> (adds),(muls),(divs)simd-opt

author: Soumith Chintala <soumith@gmail.com> 2017-02-28 20:10:47 +0300
committer: Soumith Chintala <soumith@gmail.com> 2017-02-28 20:10:47 +0300
commit: 5f9d30769789bc36ec628e3eec2f2fb85a1e6b50 (patch)
tree: 851111b7baf0a55c46092bf95e2a9f076960095d
parent: 641d9c508e027c0cd550ff435f5d6cfd02c7cecd (diff)
7 files changed, 54 insertions, 53 deletions
diff --git a/lib/TH/generic/THTensorMath.c b/lib/TH/generic/THTensorMath.c
index f698e25..9beb3f5 100644
--- a/lib/TH/generic/THTensorMath.c
+++ b/lib/TH/generic/THTensorMath.c
@@ -97,6 +97,7 @@ void THTensor_(fill)(THTensor *r_, real value)
     TH_TENSOR_APPLY(real, r_,
       if (r__stride == 1) {
         THVector_(fill)(r__data, value, r__size);
+	r__i = r__size;
 	r__data += r__stride * r__size;
 	break;
       } else {
@@ -559,7 +560,7 @@ void THTensor_(add)(THTensor *r_, THTensor *t, real value)
 {
   THTensor_(resizeAs)(r_, t);
   if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
-    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(add)(r__data, t_data, value, r__len););
+    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(adds)(r__data, t_data, value, r__len););
   } else {
     TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;);
   }
@@ -574,7 +575,7 @@ void THTensor_(mul)(THTensor *r_, THTensor *t, real value)
 {
   THTensor_(resizeAs)(r_, t);
   if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
-    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(mul)(r__data, t_data, value, r__len););
+    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(muls)(r__data, t_data, value, r__len););
   } else {
     TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;);
   }
@@ -584,7 +585,7 @@ void THTensor_(div)(THTensor *r_, THTensor *t, real value)
 {
   THTensor_(resizeAs)(r_, t);
   if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
-    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(div)(r__data, t_data, value, r__len););
+    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(divs)(r__data, t_data, value, r__len););
   } else {
     TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;);
   }
diff --git a/lib/TH/generic/THVector.h b/lib/TH/generic/THVector.h
index 17d4253..7d36854 100644
--- a/lib/TH/generic/THVector.h
+++ b/lib/TH/generic/THVector.h
@@ -4,11 +4,11 @@
 
 TH_API void THVector_(fill)(real *x, const real c, const ptrdiff_t n);
 TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n);
-TH_API void THVector_(add)(real *y, const real *x, const real c, const ptrdiff_t n);
+TH_API void THVector_(adds)(real *y, const real *x, const real c, const ptrdiff_t n);
 TH_API void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n);
-TH_API void THVector_(mul)(real *y, const real *x, const real c, const ptrdiff_t n);
+TH_API void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n);
 TH_API void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n);
-TH_API void THVector_(div)(real *y, const real *x, const real c, const ptrdiff_t n);
+TH_API void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n);
 TH_API void THVector_(copy)(real *y, const real *x, const ptrdiff_t n);
 
 /* Initialize the dispatch pointers */
diff --git a/lib/TH/generic/THVectorDefault.c b/lib/TH/generic/THVectorDefault.c
index 2cb3ec0..3388e0d 100644
--- a/lib/TH/generic/THVectorDefault.c
+++ b/lib/TH/generic/THVectorDefault.c
@@ -48,7 +48,7 @@ void THVector_(cadd_DEFAULT)(real *z, const real *x, const real *y, const real c
     z[i] = x[i] + c * y[i];
 }
 
-void THVector_(add_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
+void THVector_(adds_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
 {
   ptrdiff_t i = 0;
 
@@ -80,7 +80,7 @@ void THVector_(cmul_DEFAULT)(real *z, const real *x, const real *y, const ptrdif
     z[i] = x[i] * y[i];
 }
 
-void THVector_(mul_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
+void THVector_(muls_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
 {
   ptrdiff_t i = 0;
 
@@ -112,7 +112,7 @@ void THVector_(cdiv_DEFAULT)(real *z, const real *x, const real *y, const ptrdif
     z[i] = x[i] / y[i];
 }
 
-void THVector_(div_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
+void THVector_(divs_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
 {
   ptrdiff_t i = 0;
 
diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c
index 900bb2a..5b88852 100644
--- a/lib/TH/generic/THVectorDispatch.c
+++ b/lib/TH/generic/THVectorDispatch.c
@@ -77,38 +77,38 @@ void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const
   THVector_(cadd_DISPATCHPTR)(z, x, y, c, n);
 }
 
-static void (*THVector_(add_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(add_DEFAULT);
-static FunctionDescription THVector_(add_DISPATCHTABLE)[] = {
+static void (*THVector_(adds_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(adds_DEFAULT);
+static FunctionDescription THVector_(adds_DISPATCHTABLE)[] = {
   #if defined(__NEON__)
     #if defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(add_NEON), SIMDExtension_NEON),
+      FUNCTION_IMPL(THVector_(adds_NEON), SIMDExtension_NEON),
     #endif
   #endif
 
   #if defined(__PPC64__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(add_VSX), SIMDExtension_VSX),
+      FUNCTION_IMPL(THVector_(adds_VSX), SIMDExtension_VSX),
     #endif
   #endif
 
   #if defined(USE_AVX)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(add_AVX), SIMDExtension_AVX),
+      FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX),
     #endif
   #endif
 
   #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
           || defined(USE_SSE4_1) || defined(USE_SSE4_2)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(add_SSE), SIMDExtension_SSE),
+      FUNCTION_IMPL(THVector_(adds_SSE), SIMDExtension_SSE),
     #endif
   #endif
 
-  FUNCTION_IMPL(THVector_(add_DEFAULT), SIMDExtension_DEFAULT)
+  FUNCTION_IMPL(THVector_(adds_DEFAULT), SIMDExtension_DEFAULT)
 };
 // Dispatch stubs that just call the pointers
-TH_API void THVector_(add)(real *r_, const real *t, const real value, const ptrdiff_t n) {
-  THVector_(add_DISPATCHPTR)(r_, t, value, n);
+TH_API void THVector_(adds)(real *r_, const real *t, const real value, const ptrdiff_t n) {
+  THVector_(adds_DISPATCHPTR)(r_, t, value, n);
 }
 
 static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT);
@@ -138,37 +138,37 @@ void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n) {
   THVector_(cmul_DISPATCHPTR)(z, x, y, n);
 }
 
-static void (*THVector_(mul_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(mul_DEFAULT);
-static FunctionDescription THVector_(mul_DISPATCHTABLE)[] = {
+static void (*THVector_(muls_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(muls_DEFAULT);
+static FunctionDescription THVector_(muls_DISPATCHTABLE)[] = {
   #if defined(__NEON__)
     #if defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(mul_NEON), SIMDExtension_NEON),
+      FUNCTION_IMPL(THVector_(muls_NEON), SIMDExtension_NEON),
     #endif
   #endif
 
   #if defined(__PPC64__)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(mul_VSX), SIMDExtension_VSX),
+      FUNCTION_IMPL(THVector_(muls_VSX), SIMDExtension_VSX),
     #endif
   #endif
 
   #if defined(USE_AVX)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(mul_AVX), SIMDExtension_AVX),
+      FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX),
     #endif
   #endif
 
   #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
           || defined(USE_SSE4_1) || defined(USE_SSE4_2)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(mul_SSE), SIMDExtension_SSE),
+      FUNCTION_IMPL(THVector_(muls_SSE), SIMDExtension_SSE),
     #endif
   #endif
 
-  FUNCTION_IMPL(THVector_(mul_DEFAULT), SIMDExtension_DEFAULT)
+  FUNCTION_IMPL(THVector_(muls_DEFAULT), SIMDExtension_DEFAULT)
 };
-void THVector_(mul)(real *y, const real *x, const real c, const ptrdiff_t n) {
-  THVector_(mul_DISPATCHPTR)(y, x, c, n);
+void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n) {
+  THVector_(muls_DISPATCHPTR)(y, x, c, n);
 }
 
 static void (*THVector_(cdiv_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cdiv_DEFAULT);
@@ -198,31 +198,31 @@ void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n) {
   THVector_(cdiv_DISPATCHPTR)(z, x, y, n);
 }
 
-static void (*THVector_(div_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(div_DEFAULT);
-static FunctionDescription THVector_(div_DISPATCHTABLE)[] = {
+static void (*THVector_(divs_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(divs_DEFAULT);
+static FunctionDescription THVector_(divs_DISPATCHTABLE)[] = {
   #if defined(__NEON__)
     #if defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(div_NEON), SIMDExtension_NEON),
+      FUNCTION_IMPL(THVector_(divs_NEON), SIMDExtension_NEON),
     #endif
   #endif
 
   #if defined(USE_AVX)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(div_AVX), SIMDExtension_AVX),
+      FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX),
     #endif
   #endif
 
   #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
           || defined(USE_SSE4_1) || defined(USE_SSE4_2)
     #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
-      FUNCTION_IMPL(THVector_(div_SSE), SIMDExtension_SSE),
+      FUNCTION_IMPL(THVector_(divs_SSE), SIMDExtension_SSE),
     #endif
   #endif
 
-  FUNCTION_IMPL(THVector_(div_DEFAULT), SIMDExtension_DEFAULT)
+  FUNCTION_IMPL(THVector_(divs_DEFAULT), SIMDExtension_DEFAULT)
 };
-void THVector_(div)(real *y, const real *x, const real c, const ptrdiff_t n) {
-  THVector_(div_DISPATCHPTR)(y, x, c, n);
+void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) {
+  THVector_(divs_DISPATCHPTR)(y, x, c, n);
 }
 
 static void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT);
@@ -251,11 +251,11 @@ void THVector_(vectorDispatchInit)(void)
   uint32_t hostSimdExts = detectHostSIMDExtensions();
   INIT_DISPATCH_PTR(fill);
   INIT_DISPATCH_PTR(cadd);
-  INIT_DISPATCH_PTR(add);
+  INIT_DISPATCH_PTR(adds);
   INIT_DISPATCH_PTR(cmul);
-  INIT_DISPATCH_PTR(mul);
+  INIT_DISPATCH_PTR(muls);
   INIT_DISPATCH_PTR(cdiv);
-  INIT_DISPATCH_PTR(div);
+  INIT_DISPATCH_PTR(divs);
   INIT_DISPATCH_PTR(copy);
 }
 
diff --git a/lib/TH/vector/AVX.c b/lib/TH/vector/AVX.c
index 3356a74..1f902cc 100644
--- a/lib/TH/vector/AVX.c
+++ b/lib/TH/vector/AVX.c
@@ -51,7 +51,7 @@ static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_div_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM15 = _mm256_set_pd(c, c, c, c);
   __m256d YMM0, YMM1;
@@ -86,7 +86,7 @@ static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_mul_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM15 = _mm256_set_pd(c, c, c, c);
   __m256d YMM0, YMM1;
@@ -119,7 +119,7 @@ static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_add_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256d YMM15 = _mm256_set_pd(c, c, c, c);
   __m256d YMM0, YMM1;
@@ -183,7 +183,7 @@ static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_div_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
   __m256 YMM0, YMM1;
@@ -218,7 +218,7 @@ static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_mul_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
   __m256 YMM0, YMM1;
@@ -251,7 +251,7 @@ static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_add_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
   __m256 YMM0, YMM1;
diff --git a/lib/TH/vector/NEON.c b/lib/TH/vector/NEON.c
index 0efbf51..7920fb1 100644
--- a/lib/TH/vector/NEON.c
+++ b/lib/TH/vector/NEON.c
@@ -29,7 +29,7 @@ static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, co
     z[i] = x[i] * y[i];
 }
 
-static void THFloatVector_mul_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
   long i = 0;
 
   for(; i < n-4; i += 4)
@@ -59,7 +59,7 @@ static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, co
     z[i] = x[i] + c * y[i];
 }
 
-static void THFloatVector_add_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
   long i = 0;
 
   for(;i < n-4; i += 4)
@@ -89,7 +89,7 @@ static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, co
     z[i] = x[i] / y[i];
 }
 
-static void THFloatVector_div_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
   long i = 0;
 
   for(;i < n-4; i += 4)
diff --git a/lib/TH/vector/SSE.c b/lib/TH/vector/SSE.c
index 521e29f..01ac789 100644
--- a/lib/TH/vector/SSE.c
+++ b/lib/TH/vector/SSE.c
@@ -36,7 +36,7 @@ static void THDoubleVector_cadd_SSE(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_add_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_adds_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m128d XMM7 = _mm_set1_pd(c);
   __m128d XMM0, XMM2;
@@ -78,7 +78,7 @@ static void THDoubleVector_cmul_SSE(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_mul_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_muls_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m128d XMM15 = _mm_set1_pd(c);
   for (i=0; i<=((n)-8); i+=8) {
@@ -118,7 +118,7 @@ static void THDoubleVector_cdiv_SSE(double *z, const double *x, const double *y,
   }
 }
 
-static void THDoubleVector_div_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_divs_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m128d XMM7 = _mm_set1_pd(c);
   __m128d XMM0, XMM1;
@@ -168,7 +168,7 @@ static void THFloatVector_cadd_SSE(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_add_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_adds_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m128 XMM7 = _mm_set1_ps(c);
   __m128 XMM0, XMM2;
@@ -210,7 +210,7 @@ static void THFloatVector_cmul_SSE(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_mul_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_muls_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m128 XMM15 = _mm_set_ps1(c);
   for (i=0; i<=((n)-16); i+=16) {
@@ -250,7 +250,7 @@ static void THFloatVector_cdiv_SSE(float *z, const float *x, const float *y, con
   }
 }
 
-static void THFloatVector_div_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_divs_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
   ptrdiff_t i;
   __m128 XMM7 = _mm_set1_ps(c);
   __m128 XMM0, XMM1;
author	Soumith Chintala <soumith@gmail.com>	2017-02-28 20:10:47 +0300
committer	Soumith Chintala <soumith@gmail.com>	2017-02-28 20:10:47 +0300
commit	5f9d30769789bc36ec628e3eec2f2fb85a1e6b50 (patch)
tree	851111b7baf0a55c46092bf95e2a9f076960095d
parent	641d9c508e027c0cd550ff435f5d6cfd02c7cecd (diff)