Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/torch7.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSoumith Chintala <soumith@gmail.com>2017-02-28 20:10:47 +0300
committerSoumith Chintala <soumith@gmail.com>2017-02-28 20:10:47 +0300
commit5f9d30769789bc36ec628e3eec2f2fb85a1e6b50 (patch)
tree851111b7baf0a55c46092bf95e2a9f076960095d
parent641d9c508e027c0cd550ff435f5d6cfd02c7cecd (diff)
THVector_(add),(mul),(div) -> (adds),(muls),(divs)simd-opt
-rw-r--r--lib/TH/generic/THTensorMath.c7
-rw-r--r--lib/TH/generic/THVector.h6
-rw-r--r--lib/TH/generic/THVectorDefault.c6
-rw-r--r--lib/TH/generic/THVectorDispatch.c58
-rw-r--r--lib/TH/vector/AVX.c12
-rw-r--r--lib/TH/vector/NEON.c6
-rw-r--r--lib/TH/vector/SSE.c12
7 files changed, 54 insertions, 53 deletions
diff --git a/lib/TH/generic/THTensorMath.c b/lib/TH/generic/THTensorMath.c
index f698e25..9beb3f5 100644
--- a/lib/TH/generic/THTensorMath.c
+++ b/lib/TH/generic/THTensorMath.c
@@ -97,6 +97,7 @@ void THTensor_(fill)(THTensor *r_, real value)
TH_TENSOR_APPLY(real, r_,
if (r__stride == 1) {
THVector_(fill)(r__data, value, r__size);
+ r__i = r__size;
r__data += r__stride * r__size;
break;
} else {
@@ -559,7 +560,7 @@ void THTensor_(add)(THTensor *r_, THTensor *t, real value)
{
THTensor_(resizeAs)(r_, t);
if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(add)(r__data, t_data, value, r__len););
+ TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(adds)(r__data, t_data, value, r__len););
} else {
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;);
}
@@ -574,7 +575,7 @@ void THTensor_(mul)(THTensor *r_, THTensor *t, real value)
{
THTensor_(resizeAs)(r_, t);
if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(mul)(r__data, t_data, value, r__len););
+ TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(muls)(r__data, t_data, value, r__len););
} else {
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;);
}
@@ -584,7 +585,7 @@ void THTensor_(div)(THTensor *r_, THTensor *t, real value)
{
THTensor_(resizeAs)(r_, t);
if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(div)(r__data, t_data, value, r__len););
+ TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(divs)(r__data, t_data, value, r__len););
} else {
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;);
}
diff --git a/lib/TH/generic/THVector.h b/lib/TH/generic/THVector.h
index 17d4253..7d36854 100644
--- a/lib/TH/generic/THVector.h
+++ b/lib/TH/generic/THVector.h
@@ -4,11 +4,11 @@
TH_API void THVector_(fill)(real *x, const real c, const ptrdiff_t n);
TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n);
-TH_API void THVector_(add)(real *y, const real *x, const real c, const ptrdiff_t n);
+TH_API void THVector_(adds)(real *y, const real *x, const real c, const ptrdiff_t n);
TH_API void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n);
-TH_API void THVector_(mul)(real *y, const real *x, const real c, const ptrdiff_t n);
+TH_API void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n);
TH_API void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n);
-TH_API void THVector_(div)(real *y, const real *x, const real c, const ptrdiff_t n);
+TH_API void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n);
TH_API void THVector_(copy)(real *y, const real *x, const ptrdiff_t n);
/* Initialize the dispatch pointers */
diff --git a/lib/TH/generic/THVectorDefault.c b/lib/TH/generic/THVectorDefault.c
index 2cb3ec0..3388e0d 100644
--- a/lib/TH/generic/THVectorDefault.c
+++ b/lib/TH/generic/THVectorDefault.c
@@ -48,7 +48,7 @@ void THVector_(cadd_DEFAULT)(real *z, const real *x, const real *y, const real c
z[i] = x[i] + c * y[i];
}
-void THVector_(add_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
+void THVector_(adds_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
{
ptrdiff_t i = 0;
@@ -80,7 +80,7 @@ void THVector_(cmul_DEFAULT)(real *z, const real *x, const real *y, const ptrdif
z[i] = x[i] * y[i];
}
-void THVector_(mul_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
+void THVector_(muls_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
{
ptrdiff_t i = 0;
@@ -112,7 +112,7 @@ void THVector_(cdiv_DEFAULT)(real *z, const real *x, const real *y, const ptrdif
z[i] = x[i] / y[i];
}
-void THVector_(div_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
+void THVector_(divs_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
{
ptrdiff_t i = 0;
diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c
index 900bb2a..5b88852 100644
--- a/lib/TH/generic/THVectorDispatch.c
+++ b/lib/TH/generic/THVectorDispatch.c
@@ -77,38 +77,38 @@ void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const
THVector_(cadd_DISPATCHPTR)(z, x, y, c, n);
}
-static void (*THVector_(add_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(add_DEFAULT);
-static FunctionDescription THVector_(add_DISPATCHTABLE)[] = {
+static void (*THVector_(adds_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(adds_DEFAULT);
+static FunctionDescription THVector_(adds_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(add_NEON), SIMDExtension_NEON),
+ FUNCTION_IMPL(THVector_(adds_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(__PPC64__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(add_VSX), SIMDExtension_VSX),
+ FUNCTION_IMPL(THVector_(adds_VSX), SIMDExtension_VSX),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(add_AVX), SIMDExtension_AVX),
+ FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(add_SSE), SIMDExtension_SSE),
+ FUNCTION_IMPL(THVector_(adds_SSE), SIMDExtension_SSE),
#endif
#endif
- FUNCTION_IMPL(THVector_(add_DEFAULT), SIMDExtension_DEFAULT)
+ FUNCTION_IMPL(THVector_(adds_DEFAULT), SIMDExtension_DEFAULT)
};
// Dispatch stubs that just call the pointers
-TH_API void THVector_(add)(real *r_, const real *t, const real value, const ptrdiff_t n) {
- THVector_(add_DISPATCHPTR)(r_, t, value, n);
+TH_API void THVector_(adds)(real *r_, const real *t, const real value, const ptrdiff_t n) {
+ THVector_(adds_DISPATCHPTR)(r_, t, value, n);
}
static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT);
@@ -138,37 +138,37 @@ void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n) {
THVector_(cmul_DISPATCHPTR)(z, x, y, n);
}
-static void (*THVector_(mul_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(mul_DEFAULT);
-static FunctionDescription THVector_(mul_DISPATCHTABLE)[] = {
+static void (*THVector_(muls_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(muls_DEFAULT);
+static FunctionDescription THVector_(muls_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(mul_NEON), SIMDExtension_NEON),
+ FUNCTION_IMPL(THVector_(muls_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(__PPC64__)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(mul_VSX), SIMDExtension_VSX),
+ FUNCTION_IMPL(THVector_(muls_VSX), SIMDExtension_VSX),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(mul_AVX), SIMDExtension_AVX),
+ FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(mul_SSE), SIMDExtension_SSE),
+ FUNCTION_IMPL(THVector_(muls_SSE), SIMDExtension_SSE),
#endif
#endif
- FUNCTION_IMPL(THVector_(mul_DEFAULT), SIMDExtension_DEFAULT)
+ FUNCTION_IMPL(THVector_(muls_DEFAULT), SIMDExtension_DEFAULT)
};
-void THVector_(mul)(real *y, const real *x, const real c, const ptrdiff_t n) {
- THVector_(mul_DISPATCHPTR)(y, x, c, n);
+void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n) {
+ THVector_(muls_DISPATCHPTR)(y, x, c, n);
}
static void (*THVector_(cdiv_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cdiv_DEFAULT);
@@ -198,31 +198,31 @@ void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n) {
THVector_(cdiv_DISPATCHPTR)(z, x, y, n);
}
-static void (*THVector_(div_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(div_DEFAULT);
-static FunctionDescription THVector_(div_DISPATCHTABLE)[] = {
+static void (*THVector_(divs_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(divs_DEFAULT);
+static FunctionDescription THVector_(divs_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(div_NEON), SIMDExtension_NEON),
+ FUNCTION_IMPL(THVector_(divs_NEON), SIMDExtension_NEON),
#endif
#endif
#if defined(USE_AVX)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(div_AVX), SIMDExtension_AVX),
+ FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX),
#endif
#endif
#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
|| defined(USE_SSE4_1) || defined(USE_SSE4_2)
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(div_SSE), SIMDExtension_SSE),
+ FUNCTION_IMPL(THVector_(divs_SSE), SIMDExtension_SSE),
#endif
#endif
- FUNCTION_IMPL(THVector_(div_DEFAULT), SIMDExtension_DEFAULT)
+ FUNCTION_IMPL(THVector_(divs_DEFAULT), SIMDExtension_DEFAULT)
};
-void THVector_(div)(real *y, const real *x, const real c, const ptrdiff_t n) {
- THVector_(div_DISPATCHPTR)(y, x, c, n);
+void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) {
+ THVector_(divs_DISPATCHPTR)(y, x, c, n);
}
static void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT);
@@ -251,11 +251,11 @@ void THVector_(vectorDispatchInit)(void)
uint32_t hostSimdExts = detectHostSIMDExtensions();
INIT_DISPATCH_PTR(fill);
INIT_DISPATCH_PTR(cadd);
- INIT_DISPATCH_PTR(add);
+ INIT_DISPATCH_PTR(adds);
INIT_DISPATCH_PTR(cmul);
- INIT_DISPATCH_PTR(mul);
+ INIT_DISPATCH_PTR(muls);
INIT_DISPATCH_PTR(cdiv);
- INIT_DISPATCH_PTR(div);
+ INIT_DISPATCH_PTR(divs);
INIT_DISPATCH_PTR(copy);
}
diff --git a/lib/TH/vector/AVX.c b/lib/TH/vector/AVX.c
index 3356a74..1f902cc 100644
--- a/lib/TH/vector/AVX.c
+++ b/lib/TH/vector/AVX.c
@@ -51,7 +51,7 @@ static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_div_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
__m256d YMM0, YMM1;
@@ -86,7 +86,7 @@ static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_mul_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
__m256d YMM0, YMM1;
@@ -119,7 +119,7 @@ static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_add_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
__m256d YMM0, YMM1;
@@ -183,7 +183,7 @@ static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_div_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
__m256 YMM0, YMM1;
@@ -218,7 +218,7 @@ static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_mul_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
__m256 YMM0, YMM1;
@@ -251,7 +251,7 @@ static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_add_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
__m256 YMM0, YMM1;
diff --git a/lib/TH/vector/NEON.c b/lib/TH/vector/NEON.c
index 0efbf51..7920fb1 100644
--- a/lib/TH/vector/NEON.c
+++ b/lib/TH/vector/NEON.c
@@ -29,7 +29,7 @@ static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, co
z[i] = x[i] * y[i];
}
-static void THFloatVector_mul_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
@@ -59,7 +59,7 @@ static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, co
z[i] = x[i] + c * y[i];
}
-static void THFloatVector_add_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
@@ -89,7 +89,7 @@ static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, co
z[i] = x[i] / y[i];
}
-static void THFloatVector_div_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
diff --git a/lib/TH/vector/SSE.c b/lib/TH/vector/SSE.c
index 521e29f..01ac789 100644
--- a/lib/TH/vector/SSE.c
+++ b/lib/TH/vector/SSE.c
@@ -36,7 +36,7 @@ static void THDoubleVector_cadd_SSE(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_add_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_adds_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m128d XMM7 = _mm_set1_pd(c);
__m128d XMM0, XMM2;
@@ -78,7 +78,7 @@ static void THDoubleVector_cmul_SSE(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_mul_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_muls_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m128d XMM15 = _mm_set1_pd(c);
for (i=0; i<=((n)-8); i+=8) {
@@ -118,7 +118,7 @@ static void THDoubleVector_cdiv_SSE(double *z, const double *x, const double *y,
}
}
-static void THDoubleVector_div_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
+static void THDoubleVector_divs_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
ptrdiff_t i;
__m128d XMM7 = _mm_set1_pd(c);
__m128d XMM0, XMM1;
@@ -168,7 +168,7 @@ static void THFloatVector_cadd_SSE(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_add_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_adds_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m128 XMM7 = _mm_set1_ps(c);
__m128 XMM0, XMM2;
@@ -210,7 +210,7 @@ static void THFloatVector_cmul_SSE(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_mul_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_muls_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m128 XMM15 = _mm_set_ps1(c);
for (i=0; i<=((n)-16); i+=16) {
@@ -250,7 +250,7 @@ static void THFloatVector_cdiv_SSE(float *z, const float *x, const float *y, con
}
}
-static void THFloatVector_div_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
+static void THFloatVector_divs_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
ptrdiff_t i;
__m128 XMM7 = _mm_set1_ps(c);
__m128 XMM0, XMM1;