Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/torch7.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjokeren <robinho364@gmail.com>2016-12-05 08:34:35 +0300
committerSoumith Chintala <soumith@gmail.com>2017-02-23 13:40:33 +0300
commit15cccf3cad04862c8c25c710b5109b6d650d5085 (patch)
tree65b2ca8c12c9450e47354cfde29a92c0f4e206fe
parent1ad347c00ed2224ff13fb93ad0d45e5e709b8310 (diff)
Merge THVector cmul
-rw-r--r--lib/TH/generic/THTensorMath.c17
-rw-r--r--lib/TH/generic/THVector.h2
-rw-r--r--lib/TH/generic/THVectorDefault.c12
-rw-r--r--lib/TH/generic/THVectorDispatch.c7
-rw-r--r--lib/TH/vector/NEON.c12
-rw-r--r--lib/TH/vector/SSE.c30
6 files changed, 43 insertions, 37 deletions
diff --git a/lib/TH/generic/THTensorMath.c b/lib/TH/generic/THTensorMath.c
index 54b43c8..9f0493f 100644
--- a/lib/TH/generic/THTensorMath.c
+++ b/lib/TH/generic/THTensorMath.c
@@ -776,10 +776,19 @@ void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src)
real *sp = THTensor_(data)(src);
real *rp = THTensor_(data)(r_);
ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++)
- rp[i] = tp[i] * sp[i];
+ #pragma omp parallel if(sz > TH_OMP_OVERHEAD_THRESHOLD)
+ {
+ #ifdef _OPENMP
+ size_t num_threads = omp_get_num_threads();
+ size_t tid = omp_get_thread_num();
+ #else
+ size_t num_threads = 1;
+ size_t tid = 0;
+ #endif
+ ptrdiff_t i = tid * (sz / num_threads);
+ ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads;
+ THVector_(cmul)(rp+i, tp+i, sp+i, i_end-i);
+ }
} else {
TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * *src_data;);
}
diff --git a/lib/TH/generic/THVector.h b/lib/TH/generic/THVector.h
index 3edb83f..271868f 100644
--- a/lib/TH/generic/THVector.h
+++ b/lib/TH/generic/THVector.h
@@ -7,7 +7,7 @@ TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c,
TH_API void THVector_(add)(real *y, const real *x, const real c, const ptrdiff_t n);
TH_API void THVector_(diff)(real *z, const real *x, const real *y, const ptrdiff_t n);
TH_API void THVector_(scale)(real *y, const real c, const ptrdiff_t n);
-TH_API void THVector_(cmul)(real *y, const real *x, const ptrdiff_t n);
+TH_API void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n);
/* Initialize the dispatch pointers */
TH_API void THVector_(vectorDispatchInit)(void);
diff --git a/lib/TH/generic/THVectorDefault.c b/lib/TH/generic/THVectorDefault.c
index 89e007f..2603ed0 100644
--- a/lib/TH/generic/THVectorDefault.c
+++ b/lib/TH/generic/THVectorDefault.c
@@ -81,20 +81,20 @@ void THVector_(scale_DEFAULT)(real *y, const real c, const ptrdiff_t n)
y[i] *= c;
}
-void THVector_(cmul_DEFAULT)(real *y, const real *x, const ptrdiff_t n)
+void THVector_(cmul_DEFAULT)(real *z, const real *x, const real *y, const ptrdiff_t n)
{
ptrdiff_t i = 0;
for(; i < n-4; i += 4)
{
- y[i] *= x[i];
- y[i+1] *= x[i+1];
- y[i+2] *= x[i+2];
- y[i+3] *= x[i+3];
+ z[i] = x[i] * y[i];
+ z[i+1] = x[i+1] * y[i+1];
+ z[i+2] = x[i+2] * y[i+2];
+ z[i+3] = x[i+3] * y[i+3];
}
for(; i < n; i++)
- y[i] *= x[i];
+ z[i] = x[i] * y[i];
}
#endif
diff --git a/lib/TH/generic/THVectorDispatch.c b/lib/TH/generic/THVectorDispatch.c
index 9f70e75..bc87d78 100644
--- a/lib/TH/generic/THVectorDispatch.c
+++ b/lib/TH/generic/THVectorDispatch.c
@@ -107,7 +107,6 @@ void THVector_(diff)(real *z, const real *x, const real *y, const ptrdiff_t n) {
THVector_(diff_DISPATCHPTR)(z, x, y, n);
}
-
static void (*THVector_(scale_DISPATCHPTR))(real *, const real, const ptrdiff_t) = &THVector_(scale_DEFAULT);
static FunctionDescription THVector_(scale_DISPATCHTABLE)[] = {
#if defined(__NEON__)
@@ -136,7 +135,7 @@ TH_API void THVector_(scale)(real *y, const real c, const ptrdiff_t n) {
}
-static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT);
+static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT);
static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {
#if defined(__NEON__)
#if defined(TH_REAL_IS_FLOAT)
@@ -159,8 +158,8 @@ static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {
FUNCTION_IMPL(THVector_(cmul_DEFAULT), SIMDExtension_DEFAULT)
};
-void THVector_(cmul)(real *y, const real *x, const ptrdiff_t n) {
- THVector_(cmul_DISPATCHPTR);
+void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n) {
+ THVector_(cmul_DISPATCHPTR)(z, x, y, n);
}
/* This needs to be called in order to initialize the dispatch pointers at runtime.
diff --git a/lib/TH/vector/NEON.c b/lib/TH/vector/NEON.c
index 94a3907..3f2aaf2 100644
--- a/lib/TH/vector/NEON.c
+++ b/lib/TH/vector/NEON.c
@@ -47,19 +47,19 @@ static void THFloatVector_scale_NEON(float *y, const float c, const ptrdiff_t n)
y[i] *= c;
}
-static void THFloatVector_cmul_NEON(float *y, const float *x, const ptrdiff_t n) {
+static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
- y[i] *= x[i];
- y[i+1] *= x[i+1];
- y[i+2] *= x[i+2];
- y[i+3] *= x[i+3];
+ z[i] = x[i] * y[i];
+ z[i+1] = x[i+1] * y[i+1];
+ z[i+2] = x[i+2] * y[i+2];
+ z[i+3] = x[i+3] * y[i+3];
}
for(; i < n; i++)
- y[i] *= x[i];
+ z[i] = x[i] * y[i];
}
static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
diff --git a/lib/TH/vector/SSE.c b/lib/TH/vector/SSE.c
index 4735e63..f253aef 100644
--- a/lib/TH/vector/SSE.c
+++ b/lib/TH/vector/SSE.c
@@ -84,7 +84,7 @@ static void THDoubleVector_scale_SSE(double *y, const double c, const ptrdiff_t
}
-static void THDoubleVector_cmul_SSE(double *y, const double *x, const ptrdiff_t n) {
+static void THDoubleVector_cmul_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) {
ptrdiff_t i;
for (i=0; i<=((n)-8); i+=8) {
__m128d XMM0 = _mm_loadu_pd((x)+i );
@@ -99,14 +99,13 @@ static void THDoubleVector_cmul_SSE(double *y, const double *x, const ptrdiff_t
XMM5 = _mm_mul_pd(XMM5, XMM1);
XMM6 = _mm_mul_pd(XMM6, XMM2);
XMM7 = _mm_mul_pd(XMM7, XMM3);
- _mm_storeu_pd((y)+i , XMM4);
- _mm_storeu_pd((y)+i+2, XMM5);
- _mm_storeu_pd((y)+i+4, XMM6);
- _mm_storeu_pd((y)+i+6, XMM7);
+ _mm_storeu_pd((z)+i , XMM4);
+ _mm_storeu_pd((z)+i+2, XMM5);
+ _mm_storeu_pd((z)+i+4, XMM6);
+ _mm_storeu_pd((z)+i+6, XMM7);
}
- ptrdiff_t off = (n) - ((n)%8);
- for (i=0; i<((n)%8); i++) {
- y[off+i] *= x[off+i];
+ for (; i<(n); i++) {
+ z[i] = x[i] * y[i];
}
}
@@ -189,7 +188,7 @@ static void THFloatVector_scale_SSE(float *y, const float c, const ptrdiff_t n)
}
}
-static void THFloatVector_cmul_SSE(float *y, const float *x, const ptrdiff_t n) {
+static void THFloatVector_cmul_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) {
ptrdiff_t i;
for (i=0; i<=((n)-16); i+=16) {
__m128 XMM0 = _mm_loadu_ps((x)+i );
@@ -204,14 +203,13 @@ static void THFloatVector_cmul_SSE(float *y, const float *x, const ptrdiff_t n)
XMM5 = _mm_mul_ps(XMM5, XMM1);
XMM6 = _mm_mul_ps(XMM6, XMM2);
XMM7 = _mm_mul_ps(XMM7, XMM3);
- _mm_storeu_ps((y)+i , XMM4);
- _mm_storeu_ps((y)+i+ 4, XMM5);
- _mm_storeu_ps((y)+i+ 8, XMM6);
- _mm_storeu_ps((y)+i+12, XMM7);
+ _mm_storeu_ps((z)+i , XMM4);
+ _mm_storeu_ps((z)+i+ 4, XMM5);
+ _mm_storeu_ps((z)+i+ 8, XMM6);
+ _mm_storeu_ps((z)+i+12, XMM7);
}
- ptrdiff_t off = (n) - ((n)%16);
- for (i=0; i<((n)%16); i++) {
- y[off+i] *= x[off+i];
+ for (; i<(n); i++) {
+ z[i] = x[i] * y[i];
}
}