BLI: add BLI_simd.h header to wrap SSE includes

In preparation of adding Neon support. Ref D8237, T78710
author: Brecht Van Lommel <brecht@blender.org> 2021-02-14 16:20:51 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-02-17 18:26:24 +0300
commit: 859118d8f6ff022a16acbc6435488883424bad25 (patch)
tree: e0f708929a884070f7c3758f7fb58527cbdfd555
parent: ac680c569e1b979f20c2e81dbd4f232085141aad (diff)
13 files changed, 60 insertions, 43 deletions
diff --git a/source/blender/blenkernel/intern/lattice_deform.c b/source/blender/blenkernel/intern/lattice_deform.c
index 81c2f492f7e..2651042939f 100644
--- a/source/blender/blenkernel/intern/lattice_deform.c
+++ b/source/blender/blenkernel/intern/lattice_deform.c
@@ -31,6 +31,7 @@
 #include "MEM_guardedalloc.h"
 
 #include "BLI_math.h"
+#include "BLI_simd.h"
 #include "BLI_task.h"
 #include "BLI_utildefines.h"
 
@@ -49,10 +50,6 @@
 
 #include "BKE_deform.h"
 
-#ifdef __SSE2__
-#  include <emmintrin.h>
-#endif
-
 /* -------------------------------------------------------------------- */
 /** \name Lattice Deform API
  * \{ */
@@ -171,7 +168,7 @@ void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data,
   /* vgroup influence */
   float co_prev[4] = {0}, weight_blend = 0.0f;
   copy_v3_v3(co_prev, co);
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   __m128 co_vec = _mm_loadu_ps(co_prev);
 #endif
 
@@ -232,7 +229,7 @@ void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data,
         u = v * tu[uu - ui + 1];
         idx_u = CLAMPIS(uu, 0, idx_u_max);
         const int idx = idx_w + idx_v + idx_u;
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
         {
           __m128 weight_vec = _mm_set1_ps(u);
           /* We need to address special case for last item to avoid accessing invalid memory. */
@@ -256,7 +253,7 @@ void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data,
       }
     }
   }
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   {
     copy_v3_v3(co, (float *)&co_vec);
   }
diff --git a/source/blender/blenlib/BLI_simd.h b/source/blender/blenlib/BLI_simd.h
new file mode 100644
index 00000000000..1518b6c1de2
--- /dev/null
+++ b/source/blender/blenlib/BLI_simd.h
@@ -0,0 +1,28 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * SIMD instruction support.
+ */
+
+#if defined(__SSE2__)
+#  include <emmintrin.h>
+#  define BLI_HAVE_SSE2
+#endif
diff --git a/source/blender/blenlib/intern/math_base_inline.c b/source/blender/blenlib/intern/math_base_inline.c
index 28aa81e5858..39945960e68 100644
--- a/source/blender/blenlib/intern/math_base_inline.c
+++ b/source/blender/blenlib/intern/math_base_inline.c
@@ -31,11 +31,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#ifdef __SSE2__
-#  include <emmintrin.h>
-#endif
-
 #include "BLI_math_base.h"
+#include "BLI_simd.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -685,10 +682,10 @@ MINLINE int integer_digits_i(const int i)
 
 /* Internal helpers for SSE2 implementation.
  *
- * NOTE: Are to be called ONLY from inside `#ifdef __SSE2__` !!!
+ * NOTE: Are to be called ONLY from inside `#ifdef BLI_HAVE_SSE2` !!!
  */
 
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
 
 /* Calculate initial guess for arg^exp based on float representation
  * This method gives a constant bias, which can be easily compensated by
@@ -769,7 +766,7 @@ MALWAYS_INLINE __m128 _bli_math_blend_sse(const __m128 mask, const __m128 a, con
   return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b));
 }
 
-#endif /* __SSE2__ */
+#endif /* BLI_HAVE_SSE2 */
 
 /* Low level conversion functions */
 MINLINE unsigned char unit_float_to_uchar_clamp(float val)
diff --git a/source/blender/blenlib/intern/math_color_inline.c b/source/blender/blenlib/intern/math_color_inline.c
index 26828cb8717..4c50c1c7af8 100644
--- a/source/blender/blenlib/intern/math_color_inline.c
+++ b/source/blender/blenlib/intern/math_color_inline.c
@@ -34,7 +34,7 @@
 
 /******************************** Color Space ********************************/
 
-#  ifdef __SSE2__
+#  ifdef BLI_HAVE_SSE2
 
 MALWAYS_INLINE __m128 srgb_to_linearrgb_v4_simd(const __m128 c)
 {
@@ -75,7 +75,7 @@ MINLINE void linearrgb_to_srgb_v3_v3(float srgb[3], const float linear[3])
   srgb[2] = r[2];
 }
 
-#  else  /* __SSE2__ */
+#  else  /* BLI_HAVE_SSE2 */
 
 MINLINE void srgb_to_linearrgb_v3_v3(float linear[3], const float srgb[3])
 {
@@ -90,7 +90,7 @@ MINLINE void linearrgb_to_srgb_v3_v3(float srgb[3], const float linear[3])
   srgb[1] = linearrgb_to_srgb(linear[1]);
   srgb[2] = linearrgb_to_srgb(linear[2]);
 }
-#  endif /* __SSE2__ */
+#  endif /* BLI_HAVE_SSE2 */
 
 MINLINE void srgb_to_linearrgb_v4(float linear[4], const float srgb[4])
 {
diff --git a/source/blender/blenlib/intern/math_geom.c b/source/blender/blenlib/intern/math_geom.c
index 5d78bb0b901..b7e94e6c512 100644
--- a/source/blender/blenlib/intern/math_geom.c
+++ b/source/blender/blenlib/intern/math_geom.c
@@ -5907,7 +5907,7 @@ static float ff_quad_form_factor(float *p, float *n, float *q0, float *q1, float
 
 #if 0
 
-#  include <xmmintrin.h>
+#  include "BLI_simd.h"
 
 static __m128 sse_approx_acos(__m128 x)
 {
diff --git a/source/blender/blenlib/intern/math_matrix.c b/source/blender/blenlib/intern/math_matrix.c
index 6b5efc3f8c4..b460d75d77f 100644
--- a/source/blender/blenlib/intern/math_matrix.c
+++ b/source/blender/blenlib/intern/math_matrix.c
@@ -277,7 +277,7 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
   BLI_assert(!ELEM(R, A, B));
 
   /* matrix product: R[j][k] = A[j][i] . B[i][k] */
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   __m128 A0 = _mm_loadu_ps(A[0]);
   __m128 A1 = _mm_loadu_ps(A[1]);
   __m128 A2 = _mm_loadu_ps(A[2]);
diff --git a/source/blender/compositor/operations/COM_BlurBaseOperation.cpp b/source/blender/compositor/operations/COM_BlurBaseOperation.cpp
index 3fe154c397e..612a71037f7 100644
--- a/source/blender/compositor/operations/COM_BlurBaseOperation.cpp
+++ b/source/blender/compositor/operations/COM_BlurBaseOperation.cpp
@@ -88,7 +88,7 @@ float *BlurBaseOperation::make_gausstab(float rad, int size)
   return gausstab;
 }
 
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
 __m128 *BlurBaseOperation::convert_gausstab_sse(const float *gausstab, int size)
 {
   int n = 2 * size + 1;
diff --git a/source/blender/compositor/operations/COM_BlurBaseOperation.h b/source/blender/compositor/operations/COM_BlurBaseOperation.h
index c452b2e4ea1..56dacc96710 100644
--- a/source/blender/compositor/operations/COM_BlurBaseOperation.h
+++ b/source/blender/compositor/operations/COM_BlurBaseOperation.h
@@ -23,16 +23,14 @@
 
 #define MAX_GAUSSTAB_RADIUS 30000
 
-#ifdef __SSE2__
-#  include <emmintrin.h>
-#endif
+#include "BLI_simd.h"
 
 class BlurBaseOperation : public NodeOperation, public QualityStepHelper {
  private:
  protected:
   BlurBaseOperation(DataType data_type);
   float *make_gausstab(float rad, int size);
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   __m128 *convert_gausstab_sse(const float *gausstab, int size);
 #endif
   float *make_dist_fac_inverse(float rad, int size, int falloff);
diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
index e08d30e5ddf..90333f7dd79 100644
--- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
@@ -26,7 +26,7 @@
 GaussianXBlurOperation::GaussianXBlurOperation() : BlurBaseOperation(COM_DT_COLOR)
 {
   this->m_gausstab = nullptr;
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   this->m_gausstab_sse = nullptr;
 #endif
   this->m_filtersize = 0;
@@ -55,7 +55,7 @@ void GaussianXBlurOperation::initExecution()
 
     /* TODO(sergey): De-duplicate with the case below and Y blur. */
     this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
     this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
 #endif
   }
@@ -70,7 +70,7 @@ void GaussianXBlurOperation::updateGauss()
     m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
 
     this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
     this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
 #endif
   }
@@ -95,7 +95,7 @@ void GaussianXBlurOperation::executePixel(float output[4], int x, int y, void *d
   int offsetadd = getOffsetAdd();
   int bufferindex = ((xmin - bufferstartx) * 4) + ((ymin - bufferstarty) * 4 * bufferwidth);
 
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   __m128 accum_r = _mm_load_ps(color_accum);
   for (int nx = xmin, index = (xmin - x) + this->m_filtersize; nx < xmax;
        nx += step, index += step) {
@@ -162,7 +162,7 @@ void GaussianXBlurOperation::deinitExecution()
     MEM_freeN(this->m_gausstab);
     this->m_gausstab = nullptr;
   }
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   if (this->m_gausstab_sse) {
     MEM_freeN(this->m_gausstab_sse);
     this->m_gausstab_sse = nullptr;
diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
index 9348c05f906..b2bcd79e716 100644
--- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
+++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
@@ -24,7 +24,7 @@
 class GaussianXBlurOperation : public BlurBaseOperation {
  private:
   float *m_gausstab;
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   __m128 *m_gausstab_sse;
 #endif
   int m_filtersize;
diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
index 7710b065ccd..c5b3cf24239 100644
--- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
@@ -26,7 +26,7 @@
 GaussianYBlurOperation::GaussianYBlurOperation() : BlurBaseOperation(COM_DT_COLOR)
 {
   this->m_gausstab = nullptr;
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   this->m_gausstab_sse = nullptr;
 #endif
   this->m_filtersize = 0;
@@ -54,7 +54,7 @@ void GaussianYBlurOperation::initExecution()
     m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
 
     this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
     this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
 #endif
   }
@@ -69,7 +69,7 @@ void GaussianYBlurOperation::updateGauss()
     m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
 
     this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
     this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
 #endif
   }
@@ -94,7 +94,7 @@ void GaussianYBlurOperation::executePixel(float output[4], int x, int y, void *d
   int step = getStep();
   const int bufferIndexx = ((xmin - bufferstartx) * 4);
 
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   __m128 accum_r = _mm_load_ps(color_accum);
   for (int ny = ymin; ny < ymax; ny += step) {
     index = (ny - y) + this->m_filtersize;
@@ -162,7 +162,7 @@ void GaussianYBlurOperation::deinitExecution()
     MEM_freeN(this->m_gausstab);
     this->m_gausstab = nullptr;
   }
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   if (this->m_gausstab_sse) {
     MEM_freeN(this->m_gausstab_sse);
     this->m_gausstab_sse = nullptr;
diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
index 7ab4ecb5506..d921780876a 100644
--- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
+++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
@@ -24,7 +24,7 @@
 class GaussianYBlurOperation : public BlurBaseOperation {
  private:
   float *m_gausstab;
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   __m128 *m_gausstab_sse;
 #endif
   int m_filtersize;
diff --git a/source/blender/modifiers/intern/MOD_meshdeform.c b/source/blender/modifiers/intern/MOD_meshdeform.c
index 4bd306e7679..a94dd6da477 100644
--- a/source/blender/modifiers/intern/MOD_meshdeform.c
+++ b/source/blender/modifiers/intern/MOD_meshdeform.c
@@ -24,6 +24,7 @@
 #include "BLI_utildefines.h"
 
 #include "BLI_math.h"
+#include "BLI_simd.h"
 #include "BLI_task.h"
 
 #include "BLT_translation.h"
@@ -61,10 +62,6 @@
 #include "MOD_ui_common.h"
 #include "MOD_util.h"
 
-#ifdef __SSE2__
-#  include <emmintrin.h>
-#endif
-
 static void initData(ModifierData *md)
 {
   MeshDeformModifierData *mmd = (MeshDeformModifierData *)md;
@@ -188,7 +185,7 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
   float gridvec[3], dvec[3], ivec[3], wx, wy, wz;
   float weight, cageweight, totweight, *cageco;
   int i, j, a, x, y, z, size;
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   __m128 co = _mm_setzero_ps();
 #else
   float co[3] = {0.0f, 0.0f, 0.0f};
@@ -243,7 +240,7 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
     for (j = 0; j < cell->totinfluence; j++, inf++) {
       cageco = dco[inf->vertex];
       cageweight = weight * inf->weight;
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
       {
         __m128 cageweight_r = _mm_set1_ps(cageweight);
         /* This will load one extra element, this is ok because
@@ -261,7 +258,7 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
     }
   }
 
-#ifdef __SSE2__
+#ifdef BLI_HAVE_SSE2
   copy_v3_v3(vec, (float *)&co);
 #else
   copy_v3_v3(vec, co);
author	Brecht Van Lommel <brecht@blender.org>	2021-02-14 16:20:51 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-02-17 18:26:24 +0300
commit	859118d8f6ff022a16acbc6435488883424bad25 (patch)
tree	e0f708929a884070f7c3758f7fb58527cbdfd555
parent	ac680c569e1b979f20c2e81dbd4f232085141aad (diff)