From 859118d8f6ff022a16acbc6435488883424bad25 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 14 Feb 2021 14:20:51 +0100 Subject: BLI: add BLI_simd.h header to wrap SSE includes In preparation of adding Neon support. Ref D8237, T78710 --- source/blender/blenkernel/intern/lattice_deform.c | 11 ++++----- source/blender/blenlib/BLI_simd.h | 28 ++++++++++++++++++++++ source/blender/blenlib/intern/math_base_inline.c | 11 ++++----- source/blender/blenlib/intern/math_color_inline.c | 6 ++--- source/blender/blenlib/intern/math_geom.c | 2 +- source/blender/blenlib/intern/math_matrix.c | 2 +- .../operations/COM_BlurBaseOperation.cpp | 2 +- .../compositor/operations/COM_BlurBaseOperation.h | 6 ++--- .../operations/COM_GaussianXBlurOperation.cpp | 10 ++++---- .../operations/COM_GaussianXBlurOperation.h | 2 +- .../operations/COM_GaussianYBlurOperation.cpp | 10 ++++---- .../operations/COM_GaussianYBlurOperation.h | 2 +- source/blender/modifiers/intern/MOD_meshdeform.c | 11 ++++----- 13 files changed, 60 insertions(+), 43 deletions(-) create mode 100644 source/blender/blenlib/BLI_simd.h diff --git a/source/blender/blenkernel/intern/lattice_deform.c b/source/blender/blenkernel/intern/lattice_deform.c index 81c2f492f7e..2651042939f 100644 --- a/source/blender/blenkernel/intern/lattice_deform.c +++ b/source/blender/blenkernel/intern/lattice_deform.c @@ -31,6 +31,7 @@ #include "MEM_guardedalloc.h" #include "BLI_math.h" +#include "BLI_simd.h" #include "BLI_task.h" #include "BLI_utildefines.h" @@ -49,10 +50,6 @@ #include "BKE_deform.h" -#ifdef __SSE2__ -# include -#endif - /* -------------------------------------------------------------------- */ /** \name Lattice Deform API * \{ */ @@ -171,7 +168,7 @@ void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data, /* vgroup influence */ float co_prev[4] = {0}, weight_blend = 0.0f; copy_v3_v3(co_prev, co); -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 co_vec = _mm_loadu_ps(co_prev); #endif @@ -232,7 +229,7 @@ void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data, u = v * tu[uu - ui + 1]; idx_u = CLAMPIS(uu, 0, idx_u_max); const int idx = idx_w + idx_v + idx_u; -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 { __m128 weight_vec = _mm_set1_ps(u); /* We need to address special case for last item to avoid accessing invalid memory. */ @@ -256,7 +253,7 @@ void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data, } } } -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 { copy_v3_v3(co, (float *)&co_vec); } diff --git a/source/blender/blenlib/BLI_simd.h b/source/blender/blenlib/BLI_simd.h new file mode 100644 index 00000000000..1518b6c1de2 --- /dev/null +++ b/source/blender/blenlib/BLI_simd.h @@ -0,0 +1,28 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#pragma once + +/** \file + * \ingroup bli + * + * SIMD instruction support. + */ + +#if defined(__SSE2__) +# include +# define BLI_HAVE_SSE2 +#endif diff --git a/source/blender/blenlib/intern/math_base_inline.c b/source/blender/blenlib/intern/math_base_inline.c index 28aa81e5858..39945960e68 100644 --- a/source/blender/blenlib/intern/math_base_inline.c +++ b/source/blender/blenlib/intern/math_base_inline.c @@ -31,11 +31,8 @@ #include #include -#ifdef __SSE2__ -# include -#endif - #include "BLI_math_base.h" +#include "BLI_simd.h" #ifdef __cplusplus extern "C" { @@ -685,10 +682,10 @@ MINLINE int integer_digits_i(const int i) /* Internal helpers for SSE2 implementation. * - * NOTE: Are to be called ONLY from inside `#ifdef __SSE2__` !!! + * NOTE: Are to be called ONLY from inside `#ifdef BLI_HAVE_SSE2` !!! */ -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 /* Calculate initial guess for arg^exp based on float representation * This method gives a constant bias, which can be easily compensated by @@ -769,7 +766,7 @@ MALWAYS_INLINE __m128 _bli_math_blend_sse(const __m128 mask, const __m128 a, con return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b)); } -#endif /* __SSE2__ */ +#endif /* BLI_HAVE_SSE2 */ /* Low level conversion functions */ MINLINE unsigned char unit_float_to_uchar_clamp(float val) diff --git a/source/blender/blenlib/intern/math_color_inline.c b/source/blender/blenlib/intern/math_color_inline.c index 26828cb8717..4c50c1c7af8 100644 --- a/source/blender/blenlib/intern/math_color_inline.c +++ b/source/blender/blenlib/intern/math_color_inline.c @@ -34,7 +34,7 @@ /******************************** Color Space ********************************/ -# ifdef __SSE2__ +# ifdef BLI_HAVE_SSE2 MALWAYS_INLINE __m128 srgb_to_linearrgb_v4_simd(const __m128 c) { @@ -75,7 +75,7 @@ MINLINE void linearrgb_to_srgb_v3_v3(float srgb[3], const float linear[3]) srgb[2] = r[2]; } -# else /* __SSE2__ */ +# else /* BLI_HAVE_SSE2 */ MINLINE void srgb_to_linearrgb_v3_v3(float linear[3], const float srgb[3]) { @@ -90,7 +90,7 @@ MINLINE void linearrgb_to_srgb_v3_v3(float srgb[3], const float linear[3]) srgb[1] = linearrgb_to_srgb(linear[1]); srgb[2] = linearrgb_to_srgb(linear[2]); } -# endif /* __SSE2__ */ +# endif /* BLI_HAVE_SSE2 */ MINLINE void srgb_to_linearrgb_v4(float linear[4], const float srgb[4]) { diff --git a/source/blender/blenlib/intern/math_geom.c b/source/blender/blenlib/intern/math_geom.c index 5d78bb0b901..b7e94e6c512 100644 --- a/source/blender/blenlib/intern/math_geom.c +++ b/source/blender/blenlib/intern/math_geom.c @@ -5907,7 +5907,7 @@ static float ff_quad_form_factor(float *p, float *n, float *q0, float *q1, float #if 0 -# include +# include "BLI_simd.h" static __m128 sse_approx_acos(__m128 x) { diff --git a/source/blender/blenlib/intern/math_matrix.c b/source/blender/blenlib/intern/math_matrix.c index 6b5efc3f8c4..b460d75d77f 100644 --- a/source/blender/blenlib/intern/math_matrix.c +++ b/source/blender/blenlib/intern/math_matrix.c @@ -277,7 +277,7 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4]) BLI_assert(!ELEM(R, A, B)); /* matrix product: R[j][k] = A[j][i] . B[i][k] */ -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 A0 = _mm_loadu_ps(A[0]); __m128 A1 = _mm_loadu_ps(A[1]); __m128 A2 = _mm_loadu_ps(A[2]); diff --git a/source/blender/compositor/operations/COM_BlurBaseOperation.cpp b/source/blender/compositor/operations/COM_BlurBaseOperation.cpp index 3fe154c397e..612a71037f7 100644 --- a/source/blender/compositor/operations/COM_BlurBaseOperation.cpp +++ b/source/blender/compositor/operations/COM_BlurBaseOperation.cpp @@ -88,7 +88,7 @@ float *BlurBaseOperation::make_gausstab(float rad, int size) return gausstab; } -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 *BlurBaseOperation::convert_gausstab_sse(const float *gausstab, int size) { int n = 2 * size + 1; diff --git a/source/blender/compositor/operations/COM_BlurBaseOperation.h b/source/blender/compositor/operations/COM_BlurBaseOperation.h index c452b2e4ea1..56dacc96710 100644 --- a/source/blender/compositor/operations/COM_BlurBaseOperation.h +++ b/source/blender/compositor/operations/COM_BlurBaseOperation.h @@ -23,16 +23,14 @@ #define MAX_GAUSSTAB_RADIUS 30000 -#ifdef __SSE2__ -# include -#endif +#include "BLI_simd.h" class BlurBaseOperation : public NodeOperation, public QualityStepHelper { private: protected: BlurBaseOperation(DataType data_type); float *make_gausstab(float rad, int size); -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 *convert_gausstab_sse(const float *gausstab, int size); #endif float *make_dist_fac_inverse(float rad, int size, int falloff); diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp index e08d30e5ddf..90333f7dd79 100644 --- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp +++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp @@ -26,7 +26,7 @@ GaussianXBlurOperation::GaussianXBlurOperation() : BlurBaseOperation(COM_DT_COLOR) { this->m_gausstab = nullptr; -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 this->m_gausstab_sse = nullptr; #endif this->m_filtersize = 0; @@ -55,7 +55,7 @@ void GaussianXBlurOperation::initExecution() /* TODO(sergey): De-duplicate with the case below and Y blur. */ this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize); -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize); #endif } @@ -70,7 +70,7 @@ void GaussianXBlurOperation::updateGauss() m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize); -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize); #endif } @@ -95,7 +95,7 @@ void GaussianXBlurOperation::executePixel(float output[4], int x, int y, void *d int offsetadd = getOffsetAdd(); int bufferindex = ((xmin - bufferstartx) * 4) + ((ymin - bufferstarty) * 4 * bufferwidth); -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 accum_r = _mm_load_ps(color_accum); for (int nx = xmin, index = (xmin - x) + this->m_filtersize; nx < xmax; nx += step, index += step) { @@ -162,7 +162,7 @@ void GaussianXBlurOperation::deinitExecution() MEM_freeN(this->m_gausstab); this->m_gausstab = nullptr; } -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 if (this->m_gausstab_sse) { MEM_freeN(this->m_gausstab_sse); this->m_gausstab_sse = nullptr; diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h index 9348c05f906..b2bcd79e716 100644 --- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h @@ -24,7 +24,7 @@ class GaussianXBlurOperation : public BlurBaseOperation { private: float *m_gausstab; -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 *m_gausstab_sse; #endif int m_filtersize; diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp index 7710b065ccd..c5b3cf24239 100644 --- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp +++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp @@ -26,7 +26,7 @@ GaussianYBlurOperation::GaussianYBlurOperation() : BlurBaseOperation(COM_DT_COLOR) { this->m_gausstab = nullptr; -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 this->m_gausstab_sse = nullptr; #endif this->m_filtersize = 0; @@ -54,7 +54,7 @@ void GaussianYBlurOperation::initExecution() m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize); -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize); #endif } @@ -69,7 +69,7 @@ void GaussianYBlurOperation::updateGauss() m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS); this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize); -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize); #endif } @@ -94,7 +94,7 @@ void GaussianYBlurOperation::executePixel(float output[4], int x, int y, void *d int step = getStep(); const int bufferIndexx = ((xmin - bufferstartx) * 4); -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 accum_r = _mm_load_ps(color_accum); for (int ny = ymin; ny < ymax; ny += step) { index = (ny - y) + this->m_filtersize; @@ -162,7 +162,7 @@ void GaussianYBlurOperation::deinitExecution() MEM_freeN(this->m_gausstab); this->m_gausstab = nullptr; } -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 if (this->m_gausstab_sse) { MEM_freeN(this->m_gausstab_sse); this->m_gausstab_sse = nullptr; diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h index 7ab4ecb5506..d921780876a 100644 --- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h +++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h @@ -24,7 +24,7 @@ class GaussianYBlurOperation : public BlurBaseOperation { private: float *m_gausstab; -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 *m_gausstab_sse; #endif int m_filtersize; diff --git a/source/blender/modifiers/intern/MOD_meshdeform.c b/source/blender/modifiers/intern/MOD_meshdeform.c index 4bd306e7679..a94dd6da477 100644 --- a/source/blender/modifiers/intern/MOD_meshdeform.c +++ b/source/blender/modifiers/intern/MOD_meshdeform.c @@ -24,6 +24,7 @@ #include "BLI_utildefines.h" #include "BLI_math.h" +#include "BLI_simd.h" #include "BLI_task.h" #include "BLT_translation.h" @@ -61,10 +62,6 @@ #include "MOD_ui_common.h" #include "MOD_util.h" -#ifdef __SSE2__ -# include -#endif - static void initData(ModifierData *md) { MeshDeformModifierData *mmd = (MeshDeformModifierData *)md; @@ -188,7 +185,7 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3 float gridvec[3], dvec[3], ivec[3], wx, wy, wz; float weight, cageweight, totweight, *cageco; int i, j, a, x, y, z, size; -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 __m128 co = _mm_setzero_ps(); #else float co[3] = {0.0f, 0.0f, 0.0f}; @@ -243,7 +240,7 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3 for (j = 0; j < cell->totinfluence; j++, inf++) { cageco = dco[inf->vertex]; cageweight = weight * inf->weight; -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 { __m128 cageweight_r = _mm_set1_ps(cageweight); /* This will load one extra element, this is ok because @@ -261,7 +258,7 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3 } } -#ifdef __SSE2__ +#ifdef BLI_HAVE_SSE2 copy_v3_v3(vec, (float *)&co); #else copy_v3_v3(vec, co); -- cgit v1.2.3