diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2016-08-08 20:41:46 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2016-08-08 20:41:46 +0300 |
commit | 274f136311dfe6c516ad9f3402a4d698e3b6d9ad (patch) | |
tree | 02cd7afd1e0101250f250ea0f29fbc44d6397398 | |
parent | 5163ab3c740d6611b87296d12f27b9208d13a01a (diff) |
speed up float too
-rw-r--r-- | celt/vq.c | 8 |
1 files changed, 4 insertions, 4 deletions
@@ -181,7 +181,6 @@ static float compute_search_vec(const float *X, float *y, int *iy, int pulsesLef __m128i count; __m128i pos; #else - float tmp[4]; __m128 count; __m128 pos; #endif @@ -236,9 +235,10 @@ static float compute_search_vec(const float *X, float *y, int *iy, int pulsesLef pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2))); best_id = _mm_cvtsi128_si32(pos); #else - int mask = _mm_movemask_ps(_mm_cmpeq_ps(max, max2)); - _mm_storeu_ps(&tmp[0], pos); - best_id = _mm_cvtss_si32(_mm_load_ss(&tmp[31-__builtin_clz(mask)])); + pos = _mm_and_ps(pos, _mm_cmpeq_ps(max, max2)); + pos = _mm_max_ps(pos, _mm_shuffle_ps(pos, pos, _MM_SHUFFLE(1, 0, 3, 2))); + pos = _mm_max_ps(pos, _mm_shuffle_ps(pos, pos, _MM_SHUFFLE(2, 3, 0, 1))); + best_id = _mm_cvt_ss2si(pos); #endif } /* Updating the sums of the new pulse(s) */ |