From ec64b3c5b7abd621dfddee6b4cc115298e5d6803 Mon Sep 17 00:00:00 2001 From: Felicia Lim Date: Thu, 9 Dec 2021 12:54:43 -0800 Subject: Fix buffer overflow in xcorr_kernel_sse4_1 Before, an overflow can occur in the last loop if `len` is not a multiple of 4 as OP_CVTEPI16_EPI32_M64 tries to load 64 bits, but there are insufficient bits allocated in `x`. --- celt/x86/pitch_sse4_1.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/celt/x86/pitch_sse4_1.c b/celt/x86/pitch_sse4_1.c index a092c68b..58db6c7f 100644 --- a/celt/x86/pitch_sse4_1.c +++ b/celt/x86/pitch_sse4_1.c @@ -117,6 +117,11 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 __m128i sum0, sum1, sum2, sum3, vecSum; __m128i initSum; +#ifdef OPUS_CHECK_ASM + opus_val32 sum_c[4]={0,0,0,0}; + xcorr_kernel_c(x, y, sum_c, len); +#endif + celt_assert(len >= 3); sum0 = _mm_setzero_si128(); @@ -177,19 +182,56 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 vecSum = _mm_add_epi32(vecSum, sum2); } - for (;j