diff options
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c')
-rw-r--r-- | Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c | 208 |
1 files changed, 93 insertions, 115 deletions
diff --git a/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c b/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c index 6eb5b6e5b..d715d98f1 100644 --- a/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c +++ b/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c @@ -3,13 +3,13 @@ * Title: arm_cmplx_dot_prod_q31.c * Description: Q31 complex dot product * - * $Date: 27. January 2017 - * $Revision: V.1.5.1 + * $Date: 18. March 2019 + * $Revision: V1.6.0 * * Target Processor: Cortex-M cores * -------------------------------------------------------------------- */ /* - * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -29,147 +29,125 @@ #include "arm_math.h" /** - * @ingroup groupCmplxMath + @ingroup groupCmplxMath */ /** - * @addtogroup cmplx_dot_prod - * @{ + @addtogroup cmplx_dot_prod + @{ */ /** - * @brief Q31 complex dot product - * @param *pSrcA points to the first input vector - * @param *pSrcB points to the second input vector - * @param numSamples number of complex samples in each vector - * @param *realResult real part of the result returned here - * @param *imagResult imaginary part of the result returned here - * @return none. - * - * <b>Scaling and Overflow Behavior:</b> - * \par - * The function is implemented using an internal 64-bit accumulator. - * The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format. - * The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits. - * Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768. - * The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format. - * Input down scaling is not required. + @brief Q31 complex dot product. + @param[in] pSrcA points to the first input vector + @param[in] pSrcB points to the second input vector + @param[in] numSamples number of samples in each vector + @param[out] realResult real part of the result returned here + @param[out] imagResult imaginary part of the result returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using an internal 64-bit accumulator. + The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format. + The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits. + Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768. + The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format. + Input down scaling is not required. */ void arm_cmplx_dot_prod_q31( - q31_t * pSrcA, - q31_t * pSrcB, - uint32_t numSamples, - q63_t * realResult, - q63_t * imagResult) + const q31_t * pSrcA, + const q31_t * pSrcB, + uint32_t numSamples, + q63_t * realResult, + q63_t * imagResult) { - q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */ - q31_t a0,b0,c0,d0; - -#if defined (ARM_MATH_DSP) - - /* Run the below code for Cortex-M4 and Cortex-M3 */ - uint32_t blkCnt; /* loop counter */ + uint32_t blkCnt; /* Loop counter */ + q63_t real_sum = 0, imag_sum = 0; /* Temporary result variables */ + q31_t a0,b0,c0,d0; +#if defined (ARM_MATH_LOOPUNROLL) - /*loop Unrolling */ + /* Loop unrolling: Compute 4 outputs at a time */ blkCnt = numSamples >> 2U; - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ while (blkCnt > 0U) { - a0 = *pSrcA++; - b0 = *pSrcA++; - c0 = *pSrcB++; - d0 = *pSrcB++; - - real_sum += ((q63_t)a0 * c0) >> 14; - imag_sum += ((q63_t)a0 * d0) >> 14; - real_sum -= ((q63_t)b0 * d0) >> 14; - imag_sum += ((q63_t)b0 * c0) >> 14; - - a0 = *pSrcA++; - b0 = *pSrcA++; - c0 = *pSrcB++; - d0 = *pSrcB++; - - real_sum += ((q63_t)a0 * c0) >> 14; - imag_sum += ((q63_t)a0 * d0) >> 14; - real_sum -= ((q63_t)b0 * d0) >> 14; - imag_sum += ((q63_t)b0 * c0) >> 14; - - a0 = *pSrcA++; - b0 = *pSrcA++; - c0 = *pSrcB++; - d0 = *pSrcB++; - - real_sum += ((q63_t)a0 * c0) >> 14; - imag_sum += ((q63_t)a0 * d0) >> 14; - real_sum -= ((q63_t)b0 * d0) >> 14; - imag_sum += ((q63_t)b0 * c0) >> 14; - - a0 = *pSrcA++; - b0 = *pSrcA++; - c0 = *pSrcB++; - d0 = *pSrcB++; - - real_sum += ((q63_t)a0 * c0) >> 14; - imag_sum += ((q63_t)a0 * d0) >> 14; - real_sum -= ((q63_t)b0 * d0) >> 14; - imag_sum += ((q63_t)b0 * c0) >> 14; - - /* Decrement the loop counter */ - blkCnt--; + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + /* Decrement loop counter */ + blkCnt--; } - /* If the numSamples is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ + /* Loop unrolling: Compute remaining outputs */ blkCnt = numSamples % 0x4U; - while (blkCnt > 0U) - { - a0 = *pSrcA++; - b0 = *pSrcA++; - c0 = *pSrcB++; - d0 = *pSrcB++; - - real_sum += ((q63_t)a0 * c0) >> 14; - imag_sum += ((q63_t)a0 * d0) >> 14; - real_sum -= ((q63_t)b0 * d0) >> 14; - imag_sum += ((q63_t)b0 * c0) >> 14; - - /* Decrement the loop counter */ - blkCnt--; - } - #else - /* Run the below code for Cortex-M0 */ + /* Initialize blkCnt with number of samples */ + blkCnt = numSamples; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - while (numSamples > 0U) + while (blkCnt > 0U) { - a0 = *pSrcA++; - b0 = *pSrcA++; - c0 = *pSrcB++; - d0 = *pSrcB++; - - real_sum += ((q63_t)a0 * c0) >> 14; - imag_sum += ((q63_t)a0 * d0) >> 14; - real_sum -= ((q63_t)b0 * d0) >> 14; - imag_sum += ((q63_t)b0 * c0) >> 14; - - /* Decrement the loop counter */ - numSamples--; + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + /* Decrement loop counter */ + blkCnt--; } -#endif /* #if defined (ARM_MATH_DSP) */ - - /* Store the real and imaginary results in 16.48 format */ + /* Store real and imaginary result in 16.48 format */ *realResult = real_sum; *imagResult = imag_sum; } /** - * @} end of cmplx_dot_prod group + @} end of cmplx_dot_prod group */ |