Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/Flipper-Zero/STM32CubeWB.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c')
-rw-r--r--Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c230
1 files changed, 93 insertions, 137 deletions
diff --git a/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c b/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
index 8c7ca313b..9651999e3 100644
--- a/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
+++ b/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
@@ -3,13 +3,13 @@
* Title: arm_cmplx_mult_real_f32.c
* Description: Floating-point complex by real multiplication
*
- * $Date: 27. January 2017
- * $Revision: V.1.5.1
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -29,185 +29,141 @@
#include "arm_math.h"
/**
- * @ingroup groupCmplxMath
+ @ingroup groupCmplxMath
*/
/**
- * @defgroup CmplxByRealMult Complex-by-Real Multiplication
- *
- * Multiplies a complex vector by a real vector and generates a complex result.
- * The data in the complex arrays is stored in an interleaved fashion
- * (real, imag, real, imag, ...).
- * The parameter <code>numSamples</code> represents the number of complex
- * samples processed. The complex arrays have a total of <code>2*numSamples</code>
- * real values while the real array has a total of <code>numSamples</code>
- * real values.
- *
- * The underlying algorithm is used:
- *
- * <pre>
- * for(n=0; n<numSamples; n++) {
- * pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
- * pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
- * }
- * </pre>
- *
- * There are separate functions for floating-point, Q15, and Q31 data types.
+ @defgroup CmplxByRealMult Complex-by-Real Multiplication
+
+ Multiplies a complex vector by a real vector and generates a complex result.
+ The data in the complex arrays is stored in an interleaved fashion
+ (real, imag, real, imag, ...).
+ The parameter <code>numSamples</code> represents the number of complex
+ samples processed. The complex arrays have a total of <code>2*numSamples</code>
+ real values while the real array has a total of <code>numSamples</code>
+ real values.
+
+ The underlying algorithm is used:
+
+ <pre>
+ for (n = 0; n < numSamples; n++) {
+ pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
+ pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
+ }
+ </pre>
+
+ There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
- * @addtogroup CmplxByRealMult
- * @{
+ @addtogroup CmplxByRealMult
+ @{
*/
-
/**
- * @brief Floating-point complex-by-real multiplication
- * @param[in] *pSrcCmplx points to the complex input vector
- * @param[in] *pSrcReal points to the real input vector
- * @param[out] *pCmplxDst points to the complex output vector
- * @param[in] numSamples number of samples in each vector
- * @return none.
+ @brief Floating-point complex-by-real multiplication.
+ @param[in] pSrcCmplx points to complex input vector
+ @param[in] pSrcReal points to real input vector
+ @param[out] pCmplxDst points to complex output vector
+ @param[in] numSamples number of samples in each vector
+ @return none
*/
void arm_cmplx_mult_real_f32(
- float32_t * pSrcCmplx,
- float32_t * pSrcReal,
- float32_t * pCmplxDst,
- uint32_t numSamples)
+ const float32_t * pSrcCmplx,
+ const float32_t * pSrcReal,
+ float32_t * pCmplxDst,
+ uint32_t numSamples)
{
- float32_t in; /* Temporary variable to store input value */
- uint32_t blkCnt; /* loop counters */
-
-#if defined (ARM_MATH_DSP)
-
- /* Run the below code for Cortex-M4 and Cortex-M3 */
- float32_t inA1, inA2, inA3, inA4; /* Temporary variables to hold input data */
- float32_t inA5, inA6, inA7, inA8; /* Temporary variables to hold input data */
- float32_t inB1, inB2, inB3, inB4; /* Temporary variables to hold input data */
- float32_t out1, out2, out3, out4; /* Temporary variables to hold output data */
- float32_t out5, out6, out7, out8; /* Temporary variables to hold output data */
-
- /* loop Unrolling */
- blkCnt = numSamples >> 2U;
-
- /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while (blkCnt > 0U)
- {
- /* C[2 * i] = A[2 * i] * B[i]. */
- /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
- /* read input from complex input buffer */
- inA1 = pSrcCmplx[0];
- inA2 = pSrcCmplx[1];
- /* read input from real input buffer */
- inB1 = pSrcReal[0];
-
- /* read input from complex input buffer */
- inA3 = pSrcCmplx[2];
-
- /* multiply complex buffer real input with real buffer input */
- out1 = inA1 * inB1;
-
- /* read input from complex input buffer */
- inA4 = pSrcCmplx[3];
+ uint32_t blkCnt; /* Loop counter */
+ float32_t in; /* Temporary variable */
- /* multiply complex buffer imaginary input with real buffer input */
- out2 = inA2 * inB1;
+#if defined(ARM_MATH_NEON)
+ float32x4_t r;
+ float32x4x2_t ab,outCplx;
- /* read input from real input buffer */
- inB2 = pSrcReal[1];
- /* read input from complex input buffer */
- inA5 = pSrcCmplx[4];
+ /* Compute 4 outputs at a time */
+ blkCnt = numSamples >> 2U;
- /* multiply complex buffer real input with real buffer input */
- out3 = inA3 * inB2;
+ while (blkCnt > 0U)
+ {
+ ab = vld2q_f32(pSrcCmplx); // load & separate real/imag pSrcA (de-interleave 2)
+ r = vld1q_f32(pSrcReal); // load & separate real/imag pSrcB
- /* read input from complex input buffer */
- inA6 = pSrcCmplx[5];
- /* read input from real input buffer */
- inB3 = pSrcReal[2];
+ /* Increment pointers */
+ pSrcCmplx += 8;
+ pSrcReal += 4;
- /* multiply complex buffer imaginary input with real buffer input */
- out4 = inA4 * inB2;
+ outCplx.val[0] = vmulq_f32(ab.val[0], r);
+ outCplx.val[1] = vmulq_f32(ab.val[1], r);
- /* read input from complex input buffer */
- inA7 = pSrcCmplx[6];
+ vst2q_f32(pCmplxDst, outCplx);
+ pCmplxDst += 8;
- /* multiply complex buffer real input with real buffer input */
- out5 = inA5 * inB3;
+ blkCnt--;
+ }
- /* read input from complex input buffer */
- inA8 = pSrcCmplx[7];
-
- /* multiply complex buffer imaginary input with real buffer input */
- out6 = inA6 * inB3;
-
- /* read input from real input buffer */
- inB4 = pSrcReal[3];
-
- /* store result to destination bufer */
- pCmplxDst[0] = out1;
-
- /* multiply complex buffer real input with real buffer input */
- out7 = inA7 * inB4;
-
- /* store result to destination bufer */
- pCmplxDst[1] = out2;
-
- /* multiply complex buffer imaginary input with real buffer input */
- out8 = inA8 * inB4;
+ /* Tail */
+ blkCnt = numSamples & 3;
+#else
+#if defined (ARM_MATH_LOOPUNROLL)
- /* store result to destination bufer */
- pCmplxDst[2] = out3;
- pCmplxDst[3] = out4;
- pCmplxDst[4] = out5;
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = numSamples >> 2U;
- /* incremnet complex input buffer by 8 to process next samples */
- pSrcCmplx += 8U;
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i ] = A[2 * i ] * B[i]. */
+ /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
- /* store result to destination bufer */
- pCmplxDst[5] = out6;
+ in = *pSrcReal++;
+ /* store result in destination buffer. */
+ *pCmplxDst++ = *pSrcCmplx++ * in;
+ *pCmplxDst++ = *pSrcCmplx++ * in;
- /* increment real input buffer by 4 to process next samples */
- pSrcReal += 4U;
+ in = *pSrcReal++;
+ *pCmplxDst++ = *pSrcCmplx++ * in;
+ *pCmplxDst++ = *pSrcCmplx++ * in;
- /* store result to destination bufer */
- pCmplxDst[6] = out7;
- pCmplxDst[7] = out8;
+ in = *pSrcReal++;
+ *pCmplxDst++ = *pSrcCmplx++ * in;
+ *pCmplxDst++ = *pSrcCmplx++ * in;
- /* increment destination buffer by 8 to process next sampels */
- pCmplxDst += 8U;
+ in = *pSrcReal++;
+ *pCmplxDst++ = *pSrcCmplx++* in;
+ *pCmplxDst++ = *pSrcCmplx++ * in;
- /* Decrement the numSamples loop counter */
+ /* Decrement loop counter */
blkCnt--;
}
- /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
+ /* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
- /* Run the below code for Cortex-M0 */
+ /* Initialize blkCnt with number of samples */
blkCnt = numSamples;
-#endif /* #if defined (ARM_MATH_DSP) */
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
- /* C[2 * i] = A[2 * i] * B[i]. */
- /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+ /* C[2 * i ] = A[2 * i ] * B[i]. */
+ /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+
in = *pSrcReal++;
- /* store the result in the destination buffer. */
- *pCmplxDst++ = (*pSrcCmplx++) * (in);
- *pCmplxDst++ = (*pSrcCmplx++) * (in);
+ /* store result in destination buffer. */
+ *pCmplxDst++ = *pSrcCmplx++ * in;
+ *pCmplxDst++ = *pSrcCmplx++ * in;
- /* Decrement the numSamples loop counter */
+ /* Decrement loop counter */
blkCnt--;
}
+
}
/**
- * @} end of CmplxByRealMult group
+ @} end of CmplxByRealMult group
*/