1 files changed, 119 insertions, 118 deletions
diff --git a/Drivers/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q31.c b/Drivers/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q31.c
index 4a006aa89..a5840b733 100644
--- a/Drivers/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q31.c
+++ b/Drivers/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q31.c
@@ -3,13 +3,13 @@
  * Title:        arm_correlate_fast_q31.c
  * Description:  Fast Q31 Correlation
  *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
  *
  * Target Processor: Cortex-M cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -29,61 +29,58 @@
 #include "arm_math.h"
 
 /**
- * @ingroup groupFilters
+  @ingroup groupFilters
  */
 
 /**
- * @addtogroup Corr
- * @{
+  @addtogroup Corr
+  @{
  */
 
 /**
- * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4.
- * @param[in] *pSrcA points to the first input sequence.
- * @param[in] srcALen length of the first input sequence.
- * @param[in] *pSrcB points to the second input sequence.
- * @param[in] srcBLen length of the second input sequence.
- * @param[out] *pDst points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.
- * @return none.
- *
- * @details
- * <b>Scaling and Overflow Behavior:</b>
- *
- * \par
- * This function is optimized for speed at the expense of fixed-point precision and overflow protection.
- * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.
- * These intermediate results are accumulated in a 32-bit register in 2.30 format.
- * Finally, the accumulator is saturated and converted to a 1.31 result.
- *
- * \par
- * The fast version has the same overflow behavior as the standard version but provides less precision since it discards the low 32 bits of each multiplication result.
- * In order to avoid overflows completely the input signals must be scaled down.
- * The input signals should be scaled down to avoid intermediate overflows.
- * Scale down one of the inputs by 1/min(srcALen, srcBLen)to avoid overflows since a
- * maximum of min(srcALen, srcBLen) number of additions is carried internally.
- *
- * \par
- * See <code>arm_correlate_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision.
+  @brief         Correlation of Q31 sequences (fast version).
+  @param[in]     pSrcA      points to the first input sequence
+  @param[in]     srcALen    length of the first input sequence
+  @param[in]     pSrcB      points to the second input sequence
+  @param[in]     srcBLen    length of the second input sequence
+  @param[out]    pDst       points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   This function is optimized for speed at the expense of fixed-point precision and overflow protection.
+                   The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.
+                   These intermediate results are accumulated in a 32-bit register in 2.30 format.
+                   Finally, the accumulator is saturated and converted to a 1.31 result.
+  @par
+                   The fast version has the same overflow behavior as the standard version but provides less precision since it discards the low 32 bits of each multiplication result.
+                   In order to avoid overflows completely the input signals must be scaled down.
+                   The input signals should be scaled down to avoid intermediate overflows.
+                   Scale down one of the inputs by 1/min(srcALen, srcBLen)to avoid overflows since a
+                   maximum of min(srcALen, srcBLen) number of additions is carried internally.
+
+ @remark
+                   Refer to \ref arm_correlate_q31() for a slower implementation of this function which uses 64-bit accumulation to provide higher precision.
  */
 
 void arm_correlate_fast_q31(
-  q31_t * pSrcA,
-  uint32_t srcALen,
-  q31_t * pSrcB,
-  uint32_t srcBLen,
-  q31_t * pDst)
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst)
 {
-  q31_t *pIn1;                                   /* inputA pointer               */
-  q31_t *pIn2;                                   /* inputB pointer               */
-  q31_t *pOut = pDst;                            /* output pointer               */
-  q31_t *px;                                     /* Intermediate inputA pointer  */
-  q31_t *py;                                     /* Intermediate inputB pointer  */
-  q31_t *pSrc1;                                  /* Intermediate pointers        */
-  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulators                  */
-  q31_t x0, x1, x2, x3, c0;                      /* temporary variables for holding input and coefficient values */
-  uint32_t j, k = 0U, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;  /* loop counter                 */
-  int32_t inc = 1;                               /* Destination address modifier */
-
+  const q31_t *pIn1;                                   /* InputA pointer */
+  const q31_t *pIn2;                                   /* InputB pointer */
+        q31_t *pOut = pDst;                            /* Output pointer */
+  const q31_t *px;                                     /* Intermediate inputA pointer */
+  const q31_t *py;                                     /* Intermediate inputB pointer */
+  const q31_t *pSrc1;                                  /* Intermediate pointers */
+        q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulators */
+        q31_t x0, x1, x2, x3, c0;                      /* Temporary variables for holding input and coefficient values */
+        uint32_t blockSize1, blockSize2, blockSize3;   /* Loop counters */
+        uint32_t j, k, count, blkCnt;                  /* Loop counters */
+        uint32_t outBlockSize;
+        int32_t inc = 1;                               /* Destination address modifier */
 
   /* The algorithm implementation is based on the lengths of the inputs. */
   /* srcB is always made to slide across srcA. */
@@ -91,10 +88,10 @@ void arm_correlate_fast_q31(
   if (srcALen >= srcBLen)
   {
     /* Initialization of inputA pointer */
-    pIn1 = (pSrcA);
+    pIn1 = pSrcA;
 
     /* Initialization of inputB pointer */
-    pIn2 = (pSrcB);
+    pIn2 = pSrcB;
 
     /* Number of output samples is calculated */
     outBlockSize = (2U * srcALen) - 1U;
@@ -112,10 +109,10 @@ void arm_correlate_fast_q31(
   else
   {
     /* Initialization of inputA pointer */
-    pIn1 = (pSrcB);
+    pIn1 = pSrcB;
 
     /* Initialization of inputB pointer */
-    pIn2 = (pSrcA);
+    pIn2 = pSrcA;
 
     /* srcBLen is always considered as shorter or equal to srcALen */
     j = srcBLen;
@@ -132,14 +129,15 @@ void arm_correlate_fast_q31(
   }
 
   /* The function is internally
-   * divided into three parts according to the number of multiplications that has to be
-   * taken place between inputA samples and inputB samples. In the first part of the
+   * divided into three stages according to the number of multiplications that has to be
+   * taken place between inputA samples and inputB samples. In the first stage of the
    * algorithm, the multiplications increase by one for every iteration.
-   * In the second part of the algorithm, srcBLen number of multiplications are done.
-   * In the third part of the algorithm, the multiplications decrease by one
-   * for every iteration.*/
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.
+   * In the third stage of the algorithm, the multiplications decrease by one
+   * for every iteration. */
+
   /* The algorithm is implemented in three stages.
-   * The loop counters of each stage is initiated here. */
+     The loop counters of each stage is initiated here. */
   blockSize1 = srcBLen - 1U;
   blockSize2 = srcALen - (srcBLen - 1U);
   blockSize3 = blockSize1;
@@ -176,7 +174,7 @@ void arm_correlate_fast_q31(
     sum = 0;
 
     /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = count >> 2;
+    k = count >> 2U;
 
     /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
      ** a second loop below computes MACs for the remaining 1 to 3 samples. */
@@ -184,18 +182,21 @@ void arm_correlate_fast_q31(
     {
       /* x[0] * y[srcBLen - 4] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
+
       /* x[1] * y[srcBLen - 3] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
+
       /* x[2] * y[srcBLen - 2] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
+
       /* x[3] * y[srcBLen - 1] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
 
-      /* Decrement the loop counter */
+      /* Decrement loop counter */
       k--;
     }
 
@@ -205,12 +206,12 @@ void arm_correlate_fast_q31(
 
     while (k > 0U)
     {
-      /* Perform the multiply-accumulates */
+      /* Perform the multiply-accumulate */
       /* x[0] * y[srcBLen - 1] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
 
-      /* Decrement the loop counter */
+      /* Decrement loop counter */
       k--;
     }
 
@@ -223,10 +224,10 @@ void arm_correlate_fast_q31(
     py = pSrc1 - count;
     px = pIn1;
 
-    /* Increment the MAC count */
+    /* Increment MAC count */
     count++;
 
-    /* Decrement the loop counter */
+    /* Decrement loop counter */
     blockSize1--;
   }
 
@@ -270,9 +271,9 @@ void arm_correlate_fast_q31(
       acc3 = 0;
 
       /* read x[0], x[1], x[2] samples */
-      x0 = *(px++);
-      x1 = *(px++);
-      x2 = *(px++);
+      x0 = *px++;
+      x1 = *px++;
+      x2 = *px++;
 
       /* Apply loop unrolling and compute 4 MACs simultaneously. */
       k = srcBLen >> 2U;
@@ -282,10 +283,9 @@ void arm_correlate_fast_q31(
       do
       {
         /* Read y[0] sample */
-        c0 = *(py++);
-
+        c0 = *py++;
         /* Read x[3] sample */
-        x3 = *(px++);
+        x3 = *px++;
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[0] * y[0] */
@@ -297,13 +297,13 @@ void arm_correlate_fast_q31(
         /* acc3 +=  x[3] * y[0] */
         acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32);
 
-        /* Read y[1] sample */
-        c0 = *(py++);
 
+        /* Read y[1] sample */
+        c0 = *py++;
         /* Read x[4] sample */
-        x0 = *(px++);
+        x0 = *px++;
 
-        /* Perform the multiply-accumulates */
+        /* Perform the multiply-accumulate */
         /* acc0 +=  x[1] * y[1] */
         acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x1 * c0)) >> 32);
         /* acc1 +=  x[2] * y[1] */
@@ -313,11 +313,11 @@ void arm_correlate_fast_q31(
         /* acc3 +=  x[4] * y[1] */
         acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32);
 
-        /* Read y[2] sample */
-        c0 = *(py++);
 
+        /* Read y[2] sample */
+        c0 = *py++;
         /* Read x[5] sample */
-        x1 = *(px++);
+        x1 = *px++;
 
         /* Perform the multiply-accumulates */
         /* acc0 +=  x[2] * y[2] */
@@ -329,11 +329,11 @@ void arm_correlate_fast_q31(
         /* acc3 +=  x[5] * y[2] */
         acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32);
 
-        /* Read y[3] sample */
-        c0 = *(py++);
 
+        /* Read y[3] sample */
+        c0 = *py++;
         /* Read x[6] sample */
-        x2 = *(px++);
+        x2 = *px++;
 
         /* Perform the multiply-accumulates */
         /* acc0 +=  x[3] * y[3] */
@@ -355,10 +355,9 @@ void arm_correlate_fast_q31(
       while (k > 0U)
       {
         /* Read y[4] sample */
-        c0 = *(py++);
-
+        c0 = *py++;
         /* Read x[7] sample */
-        x3 = *(px++);
+        x3 = *px++;
 
         /* Perform the multiply-accumulates */
         /* acc0 +=  x[4] * y[4] */
@@ -375,7 +374,7 @@ void arm_correlate_fast_q31(
         x1 = x2;
         x2 = x3;
 
-        /* Decrement the loop counter */
+        /* Decrement loop counter */
         k--;
       }
 
@@ -400,8 +399,7 @@ void arm_correlate_fast_q31(
       px = pIn1 + count;
       py = pIn2;
 
-
-      /* Decrement the loop counter */
+      /* Decrement loop counter */
       blkCnt--;
     }
 
@@ -423,15 +421,15 @@ void arm_correlate_fast_q31(
       {
         /* Perform the multiply-accumulates */
         sum = (q31_t) ((((q63_t) sum << 32) +
-                        ((q63_t) * px++ * (*py++))) >> 32);
+                        ((q63_t) *px++ * (*py++))) >> 32);
         sum = (q31_t) ((((q63_t) sum << 32) +
-                        ((q63_t) * px++ * (*py++))) >> 32);
+                        ((q63_t) *px++ * (*py++))) >> 32);
         sum = (q31_t) ((((q63_t) sum << 32) +
-                        ((q63_t) * px++ * (*py++))) >> 32);
+                        ((q63_t) *px++ * (*py++))) >> 32);
         sum = (q31_t) ((((q63_t) sum << 32) +
-                        ((q63_t) * px++ * (*py++))) >> 32);
+                        ((q63_t) *px++ * (*py++))) >> 32);
 
-        /* Decrement the loop counter */
+        /* Decrement loop counter */
         k--;
       }
 
@@ -443,9 +441,9 @@ void arm_correlate_fast_q31(
       {
         /* Perform the multiply-accumulate */
         sum = (q31_t) ((((q63_t) sum << 32) +
-                        ((q63_t) * px++ * (*py++))) >> 32);
+                        ((q63_t) *px++ * (*py++))) >> 32);
 
-        /* Decrement the loop counter */
+        /* Decrement loop counter */
         k--;
       }
 
@@ -454,15 +452,14 @@ void arm_correlate_fast_q31(
       /* Destination pointer is updated according to the address modifier, inc */
       pOut += inc;
 
-      /* Increment the MAC count */
+      /* Increment MAC count */
       count++;
 
       /* Update the inputA and inputB pointers for next MAC calculation */
       px = pIn1 + count;
       py = pIn2;
 
-
-      /* Decrement the loop counter */
+      /* Decrement loop counter */
       blkCnt--;
     }
   }
@@ -477,16 +474,16 @@ void arm_correlate_fast_q31(
       /* Accumulator is made zero for every iteration */
       sum = 0;
 
-      /* Loop over srcBLen */
+      /* srcBLen number of MACS should be performed */
       k = srcBLen;
 
       while (k > 0U)
       {
         /* Perform the multiply-accumulate */
         sum = (q31_t) ((((q63_t) sum << 32) +
-                        ((q63_t) * px++ * (*py++))) >> 32);
+                        ((q63_t) *px++ * (*py++))) >> 32);
 
-        /* Decrement the loop counter */
+        /* Decrement loop counter */
         k--;
       }
 
@@ -495,18 +492,19 @@ void arm_correlate_fast_q31(
       /* Destination pointer is updated according to the address modifier, inc */
       pOut += inc;
 
-      /* Increment the MAC count */
+      /* Increment MAC count */
       count++;
 
       /* Update the inputA and inputB pointers for next MAC calculation */
       px = pIn1 + count;
       py = pIn2;
 
-      /* Decrement the loop counter */
+      /* Decrement loop counter */
       blkCnt--;
     }
   }
 
+
   /* --------------------------
    * Initializations of stage3
    * -------------------------*/
@@ -545,21 +543,24 @@ void arm_correlate_fast_q31(
      ** a second loop below computes MACs for the remaining 1 to 3 samples. */
     while (k > 0U)
     {
-      /* Perform the multiply-accumulates */
+      /* Perform the multiply-accumulate */
       /* sum += x[srcALen - srcBLen + 4] * y[3] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
+
       /* sum += x[srcALen - srcBLen + 3] * y[2] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
+
       /* sum += x[srcALen - srcBLen + 2] * y[1] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
+
       /* sum += x[srcALen - srcBLen + 1] * y[0] */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
 
-      /* Decrement the loop counter */
+      /* Decrement loop counter */
       k--;
     }
 
@@ -569,11 +570,11 @@ void arm_correlate_fast_q31(
 
     while (k > 0U)
     {
-      /* Perform the multiply-accumulates */
+      /* Perform the multiply-accumulate */
       sum = (q31_t) ((((q63_t) sum << 32) +
-                      ((q63_t) * px++ * (*py++))) >> 32);
+                      ((q63_t) *px++ * (*py++))) >> 32);
 
-      /* Decrement the loop counter */
+      /* Decrement loop counter */
       k--;
     }
 
@@ -586,15 +587,15 @@ void arm_correlate_fast_q31(
     px = ++pSrc1;
     py = pIn2;
 
-    /* Decrement the MAC count */
+    /* Decrement MAC count */
     count--;
 
-    /* Decrement the loop counter */
+    /* Decrement loop counter */
     blockSize3--;
   }
 
 }
 
 /**
- * @} end of Corr group
+  @} end of Corr group
  */