Merge pull request #1378 from Eugene8388608/master

Fix some math8 functions, improve qmul8(), minor extensions, fix typos
author: Mark Kriegsman <1334634+kriegsman@users.noreply.github.com> 2022-04-10 18:29:27 +0300
committer: GitHub <noreply@github.com> 2022-04-10 18:29:27 +0300
commit: c38c2ce4aaed669f077fb23cc57cd49a5c8f4e9d (patch)
tree: c9a69cac68258cd630a462139bcc96bdf45f18a1
parent: 0ad135ca0f342b94b3b0b35720a5253d4400f277 (diff)
parent: 34d9061317aa83595488ddb3cc8f6913e07367cf (diff)
2 files changed, 88 insertions, 29 deletions
diff --git a/src/lib8tion.h b/src/lib8tion.h
index 80e27100..43dedbc0 100644
--- a/src/lib8tion.h
+++ b/src/lib8tion.h
@@ -33,7 +33,7 @@ FASTLED_NAMESPACE_BEGIN
      qsub8( i, j) == MAX( (i - j), 0 )
 
  - Saturating signed 8-bit ("7-bit") add.
-     qadd7( i, j) == MIN( (i + j), 0x7F)
+     qadd7( i, j) == MAX( MIN( (i + j), 0x7F), -0x80)
 
 
  - Scaling (down) of unsigned 8- and 16- bit values.
@@ -99,7 +99,7 @@ FASTLED_NAMESPACE_BEGIN
 
 
  - Fast 8-bit "easing in/out" function.
-     ease8InOutCubic(x) == 3(x^i) - 2(x^3)
+     ease8InOutCubic(x) == 3(x^2) - 2(x^3)
      ease8InOutApprox(x) ==
        faster, rougher, approximation of cubic easing
      ease8InOutQuad(x) == quadratic (vs cubic) easing
@@ -208,8 +208,10 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 #define SUB8_C 1
 #define EASE8_C 1
 #define AVG8_C 1
+#define AVG8R_C 1
 #define AVG7_C 1
 #define AVG16_C 1
+#define AVG16R_C 1
 #define AVG15_C 1
 #define BLEND8_C 1
 
@@ -231,8 +233,10 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 #define SUB8_C 1
 #define EASE8_C 1
 #define AVG8_C 1
+#define AVG8R_C 1
 #define AVG7_C 1
 #define AVG16_C 1
+#define AVG16R_C 1
 #define AVG15_C 1
 #define BLEND8_C 1
 
@@ -249,8 +253,10 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 #define ADD8_C 0
 #define SUB8_C 0
 #define AVG8_C 0
+#define AVG8R_C 0
 #define AVG7_C 0
 #define AVG16_C 0
+#define AVG16R_C 0
 #define AVG15_C 0
 
 #define QADD8_AVRASM 1
@@ -260,8 +266,10 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 #define ADD8_AVRASM 1
 #define SUB8_AVRASM 1
 #define AVG8_AVRASM 1
+#define AVG8R_AVRASM 1
 #define AVG7_AVRASM 1
 #define AVG16_AVRASM 1
+#define AVG16R_AVRASM 1
 #define AVG15_AVRASM 1
 
 // Note: these require hardware MUL instruction
@@ -319,8 +327,10 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 #define SUB8_C 1
 #define EASE8_C 1
 #define AVG8_C 1
+#define AVG8R_C 1
 #define AVG7_C 1
 #define AVG16_C 1
+#define AVG16R_C 1
 #define AVG15_C 1
 #define BLEND8_C 1
 
@@ -732,9 +742,9 @@ LIB8STATIC uint8_t ease8InOutApprox( fract8 i)
 
         "Ldone_%=:               \n\t"
 
-        : [i] "+&a" (i)
+        : [i] "+a" (i)
         :
-        : "r0", "r1"
+        : "r0"
         );
     return i;
 }
@@ -744,7 +754,7 @@ LIB8STATIC uint8_t ease8InOutApprox( fract8 i)
 
 
 
-/// triwave8: triangle (sawtooth) wave generator.  Useful for
+/// triwave8: triangle wave generator.  Useful for
 ///           turning a one-byte ever-increasing value into a
 ///           one-byte value that oscillates up and down.
 ///
diff --git a/src/lib8tion/math8.h b/src/lib8tion/math8.h
index f95697bd..19a5ad77 100644
--- a/src/lib8tion/math8.h
+++ b/src/lib8tion/math8.h
@@ -50,27 +50,32 @@ LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j)
 #endif
 }
 
-/// Add one byte to another, saturating at 0x7F
+/// Add one byte to another, saturating at 0x7F and -0x80
 /// @param i - first byte to add
 /// @param j - second byte to add
-/// @returns the sum of i & j, capped at 0xFF
+/// @returns the sum of i & j, capped at 0x7F and -0x80
 LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j)
 {
 #if QADD7_C == 1
     int16_t t = i + j;
     if( t > 127) t = 127;
+    else if( t < -128) t = -128;
     return t;
 #elif QADD7_AVRASM == 1
     asm volatile(
-        /* First, add j to i, conditioning the V flag */
+        /* First, add j to i, conditioning the V and C flags */
         "add %0, %1    \n\t"
 
         /* Now test the V flag.
-        If V is clear, we branch around a load of 0x7F into i.
+        If V is clear, we branch to end.
         If V is set, we go ahead and load 0x7F into i.
         */
         "brvc L_%=     \n\t"
         "ldi %0, 0x7F  \n\t"
+
+        /* When both numbers are negative, C is set.
+        Adding it to make result negative. */
+        "adc %0, __zero_reg__\n\t"
         "L_%=: "
         : "+a" (i)
         : "a"  (j)
@@ -129,7 +134,7 @@ LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j)
 #endif
 }
 
-/// add one byte to another, with one byte result
+/// add one byte to two bytes, with two bytes result
 LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j)
 {
 #if ADD8_C == 1
@@ -213,15 +218,68 @@ LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j)
 #endif
 }
 
+/// Calculate an integer average of two unsigned
+///       8-bit integer values (uint8_t).
+///       Fractional results are rounded up, e.g. avg8r(20,41) = 31
+LIB8STATIC_ALWAYS_INLINE uint8_t avg8r( uint8_t i, uint8_t j)
+{
+#if AVG8R_C == 1
+    return (i + j + 1) >> 1;
+#elif AVG8R_AVRASM == 1
+    asm volatile(
+        /* First, add j to i, 9th bit overflows into C flag */
+        "add %0, %1          \n\t"
+        /* Divide by two, moving C flag into high 8th bit, old 1st bit now in C */
+        "ror %0              \n\t"
+        /* Add C flag */
+        "adc %0, __zero_reg__\n\t"
+        : "+a" (i)
+        : "a"  (j)
+    );
+    return i;
+#else
+#error "No implementation for avg8r available."
+#endif
+}
+
+/// Calculate an integer average of two unsigned
+///       16-bit integer values (uint16_t).
+///       Fractional results are rounded up, e.g. avg16r(20,41) = 31
+LIB8STATIC_ALWAYS_INLINE uint16_t avg16r( uint16_t i, uint16_t j)
+{
+#if AVG16R_C == 1
+    return (uint32_t)((uint32_t)(i) + (uint32_t)(j) + 1) >> 1;
+#elif AVG16R_AVRASM == 1
+    asm volatile(
+        /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
+        "add %A[i], %A[j]    \n\t"
+        /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
+        "adc %B[i], %B[j]    \n\t"
+        /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
+        "ror %B[i]        \n\t"
+        /* Divide iLo by two, moving C flag into high 8th bit, old 1st bit now in C */
+        "ror %A[i]        \n\t"
+        /* Add C flag */
+        "adc %A[i], __zero_reg__\n\t"
+        "adc %B[i], __zero_reg__\n\t"
+        : [i] "+a" (i)
+        : [j] "a"  (j)
+    );
+    return i;
+#else
+#error "No implementation for avg16r available."
+#endif
+}
+
 
 /// Calculate an integer average of two signed 7-bit
 ///       integers (int8_t)
 ///       If the first argument is even, result is rounded down.
-///       If the first argument is odd, result is result up.
+///       If the first argument is odd, result is rounded up.
 LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
 {
 #if AVG7_C == 1
-    return ((i + j) >> 1) + (i & 0x1);
+    return (i>>1) + (j>>1) + (i & 0x1);
 #elif AVG7_AVRASM == 1
     asm volatile(
         "asr %1        \n\t"
@@ -239,11 +297,11 @@ LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
 /// Calculate an integer average of two signed 15-bit
 ///       integers (int16_t)
 ///       If the first argument is even, result is rounded down.
-///       If the first argument is odd, result is result up.
+///       If the first argument is odd, result is rounded up.
 LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j)
 {
 #if AVG15_C == 1
-    return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1);
+    return (i>>1) + (j>>1) + (i & 0x1);
 #elif AVG15_AVRASM == 1
     asm volatile(
         /* first divide j by 2, throwing away lowest bit */
@@ -321,13 +379,6 @@ LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
 ///          Subtract two numbers, and calculate the modulo
 ///          of the difference and a third number, M.
 ///          In other words, it returns (A-B) % M.
-///          It is designed as a compact mechanism for
-///          incrementing a 'mode' switch and wrapping
-///          around back to 'mode 0' when the switch
-///          goes past the end of the available range.
-///          e.g. if you have seven modes, this switches
-///          to the next one and wraps around if needed:
-///            mode = addmod8( mode, 1, 7);
 ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
 LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m)
 {
@@ -376,23 +427,21 @@ LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j)
 LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j)
 {
 #if QMUL8_C == 1
-    int p = ((int)i * (int)(j) );
+    unsigned p = (unsigned)i * (unsigned)j;
     if( p > 255) p = 255;
     return p;
 #elif QMUL8_AVRASM == 1
     asm volatile(
         /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
         "  mul %0, %1          \n\t"
+        /* Extract the LOW 8-bits (r0) */
+        "  mov %0, r0          \n\t"
         /* If high byte of result is zero, all is well. */
         "  tst r1              \n\t"
         "  breq Lnospill_%=    \n\t"
-        /* If high byte of result > 0, saturate low byte to 0xFF */
-        "  ldi %0,0xFF         \n\t"
-        "  rjmp Ldone_%=       \n\t"
+        /* If high byte of result > 0, saturate to 0xFF */
+        "  ldi %0, 0xFF         \n\t"
         "Lnospill_%=:          \n\t"
-        /* Extract the LOW 8-bits (r0) */
-        "  mov %0, r0          \n\t"
-        "Ldone_%=:             \n\t"
         /* Restore r1 to "0"; it's expected to always be that */
         "  clr __zero_reg__    \n\t"
         : "+a" (i)
@@ -461,7 +510,7 @@ LIB8STATIC uint8_t sqrt16(uint16_t x)
     return low - 1;
 }
 
-/// blend a variable proproportion(0-255) of one byte to another
+/// blend a variable proportion(0-255) of one byte to another
 /// @param a - the starting byte value
 /// @param b - the byte value to blend toward
 /// @param amountOfB - the proportion (0-255) of b to blend
author	Mark Kriegsman <1334634+kriegsman@users.noreply.github.com>	2022-04-10 18:29:27 +0300
committer	GitHub <noreply@github.com>	2022-04-10 18:29:27 +0300
commit	c38c2ce4aaed669f077fb23cc57cd49a5c8f4e9d (patch)
tree	c9a69cac68258cd630a462139bcc96bdf45f18a1
parent	0ad135ca0f342b94b3b0b35720a5253d4400f277 (diff)
parent	34d9061317aa83595488ddb3cc8f6913e07367cf (diff)