12% speedup in noise on AVR (net with previous commit) by using inline avg15 on AVRs with MUL. Code size goes up 8 bytes for 16-bit, 3-D noise.

author: Mark Kriegsman <kriegsman@tr.org> 2016-01-07 23:25:10 +0300
committer: Mark Kriegsman <kriegsman@tr.org> 2016-01-07 23:25:10 +0300
commit: 7edd233f79e0ed2b6af0ae6251f212680b31a6b6 (patch)
tree: 2adc915cc2f5cb9c3a5d0bf568f9d8d9d7b7485c
parent: 3a8bb4c09bac0bbfb13cbc749e83396d417d0ccc (diff)
1 files changed, 24 insertions, 0 deletions
diff --git a/noise.cpp b/noise.cpp
index 72b0211f..8b1f6a4d 100644
--- a/noise.cpp
+++ b/noise.cpp
@@ -24,8 +24,32 @@ FL_PROGMEM static uint8_t const p[] = { 151,160,137,91,90,15,
 #if FASTLED_NOISE_ALLOW_AVERAGE_TO_OVERFLOW == 1
 #define AVG15(U,V) (((U)+(V)) >> 1)
 #else
+// See if we should use the inlined avg15 for AVR with MUL instruction
+#if defined(__AVR__) && (LIB8_ATTINY == 0)
+#define AVG15(U,V) (avg15_inline_avr_mul((U),(V)))
+// inlined copy of avg15 for AVR with MUL instruction; cloned from math8.h
+// Forcing this inline in the 3-D 16bit noise produces a 12% speedup overall,
+// at a cost of just +8 bytes of net code size.
+static int16_t inline __attribute__((always_inline))  avg15_inline_avr_mul( int16_t i, int16_t j)
+{
+    asm volatile(
+                 /* first divide j by 2, throwing away lowest bit */
+                 "asr %B[j]          \n\t"
+                 "ror %A[j]          \n\t"
+                 /* now divide i by 2, with lowest bit going into C */
+                 "asr %B[i]          \n\t"
+                 "ror %A[i]          \n\t"
+                 /* add j + C to i */
+                 "adc %A[i], %A[j]   \n\t"
+                 "adc %B[i], %B[j]   \n\t"
+                 : [i] "+a" (i)
+                 : [j] "a"  (j) );
+    return i;
+}
+#else
 #define AVG15(U,V) (avg15((U),(V)))
 #endif
+#endif
 
 //
 // #define FADE_12
author	Mark Kriegsman <kriegsman@tr.org>	2016-01-07 23:25:10 +0300
committer	Mark Kriegsman <kriegsman@tr.org>	2016-01-07 23:25:10 +0300
commit	7edd233f79e0ed2b6af0ae6251f212680b31a6b6 (patch)
tree	2adc915cc2f5cb9c3a5d0bf568f9d8d9d7b7485c
parent	3a8bb4c09bac0bbfb13cbc749e83396d417d0ccc (diff)