diff options
author | Mark Kriegsman <kriegsman@tr.org> | 2016-01-07 23:25:10 +0300 |
---|---|---|
committer | Mark Kriegsman <kriegsman@tr.org> | 2016-01-07 23:25:10 +0300 |
commit | 7edd233f79e0ed2b6af0ae6251f212680b31a6b6 (patch) | |
tree | 2adc915cc2f5cb9c3a5d0bf568f9d8d9d7b7485c | |
parent | 3a8bb4c09bac0bbfb13cbc749e83396d417d0ccc (diff) |
12% speedup in noise on AVR (net with previous commit) by using inline avg15 on AVRs with MUL. Code size goes up 8 bytes for 16-bit, 3-D noise.
-rw-r--r-- | noise.cpp | 24 |
1 files changed, 24 insertions, 0 deletions
@@ -24,8 +24,32 @@ FL_PROGMEM static uint8_t const p[] = { 151,160,137,91,90,15, #if FASTLED_NOISE_ALLOW_AVERAGE_TO_OVERFLOW == 1 #define AVG15(U,V) (((U)+(V)) >> 1) #else +// See if we should use the inlined avg15 for AVR with MUL instruction +#if defined(__AVR__) && (LIB8_ATTINY == 0) +#define AVG15(U,V) (avg15_inline_avr_mul((U),(V))) +// inlined copy of avg15 for AVR with MUL instruction; cloned from math8.h +// Forcing this inline in the 3-D 16bit noise produces a 12% speedup overall, +// at a cost of just +8 bytes of net code size. +static int16_t inline __attribute__((always_inline)) avg15_inline_avr_mul( int16_t i, int16_t j) +{ + asm volatile( + /* first divide j by 2, throwing away lowest bit */ + "asr %B[j] \n\t" + "ror %A[j] \n\t" + /* now divide i by 2, with lowest bit going into C */ + "asr %B[i] \n\t" + "ror %A[i] \n\t" + /* add j + C to i */ + "adc %A[i], %A[j] \n\t" + "adc %B[i], %B[j] \n\t" + : [i] "+a" (i) + : [j] "a" (j) ); + return i; +} +#else #define AVG15(U,V) (avg15((U),(V))) #endif +#endif // // #define FADE_12 |