Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FastLED/FastLED.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Garcia <danielgarcia@gmail.com>2016-01-07 23:28:22 +0300
committerDaniel Garcia <danielgarcia@gmail.com>2016-01-07 23:28:22 +0300
commit1c76daaa8c18ed161c1d34f4c095ed2789c33fbd (patch)
tree8b988c396547b044f253fc3d2330044c71051c02
parent840e5123994676870238b9b573434823bd8c976a (diff)
parent7edd233f79e0ed2b6af0ae6251f212680b31a6b6 (diff)
Merge branch 'master' of https://github.com/FastLED/FastLED
-rw-r--r--lib8tion/scale8.h18
-rw-r--r--noise.cpp40
2 files changed, 54 insertions, 4 deletions
diff --git a/lib8tion/scale8.h b/lib8tion/scale8.h
index 239e9dea..e6bdeefd 100644
--- a/lib8tion/scale8.h
+++ b/lib8tion/scale8.h
@@ -388,8 +388,7 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
return result;
#elif SCALE16_AVRASM == 1
- uint32_t result = 0;
- const uint8_t zero = 0;
+ uint32_t result;
asm volatile(
// result.A-B = i.A x scale.A
" mul %A[i], %A[scale] \n\t"
@@ -406,12 +405,26 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
// well, in case we want to use this code for
// a generic 16x16 multiply somewhere.
+ : [result] "=r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale)
+ : "r0", "r1"
+ );
+
+ asm volatile(
// result.C-D = i.B x scale.B
" mul %B[i], %B[scale] \n\t"
//" mov %C[result], r0 \n\t"
//" mov %D[result], r1 \n\t"
" movw %C[result], r0 \n\t"
+ : [result] "+r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale)
+ : "r0", "r1"
+ );
+ const uint8_t zero = 0;
+ asm volatile(
// result.B-D += i.B x scale.A
" mul %B[i], %A[scale] \n\t"
@@ -435,6 +448,7 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
[zero] "r" (zero)
: "r0", "r1"
);
+
result = result >> 16;
return result;
#else
diff --git a/noise.cpp b/noise.cpp
index 49aa6e7c..8b1f6a4d 100644
--- a/noise.cpp
+++ b/noise.cpp
@@ -24,8 +24,32 @@ FL_PROGMEM static uint8_t const p[] = { 151,160,137,91,90,15,
#if FASTLED_NOISE_ALLOW_AVERAGE_TO_OVERFLOW == 1
#define AVG15(U,V) (((U)+(V)) >> 1)
#else
+// See if we should use the inlined avg15 for AVR with MUL instruction
+#if defined(__AVR__) && (LIB8_ATTINY == 0)
+#define AVG15(U,V) (avg15_inline_avr_mul((U),(V)))
+// inlined copy of avg15 for AVR with MUL instruction; cloned from math8.h
+// Forcing this inline in the 3-D 16bit noise produces a 12% speedup overall,
+// at a cost of just +8 bytes of net code size.
+static int16_t inline __attribute__((always_inline)) avg15_inline_avr_mul( int16_t i, int16_t j)
+{
+ asm volatile(
+ /* first divide j by 2, throwing away lowest bit */
+ "asr %B[j] \n\t"
+ "ror %A[j] \n\t"
+ /* now divide i by 2, with lowest bit going into C */
+ "asr %B[i] \n\t"
+ "ror %A[i] \n\t"
+ /* add j + C to i */
+ "adc %A[i], %A[j] \n\t"
+ "adc %B[i], %B[j] \n\t"
+ : [i] "+a" (i)
+ : [j] "a" (j) );
+ return i;
+}
+#else
#define AVG15(U,V) (avg15((U),(V)))
#endif
+#endif
//
// #define FADE_12
@@ -297,7 +321,13 @@ uint16_t inoise16(uint32_t x, uint32_t y, uint32_t z) {
int32_t ans = inoise16_raw(x,y,z);
ans = ans + 19052L;
uint32_t pan = ans;
- return (pan*220L)>>7;
+ // pan = (ans * 220L) >> 7. That's the same as:
+ // pan = (ans * 440L) >> 8. And this way avoids a 7X four-byte shift-loop on AVR.
+ // Identical math, except for the highest bit, which we don't care about anyway,
+ // since we're returning the 'middle' 16 out of a 32-bit value anyway.
+ pan *= 440L;
+ return (pan>>8);
+
// // return scale16by8(pan,220)<<1;
// return ((inoise16_raw(x,y,z)+19052)*220)>>7;
// return scale16by8(inoise16_raw(x,y,z)+19052,220)<<1;
@@ -340,7 +370,13 @@ uint16_t inoise16(uint32_t x, uint32_t y) {
int32_t ans = inoise16_raw(x,y);
ans = ans + 17308L;
uint32_t pan = ans;
- return (pan*242L)>>7;
+ // pan = (ans * 242L) >> 7. That's the same as:
+ // pan = (ans * 484L) >> 8. And this way avoids a 7X four-byte shift-loop on AVR.
+ // Identical math, except for the highest bit, which we don't care about anyway,
+ // since we're returning the 'middle' 16 out of a 32-bit value anyway.
+ pan *= 484L;
+ return (pan>>8);
+
// return (uint32_t)(((int32_t)inoise16_raw(x,y)+(uint32_t)17308)*242)>>7;
// return scale16by8(inoise16_raw(x,y)+17308,242)<<1;
}