diff options
author | Mark Kriegsman <kriegsman@tr.org> | 2017-01-15 20:56:45 +0300 |
---|---|---|
committer | Mark Kriegsman <kriegsman@tr.org> | 2017-01-15 20:56:45 +0300 |
commit | 94e46e3978b9f2bce8f76383ff9a19879b245ad0 (patch) | |
tree | f15860df57167dac6b3dc3515b631610af9b0f6d | |
parent | d90a4524714c75884256db201beda2d7c4d10516 (diff) |
Updated scale16 and scale16by8 for AVR when FASTLED_SCALE8_FIXED is 1. scale16(foo,65535) now equals foo. Updated DemoReel100 to stay within range of NUM_LEDS on non-AVR platforms. Fixes #368.
-rw-r--r-- | examples/DemoReel100/DemoReel100.ino | 4 | ||||
-rw-r--r-- | lib8tion/scale8.h | 113 |
2 files changed, 114 insertions, 3 deletions
diff --git a/examples/DemoReel100/DemoReel100.ino b/examples/DemoReel100/DemoReel100.ino index ffa3a9fd..03534a91 100644 --- a/examples/DemoReel100/DemoReel100.ino +++ b/examples/DemoReel100/DemoReel100.ino @@ -99,7 +99,7 @@ void sinelon() { // a colored dot sweeping back and forth, with fading trails fadeToBlackBy( leds, NUM_LEDS, 20); - int pos = beatsin16(13,0,NUM_LEDS); + int pos = beatsin16( 13, 0, NUM_LEDS-1 ); leds[pos] += CHSV( gHue, 255, 192); } @@ -119,7 +119,7 @@ void juggle() { fadeToBlackBy( leds, NUM_LEDS, 20); byte dothue = 0; for( int i = 0; i < 8; i++) { - leds[beatsin16(i+7,0,NUM_LEDS)] |= CHSV(dothue, 200, 255); + leds[beatsin16( i+7, 0, NUM_LEDS-1 )] |= CHSV(dothue, 200, 255); dothue += 32; } } diff --git a/lib8tion/scale8.h b/lib8tion/scale8.h index ed5e2f0b..56392258 100644 --- a/lib8tion/scale8.h +++ b/lib8tion/scale8.h @@ -424,6 +424,34 @@ LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale ) #endif return result; #elif SCALE16BY8_AVRASM == 1 +#if FASTLED_SCALE8_FIXED == 1 + uint16_t result = 0; + asm volatile( + // result.A = HighByte( (i.A x scale) + i.A ) + " mul %A[i], %[scale] \n\t" + " add r0, %A[i] \n\t" + // " adc r1, [zero] \n\t" + // " mov %A[result], r1 \n\t" + " adc %A[result], r1 \n\t" + + // result.A-B += i.B x scale + " mul %B[i], %[scale] \n\t" + " add %A[result], r0 \n\t" + " adc %B[result], r1 \n\t" + + // cleanup r1 + " clr __zero_reg__ \n\t" + + // result.A-B += i.B + " add %A[result], %B[i] \n\t" + " adc %B[result], __zero_reg__ \n\t" + + : [result] "+r" (result) + : [i] "r" (i), [scale] "r" (scale) + : "r0", "r1" + ); + return result; +#else uint16_t result = 0; asm volatile( // result.A = HighByte(i.A x j ) @@ -444,6 +472,7 @@ LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale ) : "r0", "r1" ); return result; +#endif #else #error "No implementation for scale16by8 available." #endif @@ -464,6 +493,14 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) #endif return result; #elif SCALE16_AVRASM == 1 +#if FASTLED_SCALE8_FIXED == 1 + // implemented sort of like + // result = ((i * scale) + i ) / 65536 + // + // why not like this, you may ask? + // result = (i * (scale+1)) / 65536 + // the answer is that if scale is 65535, then scale+1 + // will be zero, which is not what we want. uint32_t result; asm volatile( // result.A-B = i.A x scale.A @@ -474,7 +511,80 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) //" mov %B[result], r1 \n\t" // which can be written as... " movw %A[result], r0 \n\t" - // We actually need to do anything with r0, + // Because we're going to add i.A-B to + // result.A-D, we DO need to keep both + // the r0 and r1 portions of the product + // UNlike in the 'unfixed scale8' version. + // So the movw here is needed. + : [result] "=r" (result) + : [i] "r" (i), + [scale] "r" (scale) + : "r0", "r1" + ); + + asm volatile( + // result.C-D = i.B x scale.B + " mul %B[i], %B[scale] \n\t" + //" mov %C[result], r0 \n\t" + //" mov %D[result], r1 \n\t" + " movw %C[result], r0 \n\t" + : [result] "+r" (result) + : [i] "r" (i), + [scale] "r" (scale) + : "r0", "r1" + ); + + const uint8_t zero = 0; + asm volatile( + // result.B-D += i.B x scale.A + " mul %B[i], %A[scale] \n\t" + + " add %B[result], r0 \n\t" + " adc %C[result], r1 \n\t" + " adc %D[result], %[zero] \n\t" + + // result.B-D += i.A x scale.B + " mul %A[i], %B[scale] \n\t" + + " add %B[result], r0 \n\t" + " adc %C[result], r1 \n\t" + " adc %D[result], %[zero] \n\t" + + // cleanup r1 + " clr r1 \n\t" + + : [result] "+r" (result) + : [i] "r" (i), + [scale] "r" (scale), + [zero] "r" (zero) + : "r0", "r1" + ); + + asm volatile( + // result.A-D += i.A-B + " add %A[result], %A[i] \n\t" + " adc %B[result], %B[i] \n\t" + " adc %C[result], %[zero] \n\t" + " adc %D[result], %[zero] \n\t" + : [result] "+r" (result) + : [i] "r" (i), + [zero] "r" (zero) + ); + + result = result >> 16; + return result; +#else + uint32_t result; + asm volatile( + // result.A-B = i.A x scale.A + " mul %A[i], %A[scale] \n\t" + // save results... + // basic idea: + //" mov %A[result], r0 \n\t" + //" mov %B[result], r1 \n\t" + // which can be written as... + " movw %A[result], r0 \n\t" + // We actually don't need to do anything with r0, // as result.A is never used again here, so we // could just move the high byte, but movw is // one clock cycle, just like mov, so might as @@ -527,6 +637,7 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) result = result >> 16; return result; +#endif #else #error "No implementation for scale16 available." #endif |