diff options
author | kriegsman@gmail.com <kriegsman@gmail.com@4ad4ec5c-605d-bd5c-5796-512c9b60011b> | 2013-05-25 18:45:26 +0400 |
---|---|---|
committer | kriegsman@gmail.com <kriegsman@gmail.com@4ad4ec5c-605d-bd5c-5796-512c9b60011b> | 2013-05-25 18:45:26 +0400 |
commit | 34074fbc46ff68a871dab0f4485b2bfb9b6c20e7 (patch) | |
tree | 2a9d4757313db0df7d1c3f44bdfd2d4bf87ffc7c /lib8tion.h | |
parent | f364ba365ec7dfa23e7a798a72c20bdc9d0988eb (diff) |
MEK: RENAMED THINGS; your code will need name changes. Renamed hsv2rgb methods. Added fixed point types, 16-bit CRGB and CHSV types, 16-bit hsv2rgb, scale16( uint16_t, fract16), fixed-to-float and float-to-fixed convenience functions, CRGB::nMaximizeBrightness method of dubious ultimate value. Cleaned up some AVR assembly functions.
Diffstat (limited to 'lib8tion.h')
-rw-r--r-- | lib8tion.h | 275 |
1 files changed, 216 insertions, 59 deletions
@@ -101,13 +101,13 @@ - Linear interpolation between two values, with the fraction between them expressed as an 8- or 16-bit - fixed point fraction (Q8 or Q16). - lerp8by8( fromU8, toU8, fracQ8 ) - lerp16by8( fromU16, toU16, fracQ8 ) - lerp15by8( fromS16, toS16, fracQ8 ) - == from + (( to - from ) * fracQ8) / 256) - lerp16by16( fromU16, toU16, fracQ16 ) - == from + (( to - from ) * fracQ16) / 65536) + fixed point fraction (fract8 or fract16). + lerp8by8( fromU8, toU8, fract8 ) + lerp16by8( fromU16, toU16, fract8 ) + lerp15by8( fromS16, toS16, fract8 ) + == from + (( to - from ) * fract8) / 256) + lerp16by16( fromU16, toU16, fract16 ) + == from + (( to - from ) * fract16) / 65536) - Optimized memmove, memcpy, and memset, that are faster than standard avr-libc 1.8. @@ -149,6 +149,7 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun #define QSUB8_C 1 #define SCALE8_C 1 #define SCALE16BY8_C 1 +#define SCALE16_C 1 #define ABS8_C 1 #define MUL8_C 1 #define QMUL8_C 1 @@ -180,11 +181,13 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun #if !defined(LIB8_ATTINY) #define SCALE8_C 0 #define SCALE16BY8_C 0 +#define SCALE16_C 0 #define MUL8_C 0 #define QMUL8_C 0 #define EASE8_C 0 #define SCALE8_AVRASM 1 #define SCALE16BY8_AVRASM 1 +#define SCALE16_AVRASM 1 #define MUL8_AVRASM 1 #define QMUL8_AVRASM 1 #define EASE8_AVRASM 1 @@ -193,11 +196,13 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun // On ATtiny, we just use C implementations #define SCALE8_C 1 #define SCALE16BY8_C 1 +#define SCALE16_C 0 #define MUL8_C 1 #define QMUL8_C 1 #define EASE8_C 1 #define SCALE8_AVRASM 0 #define SCALE16BY8_AVRASM 0 +#define SCALE16_AVRASM 0 #define MUL8_AVRASM 0 #define QMUL8_AVRASM 0 #define EASE8_AVRASM 0 @@ -212,6 +217,7 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun #define QSUB8_C 1 #define SCALE8_C 1 #define SCALE16BY8_C 1 +#define SCALE16_C 1 #define ABS8_C 1 #define MUL8_C 1 #define ADD8_C 1 @@ -223,21 +229,71 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun /////////////////////////////////////////////////////////////////////// // -// typdefs for fast fixed-point fractional types. +// typdefs for fixed-point fractional types. // -// Q7 should be interpreted as signed 128ths. -// Q8 should be interpreted as unsigned 256ths. -// Q15 should be interpreted as signed 32768ths. -// Q16 should be interpreted as unsigned 65536ths. +// sfract7 should be interpreted as signed 128ths. +// fract8 should be interpreted as unsigned 256ths. +// sfract15 should be interpreted as signed 32768ths. +// fract16 should be interpreted as unsigned 65536ths. // -// Example: if a Q8 has the is "128", that should be interpreted -// as 128 256ths, or one-half. +// Example: if a fract8 has the value "64", that should be interpreted +// as 64/256ths, or one-quarter. // +// +// fract8 range is 0 to 0.99609375 +// in steps of 0.00390625 +// +// sfract7 range is -0.9921875 to 0.9921875 +// in steps of 0.0078125 +// +// fract16 range is 0 to 0.99998474121 +// in steps of 0.00001525878 +// +// sfract15 range is -0.99996948242 to 0.99996948242 +// in steps of 0.00003051757 +// + +typedef uint8_t fract8; // ANSI: unsigned short _Fract +typedef int8_t sfract7; // ANSI: signed short _Fract +typedef uint16_t fract16; // ANSI: unsigned _Fract +typedef int16_t sfract15; // ANSI: signed _Fract + + +// accumXY types should be interpreted as X bits of integer, +// and Y bits of fraction. +// E.g., accum88 has 8 bits of int, 8 bits of fraction + +typedef uint16_t accum88; // ANSI: unsigned short _Accum +typedef int16_t saccum78; // ANSI: signed short _Accum +typedef uint32_t accum1616;// ANSI: signed _Accum +typedef int32_t saccum1516;//ANSI: signed _Accum +typedef uint16_t accum124; // no direct ANSI counterpart +typedef int32_t saccum114;// no direct ANSI counterpart + + +// typedef for IEEE754 "binary32" float type internals + +typedef union { + uint32_t i; + float f; + struct { + uint32_t mantissa: 23; + uint32_t exponent: 8; + uint32_t signbit: 1; + }; + struct { + uint32_t mant7 : 7; + uint32_t mant16: 16; + uint32_t exp_ : 8; + uint32_t sb_ : 1; + }; + struct { + uint32_t mant_lo8 : 8; + uint32_t mant_hi16_exp_lo1 : 16; + uint32_t sb_exphi7 : 8; + }; +} IEEE754binary32_t; -typedef int8_t Q7; -typedef uint8_t Q8; -typedef int16_t Q15; -typedef uint16_t Q16; /////////////////////////////////////////////////////////////////////// @@ -370,18 +426,18 @@ LIB8STATIC uint8_t sub8( uint8_t i, uint8_t j) // the numerator of a fraction whose denominator is 256 // In other words, it computes i * (scale / 256) // 4 clocks AVR, 2 clocks ARM -LIB8STATIC uint8_t scale8( uint8_t i, uint8_t scale) +LIB8STATIC uint8_t scale8( uint8_t i, fract8 scale) { #if SCALE8_C == 1 return ((int)i * (int)(scale) ) >> 8; #elif SCALE8_AVRASM == 1 asm volatile( /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ - "mul %0, %1 \n\t" + "mul %0, %1 \n\t" /* Move the high 8-bits of the product (r1) back to i */ - "mov %0, r1 \n\t" + "mov %0, r1 \n\t" /* Restore r1 to "0"; it's expected to always be that */ - "eor r1, r1 \n\t" + "clr __zero_reg__ \n\t" : "+a" (i) /* writes to i */ : "a" (scale) /* uses scale */ @@ -400,7 +456,7 @@ LIB8STATIC uint8_t scale8( uint8_t i, uint8_t scale) // inputs are non-zero, the output is guaranteed to be non-zero. // This makes for better 'video'/LED dimming, at the cost of // several additional cycles. -LIB8STATIC uint8_t scale8_video( uint8_t i, uint8_t scale) +LIB8STATIC uint8_t scale8_video( uint8_t i, fract8 scale) { #if SCALE8_C == 1 uint8_t nonzeroscale = (scale != 0) ? 1 : 0; @@ -410,12 +466,12 @@ LIB8STATIC uint8_t scale8_video( uint8_t i, uint8_t scale) uint8_t nonzeroscale = (scale != 0) ? 1 : 0; asm volatile( - " tst %0 \n" - " breq L_%= \n" - " mul %0, %1 \n" - " mov %0, r1 \n" - " add %0, %2 \n" - "L_%=: eor r1, r1 \n" + " tst %0 \n" + " breq L_%= \n" + " mul %0, %1 \n" + " mov %0, r1 \n" + " add %0, %2 \n" + "L_%=: clr __zero_reg__ \n" : "+a" (i) : "a" (scale), "a" (nonzeroscale) @@ -432,7 +488,7 @@ LIB8STATIC uint8_t scale8_video( uint8_t i, uint8_t scale) // This version of scale8 does not clean up the R1 register on AVR // If you are doing several 'scale8's in a row, use this, and // then explicitly call cleanup_R1. -LIB8STATIC uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, uint8_t scale) +LIB8STATIC uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) { #if SCALE8_C == 1 return ((int)i * (int)(scale) ) >> 8; @@ -443,7 +499,7 @@ LIB8STATIC uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, uint8_t scale) /* Move the high 8-bits of the product (r1) back to i */ "mov %0, r1 \n\t" /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */ - /* "eor r1, r1 \n\t" */ + /* "clr __zero_reg__ \n\t" */ : "+a" (i) /* writes to i */ : "a" (scale) /* uses scale */ @@ -458,7 +514,7 @@ LIB8STATIC uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, uint8_t scale) // THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENT DIRECTLY IN PLACE -LIB8STATIC void nscale8_LEAVING_R1_DIRTY( uint8_t& i, uint8_t scale) +LIB8STATIC void nscale8_LEAVING_R1_DIRTY( uint8_t& i, fract8 scale) { #if SCALE8_C == 1 i = ((int)i * (int)(scale) ) >> 8; @@ -469,7 +525,7 @@ LIB8STATIC void nscale8_LEAVING_R1_DIRTY( uint8_t& i, uint8_t scale) /* Move the high 8-bits of the product (r1) back to i */ "mov %0, r1 \n\t" /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */ - /* "eor r1, r1 \n\t" */ + /* "clr __zero_reg__ \n\t" */ : "+a" (i) /* writes to i */ : "a" (scale) /* uses scale */ @@ -481,7 +537,7 @@ LIB8STATIC void nscale8_LEAVING_R1_DIRTY( uint8_t& i, uint8_t scale) -LIB8STATIC uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, uint8_t scale) +LIB8STATIC uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) { #if SCALE8_C == 1 uint8_t nonzeroscale = (scale != 0) ? 1 : 0; @@ -516,7 +572,7 @@ LIB8STATIC void cleanup_R1() { #if CLEANUP_R1_AVRASM == 1 // Restore r1 to "0"; it's expected to always be that - asm volatile( "eor r1, r1\n\t" : : : "r1" ); + asm volatile( "clr __zero_reg__ \n\t" : : : "r1" ); #endif } @@ -527,7 +583,7 @@ LIB8STATIC void cleanup_R1() // // THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE -LIB8STATIC void nscale8x3( uint8_t& r, uint8_t& g, uint8_t& b, uint8_t scale) +LIB8STATIC void nscale8x3( uint8_t& r, uint8_t& g, uint8_t& b, fract8 scale) { #if SCALE8_C == 1 r = ((int)r * (int)(scale) ) >> 8; @@ -544,7 +600,7 @@ LIB8STATIC void nscale8x3( uint8_t& r, uint8_t& g, uint8_t& b, uint8_t scale) } -LIB8STATIC void nscale8x3_video( uint8_t& r, uint8_t& g, uint8_t& b, uint8_t scale) +LIB8STATIC void nscale8x3_video( uint8_t& r, uint8_t& g, uint8_t& b, fract8 scale) { #if SCALE8_C == 1 uint8_t nonzeroscale = (scale != 0) ? 1 : 0; @@ -566,21 +622,21 @@ LIB8STATIC void nscale8x3_video( uint8_t& r, uint8_t& g, uint8_t& b, uint8_t sca // is 256. In other words, it computes i * (scale / 256) #if SCALE16BY8_C == 1 -LIB8STATIC uint16_t scale16by8( uint16_t i, uint8_t scale ) +LIB8STATIC uint16_t scale16by8( uint16_t i, fract8 scale ) { uint16_t result; result = (i * scale) / 256; return result; } #elif SCALE16BY8_AVRASM == 1 -LIB8STATIC uint16_t scale16by8( uint16_t i, uint8_t scale ) +LIB8STATIC uint16_t scale16by8( uint16_t i, fract8 scale ) { uint16_t result; asm volatile( // result.A = HighByte(i.A x j ) " mul %A[i], %[scale] \n\t" " mov %A[result], r1 \n\t" - " eor %B[result], %B[result] \n\t" + " clr %B[result] \n\t" // result.A-B += i.B x j " mul %B[i], %[scale] \n\t" @@ -588,9 +644,9 @@ LIB8STATIC uint16_t scale16by8( uint16_t i, uint8_t scale ) " adc %B[result], r1 \n\t" // cleanup r1 - " eor r1, r1 \n\t" + " clr __zero_reg__ \n\t" - : [result] "+r" (result) + : [result] "=r" (result) : [i] "r" (i), [scale] "r" (scale) : "r0", "r1" ); @@ -600,6 +656,76 @@ LIB8STATIC uint16_t scale16by8( uint16_t i, uint8_t scale ) #error "No implementation for scale16by8 available." #endif +// scale16: scale a 16-bit unsigned value by a 16-bit value, +// considered as numerator of a fraction whose denominator +// is 65536. In other words, it computes i * (scale / 65536) + +#if SCALE16_C == 1 +LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) +{ + uint16_t result; + result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536; + return result; +} +#elif SCALE16_AVRASM == 1 +LIB8STATIC +uint16_t scale16( uint16_t i, fract16 scale ) +{ + uint32_t result; + const uint8_t zero = 0; + asm volatile( + // result.A-B = i.A x scale.A + " mul %A[i], %A[scale] \n\t" + // save results... + // basic idea: + //" mov %A[result], r0 \n\t" + //" mov %B[result], r1 \n\t" + // which can be written as... + " movw %A[result], r0 \n\t" + // We actually need to do anything with r0, + // as result.A is never used again here, so we + // could just move the high byte, but movw is + // one clock cycle, just like mov, so might as + // well, in case we want to use this code for + // a generic 16x16 multiply somewhere. + + // result.C-D = i.B x scale.B + " mul %B[i], %B[scale] \n\t" + //" mov %C[result], r0 \n\t" + //" mov %D[result], r1 \n\t" + " movw %C[result], r0 \n\t" + + // result.B-D += i.B x scale.A + " mul %B[i], %A[scale] \n\t" + + " add %B[result], r0 \n\t" + " adc %C[result], r1 \n\t" + " adc %D[result], %[zero] \n\t" + + // result.B-D += i.A x scale.B + " mul %A[i], %B[scale] \n\t" + + " add %B[result], r0 \n\t" + " adc %C[result], r1 \n\t" + " adc %D[result], %[zero] \n\t" + + // cleanup r1 + " clr r1 \n\t" + + : [result] "+r" (result) + : [i] "r" (i), + [scale] "r" (scale), + [zero] "r" (zero) + : "r0", "r1" + ); + result = result >> 16; + return result; +} +#else +#error "No implementation for scale16 available." +#endif + + // mul8: 8x8 bit multiplication, with 8 bit result LIB8STATIC uint8_t mul8( uint8_t i, uint8_t j) @@ -609,11 +735,11 @@ LIB8STATIC uint8_t mul8( uint8_t i, uint8_t j) #elif MUL8_AVRASM == 1 asm volatile( /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */ - "mul %0, %1 \n\t" + "mul %0, %1 \n\t" /* Extract the LOW 8-bits (r0) */ - "mov %0, r0 \n\t" + "mov %0, r0 \n\t" /* Restore r1 to "0"; it's expected to always be that */ - "eor r1, r1 \n\t" + "clr __zero_reg__ \n\t" : "+a" (i) : "a" (j) : "r0", "r1"); @@ -635,17 +761,17 @@ LIB8STATIC uint8_t qmul8( uint8_t i, uint8_t j) #elif QMUL8_AVRASM == 1 asm volatile( /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */ - " mul %0, %1 \n\t" + " mul %0, %1 \n\t" /* If high byte of result is zero, all is well. */ - " cpi r1, 0 \n\t" - " breq L_%= \n\t" + " cpi r1, 0 \n\t" + " breq L_%= \n\t" /* If high byte of result > 0, saturate low byte to 0xFF */ - " ldi r0, 255 \n\t" - "L_%=: \n\t" + " ldi r0, 255 \n\t" + "L_%=: \n\t" /* Extract the LOW 8-bits (r0) */ - " mov %0, r0 \n\t" + " mov %0, r0 \n\t" /* Restore r1 to "0"; it's expected to always be that */ - " eor r1, r1 \n\t" + " clr __zero_reg__ \n\t" : "+a" (i) : "a" (j) : "r0", "r1"); @@ -680,6 +806,35 @@ LIB8STATIC int8_t abs8( int8_t i) #endif } + +/////////////////////////////////////////////////////////////////////// +// +// float-to-fixed and fixed-to-float conversions +// +// Note that anything involving a 'float' on AVR will be slower. + +// floatToSfract15: conversion from IEEE754 float in the range (-1,1) +// to 16-bit fixed point. Note that the extremes of +// one and negative one are NOT representable. The +// representable range is basically +// +// sfract15ToFloat: conversion from sfract15 fixed point to +// IEEE754 32-bit float. + +LIB8STATIC +float sfract15ToFloat( sfract15 y) +{ + return y / 32768.0; +} + +LIB8STATIC +sfract15 floatToSfract15( float f) +{ + return f * 32768.0; +} + + + /////////////////////////////////////////////////////////////////////// // Dimming and brightening functions @@ -912,7 +1067,7 @@ void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinl // linear interpolation between two unsigned 8-bit values, // with 8-bit fraction -LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, Q8 frac) +LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, fract8 frac) { uint8_t delta = b - a; uint8_t scaled = scale8( delta, frac); @@ -922,7 +1077,7 @@ LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, Q8 frac) // linear interpolation between two unsigned 16-bit values, // with 16-bit fraction -LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, Q16 frac) +LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, fract16 frac) { uint16_t delta = b - a; uint32_t prod = (uint32_t)delta * (uint32_t)frac; @@ -943,7 +1098,7 @@ LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, Q16 frac) // linear interpolation between two unsigned 16-bit values, // with 8-bit fraction -LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, Q8 frac) +LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, fract8 frac) { uint16_t result; if( b > a) { @@ -960,7 +1115,7 @@ LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, Q8 frac) // linear interpolation between two signed 15-bit values, // with 8-bit fraction -LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, Q8 frac) +LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, fract8 frac) { int16_t result; if( b > a) { @@ -983,7 +1138,7 @@ LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, Q8 frac) // ease8InOuCubic: 8-bit cubic ease-in / ease-out function // Takes around 18 cycles on AVR -LIB8STATIC uint8_t ease8InOutCubic( uint8_t i) +LIB8STATIC fract8 ease8InOutCubic( fract8 i) { uint8_t ii = scale8_LEAVING_R1_DIRTY( i, i); uint8_t iii = scale8_LEAVING_R1_DIRTY( ii, i); @@ -1012,7 +1167,7 @@ LIB8STATIC uint8_t ease8InOutCubic( uint8_t i) // Asm version takes around 7 cycles on AVR. #if EASE8_C == 1 -LIB8STATIC uint8_t ease8InOutApprox( uint8_t i) +LIB8STATIC fract8 ease8InOutApprox( fract8 i) { if( i < 64) { // start with slope 0.5 @@ -1033,7 +1188,7 @@ LIB8STATIC uint8_t ease8InOutApprox( uint8_t i) } #elif EASE8_AVRASM == 1 -LIB8STATIC uint8_t ease8InOutApprox( uint8_t i) +LIB8STATIC uint8_t ease8InOutApprox( fract8 i) { // takes around 7 cycles on AVR asm volatile ( @@ -1067,4 +1222,6 @@ LIB8STATIC uint8_t ease8InOutApprox( uint8_t i) + + #endif |