diff options
author | Daniel Garcia <danielgarcia@gmail.com> | 2013-11-28 13:45:51 +0400 |
---|---|---|
committer | Daniel Garcia <danielgarcia@gmail.com> | 2013-11-28 13:45:51 +0400 |
commit | 16204092116ebc56dac507b422cf1764d4f6d711 (patch) | |
tree | 751d55c39a705ebb4e024efec83582a74f5d32e0 /clockless_trinket.h | |
parent | 29e9e4a26a810f3f8742ed44139b33a1fd1cf6af (diff) |
Roll trinket definitions into standard definitions, so library users see no change. Clean up code a bit more. Allow arbitrary delay points, for future slotted work. Minor warning cleanup
Diffstat (limited to 'clockless_trinket.h')
-rw-r--r-- | clockless_trinket.h | 231 |
1 files changed, 99 insertions, 132 deletions
diff --git a/clockless_trinket.h b/clockless_trinket.h index 1869a72c..a7d4bee6 100644 --- a/clockless_trinket.h +++ b/clockless_trinket.h @@ -3,46 +3,35 @@ #include "controller.h" #include "lib8tion.h" +#include "delay.h" #include <avr/interrupt.h> // for cli/se definitions -// Macro to convert from nano-seconds to clocks and clocks to nano-seconds -// #define NS(_NS) (_NS / (1000 / (F_CPU / 1000000L))) -#if F_CPU < 96000000 -#define NS(_NS) ( (_NS * (F_CPU / 1000000L))) / 1000 -#define CLKS_TO_MICROS(_CLKS) ((long)(_CLKS)) / (F_CPU / 1000000L) -#else -#define NS(_NS) ( (_NS * (F_CPU / 2000000L))) / 1000 -#define CLKS_TO_MICROS(_CLKS) ((long)(_CLKS)) / (F_CPU / 2000000L) -#endif - -// Macro for making sure there's enough time available -#define NO_TIME(A, B, C) (NS(A) < 3 || NS(B) < 3 || NS(C) < 6) - -#if defined(__MK20DX128__) - extern volatile uint32_t systick_millis_count; -# define MS_COUNTER systick_millis_count -#else -# if defined(CORE_TEENSY) - extern volatile unsigned long timer0_millis_count; -# define MS_COUNTER timer0_millis_count -# else - extern volatile unsigned long timer0_millis; -# define MS_COUNTER timer0_millis -# endif -#endif // Scaling macro choice -#if defined(LIB8_ATTINY) -# define INLINE_SCALE(B, SCALE) delaycycles<3>() -# warning "No hardware multiply, inline brightness scaling disabled" -#else -# define INLINE_SCALE(B, SCALE) B = scale8_LEAVING_R1_DIRTY(B, SCALE) -#endif - #ifndef TRINKET_SCALE #define TRINKET_SCALE 1 #endif +// Variations on the functions in delay.h - w/a loop var passed in to preserve registers across calls by the optimizer/compiler +template<int CYCLES> inline void _dc(register uint8_t & loopvar); + +template<int _LOOP, int PAD> inline void _dc_AVR(register uint8_t & loopvar) { + _dc<PAD>(loopvar); + asm __volatile__ ( "LDI %[loopvar], %[_LOOP]\n\tL_%=: DEC %[loopvar]\n\t BRNE L_%=\n\t" : + [loopvar] "+a" (loopvar) : [_LOOP] "M" (_LOOP) : ); +} + +template<int CYCLES> __attribute__((always_inline)) inline void _dc(register uint8_t & loopvar) { + _dc_AVR<CYCLES/3,CYCLES%3>(loopvar); +} +template<> __attribute__((always_inline)) inline void _dc<0>(register uint8_t & loopvar) {} +template<> __attribute__((always_inline)) inline void _dc<1>(register uint8_t & loopvar) {asm __volatile__("cp r0,r0":::);} +template<> __attribute__((always_inline)) inline void _dc<2>(register uint8_t & loopvar) {asm __volatile__("rjmp .+0":::);} + +#define D1(ADJ) _dc<T1-(2+ADJ)>(loopvar); +#define D2(ADJ) _dc<T2-(1+ADJ)>(loopvar); +#define D3(ADJ) _dc<T3-(1+ADJ)>(loopvar); + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // Base template for clockless controllers. These controllers have 3 control points in their cycle for each bit. The first point @@ -120,7 +109,8 @@ public: [b2] "+r" (b2), \ [count] "+x" (count), \ [scale_base] "+r" (scale_base), \ - [data] "+z" (data) \ + [data] "+z" (data), \ + [loopvar] "+a" (loopvar) \ : /* use variables */ \ [hi] "r" (hi), \ [lo] "r" (lo), \ @@ -133,22 +123,13 @@ public: [PORT] "M" (0x18) \ : /* clobber registers */ + // 1 cycle, write hi to the port #define HI1 asm __volatile__("out %[PORT], %[hi]" ASM_VARS ); // 1 cycle, write lo to the port #define LO1 asm __volatile__("out %[PORT], %[lo]" ASM_VARS ); // 2 cycles, sbrs on flipping the lne to lo if we're pushing out a 0 #define QLO2(B, N) asm __volatile__("sbrs %[" #B "], " #N ASM_VARS ); LO1; -// 0 cycle placeholder nop to keep code columns lined up -#define NOP0 -// 1 cycle nop/delay -#define NOP1 asm __volatile__("cp r0,r0" ASM_VARS ); -// 2 cycle nop/delay -#define NOP2 asm __volatile__("rjmp .+0" ASM_VARS ); -// 3 cycle nop/delay -#define NOP3 NOP1 NOP2 -// 4 cycle nop/delay -#define NOP4 NOP2 NOP2 // load a byte from ram into the given var with the given offset #define LD2(B,O) asm __volatile__("ldd %[" #B "], Z + %[" #O "]" ASM_VARS ); // 3 cycles - load a byte from ram into the scaling scratch space with the given offset, clear the target var @@ -176,6 +157,8 @@ public: // define the beginning of the loop #define LOOP asm __volatile__("1:" ASM_VARS ); #define DONE asm __volatile__("2:" ASM_VARS ); +// delay time + // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then // gcc will use register Y for the this pointer. @@ -197,6 +180,7 @@ public: // b0 = scale8(b0, scale); b1 = data[RGB_BYTE1(RGB_ORDER)]; b2 = 0; + register uint8_t loopvar; if(RGB_ORDER == RGB) { // If the rgb order is RGB, we can cut back on program space usage by making a much more compact @@ -210,39 +194,30 @@ public: LOOP // Sum of the clock counts across each row should be 10 for 8Mhz, WS2811 #if TRINKET_SCALE - // Inline scaling - HI1 NOP0 QLO2(b0, 7) LDSCL3(b1,O1) NOP1 LO1 SCALE2(b1,0) - HI1 NOP0 QLO2(b0, 6) RORSC4(b1,1) LO1 ROR1(b1) CLC1 - HI1 NOP0 QLO2(b0, 5) SCROR4(b1,2) LO1 SCALE2(b1,3) - HI1 NOP0 QLO2(b0, 4) RORSC4(b1,4) LO1 ROR1(b1) CLC1 - HI1 NOP0 QLO2(b0, 3) SCROR4(b1,5) LO1 SCALE2(b1,6) - HI1 NOP0 QLO2(b0, 2) RORSC4(b1,7) LO1 ROR1(b1) CLC1 - HI1 NOP0 QLO2(b0, 1) IDATA2 NOP2 LO1 DCOUNT2 - // The last bit is tricky. We do the 3 cycle hi, bit check, lo. Then we do a breq - // that if we don't branch, will be 1 cycle, then 3 cycles of nop, then 1 cycle out, then - // 2 cycles of jumping around the loop. If we do branch, then that's 2 cycles, we need to - // wait 2 more cycles, then do the final low and waiting - HI1 NOP0 QLO2(b0, 0) - BRLOOP1 - MOV1(b0, b1) NOP2 LO1 - JMPLOOP2 + // Inline scaling, RGB ordering matches byte ordering + HI1 D1(0) QLO2(b0, 7) LDSCL3(b1,O1) D2(3) LO1 SCALE2(b1,0) D3(2) + HI1 D1(0) QLO2(b0, 6) RORSC4(b1,1) D2(4) LO1 ROR1(b1) CLC1 D3(2) + HI1 D1(0) QLO2(b0, 5) SCROR4(b1,2) D2(4) LO1 SCALE2(b1,3) D3(2) + HI1 D1(0) QLO2(b0, 4) RORSC4(b1,4) D2(4) LO1 ROR1(b1) CLC1 D3(2) + HI1 D1(0) QLO2(b0, 3) SCROR4(b1,5) D2(4) LO1 SCALE2(b1,6) D3(2) + HI1 D1(0) QLO2(b0, 2) RORSC4(b1,7) D2(4) LO1 ROR1(b1) CLC1 D3(2) + HI1 D1(0) QLO2(b0, 1) IDATA2 D2(2) LO1 D3(0) + // In the last bit's first block, we decrement and branch to done if we decremented to 0 + HI1 D1(0) QLO2(b0, 0) DCOUNT2 BRLOOP1 MOV1(b0, b1) D2(4) LO1 D3(2) JMPLOOP2 #else - // no inline scaling - HI1 NOP0 QLO2(b0, 7) LD2(b1,O1) LO1 NOP2 - HI1 NOP0 QLO2(b0, 6) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 5) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 4) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 3) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 2) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 1) IDATA2 NOP2 LO1 DCOUNT2 - // The last bit is tricky. We do the 3 cycle hi, bit check, lo. Then we do a breq - // that if we don't branch, will be 1 cycle, then 3 cycles of nop, then 1 cycle out, then - // 2 cycles of jumping around the loop. If we do branch, then that's 2 cycles, we need to - // wait 2 more cycles, then do the final low and waiting - HI1 NOP0 QLO2(b2, 0) BRLOOP1 MOV1(b0,b1) NOP2 LO1 JMPLOOP2 + // no inline scaling, RGB ordering matches byte ordering + HI1 D1(0) QLO2(b0, 7) LD2(b1,O1) D2(2) LO1 D3(0) + HI1 D1(0) QLO2(b0, 6) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 5) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 4) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 3) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 2) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 1) IDATA2 D2(2) LO1 D3(0) + // In the last bit's first block, we decrement and branch to done if we decremented to 0 + HI1 D1(0) QLO2(b2, 0) DCOUNT2 BRLOOP1 MOV1(b0,b1) D2(4) LO1 D3(2) JMPLOOP2 #endif DONE - NOP2 LO1 NOP2 + D2(4) LO1 D3(0) } } else @@ -252,68 +227,60 @@ public: LOOP // Sum of the clock counts across each row should be 10 for 8Mhz, WS2811 #if TRINKET_SCALE - // Inline scaling - HI1 NOP0 QLO2(b0, 7) LDSCL3(b1,O1) NOP1 LO1 SCALE2(b1,0) - HI1 NOP0 QLO2(b0, 6) RORSC4(b1,1) LO1 ROR1(b1) CLC1 - HI1 NOP0 QLO2(b0, 5) SCROR4(b1,2) LO1 SCALE2(b1,3) - HI1 NOP0 QLO2(b0, 4) RORSC4(b1,4) LO1 ROR1(b1) CLC1 - HI1 NOP0 QLO2(b0, 3) SCROR4(b1,5) LO1 SCALE2(b1,6) - HI1 NOP0 QLO2(b0, 2) RORSC4(b1,7) LO1 ROR1(b1) CLC1 - HI1 NOP0 QLO2(b0, 1) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 0) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 7) LDSCL3(b2,O2) NOP1 LO1 SCALE2(b2,0) - HI1 NOP0 QLO2(b1, 6) RORSC4(b2,1) LO1 ROR1(b2) CLC1 - HI1 NOP0 QLO2(b1, 5) SCROR4(b2,2) LO1 SCALE2(b2,3) - HI1 NOP0 QLO2(b1, 4) RORSC4(b2,4) LO1 ROR1(b2) CLC1 - HI1 NOP0 QLO2(b1, 3) SCROR4(b2,5) LO1 SCALE2(b2,6) - HI1 NOP0 QLO2(b1, 2) RORSC4(b2,7) LO1 ROR1(b2) CLC1 - HI1 NOP0 QLO2(b1, 1) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 0) IDATA2 NOP2 LO1 NOP2 - HI1 NOP0 QLO2(b2, 7) LDSCL3(b0,O0) NOP1 LO1 SCALE2(b0,0) - HI1 NOP0 QLO2(b2, 6) RORSC4(b0,1) LO1 ROR1(b0) CLC1 - HI1 NOP0 QLO2(b2, 5) SCROR4(b0,2) LO1 SCALE2(b0,3) - HI1 NOP0 QLO2(b2, 4) RORSC4(b0,4) LO1 ROR1(b0) CLC1 - HI1 NOP0 QLO2(b2, 3) SCROR4(b0,5) LO1 SCALE2(b0,6) - HI1 NOP0 QLO2(b2, 2) RORSC4(b0,7) LO1 ROR1(b0) CLC1 - HI1 NOP0 QLO2(b2, 1) NOP4 LO1 DCOUNT2 - // The last bit is tricky. We do the 3 cycle hi, bit check, lo. Then we do a breq - // that if we don't branch, will be 1 cycle, then 3 cycles of nop, then 1 cycle out, then - // 2 cycles of jumping around the loop. If we do branch, then that's 2 cycles, we need to - // wait 2 more cycles, then do the final low and waiting - HI1 NOP0 QLO2(b2, 0) BRLOOP1 NOP3 LO1 JMPLOOP2 + // Inline scaling - RGB ordering + HI1 D1(0) QLO2(b0, 7) LDSCL3(b1,O1) D2(3) LO1 SCALE2(b1,0) D3(2) + HI1 D1(0) QLO2(b0, 6) RORSC4(b1,1) D2(4) LO1 ROR1(b1) CLC1 D3(2) + HI1 D1(0) QLO2(b0, 5) SCROR4(b1,2) D2(4) LO1 SCALE2(b1,3) D3(2) + HI1 D1(0) QLO2(b0, 4) RORSC4(b1,4) D2(4) LO1 ROR1(b1) CLC1 D3(2) + HI1 D1(0) QLO2(b0, 3) SCROR4(b1,5) D2(4) LO1 SCALE2(b1,6) D3(2) + HI1 D1(0) QLO2(b0, 2) RORSC4(b1,7) D2(4) LO1 ROR1(b1) CLC1 D3(2) + HI1 D1(0) QLO2(b0, 1) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 0) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 7) LDSCL3(b2,O2) D2(3) LO1 SCALE2(b2,0) D3(2) + HI1 D1(0) QLO2(b1, 6) RORSC4(b2,1) D2(4) LO1 ROR1(b2) CLC1 D3(2) + HI1 D1(0) QLO2(b1, 5) SCROR4(b2,2) D2(4) LO1 SCALE2(b2,3) D3(2) + HI1 D1(0) QLO2(b1, 4) RORSC4(b2,4) D2(4) LO1 ROR1(b2) CLC1 D3(2) + HI1 D1(0) QLO2(b1, 3) SCROR4(b2,5) D2(4) LO1 SCALE2(b2,6) D3(2) + HI1 D1(0) QLO2(b1, 2) RORSC4(b2,7) D2(4) LO1 ROR1(b2) CLC1 D3(2) + HI1 D1(0) QLO2(b1, 1) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 0) IDATA2 D2(2) LO1 D3(0) + HI1 D1(0) QLO2(b2, 7) LDSCL3(b0,O0) D2(3) LO1 SCALE2(b0,0) D3(2) + HI1 D1(0) QLO2(b2, 6) RORSC4(b0,1) D2(4) LO1 ROR1(b0) CLC1 D3(2) + HI1 D1(0) QLO2(b2, 5) SCROR4(b0,2) D2(4) LO1 SCALE2(b0,3) D3(2) + HI1 D1(0) QLO2(b2, 4) RORSC4(b0,4) D2(4) LO1 ROR1(b0) CLC1 D3(2) + HI1 D1(0) QLO2(b2, 3) SCROR4(b0,5) D2(4) LO1 SCALE2(b0,6) D3(2) + HI1 D1(0) QLO2(b2, 2) RORSC4(b0,7) D2(4) LO1 ROR1(b0) CLC1 D3(2) + HI1 D1(0) QLO2(b2, 1) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b2, 0) DCOUNT2 BRLOOP1 D2(3) LO1 D3(2) JMPLOOP2 #else - // no inline scaling - HI1 NOP0 QLO2(b0, 7) LD2(b1,O1) NOP2 LO1 NOP2 - HI1 NOP0 QLO2(b0, 6) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 5) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 4) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 3) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 2) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 1) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b0, 0) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 7) LD2(b2,O2) NOP2 LO1 NOP2 - HI1 NOP0 QLO2(b1, 6) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 5) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 4) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 3) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 2) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 1) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b1, 0) IDATA2 NOP2 LO1 NOP2 - HI1 NOP0 QLO2(b2, 7) LD2(b0,O0) LO1 NOP2 - HI1 NOP0 QLO2(b2, 6) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b2, 5) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b2, 4) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b2, 3) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b2, 2) NOP4 LO1 NOP2 - HI1 NOP0 QLO2(b2, 1) NOP4 LO1 DCOUNT2 - // The last bit is tricky. We do the 3 cycle hi, bit check, lo. Then we do a breq - // that if we don't branch, will be 1 cycle, then 3 cycles of nop, then 1 cycle out, then - // 2 cycles of jumping around the loop. If we do branch, then that's 2 cycles, we need to - // wait 2 more cycles, then do the final low and waiting - HI1 NOP0 QLO2(b2, 0) BRLOOP1 NOP3 LO1 JMPLOOP2 + // no inline scaling - non-straight RGB ordering + HI1 D1(0) QLO2(b0, 7) LD2(b1,O1) D2(2) LO1 D3(0) + HI1 D1(0) QLO2(b0, 6) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 5) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 4) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 3) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 2) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 1) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b0, 0) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 7) LD2(b2,O2) D2(2) LO1 D3(0) + HI1 D1(0) QLO2(b1, 6) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 5) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 4) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 3) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 2) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 1) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b1, 0) IDATA2 D2(2) LO1 D3(0) + HI1 D1(0) QLO2(b2, 7) LD2(b0,O0) D2(2) LO1 D3(0) + HI1 D1(0) QLO2(b2, 6) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b2, 5) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b2, 4) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b2, 3) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b2, 2) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b2, 1) D2(0) LO1 D3(0) + HI1 D1(0) QLO2(b2, 0) DCOUNT2 BRLOOP1 D2(3) LO1 D3(2) JMPLOOP2 #endif DONE - NOP2 LO1 NOP2 + D2(4) LO1 D3(0) } } } |