diff options
-rw-r--r-- | chipsets.h | 12 | ||||
-rw-r--r-- | clockless.h | 10 | ||||
-rw-r--r-- | delay.h | 16 | ||||
-rw-r--r-- | fastpin.h | 41 | ||||
-rw-r--r-- | lib8tion.h | 4 |
5 files changed, 59 insertions, 24 deletions
@@ -150,7 +150,6 @@ template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t S class P9813Controller : public CLEDController { typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; SPI mSPI; - CMinWait<24> mWaitDelay; void writeBoundary() { mSPI.writeWord(0); mSPI.writeWord(0); } @@ -186,7 +185,6 @@ public: mSPI.waitFully(); mSPI.release(); - mWaitDelay.mark(); } virtual void show(const struct CRGB *data, int nLeds, uint8_t scale) { @@ -203,9 +201,7 @@ public: #ifdef SUPPORT_ARGB virtual void show(const struct CRGB *data, int nLeds, uint8_t scale) { - mWaitDelay.wait(); mSPI.template writeBytes3<1, RGB_ORDER>((byte*)data, nLeds * 4, scale); - mWaitDelay.mark(); } #endif }; @@ -296,8 +292,6 @@ public: // UCS1903 - 500ns, 1500ns, 500ns template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> -class UCS1903Controller400Mhz : public ClocklessController<DATA_PIN, NS(500), NS(1500), NS(500), RGB_ORDER> {}; -template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> class UCS1903Controller400Khz : public ClocklessController<DATA_PIN, NS(500), NS(1500), NS(500), RGB_ORDER> {}; #if NO_TIME(500, 1500, 500) #warning "Not enough clock cycles available for the UCS103" @@ -305,8 +299,6 @@ class UCS1903Controller400Khz : public ClocklessController<DATA_PIN, NS(500), NS // TM1809 - 312.5ns, 312.5ns, 325ns template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> -class TM1809Controller800Mhz : public ClocklessController<DATA_PIN, NS(350), NS(350), NS(550), RGB_ORDER> {}; -template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> class TM1809Controller800Khz : public ClocklessController<DATA_PIN, NS(350), NS(350), NS(550), RGB_ORDER> {}; #if NO_TIME(350, 350, 550) #warning "Not enough clock cycles available for the TM1809" @@ -314,8 +306,6 @@ class TM1809Controller800Khz : public ClocklessController<DATA_PIN, NS(350), NS( // WS2811 - 350n, 350ns, 550ns template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> -class WS2811Controller800Mhz : public ClocklessController<DATA_PIN, NS(320), NS(320), NS(550), RGB_ORDER> {}; -template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> class WS2811Controller800Khz : public ClocklessController<DATA_PIN, NS(320), NS(320), NS(550), RGB_ORDER> {}; template <uint8_t DATA_PIN, uint8_t DATA_PIN2, EOrder RGB_ORDER = RGB> @@ -327,8 +317,6 @@ class WS2811Controller2800Khz : public ClocklessController2<DATA_PIN, DATA_PIN2, // 750NS, 750NS, 750NS template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> -class TM1803Controller400Mhz : public ClocklessController<DATA_PIN, NS(750), NS(750), NS(750), RGB_ORDER> {}; -template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> class TM1803Controller400Khz : public ClocklessController<DATA_PIN, NS(750), NS(750), NS(750), RGB_ORDER> {}; #if NO_TIME(750, 750, 750) #warning "Not enough clock cycles available for the TM1803" diff --git a/clockless.h b/clockless.h index 2bd449bc..e13cd153 100644 --- a/clockless.h +++ b/clockless.h @@ -43,8 +43,8 @@ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Convinience macros to wrap around the toggling of hi vs. lo -#define SET_LO FLIP ? FastPin<DATA_PIN>::fastset(port, hi) : FastPin<DATA_PIN>::fastset(port, lo) -#define SET_HI FLIP ? FastPin<DATA_PIN>::fastset(port, lo) : FastPin<DATA_PIN>::fastset(port, hi) +#define SET_LO FLIP ? FastPin<DATA_PIN>::fastset(port, hi) : FastPin<DATA_PIN>::fastset(port, lo); +#define SET_HI FLIP ? FastPin<DATA_PIN>::fastset(port, lo) : FastPin<DATA_PIN>::fastset(port, hi); template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, bool FLIP = false, int WAIT_TIME = 50> class ClocklessController : public CLEDController { @@ -145,12 +145,12 @@ public: // gcc will use register Y for the this pointer. template<int SKIP, bool ADVANCE> static void showRGBInternal(register int nLeds, register uint8_t scale, register const byte *rgbdata) { register byte *data = (byte*)rgbdata; - register data_t mask = FastPin<DATA_PIN>::mask(); + data_t mask = FastPin<DATA_PIN>::mask(); register data_ptr_t port = FastPin<DATA_PIN>::port(); nLeds *= (3 + SKIP); register uint8_t *end = data + nLeds; - register data_t hi = *port | mask; - register data_t lo = *port & ~mask; + register data_t hi = FastPin<DATA_PIN>::hival(); + register data_t lo = FastPin<DATA_PIN>::loval();; *port = lo; #if defined(FASTLED_ARM) @@ -40,7 +40,23 @@ template<int CYCLES> __attribute__((always_inline)) inline void delaycycles() { _delaycycles_AVR<CYCLES / 3, CYCLES % 3>(); } #else +// template<int LOOP, int PAD> inline void _delaycycles_ARM() { +// delaycycles<PAD>(); +// // the loop below is 3 cycles * LOOP. the LDI is one cycle, +// // the DEC is 1 cycle, the BRNE is 2 cycles if looping back and +// // 1 if not (the LDI balances out the BRNE being 1 cycle on exit) +// __asm__ __volatile__ ( +// " mov.w r9, %0\n" +// "L_%=: subs.w r9, r9, #1\n" +// " bne.n L_%=\n" +// : /* no outputs */ +// : "M" (LOOP) +// : "r9" +// ); +// } + template<int CYCLES> __attribute__((always_inline)) inline void delaycycles() { + // _delaycycles_ARM<CYCLES / 3, CYCLES % 3>(); NOP; delaycycles<CYCLES-1>(); } #endif @@ -14,7 +14,7 @@ public: void wait() { long diff = micros() - mLastMicros; - if(diff < WAIT) { + if(diff > 0 && diff < WAIT) { delayMicroseconds(WAIT - diff); } } @@ -200,6 +200,35 @@ public: inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; } }; + +/// Template definition for DUE style ARM pins using bit banding, providing direct access to the various GPIO registers. GCC +/// does a poor job of optimizing around these accesses so they are not being used just yet. +template<uint8_t PIN, uint32_t _BIT, typename _PDOR, typename _PSOR, typename _PCOR, typename _PDDR> class _DUEPIN_BITBAND { +public: + typedef volatile uint32_t * port_ptr_t; + typedef uint32_t port_t; + + inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; } + inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; } + + inline static void hi() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 1; } + inline static void lo() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 0; } + inline static void set(register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; } + + inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); } + + inline static void toggle() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() ^= 1; } + + inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); } + inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); } + inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; } + + inline static port_t hival() __attribute__ ((always_inline)) { return 1; } + inline static port_t loval() __attribute__ ((always_inline)) { return 0; } + inline static port_ptr_t port() __attribute__ ((always_inline)) { return _PDOR::template rx<_BIT>(); } + inline static port_t mask() __attribute__ ((always_inline)) { return 1; } +}; + /// Template definition for teensy 3.0 style ARM pins, providing direct access to the various GPIO registers. Note that this /// uses the full port GPIO registers. In theory, in some way, bit-band register access -should- be faster, however I have found /// that something about the way gcc does register allocation results in the bit-band code being slower. It will need more fine tuning. @@ -248,9 +277,9 @@ public: inline static void toggle() __attribute__ ((always_inline)) { *_PTOR::template rx<_BIT>() = 1; } - inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { *port = 1; } - inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { *port = 0; } - inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; } + inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); } + inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); } + inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; } inline static port_t hival() __attribute__ ((always_inline)) { return 1; } inline static port_t loval() __attribute__ ((always_inline)) { return 0; } @@ -280,13 +309,15 @@ typedef volatile uint32_t * ptr_reg32_t; #define DUE_IO32(L) _RD32(REG_PIO ## L ## _ODSR); _RD32(REG_PIO ## L ## _SODR); _RD32(REG_PIO ## L ## _CODR); _RD32(REG_PIO ## L ## _OER); -#define USE_BITBAND 0 +#define USE_BITBAND 1 #if USE_BITBAND == 0 #define _DEFPIN_DUE(PIN, BIT, L) template<> class FastPin<PIN> : public _DUEPIN<PIN, 1 << BIT, _R(REG_PIO ## L ## _ODSR), _R(REG_PIO ## L ## _SODR), _R(REG_PIO ## L ## _CODR), \ _R(GPIO ## L ## _OER)> {}; #define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \ _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; #else + #define _DEFPIN_DUE(PIN, BIT, L) template<> class FastPin<PIN> : public _DUEPIN_BITBAND<PIN, BIT, _R(REG_PIO ## L ## _ODSR), _R(REG_PIO ## L ## _SODR), _R(REG_PIO ## L ## _CODR), \ + _R(GPIO ## L ## _OER)> {}; #define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \ _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; #endif @@ -124,7 +124,7 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun #include <stdint.h> -#define LIB8STATIC __attribute__ ((unused)) static +#define LIB8STATIC __attribute__ ((unused)) static inline #if defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) || defined(__AVR_ATtiny25__) || defined(__AVR_ATtiny45__) || defined(__AVR_ATtiny85__) @@ -426,7 +426,7 @@ LIB8STATIC uint8_t sub8( uint8_t i, uint8_t j) // the numerator of a fraction whose denominator is 256 // In other words, it computes i * (scale / 256) // 4 clocks AVR, 2 clocks ARM -LIB8STATIC uint8_t scale8( uint8_t i, fract8 scale) +LIB8STATIC uint8_t scale8( uint8_t i, fract8 scale) { #if SCALE8_C == 1 return ((int)i * (int)(scale) ) >> 8; |