From 9cbddfc41dea1f25e2668fb3843e0eca430fc14a Mon Sep 17 00:00:00 2001 From: "danielgarcia@gmail.com" Date: Thu, 20 Jun 2013 18:53:53 +0000 Subject: cleanup andcheckpointing --- chipsets.h | 12 ------------ clockless.h | 10 +++++----- delay.h | 16 ++++++++++++++++ fastpin.h | 41 ++++++++++++++++++++++++++++++++++++----- lib8tion.h | 4 ++-- 5 files changed, 59 insertions(+), 24 deletions(-) diff --git a/chipsets.h b/chipsets.h index 325e8b5b..6302a2e3 100644 --- a/chipsets.h +++ b/chipsets.h @@ -150,7 +150,6 @@ template SPI; SPI mSPI; - CMinWait<24> mWaitDelay; void writeBoundary() { mSPI.writeWord(0); mSPI.writeWord(0); } @@ -186,7 +185,6 @@ public: mSPI.waitFully(); mSPI.release(); - mWaitDelay.mark(); } virtual void show(const struct CRGB *data, int nLeds, uint8_t scale) { @@ -203,9 +201,7 @@ public: #ifdef SUPPORT_ARGB virtual void show(const struct CRGB *data, int nLeds, uint8_t scale) { - mWaitDelay.wait(); mSPI.template writeBytes3<1, RGB_ORDER>((byte*)data, nLeds * 4, scale); - mWaitDelay.mark(); } #endif }; @@ -296,8 +292,6 @@ public: // UCS1903 - 500ns, 1500ns, 500ns template -class UCS1903Controller400Mhz : public ClocklessController {}; -template class UCS1903Controller400Khz : public ClocklessController {}; #if NO_TIME(500, 1500, 500) #warning "Not enough clock cycles available for the UCS103" @@ -305,8 +299,6 @@ class UCS1903Controller400Khz : public ClocklessController -class TM1809Controller800Mhz : public ClocklessController {}; -template class TM1809Controller800Khz : public ClocklessController {}; #if NO_TIME(350, 350, 550) #warning "Not enough clock cycles available for the TM1809" @@ -314,8 +306,6 @@ class TM1809Controller800Khz : public ClocklessController -class WS2811Controller800Mhz : public ClocklessController {}; -template class WS2811Controller800Khz : public ClocklessController {}; template @@ -327,8 +317,6 @@ class WS2811Controller2800Khz : public ClocklessController2 -class TM1803Controller400Mhz : public ClocklessController {}; -template class TM1803Controller400Khz : public ClocklessController {}; #if NO_TIME(750, 750, 750) #warning "Not enough clock cycles available for the TM1803" diff --git a/clockless.h b/clockless.h index 2bd449bc..e13cd153 100644 --- a/clockless.h +++ b/clockless.h @@ -43,8 +43,8 @@ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Convinience macros to wrap around the toggling of hi vs. lo -#define SET_LO FLIP ? FastPin::fastset(port, hi) : FastPin::fastset(port, lo) -#define SET_HI FLIP ? FastPin::fastset(port, lo) : FastPin::fastset(port, hi) +#define SET_LO FLIP ? FastPin::fastset(port, hi) : FastPin::fastset(port, lo); +#define SET_HI FLIP ? FastPin::fastset(port, lo) : FastPin::fastset(port, hi); template class ClocklessController : public CLEDController { @@ -145,12 +145,12 @@ public: // gcc will use register Y for the this pointer. template static void showRGBInternal(register int nLeds, register uint8_t scale, register const byte *rgbdata) { register byte *data = (byte*)rgbdata; - register data_t mask = FastPin::mask(); + data_t mask = FastPin::mask(); register data_ptr_t port = FastPin::port(); nLeds *= (3 + SKIP); register uint8_t *end = data + nLeds; - register data_t hi = *port | mask; - register data_t lo = *port & ~mask; + register data_t hi = FastPin::hival(); + register data_t lo = FastPin::loval();; *port = lo; #if defined(FASTLED_ARM) diff --git a/delay.h b/delay.h index 937e9b2a..554705fe 100644 --- a/delay.h +++ b/delay.h @@ -40,7 +40,23 @@ template __attribute__((always_inline)) inline void delaycycles() { _delaycycles_AVR(); } #else +// template inline void _delaycycles_ARM() { +// delaycycles(); +// // the loop below is 3 cycles * LOOP. the LDI is one cycle, +// // the DEC is 1 cycle, the BRNE is 2 cycles if looping back and +// // 1 if not (the LDI balances out the BRNE being 1 cycle on exit) +// __asm__ __volatile__ ( +// " mov.w r9, %0\n" +// "L_%=: subs.w r9, r9, #1\n" +// " bne.n L_%=\n" +// : /* no outputs */ +// : "M" (LOOP) +// : "r9" +// ); +// } + template __attribute__((always_inline)) inline void delaycycles() { + // _delaycycles_ARM(); NOP; delaycycles(); } #endif diff --git a/fastpin.h b/fastpin.h index 488f0fb9..e68d04f2 100644 --- a/fastpin.h +++ b/fastpin.h @@ -14,7 +14,7 @@ public: void wait() { long diff = micros() - mLastMicros; - if(diff < WAIT) { + if(diff > 0 && diff < WAIT) { delayMicroseconds(WAIT - diff); } } @@ -200,6 +200,35 @@ public: inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; } }; + +/// Template definition for DUE style ARM pins using bit banding, providing direct access to the various GPIO registers. GCC +/// does a poor job of optimizing around these accesses so they are not being used just yet. +template class _DUEPIN_BITBAND { +public: + typedef volatile uint32_t * port_ptr_t; + typedef uint32_t port_t; + + inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; } + inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; } + + inline static void hi() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 1; } + inline static void lo() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 0; } + inline static void set(register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; } + + inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); } + + inline static void toggle() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() ^= 1; } + + inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); } + inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); } + inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; } + + inline static port_t hival() __attribute__ ((always_inline)) { return 1; } + inline static port_t loval() __attribute__ ((always_inline)) { return 0; } + inline static port_ptr_t port() __attribute__ ((always_inline)) { return _PDOR::template rx<_BIT>(); } + inline static port_t mask() __attribute__ ((always_inline)) { return 1; } +}; + /// Template definition for teensy 3.0 style ARM pins, providing direct access to the various GPIO registers. Note that this /// uses the full port GPIO registers. In theory, in some way, bit-band register access -should- be faster, however I have found /// that something about the way gcc does register allocation results in the bit-band code being slower. It will need more fine tuning. @@ -248,9 +277,9 @@ public: inline static void toggle() __attribute__ ((always_inline)) { *_PTOR::template rx<_BIT>() = 1; } - inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { *port = 1; } - inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { *port = 0; } - inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; } + inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); } + inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); } + inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; } inline static port_t hival() __attribute__ ((always_inline)) { return 1; } inline static port_t loval() __attribute__ ((always_inline)) { return 0; } @@ -280,13 +309,15 @@ typedef volatile uint32_t * ptr_reg32_t; #define DUE_IO32(L) _RD32(REG_PIO ## L ## _ODSR); _RD32(REG_PIO ## L ## _SODR); _RD32(REG_PIO ## L ## _CODR); _RD32(REG_PIO ## L ## _OER); -#define USE_BITBAND 0 +#define USE_BITBAND 1 #if USE_BITBAND == 0 #define _DEFPIN_DUE(PIN, BIT, L) template<> class FastPin : public _DUEPIN {}; #define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin : public _ARMPIN {}; #else + #define _DEFPIN_DUE(PIN, BIT, L) template<> class FastPin : public _DUEPIN_BITBAND {}; #define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin : public _ARMPIN_BITBAND {}; #endif diff --git a/lib8tion.h b/lib8tion.h index 33a0abcc..0cb097f0 100644 --- a/lib8tion.h +++ b/lib8tion.h @@ -124,7 +124,7 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun #include -#define LIB8STATIC __attribute__ ((unused)) static +#define LIB8STATIC __attribute__ ((unused)) static inline #if defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) || defined(__AVR_ATtiny25__) || defined(__AVR_ATtiny45__) || defined(__AVR_ATtiny85__) @@ -426,7 +426,7 @@ LIB8STATIC uint8_t sub8( uint8_t i, uint8_t j) // the numerator of a fraction whose denominator is 256 // In other words, it computes i * (scale / 256) // 4 clocks AVR, 2 clocks ARM -LIB8STATIC uint8_t scale8( uint8_t i, fract8 scale) +LIB8STATIC uint8_t scale8( uint8_t i, fract8 scale) { #if SCALE8_C == 1 return ((int)i * (int)(scale) ) >> 8; -- cgit v1.2.3