diff options
-rw-r--r-- | platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h | 214 | ||||
-rw-r--r-- | platforms/arm/mxrt1062/clockless_arm_mxrt1062.h | 45 | ||||
-rw-r--r-- | platforms/arm/mxrt1062/fastled_arm_mxrt1062.h | 2 | ||||
-rw-r--r-- | platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h | 5 |
4 files changed, 248 insertions, 18 deletions
diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h new file mode 100644 index 00000000..de09087c --- /dev/null +++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h @@ -0,0 +1,214 @@ +#ifndef __INC_BLOCK_CLOCKLESS_ARM_MXRT1062_H +#define __INC_BLOCK_CLOCKLESS_ARM_MXRT1062_H + +FASTLED_NAMESPACE_BEGIN + +// Definition for a single channel clockless controller for the teensy4 +// See clockless.h for detailed info on how the template parameters are used. +#if defined(FASTLED_TEENSY4) + +#define __FL_T4_MASK ((1<<(LANES))-1) +template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50> +class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, __FL_T4_MASK> { + + uint8_t m_bitOffsets[16]; + uint8_t m_nActualLanes; + uint8_t m_nLowBit; + uint8_t m_nHighBit; + uint32_t m_nWriteMask; + uint8_t m_nOutBlocks; + uint32_t m_offsets[3]; + CMinWait<WAIT_TIME> mWait; +public: + + virtual int size() { return CLEDController::size() * m_nActualLanes; } + +// For each pin, if we've hit our lane count, break, otherwise set the pin to output, +// store the bit offset in our offset array, add this pin to the write mask, and if this +// pin ends a block sequence, then break out of the switch as well +#define _BLOCK_PIN(P) case P: { \ + if(m_nActualLanes == LANES) break; \ + FastPin<P>::setOutput(); \ + m_bitOffsets[m_nActualLanes++] = FastPin<P>::pinbit(); \ + m_nWriteMask |= FastPin<P>::mask(); \ + if( P == 27 || P == 7 || P == 30) break; \ +} + + virtual void init() { + // pre-initialize + memset(m_bitOffsets,0,16); + m_nActualLanes = 0; + m_nLowBit = 33; + m_nHighBit = 0; + m_nWriteMask = 0; + + // setup the bits and data tracking for parallel output + switch(FIRST_PIN) { + // GPIO6 block output + _BLOCK_PIN( 1); + _BLOCK_PIN( 0); + _BLOCK_PIN(24); + _BLOCK_PIN(25); + _BLOCK_PIN(19); + _BLOCK_PIN(18); + _BLOCK_PIN(14); + _BLOCK_PIN(15); + _BLOCK_PIN(17); + _BLOCK_PIN(16); + _BLOCK_PIN(22); + _BLOCK_PIN(23); + _BLOCK_PIN(20); + _BLOCK_PIN(21); + _BLOCK_PIN(26); + _BLOCK_PIN(27); + // GPIO7 block output + _BLOCK_PIN(10); + _BLOCK_PIN(12); + _BLOCK_PIN(11); + _BLOCK_PIN(13); + _BLOCK_PIN( 6); + _BLOCK_PIN( 9); + _BLOCK_PIN(32); + _BLOCK_PIN( 8); + _BLOCK_PIN( 7); + // GPIO 37 block output + _BLOCK_PIN(37); + _BLOCK_PIN(36); + _BLOCK_PIN(35); + _BLOCK_PIN(34); + _BLOCK_PIN(39); + _BLOCK_PIN(38); + _BLOCK_PIN(28); + _BLOCK_PIN(31); + _BLOCK_PIN(30); + } + + for(int i = 0; i < m_nActualLanes; i++) { + if(m_bitOffsets[i] < m_nLowBit) { m_nLowBit = m_bitOffsets[i]; } + if(m_bitOffsets[i] > m_nHighBit) { m_nHighBit = m_bitOffsets[i]; } + } + + m_nOutBlocks = (m_nHighBit - m_nLowBit + 8)/8; + + for(int i = 0; i < m_nActualLanes; i++) { + m_bitOffsets[i] -= m_nLowBit; + } + } + + + virtual void showPixels(PixelController<RGB_ORDER, LANES, __FL_T4_MASK> & pixels) { + mWait.wait(); + #if FASTLED_ALLOW_INTERRUPTS == 0 + uint32_t clocks = showRGBInternal(pixels); + // Adjust the timer + long microsTaken = CLKS_TO_MICROS(clocks); + MS_COUNTER += (1 + (microsTaken / 1000)); + #else + showRGBInternal(pixels); + #endif + + mWait.mark(); + } + + typedef union { + uint8_t bytes[32]; + uint8_t bg[4][8]; + uint16_t shorts[16]; + uint32_t raw[8]; + } _outlines; + + + template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) { + _outlines b2; + switch(m_nOutBlocks) { + case 3: transpose8x1_noinline(b.bg[3], b2.bg[3]); + case 2: transpose8x1_noinline(b.bg[2], b2.bg[2]); + case 1: transpose8x1_noinline(b.bg[1], b2.bg[1]); + case 0: transpose8x1_noinline(b.bg[0], b2.bg[0]); + } + + register uint8_t d = pixels.template getd<PX>(pixels); + register uint8_t scale = pixels.template getscale<PX>(pixels); + + int x = 0; + for(uint32_t i = 8; i > 0;) { + i--; + while(ARM_DWT_CYCCNT < next_mark); + next_mark = ARM_DWT_CYCCNT + m_offsets[0]; + *FastPin<FIRST_PIN>::sport() = m_nWriteMask; + + uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i]; + + out <<= m_nLowBit; + + while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]); + *FastPin<FIRST_PIN>::cport() = ((~out) & m_nWriteMask); + + while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]); + *FastPin<FIRST_PIN>::cport() = m_nWriteMask; + + // Read and store up to two bytes + if (x < m_nActualLanes) { + b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale); + x++; + if (x < m_nActualLanes) { + b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale); + x++; + } + } + } + } + + uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, __FL_T4_MASK> &allpixels) { + allpixels.preStepFirstByteDithering(); + _outlines b0; + uint32_t start = ARM_DWT_CYCCNT; + + for(int i = 0; i < m_nActualLanes; i++) { + b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i); + } + + cli(); + m_offsets[0] = _FASTLED_NS_TO_DWT(T1+T2+T3); + m_offsets[1] = _FASTLED_NS_TO_DWT(T3); + m_offsets[2] = _FASTLED_NS_TO_DWT(T2+T3); + uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD)); + + uint32_t next_mark = ARM_DWT_CYCCNT + m_offsets[0]; + + while(allpixels.has(1)) { + allpixels.stepDithering(); + #if (FASTLED_ALLOW_INTERRUPTS == 1) + cli(); + // if interrupts took longer than 45µs, punt on the current frame + if(ARM_DWT_CYCCNT > next_mark) { + if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return ARM_DWT_CYCCNT - start; } + } + #endif + + // Write first byte, read next byte + writeBits<8+XTRA0,1>(next_mark, b0, allpixels); + + // Write second byte, read 3rd byte + writeBits<8+XTRA0,2>(next_mark, b0, allpixels); + allpixels.advanceData(); + + // Write third byte + writeBits<8+XTRA0,0>(next_mark, b0, allpixels); + + #if (FASTLED_ALLOW_INTERRUPTS == 1) + sei(); + #endif + } + + sei(); + + return ARM_DWT_CYCCNT - start; + } +}; + +#endif //defined(FASTLED_TEENSY4) + +FASTLED_NAMESPACE_END + +#endif diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h index ce0d972e..d9175f85 100644 --- a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h +++ b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h @@ -19,7 +19,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> { data_t mPinMask; data_ptr_t mPort; CMinWait<WAIT_TIME> mWait; + uint32_t off[3]; + public: + static constexpr int __DATA_PIN() { return DATA_PIN; } + static constexpr int __T1() { return T1; } + static constexpr int __T2() { return T2; } + static constexpr int __T3() { return T3; } + static constexpr EOrder __RGB_ORDER() { return RGB_ORDER; } + static constexpr int __XTRA0() { return XTRA0; } + static constexpr bool __FLIP() { return FLIP; } + static constexpr int __WAIT_TIME() { return WAIT_TIME; } + virtual void init() { FastPin<DATA_PIN>::setOutput(); mPinMask = FastPin<DATA_PIN>::mask(); @@ -38,46 +49,48 @@ protected: mWait.mark(); } - template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register uint32_t off1, register uint32_t off2, register uint32_t off3, register uint32_t & b) { + template<int BITS> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register uint32_t & b) { for(register uint32_t i = BITS-1; i > 0; i--) { while(ARM_DWT_CYCCNT < next_mark); - next_mark = ARM_DWT_CYCCNT + off1; + next_mark = ARM_DWT_CYCCNT + off[0]; FastPin<DATA_PIN>::hi(); if(b&0x80) { - while((next_mark - ARM_DWT_CYCCNT) > off2); + while((next_mark - ARM_DWT_CYCCNT) > off[1]); FastPin<DATA_PIN>::lo(); } else { - while((next_mark - ARM_DWT_CYCCNT) > off3); + while((next_mark - ARM_DWT_CYCCNT) > off[2]); FastPin<DATA_PIN>::lo(); } b <<= 1; } while(ARM_DWT_CYCCNT < next_mark); - next_mark = ARM_DWT_CYCCNT + off1; + next_mark = ARM_DWT_CYCCNT + off[1]; FastPin<DATA_PIN>::hi(); if(b&0x80) { - while((next_mark - ARM_DWT_CYCCNT) > off2); + while((next_mark - ARM_DWT_CYCCNT) > off[2]); FastPin<DATA_PIN>::lo(); } else { - while((next_mark - ARM_DWT_CYCCNT) > off3); + while((next_mark - ARM_DWT_CYCCNT) > off[2]); FastPin<DATA_PIN>::lo(); } } uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) { + uint32_t start = ARM_DWT_CYCCNT; + // Setup the pixel controller and load/scale the first byte pixels.preStepFirstByteDithering(); register uint32_t b = pixels.loadAndScale0(); cli(); - uint32_t off1 = _FASTLED_NS_TO_DWT(T1+T2+T3); - uint32_t off2 = _FASTLED_NS_TO_DWT(T3); - uint32_t off3 = _FASTLED_NS_TO_DWT(T2+T3); + off[0] = _FASTLED_NS_TO_DWT(T1+T2+T3); + off[1] = _FASTLED_NS_TO_DWT(T3); + off[2] = _FASTLED_NS_TO_DWT(T2+T3); uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD)); - uint32_t next_mark = ARM_DWT_CYCCNT + off1; + uint32_t next_mark = ARM_DWT_CYCCNT + off[0]; while(pixels.has(1)) { pixels.stepDithering(); @@ -85,19 +98,19 @@ protected: cli(); // if interrupts took longer than 45µs, punt on the current frame if(ARM_DWT_CYCCNT > next_mark) { - if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return 0; } + if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return ARM_DWT_CYCCNT - start; } } #endif // Write first byte, read next byte - writeBits<8+XTRA0>(next_mark, off1, off2, off3, b); + writeBits<8+XTRA0>(next_mark, b); b = pixels.loadAndScale1(); // Write second byte, read 3rd byte - writeBits<8+XTRA0>(next_mark, off1, off2, off3, b); + writeBits<8+XTRA0>(next_mark, b); b = pixels.loadAndScale2(); // Write third byte, read 1st byte of next pixel - writeBits<8+XTRA0>(next_mark, off1, off2, off3, b); + writeBits<8+XTRA0>(next_mark, b); b = pixels.advanceAndLoadAndScale0(); #if (FASTLED_ALLOW_INTERRUPTS == 1) sei(); @@ -105,7 +118,7 @@ protected: }; sei(); - return ARM_DWT_CYCCNT; + return ARM_DWT_CYCCNT - start; } }; #endif diff --git a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h index 313ab0d3..0814c7fa 100644 --- a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h +++ b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h @@ -4,4 +4,6 @@ #include "fastpin_arm_mxrt1062.h" #include "fastspi_arm_mxrt1062.h" #include "clockless_arm_mxrt1062.h" +#include "block_clockless_arm_mxrt1062.h" + #endif diff --git a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h index bfb1cb47..e1b15674 100644 --- a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h +++ b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h @@ -13,7 +13,7 @@ FASTLED_NAMESPACE_BEGIN /// Template definition for teensy 4.0 style ARM pins, providing direct access to the various GPIO registers. Note that this /// uses the full port GPIO registers. It calls through to pinMode for setting input/output on pins /// The registers are data output, set output, clear output, toggle output, input, and direction -template<uint8_t PIN, uint32_t _MASK, typename _GPIO_DR, typename _GPIO_DR_SET, typename _GPIO_DR_CLEAR, typename _GPIO_DR_TOGGLE> class _ARMPIN { +template<uint8_t PIN, uint32_t _BIT, uint32_t _MASK, typename _GPIO_DR, typename _GPIO_DR_SET, typename _GPIO_DR_CLEAR, typename _GPIO_DR_TOGGLE> class _ARMPIN { public: typedef volatile uint32_t * port_ptr_t; typedef uint32_t port_t; @@ -39,6 +39,7 @@ public: inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPIO_DR_SET::r(); } inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPIO_DR_CLEAR::r(); } inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; } + inline static uint32_t pinbit() __attribute__ ((always_inline)) { return _BIT; } }; @@ -51,7 +52,7 @@ public: // at https://forum.pjrc.com/threads/54711-Teensy-4-0-First-Beta-Test?p=193716&viewfull=1#post193716 // refer to GPIO1-4, we're going to use GPIO6-9 in the definitions below because the fast registers are what // the teensy core is using internally -#define _DEFPIN_T4(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _DR), _R(GPIO ## L ## _DR_SET), _R(GPIO ## L ## _DR_CLEAR), _R(GPIO ## L ## _DR_TOGGLE)> {}; +#define _DEFPIN_T4(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L ## _DR), _R(GPIO ## L ## _DR_SET), _R(GPIO ## L ## _DR_CLEAR), _R(GPIO ## L ## _DR_TOGGLE)> {}; #if defined(FASTLED_TEENSY4) && defined(CORE_TEENSY) _IO32(1); _IO32(2); _IO32(3); _IO32(4); _IO32(5); |