diff options
-rw-r--r-- | FastSPI_LED2.h | 10 | ||||
-rw-r--r-- | examples/Fast2Dev/Fast2Dev.ino | 46 | ||||
-rw-r--r-- | fastpin.h | 2 | ||||
-rw-r--r-- | fastspi.h | 242 | ||||
-rw-r--r-- | preview_changes.txt | 3 |
5 files changed, 280 insertions, 23 deletions
diff --git a/FastSPI_LED2.h b/FastSPI_LED2.h index 22d645e4..4866aa6e 100644 --- a/FastSPI_LED2.h +++ b/FastSPI_LED2.h @@ -36,7 +36,7 @@ public: __attribute__((always_inline)) inline static uint8_t adjust(register uint8_t data) { return (data>>1) | 0x80; } }; -template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint8_t SELECT_PIN, uint8_t SPI_SPEED = 0 > +template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint8_t SELECT_PIN, uint8_t SPI_SPEED = 2 > class LPD8806Controller : public CLEDController { typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; SPI mSPI; @@ -76,7 +76,7 @@ public: // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint8_t SELECT_PIN, uint8_t SPI_SPEED = 1> +template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint8_t SELECT_PIN, uint8_t SPI_SPEED = 3> class WS2801Controller : public CLEDController { typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; SPI mSPI; @@ -94,9 +94,9 @@ public: } virtual void showRGB(uint8_t *data, int nLeds) { - mWaitDelay.wait(); + // mWaitDelay.wait(); mSPI.writeBytes3(data, nLeds * 3); - mWaitDelay.mark(); + // mWaitDelay.mark(); } #ifdef SUPPORT_ARGB @@ -120,7 +120,7 @@ class SM16716Controller : public CLEDController { #if defined(__MK20DX128__) // for Teensy 3.0 // Have to force software SPI for the teensy 3.0 right now because it doesn't deal well // with flipping in and out of hardware SPI - typedef SoftwareSPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; + typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; #else typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; #endif diff --git a/examples/Fast2Dev/Fast2Dev.ino b/examples/Fast2Dev/Fast2Dev.ino index 5628ea71..749e00d3 100644 --- a/examples/Fast2Dev/Fast2Dev.ino +++ b/examples/Fast2Dev/Fast2Dev.ino @@ -17,7 +17,7 @@ // ////////////////////////////////////////////////// -#define NUM_LEDS 10 +#define NUM_LEDS 160 struct CRGB { byte g; byte r; byte b; }; @@ -25,11 +25,12 @@ struct CRGB leds[NUM_LEDS]; // gdn clk data pwr // Note: timing values in the code below are stale/out of date +// Hardware SPI Teensy 3 - .362ms for an 86 led frame // Hardware SPI - .652ms for an 86 led frame @8Mhz (3.1Mbps?), .913ms @4Mhz 1.434ms @2Mhz // Hardware SPIr2 - .539ms @8Mhz, .799 @4Mhz, 1.315ms @2Mhz // With the wait ordering reversed, .520ms at 8Mhz, .779ms @4Mhz, 1.3ms @2Mhz -// LPD8806Controller<11, 13, 10> LED; -SM16716Controller<11, 13, 10> LED; +LPD8806Controller<11, 13, 10> LED; +// SM16716Controller<11, 13, 10> LED; //LPD8806Controller<11, 13, 14> LED; // LPD8806Controller<2, 1, 0> LED; // teensy pins @@ -53,7 +54,7 @@ SM16716Controller<11, 13, 10> LED; // WS2801Controller<11, 13, 10, 0> LED; // Same Port, non-hardware SPI - 1.2ms for an 86 led frame, 1.12ms with large switch -// WS2801Controller<12, 13, 10> LED; +// WS2801Controller<11, 13, 10, 0> LED; // Different Port, non-hardware SPI - 1.47ms for an 86 led frame // WS2801Controller<7, 13, 10> LED; @@ -108,6 +109,9 @@ void setup() { #endif } +int count = 0; +long start = millis(); + void loop() { #if 0 memset(leds, 255, NUM_LEDS * sizeof(struct CRGB)); @@ -123,22 +127,30 @@ void loop() { case 2: leds[iLed].b = 128; break; } + if(count == 0) { + start = millis(); + } + + if(count++ == 1000) { + count = 0; + DPRINT("Time for 1000 frames: "); DPRINTLN(millis() - start); + } LED.showRGB((byte*)leds, NUM_LEDS);; //DPRINTLN("waiting"); - delay(20); +// delay(20); } } - for(int i = 0; i < 64; i++) { - memset(leds, i, NUM_LEDS * 3); - LED.showRGB((byte*)leds, NUM_LEDS);; - // DPRINTLN("waiting"); - delay(40); - } - for(int i = 64; i >= 0; i--) { - memset(leds, i, NUM_LEDS * 3); - LED.showRGB((byte*)leds, NUM_LEDS);; - // DPRINTLN("waiting"); - delay(40); - } + // for(int i = 0; i < 64; i++) { + // memset(leds, i, NUM_LEDS * 3); + // LED.showRGB((byte*)leds, NUM_LEDS);; + // // DPRINTLN("waiting"); + // delay(40); + // } + // for(int i = 64; i >= 0; i--) { + // memset(leds, i, NUM_LEDS * 3); + // LED.showRGB((byte*)leds, NUM_LEDS);; + // // DPRINTLN("waiting"); + // delay(40); + // } #endif }
\ No newline at end of file @@ -343,7 +343,7 @@ _DEFPIN_ARM(28, 8, C); _DEFPIN_ARM(29, 10, C); _DEFPIN_ARM(30, 11, C); _DEFPIN_A _DEFPIN_ARM(32, 18, B); _DEFPIN_ARM(33, 4, A); #define SPI_DATA 11 -#define SPI_DATA 13 +#define SPI_CLOCK 13 #else @@ -675,6 +675,240 @@ public: }; +#if defined(__MK20DX128__) && defined(CORE_TEENSY) + +template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_SPEED> +class ARMHardwareSPIOutput { + Selectable *m_pSelect; +public: + ARMHardwareSPIOutput() { m_pSelect = NULL; } + ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; } + void setSelect(Selectable *pSelect) { m_pSelect = pSelect; } + + static inline void update_ctar0(uint32_t ctar) __attribute__((always_inline)) { + if (SPI0_CTAR0 == ctar) return; + uint32_t mcr = SPI0_MCR; + if (mcr & SPI_MCR_MDIS) { + SPI0_CTAR0 = ctar; + } else { + SPI0_MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT; + SPI0_CTAR0 = ctar; + SPI0_MCR = mcr; + } + } + + static inline void update_ctar1(uint32_t ctar) __attribute__((always_inline)) { + if (SPI0_CTAR1 == ctar) return; + uint32_t mcr = SPI0_MCR; + if (mcr & SPI_MCR_MDIS) { + SPI0_CTAR1 = ctar; + } else { + SPI0_MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT; + SPI0_CTAR1 = ctar; + SPI0_MCR = mcr; + + } + } + + static inline void set_ctar1_bits(int bits) { + // Set ctar1 to 16 bits + int ctar = SPI0_CTAR1; + + // clear the FMSZ bits + ctar &= SPI_CTAR_FMSZ(0x0F); + ctar |= SPI_CTAR_FMSZ((bits-1) & 0x0F); + + update_ctar1(ctar); + } + + void init() { + uint8_t clr; + + // set the pins to output + FastPin<_DATA_PIN>::setOutput(); + FastPin<_CLOCK_PIN>::setOutput(); + release(); + + SPCR |= ((1<<SPE) | (1<<MSTR) ); // enable SPI as master + SPCR &= ~ ( (1<<SPR1) | (1<<SPR0) ); // clear out the prescalar bits + + clr = SPSR; // clear SPI status register + clr = SPDR; // clear SPI data register + + bool b2x = false; + int hiBit = 0; + int spd = _SPI_SPEED; + while(spd >>= 1) { hiBit++; } + + // Spped mappings are a little different, here they are based on the highest bit set in the speed parameter. + // If bit 8 is set, it's at osc/128, bit 7, then osc/64, etc... down the line. + switch(hiBit) { + /* fosc/2 */ case 0: // no bits set + case 1: // speed set to 1 + case 2: // speed set to 2 + b2x=true; break; + /* fosc/4 */ case 3: break; + /* fosc/8 */ case 4: SPCR |= (1<<SPR0); b2x=true; break; + /* fosc/16 */ case 5: SPCR |= (1<<SPR0); break; + /* fosc/32 */ case 6: SPCR |= (1<<SPR1); b2x=true; break; + /* fosc/64 */ case 7: SPCR |= (1<<SPR1); break; + // /* fosc/64 */ case 6: SPCR |= (1<<SPR1); SPCR |= (1<<SPR0); b2x=true; break; + /* fosc/128 */ default: SPCR |= (1<<SPR1); SPCR |= (1<<SPR0); break; + } + if(b2x) { SPSR |= (1<<SPI2X); } + else { SPSR &= ~ (1<<SPI2X); } + + // force speed faster + switch(_SPI_SPEED) { + case 0: // ~20Mbps + { + uint32_t ctar0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(0) | SPI_CTAR_BR(0) | SPI_CTAR_CSSCK(0) | SPI_CTAR_DBR; + uint32_t ctar1 = SPI_CTAR_FMSZ(15) | SPI_CTAR_PBR(0) | SPI_CTAR_BR(0) | SPI_CTAR_CSSCK(0) | SPI_CTAR_DBR; + update_ctar0(ctar0); + update_ctar1(ctar1); + break; + } + case 1: // ~15Mbps + { + uint32_t ctar0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(0) | SPI_CTAR_CSSCK(0) | SPI_CTAR_DBR; + uint32_t ctar1 = SPI_CTAR_FMSZ(15) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(0) | SPI_CTAR_CSSCK(0) | SPI_CTAR_DBR; + update_ctar0(ctar0); + update_ctar1(ctar1); + break; + } + case 2: // ~10Mbps + { + uint32_t ctar0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(0) | SPI_CTAR_BR(0) | SPI_CTAR_CSSCK(0); + uint32_t ctar1 = SPI_CTAR_FMSZ(15) | SPI_CTAR_PBR(0) | SPI_CTAR_BR(0) | SPI_CTAR_CSSCK(0); + update_ctar0(ctar0); + update_ctar1(ctar1); + break; + } + default: + { + // Configure ctar1 to be 16 bits based off of ctar0's settings + update_ctar1(SPI0_CTAR0); + + set_ctar1_bits(16); + } + } + + // push 192 0s to prime the spi stuff + select(); + writeByteNoWait(0); + for(int i = 0; i < 191; i++) { + writeByte(0); writeByte(0); writeByte(0); + } + waitFully(); + release(); + } + + static void waitFully() __attribute__((always_inline)) { + while( (SPI0_SR & 0xF000) > 0); + while (!(SPI0_SR & SPI_SR_TCF)); + SPI0_SR |= SPI_SR_TCF; + } + static void wait() __attribute__((always_inline)) { while( (SPI0_SR & 0xF000) >= 0x4000); } + + + static void writeWord(uint16_t w) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI0_PUSHR_CTAS(1) | (w & 0xFFFF); } + + static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPI0_PUSHR = (b & 0xFF); } + static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = (b & 0xFF); wait(); } + static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = (b & 0xFF); } + + // not the most efficient mechanism in the world - but should be enough for sm16716 and friends + template <uint8_t BIT> inline static void writeBit(uint8_t b) { + uint32_t ctar1_save = SPI0_CTAR1; + + // Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit + uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(8); + update_ctar1(ctar1); + + writeWord( (b & (1 << BIT)) != 0); + + update_ctar1(ctar1_save); + } + + void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } + void release() { if(m_pSelect != NULL) { m_pSelect->release(); } } + + void writeBytesValue(uint8_t value, int len) { + select(); + while(len--) { + writeByte(value); + } + waitFully(); + release(); + } + + // Write a block of n uint8_ts out + template <class D> void writeBytes(register uint8_t *data, int len) { + uint8_t *end = data + len; + select(); + while(data != end) { + writeByte(D::adjust(*data++)); + } + waitFully(); + release(); + } + + void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); } + + // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template + // parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping + template <uint8_t SKIP, class D> void writeBytes3(register uint8_t *data, int len) { + uint8_t *end = data + len; + select(); + if(false && !SKIP && ((len % 2) == 0)) { + switch(len % 8) { + case 0: while(data != end) { + writeWord(D::adjust(*data++) << 8 | D::adjust(*data++)); + case 6: + writeWord(D::adjust(*data++) << 8 | D::adjust(*data++)); + case 4: + writeWord(D::adjust(*data++) << 8 | D::adjust(*data++)); + case 2: + writeWord(D::adjust(*data++) << 8 | D::adjust(*data++)); + wait(); + } + } + waitFully(); + } else if(SKIP & FLAG_START_BIT) { + uint32_t ctar1_save = SPI0_CTAR1; + + // Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit + uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(8); + update_ctar1(ctar1); + + while(data != end) { + data += (MASK_SKIP_BITS & SKIP); + writeWord( 0x100 | D::adjust(*data++)); + writeByte(D::adjust(*data++)); + writeByte(D::adjust(*data++)); + } + waitFully(); + + // restore ctar1 + update_ctar1(ctar1_save); + } else { + while(data != end) { + data += (MASK_SKIP_BITS & SKIP); + writeByte(D::adjust(*data++)); + writeWord(D::adjust(*data++) << 8 | D::adjust(*data++)); + } + waitFully(); + } + release(); + } + + template <uint8_t SKIP> void writeBytes3(register uint8_t *data, int len) { writeBytes3<SKIP, DATA_NOP>(data, len); } + template <class D> void writeBytes3(register uint8_t *data, int len) { writeBytes3<0, D>(data, len); } + void writeBytes3(register uint8_t *data, int len) { writeBytes3<0, DATA_NOP>(data, len); } + +}; +#endif + // Clock speed dividers #define SPEED_DIV_2 2 #define SPEED_DIV_4 4 @@ -701,9 +935,17 @@ class SoftwareSPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SP #ifndef FORCE_SOFTWARE_SPI #if defined(SPI_DATA) && defined(SPI_CLOCK) +#if defined(__MK20DX128__) && defined(CORE_TEENSY) + +template<uint8_t SPI_SPEED> +class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {}; + +#else + template<uint8_t SPI_SPEED> class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public AVRHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {}; +#endif #else #warning "No hardware SPI pins defined. All SPI access will default to bitbanged output" diff --git a/preview_changes.txt b/preview_changes.txt index 50d7aef9..464b9777 100644 --- a/preview_changes.txt +++ b/preview_changes.txt @@ -1,3 +1,6 @@ +Preview 3 +* True hardware SPI support for teensy (up to 20Mbit output!) +* Minor bug fixes/tweaks Preview 2 * Rename pin class to FastPin * Replace latch with select, more accurate description of what it does |