Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FastLED/FastLED.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKris Couck <kriscouck@gmail.com>2016-11-29 03:47:38 +0300
committerKris Couck <kriscouck@gmail.com>2017-01-16 01:41:40 +0300
commitcf5bae12293c482a98ba8f0cd7002816693dae16 (patch)
treef299ab749a2fc411f9c13c872d27a129839097e3
parent94e46e3978b9f2bce8f76383ff9a19879b245ad0 (diff)
Add K66 (Teensy 3.6)
-rw-r--r--led_sysdefs.h3
-rw-r--r--platforms.h3
-rw-r--r--platforms/arm/k66/clockless_arm_k66.h124
-rw-r--r--platforms/arm/k66/clockless_block_arm_k66.h334
-rw-r--r--platforms/arm/k66/fastled_arm_k66.h14
-rw-r--r--platforms/arm/k66/fastpin_arm_k66.h129
-rw-r--r--platforms/arm/k66/fastspi_arm_k66.h460
-rw-r--r--platforms/arm/k66/led_sysdefs_arm_k66.h46
8 files changed, 1113 insertions, 0 deletions
diff --git a/led_sysdefs.h b/led_sysdefs.h
index bd359ab9..57faad2f 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -10,6 +10,9 @@
#elif defined(__MK20DX128__) || defined(__MK20DX256__)
// Include k20/T3 headers
#include "platforms/arm/k20/led_sysdefs_arm_k20.h"
+#elif defined(__MK66FX1M0__)
+// Include k66/T3.6 headers
+#include "platforms/arm/k66/led_sysdefs_arm_k66.h"
#elif defined(__MKL26Z64__)
// Include kl26/T-LC headers
#include "platforms/arm/kl26/led_sysdefs_arm_kl26.h"
diff --git a/platforms.h b/platforms.h
index 29788644..7216de7c 100644
--- a/platforms.h
+++ b/platforms.h
@@ -10,6 +10,9 @@
#elif defined(__MK20DX128__) || defined(__MK20DX256__)
// Include k20/T3 headers
#include "platforms/arm/k20/fastled_arm_k20.h"
+#elif defined(__MK66FX1M0__)
+// Include k66/T3.6 headers
+#include "platforms/arm/k66/fastled_arm_k66.h"
#elif defined(__MKL26Z64__)
// Include kl26/T-LC headers
#include "platforms/arm/kl26/fastled_arm_kl26.h"
diff --git a/platforms/arm/k66/clockless_arm_k66.h b/platforms/arm/k66/clockless_arm_k66.h
new file mode 100644
index 00000000..bc2090b3
--- /dev/null
+++ b/platforms/arm/k66/clockless_arm_k66.h
@@ -0,0 +1,124 @@
+#ifndef __INC_CLOCKLESS_ARM_K20_H
+#define __INC_CLOCKLESS_ARM_K20_H
+
+FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for the k20 family of chips, like that used in the teensy 3.0/3.1
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY3)
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+ typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+ typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+ data_t mPinMask;
+ data_ptr_t mPort;
+ CMinWait<WAIT_TIME> mWait;
+public:
+ virtual void init() {
+ FastPin<DATA_PIN>::setOutput();
+ mPinMask = FastPin<DATA_PIN>::mask();
+ mPort = FastPin<DATA_PIN>::port();
+ }
+
+ virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+ virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+ mWait.wait();
+ if(!showRGBInternal(pixels)) {
+ sei(); delayMicroseconds(WAIT_TIME); cli();
+ showRGBInternal(pixels);
+ }
+ mWait.mark();
+ }
+
+ template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b) {
+ for(register uint32_t i = BITS-1; i > 0; i--) {
+ while(ARM_DWT_CYCCNT < next_mark);
+ next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+ FastPin<DATA_PIN>::fastset(port, hi);
+ if(b&0x80) {
+ while((next_mark - ARM_DWT_CYCCNT) > (T3+(2*(F_CPU/24000000))));
+ FastPin<DATA_PIN>::fastset(port, lo);
+ } else {
+ while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+ FastPin<DATA_PIN>::fastset(port, lo);
+ }
+ b <<= 1;
+ }
+
+ while(ARM_DWT_CYCCNT < next_mark);
+ next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+ FastPin<DATA_PIN>::fastset(port, hi);
+
+ if(b&0x80) {
+ while((next_mark - ARM_DWT_CYCCNT) > (T3+(2*(F_CPU/24000000))));
+ FastPin<DATA_PIN>::fastset(port, lo);
+ } else {
+ while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+ FastPin<DATA_PIN>::fastset(port, lo);
+ }
+ }
+
+ // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+ // gcc will use register Y for the this pointer.
+ static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+ // Get access to the clock
+ ARM_DEMCR |= ARM_DEMCR_TRCENA;
+ ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+ ARM_DWT_CYCCNT = 0;
+
+ register data_ptr_t port = FastPin<DATA_PIN>::port();
+ register data_t hi = *port | FastPin<DATA_PIN>::mask();;
+ register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+ *port = lo;
+
+ // Setup the pixel controller and load/scale the first byte
+ pixels.preStepFirstByteDithering();
+ register uint8_t b = pixels.loadAndScale0();
+
+ cli();
+ uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+ while(pixels.has(1)) {
+ pixels.stepDithering();
+ #if (FASTLED_ALLOW_INTERRUPTS == 1)
+ cli();
+ // if interrupts took longer than 45µs, punt on the current frame
+ if(ARM_DWT_CYCCNT > next_mark) {
+ if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+ }
+
+ hi = *port | FastPin<DATA_PIN>::mask();
+ lo = *port & ~FastPin<DATA_PIN>::mask();
+ #endif
+ // Write first byte, read next byte
+ writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+ b = pixels.loadAndScale1();
+
+ // Write second byte, read 3rd byte
+ writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+ b = pixels.loadAndScale2();
+
+ // Write third byte, read 1st byte of next pixel
+ writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+ b = pixels.advanceAndLoadAndScale0();
+ #if (FASTLED_ALLOW_INTERRUPTS == 1)
+ sei();
+ #endif
+ };
+
+ sei();
+ return ARM_DWT_CYCCNT;
+ }
+};
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/arm/k66/clockless_block_arm_k66.h b/platforms/arm/k66/clockless_block_arm_k66.h
new file mode 100644
index 00000000..2a051d9b
--- /dev/null
+++ b/platforms/arm/k66/clockless_block_arm_k66.h
@@ -0,0 +1,334 @@
+#ifndef __INC_BLOCK_CLOCKLESS_ARM_K66_H
+#define __INC_BLOCK_CLOCKLESS_ARM_K66_H
+
+//
+// unmodified from k20 code
+//
+
+// Definition for a single channel clockless controller for the k20 family of chips, like that used in the teensy 3.0/3.1
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY3)
+#define FASTLED_HAS_BLOCKLESS 1
+
+#define PORTC_FIRST_PIN 15
+#define PORTD_FIRST_PIN 2
+#define HAS_PORTDC 1
+
+#define PORT_MASK (((1<<LANES)-1) & ((FIRST_PIN==2) ? 0xFF : 0xFFF))
+
+#define MIN(X,Y) (((X)<(Y)) ? (X):(Y))
+#define USED_LANES ((FIRST_PIN==2) ? MIN(LANES,8) : MIN(LANES,12))
+
+#include "kinetis.h"
+
+FASTLED_NAMESPACE_BEGIN
+
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 40>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PORT_MASK> {
+ typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
+ typedef typename FastPin<FIRST_PIN>::port_t data_t;
+
+ data_t mPinMask;
+ data_ptr_t mPort;
+ CMinWait<WAIT_TIME> mWait;
+public:
+ virtual int size() { return CLEDController::size() * LANES; }
+
+ virtual void showPixels(PixelController<RGB_ORDER, LANES, PORT_MASK> & pixels) {
+ mWait.wait();
+ uint32_t clocks = showRGBInternal(pixels);
+ #if FASTLED_ALLOW_INTTERUPTS == 0
+ // Adjust the timer
+ long microsTaken = CLKS_TO_MICROS(clocks);
+ MS_COUNTER += (1 + (microsTaken / 1000));
+ #endif
+
+ mWait.mark();
+ }
+
+ virtual void init() {
+ if(FIRST_PIN == PORTC_FIRST_PIN) { // PORTC
+ switch(USED_LANES) {
+ case 12: FastPin<30>::setOutput();
+ case 11: FastPin<29>::setOutput();
+ case 10: FastPin<27>::setOutput();
+ case 9: FastPin<28>::setOutput();
+ case 8: FastPin<12>::setOutput();
+ case 7: FastPin<11>::setOutput();
+ case 6: FastPin<13>::setOutput();
+ case 5: FastPin<10>::setOutput();
+ case 4: FastPin<9>::setOutput();
+ case 3: FastPin<23>::setOutput();
+ case 2: FastPin<22>::setOutput();
+ case 1: FastPin<15>::setOutput();
+ }
+ } else if(FIRST_PIN == PORTD_FIRST_PIN) { // PORTD
+ switch(USED_LANES) {
+ case 8: FastPin<5>::setOutput();
+ case 7: FastPin<21>::setOutput();
+ case 6: FastPin<20>::setOutput();
+ case 5: FastPin<6>::setOutput();
+ case 4: FastPin<8>::setOutput();
+ case 3: FastPin<7>::setOutput();
+ case 2: FastPin<14>::setOutput();
+ case 1: FastPin<2>::setOutput();
+ }
+ }
+ mPinMask = FastPin<FIRST_PIN>::mask();
+ mPort = FastPin<FIRST_PIN>::port();
+ }
+
+ virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+ typedef union {
+ uint8_t bytes[12];
+ uint16_t shorts[6];
+ uint32_t raw[3];
+ } Lines;
+
+ template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, PixelController<RGB_ORDER, LANES, PORT_MASK> &pixels) { // , register uint32_t & b2) {
+ register Lines b2;
+ if(USED_LANES>8) {
+ transpose8<1,2>(b.bytes,b2.bytes);
+ transpose8<1,2>(b.bytes+8,b2.bytes+1);
+ } else {
+ transpose8x1(b.bytes,b2.bytes);
+ }
+ register uint8_t d = pixels.template getd<PX>(pixels);
+ register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+ for(register uint32_t i = 0; i < (USED_LANES/2); i++) {
+ while(ARM_DWT_CYCCNT < next_mark);
+ next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+ *FastPin<FIRST_PIN>::sport() = PORT_MASK;
+
+ while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+ if(USED_LANES>8) {
+ *FastPin<FIRST_PIN>::cport() = ((~b2.shorts[i]) & PORT_MASK);
+ } else {
+ *FastPin<FIRST_PIN>::cport() = ((~b2.bytes[7-i]) & PORT_MASK);
+ }
+
+ while((next_mark - ARM_DWT_CYCCNT) > (T3));
+ *FastPin<FIRST_PIN>::cport() = PORT_MASK;
+
+ b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+ b.bytes[i+(USED_LANES/2)] = pixels.template loadAndScale<PX>(pixels,i+(USED_LANES/2),d,scale);
+ }
+
+ // if folks use an odd numnber of lanes, get the last byte's value here
+ if(USED_LANES & 0x01) {
+ b.bytes[USED_LANES-1] = pixels.template loadAndScale<PX>(pixels,USED_LANES-1,d,scale);
+ }
+
+ for(register uint32_t i = USED_LANES/2; i < 8; i++) {
+ while(ARM_DWT_CYCCNT < next_mark);
+ next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+ *FastPin<FIRST_PIN>::sport() = PORT_MASK;
+ while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+ if(USED_LANES>8) {
+ *FastPin<FIRST_PIN>::cport() = ((~b2.shorts[i]) & PORT_MASK);
+ } else {
+ // b2.bytes[0] = 0;
+ *FastPin<FIRST_PIN>::cport() = ((~b2.bytes[7-i]) & PORT_MASK);
+ }
+
+ while((next_mark - ARM_DWT_CYCCNT) > (T3));
+ *FastPin<FIRST_PIN>::cport() = PORT_MASK;
+
+ }
+ }
+
+
+
+ // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+ // gcc will use register Y for the this pointer.
+ static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
+ // Get access to the clock
+ ARM_DEMCR |= ARM_DEMCR_TRCENA;
+ ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+ ARM_DWT_CYCCNT = 0;
+
+ // Setup the pixel controller and load/scale the first byte
+ allpixels.preStepFirstByteDithering();
+ register Lines b0;
+
+ allpixels.preStepFirstByteDithering();
+ for(int i = 0; i < USED_LANES; i++) {
+ b0.bytes[i] = allpixels.loadAndScale0(i);
+ }
+
+ cli();
+ uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+ while(allpixels.has(1)) {
+ #if (FASTLED_ALLOW_INTERRUPTS == 1)
+ cli();
+ // if interrupts took longer than 45µs, punt on the current frame
+ if(ARM_DWT_CYCCNT > next_mark) {
+ if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-5)*CLKS_PER_US)) { sei(); return ARM_DWT_CYCCNT; }
+ }
+ #endif
+ allpixels.stepDithering();
+
+ // Write first byte, read next byte
+ writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+ // Write second byte, read 3rd byte
+ writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+ allpixels.advanceData();
+
+ // Write third byte
+ writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+ #if (FASTLED_ALLOW_INTERRUPTS == 1)
+ sei();
+ #endif
+ };
+
+ return ARM_DWT_CYCCNT;
+ }
+};
+
+#define PMASK ((1<<(LANES))-1)
+#define PMASK_HI (PMASK>>8 & 0xFF)
+#define PMASK_LO (PMASK & 0xFF)
+
+template <uint8_t LANES, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PMASK> {
+ typedef typename FastPin<PORTC_FIRST_PIN>::port_ptr_t data_ptr_t;
+ typedef typename FastPin<PORTC_FIRST_PIN>::port_t data_t;
+
+ data_t mPinMask;
+ data_ptr_t mPort;
+ CMinWait<WAIT_TIME> mWait;
+public:
+ virtual void init() {
+ static_assert(LANES <= 16, "Maximum of 16 lanes for Teensy parallel controllers!");
+ // FastPin<30>::setOutput();
+ // FastPin<29>::setOutput();
+ // FastPin<27>::setOutput();
+ // FastPin<28>::setOutput();
+ switch(LANES) {
+ case 16: FastPin<12>::setOutput();
+ case 15: FastPin<11>::setOutput();
+ case 14: FastPin<13>::setOutput();
+ case 13: FastPin<10>::setOutput();
+ case 12: FastPin<9>::setOutput();
+ case 11: FastPin<23>::setOutput();
+ case 10: FastPin<22>::setOutput();
+ case 9: FastPin<15>::setOutput();
+
+ case 8: FastPin<5>::setOutput();
+ case 7: FastPin<21>::setOutput();
+ case 6: FastPin<20>::setOutput();
+ case 5: FastPin<6>::setOutput();
+ case 4: FastPin<8>::setOutput();
+ case 3: FastPin<7>::setOutput();
+ case 2: FastPin<14>::setOutput();
+ case 1: FastPin<2>::setOutput();
+ }
+ }
+
+ virtual void showPixels(PixelController<RGB_ORDER, LANES, PMASK> & pixels) {
+ mWait.wait();
+ uint32_t clocks = showRGBInternal(pixels);
+ #if FASTLED_ALLOW_INTTERUPTS == 0
+ // Adjust the timer
+ long microsTaken = CLKS_TO_MICROS(clocks);
+ MS_COUNTER += (1 + (microsTaken / 1000));
+ #endif
+
+ mWait.mark();
+ }
+
+ typedef union {
+ uint8_t bytes[16];
+ uint16_t shorts[8];
+ uint32_t raw[4];
+ } Lines;
+
+ template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, PixelController<RGB_ORDER,LANES, PMASK> &pixels) { // , register uint32_t & b2) {
+ register Lines b2;
+ transpose8x1(b.bytes,b2.bytes);
+ transpose8x1(b.bytes+8,b2.bytes+8);
+ register uint8_t d = pixels.template getd<PX>(pixels);
+ register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+ for(register uint32_t i = 0; (i < LANES) && (i < 8); i++) {
+ while(ARM_DWT_CYCCNT < next_mark);
+ next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
+ *FastPin<PORTD_FIRST_PIN>::sport() = PMASK_LO;
+ *FastPin<PORTC_FIRST_PIN>::sport() = PMASK_HI;
+
+ while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+6));
+ *FastPin<PORTD_FIRST_PIN>::cport() = ((~b2.bytes[7-i]) & PMASK_LO);
+ *FastPin<PORTC_FIRST_PIN>::cport() = ((~b2.bytes[15-i]) & PMASK_HI);
+
+ while((next_mark - ARM_DWT_CYCCNT) > (T3));
+ *FastPin<PORTD_FIRST_PIN>::cport() = PMASK_LO;
+ *FastPin<PORTC_FIRST_PIN>::cport() = PMASK_HI;
+
+ b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+ if(LANES==16 || (LANES>8 && ((i+8) < LANES))) {
+ b.bytes[i+8] = pixels.template loadAndScale<PX>(pixels,i+8,d,scale);
+ }
+ }
+ }
+
+
+
+ // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+ // gcc will use register Y for the this pointer.
+ static uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, PMASK> &allpixels) {
+ // Get access to the clock
+ ARM_DEMCR |= ARM_DEMCR_TRCENA;
+ ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+ ARM_DWT_CYCCNT = 0;
+
+ // Setup the pixel controller and load/scale the first byte
+ allpixels.preStepFirstByteDithering();
+ register Lines b0;
+
+ allpixels.preStepFirstByteDithering();
+ for(int i = 0; i < LANES; i++) {
+ b0.bytes[i] = allpixels.loadAndScale0(i);
+ }
+
+ cli();
+ uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+ while(allpixels.has(1)) {
+ allpixels.stepDithering();
+ #if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
+ cli();
+ // if interrupts took longer than 45µs, punt on the current frame
+ if(ARM_DWT_CYCCNT > next_mark) {
+ if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return ARM_DWT_CYCCNT; }
+ }
+ #endif
+
+ // Write first byte, read next byte
+ writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+ // Write second byte, read 3rd byte
+ writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+ allpixels.advanceData();
+
+ // Write third byte
+ writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+
+ #if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
+ sei();
+ #endif
+ };
+ sei();
+
+ return ARM_DWT_CYCCNT;
+ }
+};
+
+FASTLED_NAMESPACE_END
+
+#endif
+
+#endif
diff --git a/platforms/arm/k66/fastled_arm_k66.h b/platforms/arm/k66/fastled_arm_k66.h
new file mode 100644
index 00000000..7ef23c3f
--- /dev/null
+++ b/platforms/arm/k66/fastled_arm_k66.h
@@ -0,0 +1,14 @@
+#ifndef __INC_FASTLED_ARM_K66_H
+#define __INC_FASTLED_ARM_K66_H
+
+// Include the k66 headers
+#include "bitswap.h"
+#include "fastled_delay.h"
+#include "fastpin_arm_k66.h"
+#include "fastspi_arm_k66.h"
+//#include "octows2811_controller.h"
+//#include "smartmatrix_t3.h"
+#include "clockless_arm_k66.h"
+#include "clockless_block_arm_k66.h"
+
+#endif \ No newline at end of file
diff --git a/platforms/arm/k66/fastpin_arm_k66.h b/platforms/arm/k66/fastpin_arm_k66.h
new file mode 100644
index 00000000..800ba268
--- /dev/null
+++ b/platforms/arm/k66/fastpin_arm_k66.h
@@ -0,0 +1,129 @@
+#ifndef __FASTPIN_ARM_K66_H
+#define __FASTPIN_ARM_K66_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be slightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+
+/// Template definition for teensy 3.0 style ARM pins, providing direct access to the various GPIO registers. Note that this
+/// uses the full port GPIO registers. In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower. It will need more fine tuning.
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+template<uint8_t PIN, uint32_t _MASK, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN {
+public:
+ typedef volatile uint32_t * port_ptr_t;
+ typedef uint32_t port_t;
+
+ inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+ inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+ inline static void hi() __attribute__ ((always_inline)) { _PSOR::r() = _MASK; }
+ inline static void lo() __attribute__ ((always_inline)) { _PCOR::r() = _MASK; }
+ inline static void set(register port_t val) __attribute__ ((always_inline)) { _PDOR::r() = val; }
+
+ inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+ inline static void toggle() __attribute__ ((always_inline)) { _PTOR::r() = _MASK; }
+
+ inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+ inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+ inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+ inline static port_t hival() __attribute__ ((always_inline)) { return _PDOR::r() | _MASK; }
+ inline static port_t loval() __attribute__ ((always_inline)) { return _PDOR::r() & ~_MASK; }
+ inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PDOR::r(); }
+ inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_PSOR::r(); }
+ inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_PCOR::r(); }
+ inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+/// Template definition for teensy 3.0 style ARM pins using bit banding, providing direct access to the various GPIO registers. GCC
+/// does a poor job of optimizing around these accesses so they are not being used just yet.
+template<uint8_t PIN, int _BIT, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN_BITBAND {
+public:
+ typedef volatile uint32_t * port_ptr_t;
+ typedef uint32_t port_t;
+
+ inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+ inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+ inline static void hi() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 1; }
+ inline static void lo() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 0; }
+ inline static void set(register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; }
+
+ inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+ inline static void toggle() __attribute__ ((always_inline)) { *_PTOR::template rx<_BIT>() = 1; }
+
+ inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+ inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+ inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; }
+
+ inline static port_t hival() __attribute__ ((always_inline)) { return 1; }
+ inline static port_t loval() __attribute__ ((always_inline)) { return 0; }
+ inline static port_ptr_t port() __attribute__ ((always_inline)) { return _PDOR::template rx<_BIT>(); }
+ inline static port_t mask() __attribute__ ((always_inline)) { return 1; }
+};
+
+// Macros for k20 pin access/definition
+#define GPIO_BITBAND_ADDR(reg, bit) (((uint32_t)&(reg) - 0x40000000) * 32 + (bit) * 4 + 0x42000000)
+#define GPIO_BITBAND_PTR(reg, bit) ((uint32_t *)GPIO_BITBAND_ADDR((reg), (bit)))
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
+ template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
+#define _IO32(L) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR);
+
+#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+ _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; \
+ template<> class FastPinBB<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+ _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {};
+
+// Actual pin definitions
+#if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
+
+_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E);
+
+#define MAX_PIN 63
+_DEFPIN_ARM( 0, 16, B); _DEFPIN_ARM( 1, 17, B); _DEFPIN_ARM( 2, 0, D); _DEFPIN_ARM( 3, 12, A);
+_DEFPIN_ARM( 4, 13, A); _DEFPIN_ARM( 5, 7, D); _DEFPIN_ARM( 6, 4, D); _DEFPIN_ARM( 7, 2, D);
+_DEFPIN_ARM( 8, 3, D); _DEFPIN_ARM( 9, 3, C); _DEFPIN_ARM(10, 4, C); _DEFPIN_ARM(11, 6, C);
+_DEFPIN_ARM(12, 7, C); _DEFPIN_ARM(13, 5, C); _DEFPIN_ARM(14, 1, D); _DEFPIN_ARM(15, 0, C);
+_DEFPIN_ARM(16, 0, B); _DEFPIN_ARM(17, 1, B); _DEFPIN_ARM(18, 3, B); _DEFPIN_ARM(19, 2, B);
+_DEFPIN_ARM(20, 5, D); _DEFPIN_ARM(21, 6, D); _DEFPIN_ARM(22, 1, C); _DEFPIN_ARM(23, 2, C);
+_DEFPIN_ARM(24, 26, E); _DEFPIN_ARM(25, 5, A); _DEFPIN_ARM(26, 14, A); _DEFPIN_ARM(27, 15, A);
+_DEFPIN_ARM(28, 16, A); _DEFPIN_ARM(29, 18, B); _DEFPIN_ARM(30, 19, B); _DEFPIN_ARM(31, 10, B);
+_DEFPIN_ARM(32, 11, B); _DEFPIN_ARM(33, 24, E); _DEFPIN_ARM(34, 25, E); _DEFPIN_ARM(35, 8, C);
+_DEFPIN_ARM(36, 9, C); _DEFPIN_ARM(37, 10, C); _DEFPIN_ARM(38, 11, C); _DEFPIN_ARM(39, 17, A);
+_DEFPIN_ARM(40, 28, A); _DEFPIN_ARM(41, 29, A); _DEFPIN_ARM(42, 26, A); _DEFPIN_ARM(43, 20, B);
+_DEFPIN_ARM(44, 22, B); _DEFPIN_ARM(45, 23, B); _DEFPIN_ARM(46, 21, B); _DEFPIN_ARM(47, 8, D);
+_DEFPIN_ARM(48, 9, D); _DEFPIN_ARM(49, 4, B); _DEFPIN_ARM(50, 5, B); _DEFPIN_ARM(51, 14, D);
+_DEFPIN_ARM(52, 13, D); _DEFPIN_ARM(53, 12, D); _DEFPIN_ARM(54, 15, D); _DEFPIN_ARM(55, 11, D);
+_DEFPIN_ARM(56, 10, E); _DEFPIN_ARM(57, 11, E); _DEFPIN_ARM(58, 0, E); _DEFPIN_ARM(59, 1, E);
+_DEFPIN_ARM(60, 2, E); _DEFPIN_ARM(61, 3, E); _DEFPIN_ARM(62, 4, E); _DEFPIN_ARM(63, 5, E);
+
+
+
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+#define SPI1 (*(SPI_t *)0x4002D000)
+
+#define SPI2_DATA 7
+#define SPI2_CLOCK 14
+
+#define FASTLED_TEENSY3
+#define ARM_HARDWARE_SPI
+#define HAS_HARDWARE_PIN_SUPPORT
+#endif
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+#endif // __INC_FASTPIN_ARM_K66
diff --git a/platforms/arm/k66/fastspi_arm_k66.h b/platforms/arm/k66/fastspi_arm_k66.h
new file mode 100644
index 00000000..0f0abda8
--- /dev/null
+++ b/platforms/arm/k66/fastspi_arm_k66.h
@@ -0,0 +1,460 @@
+#ifndef __INC_FASTSPI_ARM_H
+#define __INC_FASTSPI_ARM_H
+
+//
+// unmodified from k20 code
+//
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
+
+// Version 1.20 renamed SPI_t to KINETISK_SPI_t
+#if TEENSYDUINO >= 120
+#define SPI_t KINETISK_SPI_t
+#endif
+
+#ifndef KINETISK_SPI0
+#define KINETISK_SPI0 SPI0
+#endif
+
+#ifndef SPI_PUSHR_CONT
+#define SPI_PUSHR_CONT SPIX.PUSHR_CONT
+#define SPI_PUSHR_CTAS(X) SPIX.PUSHR_CTAS(X)
+#define SPI_PUSHR_EOQ SPIX.PUSHR_EOQ
+#define SPI_PUSHR_CTCNT SPIX.PUSHR_CTCNT
+#define SPI_PUSHR_PCS(X) SPIX.PUSHR_PCS(X)
+#endif
+
+// Template function that, on compilation, expands to a constant representing the highest bit set in a byte. Right now,
+// if no bits are set (value is 0), it returns 0, which is also the value returned if the lowest bit is the only bit
+// set (the zero-th bit). Unclear if I will want this to change at some point.
+template<int VAL, int BIT> class BitWork {
+public:
+ static int highestBit() __attribute__((always_inline)) { return (VAL & 1 << BIT) ? BIT : BitWork<VAL, BIT-1>::highestBit(); }
+};
+template<int VAL> class BitWork<VAL, 0> {
+public:
+ static int highestBit() __attribute__((always_inline)) { return 0; }
+};
+
+#define MAX(A, B) (( (A) > (B) ) ? (A) : (B))
+
+#define USE_CONT 0
+// intra-frame backup data
+struct SPIState {
+ uint32_t _ctar0,_ctar1;
+ uint32_t pins[4];
+};
+
+// extern SPIState gState;
+
+
+// Templated function to translate a clock divider value into the prescalar, scalar, and clock doubling setting for the world.
+template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint32_t & dbl) {
+ switch(VAL) {
+ // Handle the dbl clock cases
+ case 0: case 1:
+ case 2: preScalar = 0; scalar = 0; dbl = 1; break;
+ case 3: preScalar = 1; scalar = 0; dbl = 1; break;
+ case 5: preScalar = 2; scalar = 0; dbl = 1; break;
+ case 7: preScalar = 3; scalar = 0; dbl = 1; break;
+
+ // Handle the scalar value 6 cases (since it's not a power of two, it won't get caught
+ // below)
+ case 9: preScalar = 1; scalar = 2; dbl = 1; break;
+ case 18: case 19: preScalar = 1; scalar = 2; dbl = 0; break;
+
+ case 15: preScalar = 2; scalar = 2; dbl = 1; break;
+ case 30: case 31: preScalar = 2; scalar = 2; dbl = 0; break;
+
+ case 21: case 22: case 23: preScalar = 3; scalar = 2; dbl = 1; break;
+ case 42: case 43: case 44: case 45: case 46: case 47: preScalar = 3; scalar = 2; dbl = 0; break;
+ default: {
+ int p2 = BitWork<VAL/2, 15>::highestBit();
+ int p3 = BitWork<VAL/3, 15>::highestBit();
+ int p5 = BitWork<VAL/5, 15>::highestBit();
+ int p7 = BitWork<VAL/7, 15>::highestBit();
+
+ int w2 = 2 * (1 << p2);
+ int w3 = (VAL/3) > 0 ? 3 * (1 << p3) : 0;
+ int w5 = (VAL/5) > 0 ? 5 * (1 << p5) : 0;
+ int w7 = (VAL/7) > 0 ? 7 * (1 << p7) : 0;
+
+ int maxval = MAX(MAX(w2, w3), MAX(w5, w7));
+
+ if(w2 == maxval) { preScalar = 0; scalar = p2; }
+ else if(w3 == maxval) { preScalar = 1; scalar = p3; }
+ else if(w5 == maxval) { preScalar = 2; scalar = p5; }
+ else if(w7 == maxval) { preScalar = 3; scalar = p7; }
+
+ dbl = 0;
+ if(scalar == 0) { dbl = 1; }
+ else if(scalar < 3) { scalar--; }
+ }
+ }
+ return;
+}
+
+#define SPIX (*(SPI_t*)pSPIX)
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+class ARMHardwareSPIOutput {
+ Selectable *m_pSelect;
+ SPIState gState;
+
+ // Borrowed from the teensy3 SPSR emulation code -- note, enabling pin 7 disables pin 11 (and vice versa),
+ // and likewise enabling pin 14 disables pin 13 (and vice versa)
+ inline void enable_pins(void) __attribute__((always_inline)) {
+ //serial_print("enable_pins\n");
+ switch(_DATA_PIN) {
+ case 7:
+ CORE_PIN7_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+ CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+ break;
+ case 11:
+ CORE_PIN11_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+ CORE_PIN7_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+ break;
+ }
+
+ switch(_CLOCK_PIN) {
+ case 13:
+ CORE_PIN13_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+ CORE_PIN14_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+ break;
+ case 14:
+ CORE_PIN14_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+ CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+ break;
+ }
+ }
+
+ // Borrowed from the teensy3 SPSR emulation code. We disable the pins that we're using, and restore the state on the pins that we aren't using
+ inline void disable_pins(void) __attribute__((always_inline)) {
+ switch(_DATA_PIN) {
+ case 7: CORE_PIN7_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN11_CONFIG = gState.pins[1]; break;
+ case 11: CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN7_CONFIG = gState.pins[0]; break;
+ }
+
+ switch(_CLOCK_PIN) {
+ case 13: CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN14_CONFIG = gState.pins[3]; break;
+ case 14: CORE_PIN14_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); CORE_PIN13_CONFIG = gState.pins[2]; break;
+ }
+ }
+
+ static inline void update_ctars(uint32_t ctar0, uint32_t ctar1) __attribute__((always_inline)) {
+ if(SPIX.CTAR0 == ctar0 && SPIX.CTAR1 == ctar1) return;
+ uint32_t mcr = SPIX.MCR;
+ if(mcr & SPI_MCR_MDIS) {
+ SPIX.CTAR0 = ctar0;
+ SPIX.CTAR1 = ctar1;
+ } else {
+ SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+ SPIX.CTAR0 = ctar0;
+ SPIX.CTAR1 = ctar1;
+ SPIX.MCR = mcr;
+ }
+ }
+
+ static inline void update_ctar0(uint32_t ctar) __attribute__((always_inline)) {
+ if (SPIX.CTAR0 == ctar) return;
+ uint32_t mcr = SPIX.MCR;
+ if (mcr & SPI_MCR_MDIS) {
+ SPIX.CTAR0 = ctar;
+ } else {
+ SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+ SPIX.CTAR0 = ctar;
+
+ SPIX.MCR = mcr;
+ }
+ }
+
+ static inline void update_ctar1(uint32_t ctar) __attribute__((always_inline)) {
+ if (SPIX.CTAR1 == ctar) return;
+ uint32_t mcr = SPIX.MCR;
+ if (mcr & SPI_MCR_MDIS) {
+ SPIX.CTAR1 = ctar;
+ } else {
+ SPIX.MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+ SPIX.CTAR1 = ctar;
+ SPIX.MCR = mcr;
+
+ }
+ }
+
+ void setSPIRate() {
+ // Configure CTAR0, defaulting to 8 bits and CTAR1, defaulting to 16 bits
+ uint32_t _PBR = 0;
+ uint32_t _BR = 0;
+ uint32_t _CSSCK = 0;
+ uint32_t _DBR = 0;
+
+ // if(_SPI_CLOCK_DIVIDER >= 256) { _PBR = 0; _BR = _CSSCK = 7; _DBR = 0; } // osc/256
+ // else if(_SPI_CLOCK_DIVIDER >= 128) { _PBR = 0; _BR = _CSSCK = 6; _DBR = 0; } // osc/128
+ // else if(_SPI_CLOCK_DIVIDER >= 64) { _PBR = 0; _BR = _CSSCK = 5; _DBR = 0; } // osc/64
+ // else if(_SPI_CLOCK_DIVIDER >= 32) { _PBR = 0; _BR = _CSSCK = 4; _DBR = 0; } // osc/32
+ // else if(_SPI_CLOCK_DIVIDER >= 16) { _PBR = 0; _BR = _CSSCK = 3; _DBR = 0; } // osc/16
+ // else if(_SPI_CLOCK_DIVIDER >= 8) { _PBR = 0; _BR = _CSSCK = 1; _DBR = 0; } // osc/8
+ // else if(_SPI_CLOCK_DIVIDER >= 7) { _PBR = 3; _BR = _CSSCK = 0; _DBR = 1; } // osc/7
+ // else if(_SPI_CLOCK_DIVIDER >= 5) { _PBR = 2; _BR = _CSSCK = 0; _DBR = 1; } // osc/5
+ // else if(_SPI_CLOCK_DIVIDER >= 4) { _PBR = 0; _BR = _CSSCK = 0; _DBR = 0; } // osc/4
+ // else if(_SPI_CLOCK_DIVIDER >= 3) { _PBR = 1; _BR = _CSSCK = 0; _DBR = 1; } // osc/3
+ // else { _PBR = 0; _BR = _CSSCK = 0; _DBR = 1; } // osc/2
+
+ getScalars<_SPI_CLOCK_DIVIDER>(_PBR, _BR, _DBR);
+ _CSSCK = _BR;
+
+ uint32_t ctar0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK);
+ uint32_t ctar1 = SPI_CTAR_FMSZ(15) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK);
+
+ #if USE_CONT == 1
+ ctar0 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL;
+ ctar1 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL;
+ #endif
+
+ if(_DBR) {
+ ctar0 |= SPI_CTAR_DBR;
+ ctar1 |= SPI_CTAR_DBR;
+ }
+
+ update_ctars(ctar0,ctar1);
+ }
+
+ void inline save_spi_state() __attribute__ ((always_inline)) {
+ // save ctar data
+ gState._ctar0 = SPIX.CTAR0;
+ gState._ctar1 = SPIX.CTAR1;
+
+ // save data for the not-us pins
+ gState.pins[0] = CORE_PIN7_CONFIG;
+ gState.pins[1] = CORE_PIN11_CONFIG;
+ gState.pins[2] = CORE_PIN13_CONFIG;
+ gState.pins[3] = CORE_PIN14_CONFIG;
+ }
+
+ void inline restore_spi_state() __attribute__ ((always_inline)) {
+ // restore ctar data
+ update_ctars(gState._ctar0,gState._ctar1);
+
+ // restore data for the not-us pins (not necessary because disable_pins will do this)
+ // CORE_PIN7_CONFIG = gState.pins[0];
+ // CORE_PIN11_CONFIG = gState.pins[1];
+ // CORE_PIN13_CONFIG = gState.pins[2];
+ // CORE_PIN14_CONFIG = gState.pins[3];
+ }
+
+
+public:
+ ARMHardwareSPIOutput() { m_pSelect = NULL; }
+ ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+ void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+
+ void init() {
+ // set the pins to output
+ FastPin<_DATA_PIN>::setOutput();
+ FastPin<_CLOCK_PIN>::setOutput();
+
+ // Enable SPI0 clock
+ uint32_t sim6 = SIM_SCGC6;
+ if((SPI_t*)pSPIX == &KINETISK_SPI0) {
+ if (!(sim6 & SIM_SCGC6_SPI0)) {
+ //serial_print("init1\n");
+ SIM_SCGC6 = sim6 | SIM_SCGC6_SPI0;
+ SPIX.CTAR0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(1);
+ }
+ } else if((SPI_t*)pSPIX == &SPI1) {
+ if (!(sim6 & SIM_SCGC6_SPI1)) {
+ //serial_print("init1\n");
+ SIM_SCGC6 = sim6 | SIM_SCGC6_SPI1;
+ SPIX.CTAR0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(1);
+ }
+ }
+
+ // Configure SPI as the master and enable
+ SPIX.MCR |= SPI_MCR_MSTR; // | SPI_MCR_CONT_SCKE);
+ SPIX.MCR &= ~(SPI_MCR_MDIS | SPI_MCR_HALT);
+
+ // pin/spi configuration happens on select
+ }
+
+ static void waitFully() __attribute__((always_inline)) {
+ // Wait for the last byte to get shifted into the register
+ cli();
+ while( (SPIX.SR & 0xF000) > 0) {
+ // reset the TCF flag
+ SPIX.SR |= SPI_SR_TCF;
+ }
+ sei();
+
+ // wait for the TCF flag to get set
+ while (!(SPIX.SR & SPI_SR_TCF));
+ SPIX.SR |= (SPI_SR_TCF | SPI_SR_EOQF);
+ }
+
+ static bool needwait() __attribute__((always_inline)) { return (SPIX.SR & 0x4000); }
+ static void wait() __attribute__((always_inline)) { while( (SPIX.SR & 0x4000) ); }
+ static void wait1() __attribute__((always_inline)) { while( (SPIX.SR & 0xF000) >= 0x2000); }
+
+ enum ECont { CONT, NOCONT };
+ enum EWait { PRE, POST, NONE };
+ enum ELast { NOTLAST, LAST };
+
+ #if USE_CONT == 1
+ #define CM CONT
+ #else
+ #define CM NOCONT
+ #endif
+ #define WM PRE
+
+ template<ECont CONT_STATE, EWait WAIT_STATE, ELast LAST_STATE> class Write {
+ public:
+ static void writeWord(uint16_t w) __attribute__((always_inline)) {
+ if(WAIT_STATE == PRE) { wait(); }
+ SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
+ ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+ SPI_PUSHR_CTAS(1) | (w & 0xFFFF);
+ SPIX.SR |= SPI_SR_TCF;
+ if(WAIT_STATE == POST) { wait(); }
+ }
+
+ static void writeByte(uint8_t b) __attribute__((always_inline)) {
+ if(WAIT_STATE == PRE) { wait(); }
+ SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
+ ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+ SPI_PUSHR_CTAS(0) | (b & 0xFF);
+ SPIX.SR |= SPI_SR_TCF;
+ if(WAIT_STATE == POST) { wait(); }
+ }
+ };
+
+ static void writeWord(uint16_t w) __attribute__((always_inline)) { wait(); SPIX.PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;}
+ static void writeWordNoWait(uint16_t w) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;}
+
+ static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;}
+ static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF);SPIX.SR |= SPI_SR_TCF; wait(); }
+ static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;}
+
+ static void writeWordCont(uint16_t w) __attribute__((always_inline)) { wait(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;}
+ static void writeWordContNoWait(uint16_t w) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); SPIX.SR |= SPI_SR_TCF;}
+
+ static void writeByteCont(uint8_t b) __attribute__((always_inline)) { wait(); SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;}
+ static void writeByteContPostWait(uint8_t b) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;wait(); }
+ static void writeByteContNoWait(uint8_t b) __attribute__((always_inline)) { SPIX.PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); SPIX.SR |= SPI_SR_TCF;}
+
+ // not the most efficient mechanism in the world - but should be enough for sm16716 and friends
+ template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+ uint32_t ctar1_save = SPIX.CTAR1;
+
+ // Clear out the FMSZ bits, reset them for 1 bit transferd for the start bit
+ uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(0);
+ update_ctar1(ctar1);
+
+ writeWord( (b & (1 << BIT)) != 0);
+
+ update_ctar1(ctar1_save);
+ }
+
+ void inline select() __attribute__((always_inline)) {
+ save_spi_state();
+ if(m_pSelect != NULL) { m_pSelect->select(); }
+ setSPIRate();
+ enable_pins();
+ }
+
+ void inline release() __attribute__((always_inline)) {
+ disable_pins();
+ if(m_pSelect != NULL) { m_pSelect->release(); }
+ restore_spi_state();
+ }
+
+ static void writeBytesValueRaw(uint8_t value, int len) {
+ while(len--) { Write<CM, WM, NOTLAST>::writeByte(value); }
+ }
+
+ void writeBytesValue(uint8_t value, int len) {
+ select();
+ while(len--) {
+ writeByte(value);
+ }
+ waitFully();
+ release();
+ }
+
+ // Write a block of n uint8_ts out
+ template <class D> void writeBytes(register uint8_t *data, int len) {
+ uint8_t *end = data + len;
+ select();
+ // could be optimized to write 16bit words out instead of 8bit bytes
+ while(data != end) {
+ writeByte(D::adjust(*data++));
+ }
+ D::postBlock(len);
+ waitFully();
+ release();
+ }
+
+ void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+ // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template
+ // parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+ template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+ select();
+ int len = pixels.mLen;
+
+ // Setup the pixel controller
+ if((FLAGS & FLAG_START_BIT) == 0) {
+ //If no start bit stupiditiy, write out as many 16-bit blocks as we can
+ while(pixels.has(2)) {
+ // Load and write out the first two bytes
+ if(WM == NONE) { wait1(); }
+ Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale0()) << 8 | D::adjust(pixels.loadAndScale1()));
+
+ // Load and write out the next two bytes (step dithering, advance data in between since we
+ // cross pixels here)
+ Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale2()) << 8 | D::adjust(pixels.stepAdvanceAndLoadAndScale0()));
+
+ // Load and write out the next two bytes
+ Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale1()) << 8 | D::adjust(pixels.loadAndScale2()));
+ pixels.stepDithering();
+ pixels.advanceData();
+ }
+
+ if(pixels.has(1)) {
+ if(WM == NONE) { wait1(); }
+ // write out the rest as alternating 16/8-bit blocks (likely to be just one)
+ Write<CM, WM, NOTLAST>::writeWord(D::adjust(pixels.loadAndScale0()) << 8 | D::adjust(pixels.loadAndScale1()));
+ Write<CM, WM, NOTLAST>::writeByte(D::adjust(pixels.loadAndScale2()));
+ }
+
+ D::postBlock(len);
+ waitFully();
+ } else if(FLAGS & FLAG_START_BIT) {
+ uint32_t ctar1_save = SPIX.CTAR1;
+
+ // Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit
+ uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(8);
+ update_ctar1(ctar1);
+
+ while(pixels.has(1)) {
+ writeWord( 0x100 | D::adjust(pixels.loadAndScale0()));
+ writeByte(D::adjust(pixels.loadAndScale1()));
+ writeByte(D::adjust(pixels.loadAndScale2()));
+ pixels.advanceData();
+ pixels.stepDithering();
+ }
+ D::postBlock(len);
+ waitFully();
+
+ // restore ctar1
+ update_ctar1(ctar1_save);
+ }
+ release();
+ }
+};
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/arm/k66/led_sysdefs_arm_k66.h b/platforms/arm/k66/led_sysdefs_arm_k66.h
new file mode 100644
index 00000000..0b0c701c
--- /dev/null
+++ b/platforms/arm/k66/led_sysdefs_arm_k66.h
@@ -0,0 +1,46 @@
+#ifndef __INC_LED_SYSDEFS_ARM_K66_H
+#define __INC_LED_SYSDEFS_ARM_K66_H
+
+#define FASTLED_TEENSY3
+#define FASTLED_ARM
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+#if (F_CPU == 192000000)
+#define CLK_DBL 1
+#endif
+
+// Get some system include files
+#include <avr/io.h>
+#include <avr/interrupt.h> // for cli/se definitions
+
+// Define the register types
+#if defined(ARDUINO) // && ARDUINO < 150
+typedef volatile uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+#endif
+
+extern volatile uint32_t systick_millis_count;
+# define MS_COUNTER systick_millis_count
+
+
+// Default to using PROGMEM, since TEENSY3 provides it
+// even though all it does is ignore it. Just being
+// conservative here in case TEENSY3 changes.
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 1
+#endif
+
+
+#endif