Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FastLED/FastLED.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Garcia <danielgarcia@gmail.com>2013-11-11 02:54:41 +0400
committerDaniel Garcia <danielgarcia@gmail.com>2013-11-11 02:54:41 +0400
commit6bcfa714588b12a72bdde36a1f0a43871fd5d567 (patch)
tree74c4fadde71b107f9a823928602141d673c0d9b5
parente325d5d3f934aed2b301c224352b41a1d07e3693 (diff)
parent59edcab79837185feeea2dfe6f46b2c4ad17b8d8 (diff)
Merge branch 'FastSPI_LED2'
-rw-r--r--FastLED.cpp79
-rw-r--r--FastLED.h147
-rw-r--r--chipsets.h262
-rw-r--r--clockless.h318
-rw-r--r--controller.h56
-rw-r--r--delay.h62
-rw-r--r--dmx.h115
-rw-r--r--examples/Fast2Dev/Fast2Dev.ino98
-rw-r--r--examples/FirstLight/FirstLight.ino66
-rw-r--r--examples/RGBCalibrate/RGBCalibrate.ino66
-rw-r--r--fastpin.h424
-rw-r--r--fastspi.h91
-rw-r--r--fastspi_arm.h386
-rw-r--r--fastspi_avr.h314
-rw-r--r--fastspi_bitbang.h368
-rw-r--r--fastspi_dma.h0
-rw-r--r--hsv2rgb.cpp495
-rw-r--r--hsv2rgb.h59
-rw-r--r--lib8tion.cpp242
-rw-r--r--lib8tion.h1272
-rw-r--r--pixeltypes.h659
-rw-r--r--preview_changes.txt57
22 files changed, 5636 insertions, 0 deletions
diff --git a/FastLED.cpp b/FastLED.cpp
new file mode 100644
index 00000000..85095210
--- /dev/null
+++ b/FastLED.cpp
@@ -0,0 +1,79 @@
+#include "FastSPI_LED2.h"
+
+
+CFastLED LEDS;
+CFastLED & FastSPI_LED = LEDS;
+CFastLED & FastSPI_LED2 = LEDS;
+CFastLED & FastLED = LEDS;
+
+uint32_t CRGB::Squant = ((uint32_t)((__TIME__[4]-'0') * 28))<<16 | ((__TIME__[6]-'0')*50)<<8 | ((__TIME__[7]-'0')*28);
+
+CFastLED::CFastLED() {
+ // clear out the array of led controllers
+ m_nControllers = NUM_CONTROLLERS;
+ m_nScale = 255;
+ memset8(m_Controllers, 0, m_nControllers * sizeof(CControllerInfo));
+}
+
+CLEDController *CFastLED::addLeds(CLEDController *pLed,
+ const struct CRGB *data,
+ int nLedsOrOffset, int nLedsIfOffset) {
+ int nOffset = (nLedsIfOffset > 0) ? nLedsOrOffset : 0;
+ int nLeds = (nLedsIfOffset > 0) ? nLedsIfOffset : nLedsOrOffset;
+
+ int target = -1;
+
+ // Figure out where to put the new led controller
+ for(int i = 0; i < m_nControllers; i++) {
+ if(m_Controllers[i].pLedController == NULL) {
+ target = i;
+ break;
+ }
+ }
+
+ // if we have a spot, use it!
+ if(target != -1) {
+ m_Controllers[target].pLedController = pLed;
+ m_Controllers[target].pLedData = data;
+ m_Controllers[target].nOffset = nOffset;
+ m_Controllers[target].nLeds = nLeds;
+ pLed->init();
+ return pLed;
+ }
+
+ return NULL;
+}
+
+void CFastLED::show(uint8_t scale) {
+ for(int i = 0; i < m_nControllers; i++) {
+ if(m_Controllers[i].pLedController != NULL) {
+ m_Controllers[i].pLedController->show(m_Controllers[i].pLedData + m_Controllers[i].nOffset,
+ m_Controllers[i].nLeds, scale);
+ } else {
+ return;
+ }
+ }
+}
+
+void CFastLED::showColor(const struct CRGB & color, uint8_t scale) {
+ for(int i = 0; i < m_nControllers; i++) {
+ if(m_Controllers[i].pLedController != NULL) {
+ m_Controllers[i].pLedController->showColor(color, m_Controllers[i].nLeds, scale);
+ } else {
+ return;
+ }
+ }
+}
+
+void CFastLED::clear(boolean includeLedData) {
+ showColor(CRGB(0,0,0), 0);
+ if(includeLedData) {
+ for(int i = 0; i < m_nControllers; i++) {
+ if(m_Controllers[i].pLedData != NULL) {
+ memset8((void*)m_Controllers[i].pLedData, 0, sizeof(struct CRGB) * m_Controllers[i].nLeds);
+ } else {
+ return;
+ }
+ }
+ }
+}
diff --git a/FastLED.h b/FastLED.h
new file mode 100644
index 00000000..a2891fcd
--- /dev/null
+++ b/FastLED.h
@@ -0,0 +1,147 @@
+#ifndef __INC_FASTSPI_LED2_H
+#define __INC_FASTSPI_LED2_H
+
+#include "controller.h"
+#include "fastpin.h"
+#include "fastspi.h"
+#include "clockless.h"
+#include "lib8tion.h"
+#include "hsv2rgb.h"
+#include "chipsets.h"
+#include "dmx.h"
+
+enum ESPIChipsets {
+ LPD8806,
+ WS2801,
+ SM16716
+};
+
+enum EClocklessChipsets {
+ DMX,
+ TM1809,
+ TM1804,
+ TM1803,
+ WS2811,
+ WS2812,
+ WS2812B,
+ WS2811_400,
+ NEOPIXEL,
+ UCS1903
+};
+
+#define NUM_CONTROLLERS 8
+
+class CFastLED {
+ struct CControllerInfo {
+ CLEDController *pLedController;
+ const struct CRGB *pLedData;
+ int nLeds;
+ int nOffset;
+ };
+
+ CControllerInfo m_Controllers[NUM_CONTROLLERS];
+ int m_nControllers;
+ uint8_t m_nScale;
+
+public:
+ CFastLED();
+
+ CLEDController *addLeds(CLEDController *pLed, const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0);
+
+ template<ESPIChipsets CHIPSET, uint8_t DATA_PIN, uint8_t CLOCK_PIN > CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+ switch(CHIPSET) {
+ case LPD8806: return addLeds(new LPD8806Controller<DATA_PIN, CLOCK_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+ case WS2801: return addLeds(new WS2801Controller<DATA_PIN, CLOCK_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+ case SM16716: return addLeds(new SM16716Controller<DATA_PIN, CLOCK_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+ }
+ }
+
+ template<ESPIChipsets CHIPSET, uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER > CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+ switch(CHIPSET) {
+ case LPD8806: return addLeds(new LPD8806Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+ case WS2801: return addLeds(new WS2801Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+ case SM16716: return addLeds(new SM16716Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+ }
+ }
+
+ template<ESPIChipsets CHIPSET, uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER, uint8_t SPI_DATA_RATE > CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+ switch(CHIPSET) {
+ case LPD8806: return addLeds(new LPD8806Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_DATA_RATE>(), data, nLedsOrOffset, nLedsIfOffset);
+ case WS2801: return addLeds(new WS2801Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_DATA_RATE>(), data, nLedsOrOffset, nLedsIfOffset);
+ case SM16716: return addLeds(new SM16716Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_DATA_RATE>(), data, nLedsOrOffset, nLedsIfOffset);
+ }
+ }
+
+#ifdef SPI_DATA
+ template<ESPIChipsets CHIPSET> CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+ return addLeds<CHIPSET, SPI_DATA, SPI_CLOCK, RGB>(data, nLedsOrOffset, nLedsIfOffset);
+ }
+
+ template<ESPIChipsets CHIPSET, EOrder RGB_ORDER> CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+ return addLeds<CHIPSET, SPI_DATA, SPI_CLOCK, RGB_ORDER>(data, nLedsOrOffset, nLedsIfOffset);
+ }
+
+ template<ESPIChipsets CHIPSET, EOrder RGB_ORDER, uint8_t SPI_DATA_RATE> CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+ return addLeds<CHIPSET, SPI_DATA, SPI_CLOCK, RGB_ORDER, SPI_DATA_RATE>(data, nLedsOrOffset, nLedsIfOffset);
+ }
+
+#endif
+
+ template<EClocklessChipsets CHIPSET, uint8_t DATA_PIN>
+ CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+ switch(CHIPSET) {
+#ifdef FASTSPI_USE_DMX_SIMPLE
+ case DMX: return addLeds(new DMXController<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+#endif
+ case TM1804:
+ case TM1809: return addLeds(new TM1809Controller800Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+ case TM1803: return addLeds(new TM1803Controller400Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+ case UCS1903: return addLeds(new UCS1903Controller400Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+ case WS2812:
+ case WS2812B:
+ case NEOPIXEL:
+ case WS2811: return addLeds(new WS2811Controller800Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+ case WS2811_400: return addLeds(new WS2811Controller400Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset);
+ }
+ }
+
+ template<EClocklessChipsets CHIPSET, uint8_t DATA_PIN, EOrder RGB_ORDER>
+ CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+ switch(CHIPSET) {
+#ifdef FASTSPI_USE_DMX_SIMPLE
+ case DMX: return addLeds(new DMXController<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+#endif
+ case TM1809: return addLeds(new TM1809Controller800Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+ case TM1803: return addLeds(new TM1803Controller400Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+ case UCS1903: return addLeds(new UCS1903Controller400Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+ case WS2812:
+ case WS2812B:
+ case NEOPIXEL:
+ case WS2811: return addLeds(new WS2811Controller800Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+ case WS2811_400: return addLeds(new WS2811Controller400Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset);
+ }
+ }
+
+ void setBrightness(uint8_t scale) { m_nScale = scale; }
+ uint8_t getBrightness() { return m_nScale; }
+
+ /// Update all our controllers with the current led colors, using the passed in brightness
+ void show(uint8_t scale);
+
+ /// Update all our controllers with the current led colors
+ void show() { show(m_nScale); }
+
+ void clear(boolean includeLedData = true);
+
+ void showColor(const struct CRGB & color, uint8_t scale);
+
+ void showColor(const struct CRGB & color) { showColor(color, m_nScale); }
+
+};
+
+extern CFastLED & FastSPI_LED;
+extern CFastLED & FastSPI_LED2;
+extern CFastLED & FastLED;
+extern CFastLED LEDS;
+
+#endif
diff --git a/chipsets.h b/chipsets.h
new file mode 100644
index 00000000..5688ed48
--- /dev/null
+++ b/chipsets.h
@@ -0,0 +1,262 @@
+#ifndef __INC_CHIPSETS_H
+#define __INC_CHIPSETS_H
+
+#include "pixeltypes.h"
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// LPD8806 controller class - takes data/clock/select pin values (N.B. should take an SPI definition?)
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(24) >
+class LPD8806Controller : public CLEDController {
+ typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
+
+ class LPD8806_ADJUST {
+ public:
+ // LPD8806 spec wants the high bit of every rgb data byte sent out to be set.
+ __attribute__((always_inline)) inline static uint8_t adjust(register uint8_t data) { return (data>>1) | 0x80; }
+ __attribute__((always_inline)) inline static uint8_t adjust(register uint8_t data, register uint8_t scale) { return (scale8(data, scale)>>1) | 0x80; }
+ __attribute__((always_inline)) inline static void postBlock(int len) {
+ SPI::writeBytesValueRaw(0, ((len+63)>>6));
+ }
+
+ };
+
+ SPI mSPI;
+ int mClearedLeds;
+
+ void checkClear(int nLeds) {
+ if(nLeds > mClearedLeds) {
+ clearLine(nLeds);
+ mClearedLeds = nLeds;
+ }
+ }
+
+ void clearLine(int nLeds) {
+ int n = ((nLeds + 63) >> 6);
+ mSPI.writeBytesValue(0, n);
+ }
+public:
+ LPD8806Controller() {}
+ virtual void init() {
+ mSPI.init();
+ mClearedLeds = 0;
+ }
+
+ virtual void clearLeds(int nLeds) {
+ mSPI.select();
+ mSPI.writeBytesValueRaw(0x80, nLeds * 3);
+ mSPI.writeBytesValueRaw(0, ((nLeds*3+63)>>6));
+ mSPI.release();
+ }
+
+ virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) {
+ mSPI.select();
+ uint8_t a = 0x80 | (scale8(data[RGB_BYTE0(RGB_ORDER)], scale) >> 1);
+ uint8_t b = 0x80 | (scale8(data[RGB_BYTE1(RGB_ORDER)], scale) >> 1);
+ uint8_t c = 0x80 | (scale8(data[RGB_BYTE2(RGB_ORDER)], scale) >> 1);
+ int iLeds = 0;
+
+ while(iLeds++ < nLeds) {
+ mSPI.writeByte(a);
+ mSPI.writeByte(b);
+ mSPI.writeByte(c);
+ }
+
+ // latch in the world
+ mSPI.writeBytesValueRaw(0, ((nLeds*3+63)>>6));
+ mSPI.release();
+ }
+
+ virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) {
+ mSPI.template writeBytes3<LPD8806_ADJUST, RGB_ORDER>((byte*)data, nLeds * 3, scale);
+ }
+
+#ifdef SUPPORT_ARGB
+ virtual void show(const struct CARGB *data, int nLeds, uint8_t scale) {
+ checkClear(nLeds);
+ mSPI.template writeBytes3<1, LPD8806_ADJUST, RGB_ORDER>((byte*)data, nLeds * 4, scale);
+ }
+#endif
+};
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// WS2801 definition - takes data/clock/select pin values (N.B. should take an SPI definition?)
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(1)>
+class WS2801Controller : public CLEDController {
+ typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
+ SPI mSPI;
+ CMinWait<500> mWaitDelay;
+public:
+ WS2801Controller() {}
+
+ virtual void init() {
+ mSPI.init();
+ mWaitDelay.mark();
+ }
+
+ virtual void clearLeds(int nLeds) {
+ mWaitDelay.wait();
+ mSPI.writeBytesValue(0, nLeds*3);
+ mWaitDelay.mark();
+ }
+
+ virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) {
+ mWaitDelay.wait();
+ mSPI.select();
+ uint8_t a = scale8(data[RGB_BYTE0(RGB_ORDER)], scale);
+ uint8_t b = scale8(data[RGB_BYTE1(RGB_ORDER)], scale);
+ uint8_t c = scale8(data[RGB_BYTE2(RGB_ORDER)], scale);
+
+ while(nLeds--) {
+ mSPI.writeByte(a);
+ mSPI.writeByte(b);
+ mSPI.writeByte(c);
+ }
+ mSPI.waitFully();
+ mSPI.release();
+ mWaitDelay.mark();
+ }
+
+ virtual void show(const struct CRGB *data, int nLeds, uint8_t scale) {
+ mWaitDelay.wait();
+ mSPI.template writeBytes3<0, RGB_ORDER>((byte*)data, nLeds * 3, scale);
+ mWaitDelay.mark();
+ }
+
+#ifdef SUPPORT_ARGB
+ virtual void show(const struct CRGB *data, int nLeds, uint8_t scale) {
+ mWaitDelay.wait();
+ mSPI.template writeBytes3<1, RGB_ORDER>((byte*)data, nLeds * 4, scale);
+ mWaitDelay.mark();
+ }
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// SM16716 definition - takes data/clock/select pin values (N.B. should take an SPI definition?)
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(16)>
+class SM16716Controller : public CLEDController {
+ typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
+ SPI mSPI;
+
+ void writeHeader() {
+ // Write out 50 zeros to the spi line (6 blocks of 8 followed by two single bit writes)
+ mSPI.select();
+ mSPI.writeBytesValueRaw(0, 6);
+ mSPI.waitFully();
+ mSPI.template writeBit<0>(0);
+ mSPI.template writeBit<0>(0);
+ mSPI.release();
+ }
+
+public:
+ SM16716Controller() {}
+
+ virtual void init() {
+ mSPI.init();
+ }
+
+ virtual void clearLeds(int nLeds) {
+ mSPI.select();
+ while(nLeds--) {
+ mSPI.template writeBit<0>(1);
+ mSPI.writeByte(0);
+ mSPI.writeByte(0);
+ mSPI.writeByte(0);
+ }
+ mSPI.waitFully();
+ mSPI.release();
+ writeHeader();
+ }
+
+ virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) {
+ mSPI.select();
+ uint8_t a = scale8(data[RGB_BYTE0(RGB_ORDER)], scale);
+ uint8_t b = scale8(data[RGB_BYTE1(RGB_ORDER)], scale);
+ uint8_t c = scale8(data[RGB_BYTE2(RGB_ORDER)], scale);
+
+ while(nLeds--) {
+ mSPI.template writeBit<0>(1);
+ mSPI.writeByte(a);
+ mSPI.writeByte(b);
+ mSPI.writeByte(c);
+ }
+ writeHeader();
+ mSPI.release();
+ }
+
+ virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) {
+ // Make sure the FLAG_START_BIT flag is set to ensure that an extra 1 bit is sent at the start
+ // of each triplet of bytes for rgb data
+ // writeHeader();
+ mSPI.template writeBytes3<FLAG_START_BIT, RGB_ORDER>((byte*)data, nLeds * 3, scale);
+ writeHeader();
+ }
+
+#ifdef SUPPORT_ARGB
+ virtual void show(const struct CARGB *data, int nLeds, uint8_t scale = 255) {
+ mSPI.writeBytesValue(0, 6);
+ mSPI.template writeBit<0>(0);
+ mSPI.template writeBit<0>(0);
+
+ // Make sure the FLAG_START_BIT flag is set to ensure that an extra 1 bit is sent at the start
+ // of each triplet of bytes for rgb data
+ mSPI.template writeBytes3<1 | FLAG_START_BIT, RGB_ORDER>((byte*)data, nLeds * 4, scale);
+ }
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Clockless template instantiations
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// UCS1903 - 500ns, 1500ns, 500ns
+template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
+class UCS1903Controller400Khz : public ClocklessController<DATA_PIN, NS(500), NS(1500), NS(500), RGB_ORDER> {};
+#if NO_TIME(500, 1500, 500)
+#warning "No enough clock cycles available for the UCS103"
+#endif
+
+// TM1809 - 312.5ns, 312.5ns, 325ns
+template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
+class TM1809Controller800Khz : public ClocklessController<DATA_PIN, NS(350), NS(350), NS(550), RGB_ORDER> {};
+#if NO_TIME(350, 350, 550)
+#warning "No enough clock cycles available for the TM1809"
+#endif
+
+// WS2811 - 400ns, 400ns, 450ns
+template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
+class WS2811Controller800Khz : public ClocklessController<DATA_PIN, NS(400), NS(400), NS(450), RGB_ORDER> {};
+#if NO_TIME(400, 400, 450)
+#warning "No enough clock cycles available for the WS2811 (800khz)"
+#endif
+
+// WS2811@400khz - 800ns, 800ns, 900ns
+template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
+class WS2811Controller400Khz : public ClocklessController<DATA_PIN, NS(800), NS(800), NS(900), RGB_ORDER> {};
+#if NO_TIME(800, 800, 900)
+#warning "No enough clock cycles available for the WS2811 (400Khz)"
+#endif
+
+// 750NS, 750NS, 750NS
+template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
+class TM1803Controller400Khz : public ClocklessController<DATA_PIN, NS(750), NS(750), NS(750), RGB_ORDER> {};
+#if NO_TIME(750, 750, 750)
+#warning "No enough clock cycles available for the UCS103"
+#endif
+
+#endif
diff --git a/clockless.h b/clockless.h
new file mode 100644
index 00000000..238276ef
--- /dev/null
+++ b/clockless.h
@@ -0,0 +1,318 @@
+#ifndef __INC_CLOCKLESS_H
+#define __INC_CLOCKLESS_H
+
+#include "controller.h"
+#include "lib8tion.h"
+#include <avr/interrupt.h> // for cli/se definitions
+
+// Macro to convert from nano-seconds to clocks and clocks to nano-seconds
+// #define NS(_NS) (_NS / (1000 / (F_CPU / 1000000L)))
+#if F_CPU < 96000000
+#define NS(_NS) ( (_NS * (F_CPU / 1000000L))) / 1000
+#define CLKS_TO_MICROS(_CLKS) ((long)(_CLKS)) / (F_CPU / 1000000L)
+#else
+#define NS(_NS) ( (_NS * (F_CPU / 2000000L))) / 1000
+#define CLKS_TO_MICROS(_CLKS) ((long)(_CLKS)) / (F_CPU / 2000000L)
+#endif
+
+// Macro for making sure there's enough time available
+#define NO_TIME(A, B, C) (NS(A) < 3 || NS(B) < 3 || NS(C) < 6)
+
+#if defined(__MK20DX128__)
+ extern volatile uint32_t systick_millis_count;
+# define MS_COUNTER systick_millis_count
+#else
+# if defined(CORE_TEENSY)
+ extern volatile unsigned long timer0_millis_count;
+# define MS_COUNTER timer0_millis_count
+# else
+ extern volatile unsigned long timer0_millis;
+# define MS_COUNTER timer0_millis
+# endif
+#endif
+
+// Scaling macro choice
+#if defined(LIB8_ATTINY)
+# define INLINE_SCALE(B, SCALE) delaycycles<3>()
+# warning "No hardware multiply, inline brightness scaling disabled"
+#else
+# define INLINE_SCALE(B, SCALE) B = scale8_LEAVING_R1_DIRTY(B, SCALE)
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Base template for clockless controllers. These controllers have 3 control points in their cycle for each bit. The first point
+// is where the line is raised hi. The second pointsnt is where the line is dropped low for a zero. The third point is where the
+// line is dropped low for a one. T1, T2, and T3 correspond to the timings for those three in clock cycles.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int WAIT_TIME = 50>
+class ClocklessController : public CLEDController {
+ typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+ typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+ data_t mPinMask;
+ data_ptr_t mPort;
+ CMinWait<WAIT_TIME> mWait;
+public:
+ virtual void init() {
+ FastPin<DATA_PIN>::setOutput();
+ mPinMask = FastPin<DATA_PIN>::mask();
+ mPort = FastPin<DATA_PIN>::port();
+ }
+
+#if defined(__MK20DX128__)
+ // We don't use the bitSetFast methods for ARM.
+#else
+ template <int N>inline static void bitSetFast(register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t b) {
+ // First cycle
+ FastPin<DATA_PIN>::fastset(port, hi); // 1/2 clock cycle if using out
+ delaycycles<T1 - (_CYCLES(DATA_PIN) + 1)>(); // 1st cycle length minus 1/2 clock for out, 1 clock for sbrs
+ __asm__ __volatile__ ("sbrs %0, %1" :: "r" (b), "M" (N) :); // 1 clock for check (+1 if skipping, next op is also 1 clock)
+
+ // Second cycle
+ FastPin<DATA_PIN>::fastset(port, lo); // 1/2 clock cycle if using out
+ delaycycles<T2 - _CYCLES(DATA_PIN)>(); // 2nd cycle length minus 1/2 clock for out
+
+ // Third cycle
+ FastPin<DATA_PIN>::fastset(port, lo); // 1 clock cycle if using out
+ delaycycles<T3 - _CYCLES(DATA_PIN)>(); // 3rd cycle length minus 1 clock for out
+ }
+
+ #define END_OF_BYTE
+ #define END_OF_LOOP 6 // loop compare, jump, next uint8_t load
+ template <int N, int ADJ>inline static void bitSetLast(register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t b) {
+ // First cycle
+ FastPin<DATA_PIN>::fastset(port, hi); // 1 clock cycle if using out, 2 otherwise
+ delaycycles<T1 - (_CYCLES(DATA_PIN))>(); // 1st cycle length minus 1 clock for out, 1 clock for sbrs
+ __asm__ __volatile__ ("sbrs %0, %1" :: "r" (b), "M" (N) :); // 1 clock for check (+1 if skipping, next op is also 1 clock)
+
+ // Second cycle
+ FastPin<DATA_PIN>::fastset(port, lo); // 1/2 clock cycle if using out
+ delaycycles<T2 - (_CYCLES(DATA_PIN))>(); // 2nd cycle length minus 1/2 clock for out
+
+ // Third cycle
+ FastPin<DATA_PIN>::fastset(port, lo); // 1/2 clock cycle if using out
+ delaycycles<T3 - (_CYCLES(DATA_PIN) + ADJ)>(); // 3rd cycle length minus 7 clocks for out, loop compare, jump, next uint8_t load
+ }
+#endif
+
+ virtual void clearLeds(int nLeds) {
+ showColor(CRGB(0, 0, 0), nLeds, 0);
+ }
+
+ // set all the leds on the controller to a given color
+ virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) {
+ mWait.wait();
+ cli();
+
+ showRGBInternal<0, false>(nLeds, scale, (const byte*)&data);
+
+ // Adjust the timer
+ long microsTaken = CLKS_TO_MICROS((long)nLeds * 24 * (T1 + T2 + T3));
+ MS_COUNTER += (microsTaken / 1000);
+ sei();
+ mWait.mark();
+ }
+
+ virtual void show(const struct CRGB *rgbdata, int nLeds, uint8_t scale = 255) {
+ mWait.wait();
+ cli();
+
+ showRGBInternal<0, true>(nLeds, scale, (const byte*)rgbdata);
+
+ // Adjust the timer
+ long microsTaken = CLKS_TO_MICROS((long)nLeds * 24 * (T1 + T2 + T3));
+ MS_COUNTER += (microsTaken / 1000);
+ sei();
+ mWait.mark();
+ }
+
+#ifdef SUPPORT_ARGB
+ virtual void show(const struct CARGB *rgbdata, int nLeds, uint8_t scale = 255) {
+ mWait.wait();
+ cli();
+
+ showRGBInternal<1, true>(nLeds, scale, (const byte*)rgbdata);
+
+ // Adjust the timer
+ long microsTaken = CLKS_TO_MICROS((long)nLeds * 24 * (T1 + T2 + T3));
+ MS_COUNTER += (microsTaken / 1000);
+ sei();
+ mWait.mark();
+ }
+#endif
+
+#if defined(__MK20DX128__)
+ inline static void write8Bits(register data_ptr_t port, register data_t hi, register data_t lo, register uint32_t & b) __attribute__ ((always_inline)) {
+ // TODO: hand rig asm version of this method. The timings are based on adjusting/studying GCC compiler ouptut. This
+ // will bite me in the ass at some point, I know it.
+ for(register uint32_t i = 7; i > 0; i--) {
+ FastPin<DATA_PIN>::fastset(port, hi);
+ delaycycles<T1 - 5>(); // 5 cycles - 2 store, 1 and, 1 test, 1 if
+ if(b & 0x80) { FastPin<DATA_PIN>::fastset(port, hi); } else { FastPin<DATA_PIN>::fastset(port, lo); }
+ b <<= 1;
+ delaycycles<T2 - 2>(); // 2 cycles, 1 store/skip, 1 shift
+ FastPin<DATA_PIN>::fastset(port, lo);
+ delaycycles<T3 - 5>(); // 3 cycles, 2 store, 1 sub, 1 branch backwards
+ }
+ // delay an extra cycle because falling out of the loop takes on less cycle than looping around
+ delaycycles<1>();
+
+ FastPin<DATA_PIN>::fastset(port, hi);
+ delaycycles<T1 - 6>();
+ if(b & 0x80) { FastPin<DATA_PIN>::fastset(port, hi); } else { FastPin<DATA_PIN>::fastset(port, lo); }
+ delaycycles<T2 - 2>(); // 4 cycles, 2 store, store/skip
+ FastPin<DATA_PIN>::fastset(port, lo);
+ }
+#endif
+
+ // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+ // gcc will use register Y for the this pointer.
+ template<int SKIP, bool ADVANCE> static void showRGBInternal(register int nLeds, register uint8_t scale, register const byte *rgbdata) {
+ register byte *data = (byte*)rgbdata;
+ register data_t mask = FastPin<DATA_PIN>::mask();
+ register data_ptr_t port = FastPin<DATA_PIN>::port();
+ nLeds *= (3 + SKIP);
+ register uint8_t *end = data + nLeds;
+ register data_t hi = *port | mask;
+ register data_t lo = *port & ~mask;
+ *port = lo;
+
+#if defined(__MK20DX128__)
+ register uint32_t b;
+ b = ((ADVANCE)?data:rgbdata)[SKIP + RGB_BYTE0(RGB_ORDER)];
+ b = scale8(b, scale);
+ while(data < end) {
+ // Write first byte, read next byte
+ write8Bits(port, hi, lo, b);
+
+ b = ((ADVANCE)?data:rgbdata)[SKIP + RGB_BYTE1(RGB_ORDER)];
+ INLINE_SCALE(b, scale);
+ delaycycles<T3 - 5>(); // 1 store, 2 load, 1 mul, 1 shift,
+
+ // Write second byte
+ write8Bits(port, hi, lo, b);
+
+ b = ((ADVANCE)?data:rgbdata)[SKIP + RGB_BYTE2(RGB_ORDER)];
+ INLINE_SCALE(b, scale);
+
+ data += 3 + SKIP;
+ if((RGB_ORDER & 0070) == 0) {
+ delaycycles<T3 - 6>(); // 1 store, 2 load, 1 mul, 1 shift, 1 adds if BRG or GRB
+ } else {
+ delaycycles<T3 - 5>(); // 1 store, 2 load, 1 mul, 1 shift,
+ }
+
+ // Write third byte
+ write8Bits(port, hi, lo, b);
+
+ b = ((ADVANCE)?data:rgbdata)[SKIP + RGB_BYTE0(RGB_ORDER)];
+ INLINE_SCALE(b, scale);
+
+ delaycycles<T3 - 11>(); // 1 store, 2 load (with increment), 1 mul, 1 shift, 1 cmp, 1 branch backwards, 1 movim
+ };
+#else
+#if 0
+ register uint8_t b = *data++;
+ while(data <= end) {
+ bitSetFast<7>(port, hi, lo, b);
+ bitSetFast<6>(port, hi, lo, b);
+ bitSetFast<5>(port, hi, lo, b);
+ bitSetFast<4>(port, hi, lo, b);
+ bitSetFast<3>(port, hi, lo, b);
+ // Leave an extra 2 clocks for the next byte load
+ bitSetLast<2, 2>(port, hi, lo, b);
+ register uint8_t next = *data++;
+ // Leave an extra 4 clocks for the scale
+ bitSetLast<1, 4>(port, hi, lo, b);
+ next = scale8(next, scale);
+ bitSetLast<0, END_OF_LOOP>(port, hi, lo, b);
+ b = next;
+ }
+#else
+ register uint8_t b;
+
+ if(ADVANCE) {
+ b = data[SKIP + RGB_BYTE0(RGB_ORDER)];
+ } else {
+ b = rgbdata[SKIP + RGB_BYTE0(RGB_ORDER)];
+ }
+ b = scale8_LEAVING_R1_DIRTY(b, scale);
+
+ register uint8_t c;
+ register uint8_t d;
+ while(data < end) {
+ for(register byte x=5; x; x--) {
+ bitSetLast<7, 4>(port, hi, lo, b);
+ b <<= 1;
+ }
+ delaycycles<1>();
+ // Leave an extra 2 clocks for the next byte load
+ bitSetLast<7, 1>(port, hi, lo, b);
+ delaycycles<1>();
+
+ // Leave an extra 4 clocks for the scale
+ bitSetLast<6, 6>(port, hi, lo, b);
+ if(ADVANCE) {
+ c = data[SKIP + RGB_BYTE1(RGB_ORDER)];
+ } else {
+ c = rgbdata[SKIP + RGB_BYTE1(RGB_ORDER)];
+ delaycycles<1>();
+ }
+ INLINE_SCALE(c, scale);
+ bitSetLast<5, 1>(port, hi, lo, b);
+
+ for(register byte x=5; x; x--) {
+ bitSetLast<7, 4>(port, hi, lo, c);
+ c <<= 1;
+ }
+ delaycycles<1>();
+ // Leave an extra 2 clocks for the next byte load
+ bitSetLast<7, 1>(port, hi, lo, c);
+ delaycycles<1>();
+
+ // Leave an extra 4 clocks for the scale
+ bitSetLast<6, 6>(port, hi, lo, c);
+ if(ADVANCE) {
+ d = data[SKIP + RGB_BYTE2(RGB_ORDER)];
+ } else {
+ d = rgbdata[SKIP + RGB_BYTE2(RGB_ORDER)];
+ delaycycles<1>();
+ }
+ INLINE_SCALE(d, scale);
+ bitSetLast<5, 1>(port, hi, lo, c);
+
+ for(register byte x=5; x; x--) {
+ bitSetLast<7, 4>(port, hi, lo, d);
+ d <<= 1;
+ }
+ delaycycles<1>();
+ // Leave an extra 2 clocks for the next byte load
+ bitSetLast<7, 2>(port, hi, lo, d);
+ data += (SKIP + 3);
+ // Leave an extra 4 clocks for the scale
+ bitSetLast<6, 6>(port, hi, lo, d);
+ if(ADVANCE) {
+ b = data[SKIP + RGB_BYTE0(RGB_ORDER)];
+ } else {
+ b = rgbdata[SKIP + RGB_BYTE0(RGB_ORDER)];
+ delaycycles<1>();
+ }
+ INLINE_SCALE(b, scale);
+ bitSetLast<5, 6>(port, hi, lo, d);
+ }
+ cleanup_R1();
+#endif
+#endif
+ }
+
+#ifdef SUPPORT_ARGB
+ virtual void showARGB(struct CARGB *data, int nLeds) {
+ // TODO: IMPLEMENTME
+ }
+#endif
+};
+
+#endif
diff --git a/controller.h b/controller.h
new file mode 100644
index 00000000..2c703f00
--- /dev/null
+++ b/controller.h
@@ -0,0 +1,56 @@
+#ifndef __INC_CONTROLLER_H
+#define __INC_CONTROLLER_H
+
+#include <avr/io.h>
+#include "pixeltypes.h"
+
+
+#define RGB_BYTE0(X) ((X>>6) & 0x3)
+#define RGB_BYTE1(X) ((X>>3) & 0x3)
+#define RGB_BYTE2(X) ((X) & 0x3)
+
+// operator byte *(struct CRGB[] arr) { return (byte*)arr; }
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// LED Controller interface definition
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/// Base definition for an LED controller. Pretty much the methods that every LED controller object will make available.
+/// Note that the showARGB method is not impelemented for all controllers yet. Note also the methods for eventual checking
+/// of background writing of data (I'm looking at you, teensy 3.0 DMA controller!). If you want to pass LED controllers around
+/// to methods, make them references to this type, keeps your code saner.
+class CLEDController {
+public:
+ // initialize the LED controller
+ virtual void init() = 0;
+
+ // reset any internal state to a clean point
+ virtual void reset() { init(); }
+
+ // clear out/zero out the given number of leds.
+ virtual void clearLeds(int nLeds) = 0;
+
+ // set all the leds on the controller to a given color
+ virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) = 0;
+
+ // note that the uint8_ts will be in the order that you want them sent out to the device.
+ // nLeds is the number of RGB leds being written to
+ virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) = 0;
+
+#ifdef SUPPORT_ARGB
+ // as above, but every 4th uint8_t is assumed to be alpha channel data, and will be skipped
+ virtual void show(const struct CARGB *data, int nLeds, uint8_t scale = 255) = 0;
+#endif
+
+ // is the controller ready to write data out
+ virtual bool ready() { return true; }
+
+ // wait until the controller is ready to write data out
+ virtual void wait() { return; }
+
+};
+
+#endif \ No newline at end of file
diff --git a/delay.h b/delay.h
new file mode 100644
index 00000000..c29de694
--- /dev/null
+++ b/delay.h
@@ -0,0 +1,62 @@
+#ifndef __INC_DELAY_H
+#define __INC_DELAY_H
+
+////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Clock cycle counted delay loop
+//
+////////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__arm__)
+# define NOP __asm__ __volatile__ ("nop\n");
+#else
+# define NOP __asm__ __volatile__ ("cp r0,r0\n");
+#endif
+
+// predeclaration to not upset the compiler
+template<int CYCLES> inline void delaycycles();
+
+// TODO: ARM version of _delaycycles_
+// worker template - this will nop for LOOP * 3 + PAD cycles total
+template<int LOOP, int PAD> inline void _delaycycles_AVR() {
+ delaycycles<PAD>();
+ // the loop below is 3 cycles * LOOP. the LDI is one cycle,
+ // the DEC is 1 cycle, the BRNE is 2 cycles if looping back and
+ // 1 if not (the LDI balances out the BRNE being 1 cycle on exit)
+ __asm__ __volatile__ (
+ " LDI R16, %0\n"
+ "L_%=: DEC R16\n"
+ " BRNE L_%=\n"
+ : /* no outputs */
+ : "M" (LOOP)
+ : "r16"
+ );
+}
+
+// usable definition
+#if !defined(__MK20DX128__)
+template<int CYCLES> __attribute__((always_inline)) inline void delaycycles() {
+ _delaycycles_AVR<CYCLES / 3, CYCLES % 3>();
+}
+#else
+template<int CYCLES> __attribute__((always_inline)) inline void delaycycles() {
+ NOP; delaycycles<CYCLES-1>();
+}
+#endif
+
+// pre-instantiations for values small enough to not need the loop, as well as sanity holders
+// for some negative values.
+template<> __attribute__((always_inline)) inline void delaycycles<-6>() {}
+template<> __attribute__((always_inline)) inline void delaycycles<-5>() {}
+template<> __attribute__((always_inline)) inline void delaycycles<-4>() {}
+template<> __attribute__((always_inline)) inline void delaycycles<-3>() {}
+template<> __attribute__((always_inline)) inline void delaycycles<-2>() {}
+template<> __attribute__((always_inline)) inline void delaycycles<-1>() {}
+template<> __attribute__((always_inline)) inline void delaycycles<0>() {}
+template<> __attribute__((always_inline)) inline void delaycycles<1>() {NOP;}
+template<> __attribute__((always_inline)) inline void delaycycles<2>() {NOP;NOP;}
+template<> __attribute__((always_inline)) inline void delaycycles<3>() {NOP;NOP;NOP;}
+template<> __attribute__((always_inline)) inline void delaycycles<4>() {NOP;NOP;NOP;NOP;}
+template<> __attribute__((always_inline)) inline void delaycycles<5>() {NOP;NOP;NOP;NOP;NOP;}
+
+#endif \ No newline at end of file
diff --git a/dmx.h b/dmx.h
new file mode 100644
index 00000000..a7c1c1f0
--- /dev/null
+++ b/dmx.h
@@ -0,0 +1,115 @@
+#ifndef __INC_DMX_H
+#define __INC_DMX_H
+
+//#ifdef DmxSimple_H
+//#if USE_DMX_SIMPLE
+#ifdef FASTSPI_USE_DMX_SIMPLE
+#include<DmxSimple.h>
+// note - dmx simple must be included before FastSPI for this code to be enabled
+template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> class DMXController : public CLEDController {
+public:
+ // initialize the LED controller
+ virtual void init() { DmxSimple.usePin(DATA_PIN); }
+
+ // reset any internal state to a clean point
+ virtual void reset() { init(); }
+
+ // clear out/zero out the given number of leds.
+ virtual void clearLeds(int nLeds) {
+ int count = min(nLeds * 3, DMX_SIZE);
+ for(int iChannel = 1; iChannel <= count; iChannel++) { DmxSimple.write(iChannel, 0); }
+ }
+
+ // set all the leds on the controller to a given color
+ virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) {
+ int count = min(nLeds, DMX_SIZE / 3);
+ int iChannel = 1;
+ for(int i = 0; i < count; i++) {
+ DmxSimple.write(iChannel++, scale8(data[RGB_BYTE0(RGB_ORDER)], scale));
+ DmxSimple.write(iChannel++, scale8(data[RGB_BYTE1(RGB_ORDER)], scale));
+ DmxSimple.write(iChannel++, scale8(data[RGB_BYTE2(RGB_ORDER)], scale));
+ }
+ }
+
+ // note that the uint8_ts will be in the order that you want them sent out to the device.
+ // nLeds is the number of RGB leds being written to
+ virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) {
+ int count = min(nLeds, DMX_SIZE / 3);
+ int iChannel = 1;
+ for(int i = 0; i < count; i++) {
+ DmxSimple.write(iChannel++, scale8(data[i][RGB_BYTE0(RGB_ORDER)], scale));
+ DmxSimple.write(iChannel++, scale8(data[i][RGB_BYTE1(RGB_ORDER)], scale));
+ DmxSimple.write(iChannel++, scale8(data[i][RGB_BYTE2(RGB_ORDER)], scale));
+ }
+
+ }
+
+#ifdef SUPPORT_ARGB
+ // as above, but every 4th uint8_t is assumed to be alpha channel data, and will be skipped
+ virtual void show(const struct CARGB *data, int nLeds, uint8_t scale = 255) = 0;
+#endif
+
+ // is the controller ready to write data out
+ virtual bool ready() { return true; }
+
+ // wait until the controller is ready to write data out
+ virtual void wait() { return; }
+
+};
+
+#elif defined(DmxSerial_h)
+
+template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> class DMXController : public CLEDController {
+public:
+ // initialize the LED controller
+ virtual void init() { DMXSerial.init(DMXController); }
+
+ // reset any internal state to a clean point
+ virtual void reset() { init(); }
+
+ // clear out/zero out the given number of leds.
+ virtual void clearLeds(int nLeds) {
+ int count = min(nLeds * 3, DMX_SIZE);
+ for(int iChannel = 0; iChannel < count; iChannel++) { DmxSimple.write(iChannel, 0); }
+ }
+
+ // set all the leds on the controller to a given color
+ virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) {
+ int count = min(nLeds, DMX_SIZE / 3);
+ int iChannel = 0;
+ for(int i = 0; i < count; i++) {
+ DMXSerial.write(iChannel++, scale8(data[RGB_BYTE0(RGB_ORDER)], scale));
+ DMXSerial.write(iChannel++, scale8(data[RGB_BYTE1(RGB_ORDER)], scale));
+ DMXSerial.write(iChannel++, scale8(data[RGB_BYTE2(RGB_ORDER)], scale));
+ }
+ }
+
+ // note that the uint8_ts will be in the order that you want them sent out to the device.
+ // nLeds is the number of RGB leds being written to
+ virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) {
+ int count = min(nLeds, DMX_SIZE / 3);
+ int iChannel = 0;
+ for(int i = 0; i < count; i++) {
+ DMXSerial.write(iChannel++, scale8(data[i][RGB_BYTE0(RGB_ORDER)], scale));
+ DMXSerial.write(iChannel++, scale8(data[i][RGB_BYTE1(RGB_ORDER)], scale));
+ DMXSerial.write(iChannel++, scale8(data[i][RGB_BYTE2(RGB_ORDER)], scale));
+ }
+
+ }
+
+#ifdef SUPPORT_ARGB
+ // as above, but every 4th uint8_t is assumed to be alpha channel data, and will be skipped
+ virtual void show(const struct CARGB *data, int nLeds, uint8_t scale = 255) = 0;
+#endif
+
+ // is the controller ready to write data out
+ virtual bool ready() { return true; }
+
+ // wait until the controller is ready to write data out
+ virtual void wait() { return; }
+
+};
+
+#endif
+
+#endif \ No newline at end of file
diff --git a/examples/Fast2Dev/Fast2Dev.ino b/examples/Fast2Dev/Fast2Dev.ino
new file mode 100644
index 00000000..5090aaad
--- /dev/null
+++ b/examples/Fast2Dev/Fast2Dev.ino
@@ -0,0 +1,98 @@
+// Uncomment this line if you have any interrupts that are changing pins - this causes the library to be a little bit more cautious
+// #define FAST_SPI_INTERRUPTS_WRITE_PINS 1
+
+// Uncomment this line to force always using software, instead of hardware, SPI (why?)
+// #define FORCE_SOFTWARE_SPI 1
+
+// Uncomment this line if you want to talk to DMX controllers
+// #define FASTSPI_USE_DMX_SIMPLE 1
+
+#include "FastLED.h"
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// test code
+//
+//////////////////////////////////////////////////
+
+#define NUM_LEDS 150
+
+CRGB leds[NUM_LEDS];
+
+void setup() {
+ // sanity check delay - allows reprogramming if accidently blowing power w/leds
+ delay(2000);
+
+ // For safety (to prevent too high of a power draw), the test case defaults to
+ // setting brightness to 25% brightness
+ LEDS.setBrightness(64);
+
+ // LEDS.addLeds<WS2811, 13>(leds, NUM_LEDS);
+ // LEDS.addLeds<TM1809, 13>(leds, NUM_LEDS);
+ // LEDS.addLeds<UCS1903, 13>(leds, NUM_LEDS);
+ // LEDS.addLeds<TM1803, 13>(leds, NUM_LEDS);
+
+ // LEDS.addLeds<P9813>(leds, NUM_LEDS);
+
+ LEDS.addLeds<LPD8806>(leds, NUM_LEDS);
+ // LEDS.addLeds<WS2801>(leds, NUM_LEDS);
+ // LEDS.addLeds<SM16716>(leds, NUM_LEDS);
+
+ // LEDS.addLeds<WS2811, 11>(leds, NUM_LEDS);
+
+ // Put ws2801 strip on the hardware SPI pins with a BGR ordering of rgb and limited to a 1Mhz data rate
+ // LEDS.addLeds<WS2801, 11, 13, BGR, DATA_RATE_MHZ(1)>(leds, NUM_LEDS);
+
+ // LEDS.addLeds<LPD8806, 10, 11>(leds, NUM_LEDS);
+ // LEDS.addLeds<WS2811, 13, BRG>(leds, NUM_LEDS);
+ // LEDS.addLeds<LPD8806, BGR>(leds, NUM_LEDS);
+}
+
+void loop() {
+ for(int i = 0; i < 3; i++) {
+ for(int iLed = 0; iLed < NUM_LEDS; iLed++) {
+ memset(leds, 0, NUM_LEDS * sizeof(struct CRGB));
+
+ switch(i) {
+ // You can access the rgb values by field r, g, b
+ case 0: leds[iLed].r = 128; break;
+
+ // or by indexing into the led (r==0, g==1, b==2)
+ case 1: leds[iLed][i] = 128; break;
+
+ // or by setting the rgb values for the pixel all at once
+ case 2: leds[iLed] = CRGB(0, 0, 128); break;
+ }
+
+ // and now, show your led array!
+ LEDS.show();
+ delay(10);
+ }
+
+ // fade up
+ for(int x = 0; x < 128; x++) {
+ // The showColor method sets all the leds in the strip to the same color
+ LEDS.showColor(CRGB(x, 0, 0));
+ delay(10);
+ }
+
+ // fade down
+ for(int x = 128; x >= 0; x--) {
+ LEDS.showColor(CRGB(x, 0, 0));
+ delay(10);
+ }
+
+ // let's fade up by scaling the brightness
+ for(int scale = 0; scale < 128; scale++) {
+ LEDS.showColor(CRGB(0, 128, 0), scale);
+ delay(10);
+ }
+
+ // let's fade down by scaling the brightness
+ for(int scale = 128; scale > 0; scale--) {
+ LEDS.showColor(CRGB(0, 128, 0), scale);
+ delay(10);
+ }
+ }
+}
diff --git a/examples/FirstLight/FirstLight.ino b/examples/FirstLight/FirstLight.ino
new file mode 100644
index 00000000..fcfbacbd
--- /dev/null
+++ b/examples/FirstLight/FirstLight.ino
@@ -0,0 +1,66 @@
+#define FORCE_SOFTWARE_SPI
+#define FORCE_SOFTWARE_PINS
+#include "FastLED.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////
+//
+// Move a white dot along the strip of leds. This program simply shows how to configure the leds,
+// and then how to turn a single pixel white and then off, moving down the line of pixels.
+//
+
+// How many leds are in the strip?
+#define NUM_LEDS 60
+
+// Data pin that led data will be written out over
+#define DATA_PIN 6
+
+// Clock pin only needed for SPI based chipsets when not using hardware SPI
+//#define CLOCK_PIN 8
+
+// This is an array of leds. One item for each led in your strip.
+CRGB leds[NUM_LEDS];
+
+// This function sets up the ledsand tells the controller about them
+void setup() {
+ // sanity check delay - allows reprogramming if accidently blowing power w/leds
+ delay(2000);
+
+ // Uncomment one of the following lines for your leds arrangement.
+ // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<WS2811, DATA_PIN, GRB>(leds+18, NUM_LEDS/3);
+ // FastLED.addLeds<WS2811, 8, RGB>(leds + 225, NUM_LEDS/4);
+ // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<NEOPIXEL, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<WS2811_400, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
+
+ // FastLED.addLeds<WS2801, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<SM16716, RGB>(leds, NUM_LEDS);
+ FastLED.addLeds<LPD8806, RGB>(leds, NUM_LEDS);
+
+ // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+}
+
+// This function runs over and over, and is where you do the magic to light
+// your leds.
+void loop() {
+ // Move a single white led
+ for(int whiteLed = 0; whiteLed < NUM_LEDS; whiteLed = whiteLed + 1) {
+ // Turn our current led on to white, then show the leds
+ leds[whiteLed] = CRGB::White;
+
+ // Show the leds (only one of which is set to white, from above)
+ FastLED.show();
+
+ // Wait a little bit
+ delay(100);
+
+ // Turn our current led back to black for the next loop around
+ leds[whiteLed] = CRGB::Black;
+ }
+}
diff --git a/examples/RGBCalibrate/RGBCalibrate.ino b/examples/RGBCalibrate/RGBCalibrate.ino
new file mode 100644
index 00000000..55661052
--- /dev/null
+++ b/examples/RGBCalibrate/RGBCalibrate.ino
@@ -0,0 +1,66 @@
+#include "FastLED.h"
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// RGB Calibration code
+//
+// Use this sketch to determine what the RGB ordering for your chipset should be. Steps for setting up to use:
+
+// * Uncomment the line in setup that corresponds to the LED chipset that you are using. (Note that they
+// all explicitly specify the RGB order as RGB)
+// * Define DATA_PIN to the pin that data is connected to.
+// * (Optional) if using software SPI for chipsets that are SPI based, define CLOCK_PIN to the clock pin
+// * Compile/upload/run the sketch
+
+// You should see six leds on. If the RGB ordering is correct, you should see 1 red led, 2 green
+// leds, and 3 blue leds. If you see different colors, the count of each color tells you what the
+// position for that color in the rgb orering should be. So, for example, if you see 1 Blue, and 2
+// Red, and 3 Green leds then the rgb ordering should be BRG (Blue, Red, Green).
+
+// You can then test this ordering by setting the RGB ordering in the addLeds line below to the new ordering
+// and it should come out correctly, 1 red, 2 green, and 3 blue.
+//
+//////////////////////////////////////////////////
+
+#define NUM_LEDS 6
+
+// Data pin that led data will be written out over
+#define DATA_PIN 7
+// Clock pin only needed for SPI based chipsets when not using hardware SPI
+//#define CLOCK_PIN 8
+
+CRGB leds[NUM_LEDS];
+
+void setup() {
+ // sanity check delay - allows reprogramming if accidently blowing power w/leds
+ delay(2000);
+
+ // Uncomment one of the following lines for your leds arrangement.
+ // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<WS2811, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
+
+ // FastLED.addLeds<WS2801, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<SM16716, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<LPD8806, RGB>(leds, NUM_LEDS);
+
+ // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+ // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+}
+
+void loop() {
+ leds[0] = CRGB::Red;
+ leds[1] = CRGB::Green;
+ leds[2] = CRGB::Green;
+ leds[3] = CRGB::Blue;
+ leds[4] = CRGB::Blue;
+ leds[5] = CRGB::Blue;
+ FastLED.show();
+ delay(1000);
+}
diff --git a/fastpin.h b/fastpin.h
new file mode 100644
index 00000000..b6355fce
--- /dev/null
+++ b/fastpin.h
@@ -0,0 +1,424 @@
+#ifndef __INC_FASTPIN_H
+#define __INC_FASTPIN_H
+
+#include<avr/io.h>
+
+// Arduino.h needed for convinience functions digitalPinToPort/BitMask/portOutputRegister and the pinMode methods.
+#include<Arduino.h>
+
+#define NO_PIN 255
+
+// Class to ensure that a minimum amount of time has kicked since the last time run - and delay if not enough time has passed yet
+// this should make sure that chipsets that have
+template<int WAIT> class CMinWait {
+ long mLastMicros;
+public:
+ CMinWait() { mLastMicros = 0; }
+
+ void wait() {
+ long diff = micros() - mLastMicros;
+ if(diff < WAIT) {
+ delayMicroseconds(WAIT - diff);
+ }
+ }
+
+ void mark() { mLastMicros = micros(); }
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Pin access class - needs to tune for various platforms (naive fallback solution?)
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__AVR_ATmega1280__) || defined(__AVR_ATmega2560__)
+#define _CYCLES(_PIN) (((_PIN >= 62 ) || (_PIN>=42 && _PIN<=49) || (_PIN>=14 && _PIN <=17) || (_PIN>=6 && _PIN <=9)) ? 2 : 1)
+#else
+#define _CYCLES(_PIN) ((_PIN >= 24) ? 2 : 1)
+#endif
+
+class Selectable {
+public:
+ virtual void select() = 0;
+ virtual void release() = 0;
+ virtual bool isSelected() = 0;
+};
+
+class Pin : public Selectable {
+ uint8_t mPinMask;
+ uint8_t mPin;
+ volatile uint8_t *mPort;
+
+ void _init() {
+ mPinMask = digitalPinToBitMask(mPin);
+ mPort = portOutputRegister(digitalPinToPort(mPin));
+ }
+public:
+ Pin(int pin) : mPin(pin) { _init(); }
+
+ typedef volatile uint8_t * port_ptr_t;
+ typedef uint8_t port_t;
+
+ inline void setOutput() { pinMode(mPin, OUTPUT); }
+ inline void setInput() { pinMode(mPin, INPUT); }
+
+ inline void hi() __attribute__ ((always_inline)) { *mPort |= mPinMask; }
+ inline void lo() __attribute__ ((always_inline)) { *mPort &= ~mPinMask; }
+
+ inline void strobe() __attribute__ ((always_inline)) { hi(); lo(); }
+
+ inline void hi(register port_ptr_t port) __attribute__ ((always_inline)) { *port |= mPinMask; }
+ inline void lo(register port_ptr_t port) __attribute__ ((always_inline)) { *port &= ~mPinMask; }
+ inline void set(register port_t val) __attribute__ ((always_inline)) { *mPort = val; }
+
+ inline void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+ port_t hival() __attribute__ ((always_inline)) { return *mPort | mPinMask; }
+ port_t loval() __attribute__ ((always_inline)) { return *mPort & ~mPinMask; }
+ port_ptr_t port() __attribute__ ((always_inline)) { return mPort; }
+ port_t mask() __attribute__ ((always_inline)) { return mPinMask; }
+
+ virtual void select() { hi(); }
+ virtual void release() { lo(); }
+ virtual bool isSelected() { return (*mPort & mPinMask) == mPinMask; }
+};
+
+class OutputPin : public Pin {
+public:
+ OutputPin(int pin) : Pin(pin) { setOutput(); }
+};
+
+class InputPin : public Pin {
+public:
+ InputPin(int pin) : Pin(pin) { setInput(); }
+};
+
+/// The simplest level of Pin class. This relies on runtime functions durinig initialization to get the port/pin mask for the pin. Most
+/// of the accesses involve references to these static globals that get set up. This won't be the fastest set of pin operations, but it
+/// will provide pin level access on pretty much all arduino environments. In addition, it includes some methods to help optimize access in
+/// various ways. Namely, the versions of hi, lo, and fastset that take the port register as a passed in register variable (saving a global
+/// dereference), since these functions are aggressively inlined, that can help collapse out a lot of extraneous memory loads/dereferences.
+///
+/// In addition, if, while writing a bunch of data to a pin, you know no other pins will be getting written to, you can get/cache a value of
+/// the pin's port register and use that to do a full set to the register. This results in one being able to simply do a store to the register,
+/// vs. the load, and/or, and store that would be done normally.
+///
+/// There are platform specific instantiations of this class that provide direct i/o register access to pins for much higher speed pin twiddling.
+///
+/// Note that these classes are all static functions. So the proper usage is Pin<13>::hi(); or such. Instantiating objects is not recommended,
+/// as passing Pin objects around will likely -not- have the effect you're expecting.
+template<uint8_t PIN> class FastPin {
+ static uint8_t sPinMask;
+ static volatile uint8_t *sPort;
+ static void _init() {
+ sPinMask = digitalPinToBitMask(PIN);
+ sPort = portOutputRegister(digitalPinToPort(PIN));
+ }
+public:
+ typedef volatile uint8_t * port_ptr_t;
+ typedef uint8_t port_t;
+
+ inline static void setOutput() { _init(); pinMode(PIN, OUTPUT); }
+ inline static void setInput() { _init(); pinMode(PIN, INPUT); }
+
+ inline static void hi() __attribute__ ((always_inline)) { *sPort |= sPinMask; }
+ inline static void lo() __attribute__ ((always_inline)) { *sPort &= ~sPinMask; }
+
+ inline static void strobe() __attribute__ ((always_inline)) { hi(); lo(); }
+
+ inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { *port |= sPinMask; }
+ inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { *port &= ~sPinMask; }
+ inline static void set(register port_t val) __attribute__ ((always_inline)) { *sPort = val; }
+
+ inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+ static port_t hival() __attribute__ ((always_inline)) { return *sPort | sPinMask; }
+ static port_t loval() __attribute__ ((always_inline)) { return *sPort & ~sPinMask; }
+ static port_ptr_t port() __attribute__ ((always_inline)) { return sPort; }
+ static port_t mask() __attribute__ ((always_inline)) { return sPinMask; }
+};
+
+template<uint8_t PIN> uint8_t FastPin<PIN>::sPinMask;
+template<uint8_t PIN> volatile uint8_t *FastPin<PIN>::sPort;
+
+/// Class definition for a Pin where we know the port registers at compile time for said pin. This allows us to make
+/// a lot of optimizations, as the inlined hi/lo methods will devolve to a single io register write/bitset.
+template<uint8_t PIN, uint8_t _MASK, typename _PORT, typename _DDR, typename _PIN> class _AVRPIN {
+public:
+ typedef volatile uint8_t * port_ptr_t;
+ typedef uint8_t port_t;
+
+ inline static void setOutput() { _DDR::r() |= _MASK; }
+ inline static void setInput() { _DDR::r() &= ~_MASK; }
+
+ inline static void hi() __attribute__ ((always_inline)) { _PORT::r() |= _MASK; }
+ inline static void lo() __attribute__ ((always_inline)) { _PORT::r() &= ~_MASK; }
+ inline static void set(register uint8_t val) __attribute__ ((always_inline)) { _PORT::r() = val; }
+
+ inline static void strobe() __attribute__ ((always_inline)) { hi(); lo(); }
+
+ inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+ inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+ inline static void fastset(register port_ptr_t port, register uint8_t val) __attribute__ ((always_inline)) { set(val); }
+
+ inline static port_t hival() __attribute__ ((always_inline)) { return _PORT::r() | _MASK; }
+ inline static port_t loval() __attribute__ ((always_inline)) { return _PORT::r() & ~_MASK; }
+ inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PORT::r(); }
+ inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+/// Template definition for teensy 3.0 style ARM pins, providing direct access to the various GPIO registers. Note that this
+/// uses the full port GPIO registers. In theory, in some way, bit-band register access -should- be faster, however I have found
+/// that something about the way gcc does register allocation results in the bit-band code being slower. It will need more fine tuning.
+template<uint8_t PIN, uint32_t _MASK, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN {
+public:
+ typedef volatile uint32_t * port_ptr_t;
+ typedef uint32_t port_t;
+
+ inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+ inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+ inline static void hi() __attribute__ ((always_inline)) { _PSOR::r() = _MASK; }
+ inline static void lo() __attribute__ ((always_inline)) { _PCOR::r() = _MASK; }
+ inline static void set(register port_t val) __attribute__ ((always_inline)) { _PDOR::r() = val; }
+
+ inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+ inline static void toggle() __attribute__ ((always_inline)) { _PTOR::r() = _MASK; }
+
+ inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+ inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+ inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+ inline static port_t hival() __attribute__ ((always_inline)) { return _PDOR::r() | _MASK; }
+ inline static port_t loval() __attribute__ ((always_inline)) { return _PDOR::r() & ~_MASK; }
+ inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PDOR::r(); }
+ inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+/// Template definition for teensy 3.0 style ARM pins using bit banding, providing direct access to the various GPIO registers. GCC
+/// does a poor job of optimizing around these accesses so they are not being used just yet.
+template<uint8_t PIN, int _BIT, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN_BITBAND {
+public:
+ typedef volatile uint32_t * port_ptr_t;
+ typedef uint32_t port_t;
+
+ inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+ inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+ inline static void hi() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 1; }
+ inline static void lo() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 0; }
+ inline static void set(register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; }
+
+ inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+ inline static void toggle() __attribute__ ((always_inline)) { *_PTOR::template rx<_BIT>() = 1; }
+
+ inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { *port = 1; }
+ inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { *port = 0; }
+ inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+ inline static port_t hival() __attribute__ ((always_inline)) { return 1; }
+ inline static port_t loval() __attribute__ ((always_inline)) { return 0; }
+ inline static port_ptr_t port() __attribute__ ((always_inline)) { return _PDOR::template rx<_BIT>(); }
+ inline static port_t mask() __attribute__ ((always_inline)) { return 1; }
+};
+
+/// AVR definitions for pins. Getting around the fact that I can't pass GPIO register addresses in as template arguments by instead creating
+/// a custom type for each GPIO register with a single, static, aggressively inlined function that returns that specific GPIO register. A similar
+/// trick is used a bit further below for the ARM GPIO registers (of which there are far more than on AVR!)
+typedef volatile uint8_t & reg8_t;
+#define _R(T) struct __gen_struct_ ## T
+#define _RD8(T) struct __gen_struct_ ## T { static inline reg8_t r() { return T; }};
+#define _IO(L) _RD8(DDR ## L); _RD8(PORT ## L); _RD8(PIN ## L);
+#define _DEFPIN_AVR(PIN, MASK, L) template<> class FastPin<PIN> : public _AVRPIN<PIN, MASK, _R(PORT ## L), _R(DDR ## L), _R(PIN ## L)> {};
+
+// ARM definitions
+#define GPIO_BITBAND_ADDR(reg, bit) (((uint32_t)&(reg) - 0x40000000) * 32 + (bit) * 4 + 0x42000000)
+#define GPIO_BITBAND_PTR(reg, bit) ((uint32_t *)GPIO_BITBAND_ADDR((reg), (bit)))
+
+typedef volatile uint32_t & reg32_t;
+typedef volatile uint32_t * ptr_reg32_t;
+
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
+ template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
+#define _IO32(L) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR);
+
+#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+ _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {};
+
+// Don't use bit band'd pins for now, the compiler generates far less efficient code around them
+// #define _DEFPIN_ARM(PIN, BIT, L) template<> class Pin<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR),
+// _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {};
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Pin definitions for AVR and ARM. If there are pin definitions supplied below for the platform being
+// built on, then much higher speed access will be possible, namely with direct GPIO register accesses.
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(FORCE_SOFTWARE_PINS)
+#warning "Softwrae pin support forced pin access will be slightly slower. See fastpin.h for info."
+#define NO_HARDWARE_PIN_SUPPORT
+
+#elif defined(__AVR_ATtiny85__)
+_IO(B);
+
+_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B); _DEFPIN_AVR(3, 0x08, B);
+_DEFPIN_AVR(4, 0x10, B); _DEFPIN_AVR(5, 0x20, B);
+
+#elif defined(__AVR_ATmega328P__) || defined(__AVR_ATmega168__)
+// Accelerated port definitions for arduino avrs
+_IO(D); _IO(B); _IO(C);
+_DEFPIN_AVR( 0, 0x01, D); _DEFPIN_AVR( 1, 0x02, D); _DEFPIN_AVR( 2, 0x04, D); _DEFPIN_AVR( 3, 0x08, D);
+_DEFPIN_AVR( 4, 0x10, D); _DEFPIN_AVR( 5, 0x20, D); _DEFPIN_AVR( 6, 0x40, D); _DEFPIN_AVR( 7, 0x80, D);
+_DEFPIN_AVR( 8, 0x01, B); _DEFPIN_AVR( 9, 0x02, B); _DEFPIN_AVR(10, 0x04, B); _DEFPIN_AVR(11, 0x08, B);
+_DEFPIN_AVR(12, 0x10, B); _DEFPIN_AVR(13, 0x20, B); _DEFPIN_AVR(14, 0x01, C); _DEFPIN_AVR(15, 0x02, C);
+_DEFPIN_AVR(16, 0x04, C); _DEFPIN_AVR(17, 0x08, C); _DEFPIN_AVR(18, 0x10, C); _DEFPIN_AVR(19, 0x20, C);
+
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+#define SPI_SELECT 10
+#define AVR_HARDWARE_SPI
+
+#elif defined(__AVR_ATmega1280__) || defined(__AVR_ATmega2560__)
+// megas
+
+_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F); _IO(G); _IO(H); _IO(J); _IO(K); _IO(L);
+
+_DEFPIN_AVR(0, 1, E); _DEFPIN_AVR(1, 2, E); _DEFPIN_AVR(2, 16, E); _DEFPIN_AVR(3, 32, E);
+_DEFPIN_AVR(4, 32, G); _DEFPIN_AVR(5, 8, E); _DEFPIN_AVR(6, 8, H); _DEFPIN_AVR(7, 16, H);
+_DEFPIN_AVR(8, 32, H); _DEFPIN_AVR(9, 64, H); _DEFPIN_AVR(10, 16, B); _DEFPIN_AVR(11, 32, B);
+_DEFPIN_AVR(12, 64, B); _DEFPIN_AVR(13, 128, B); _DEFPIN_AVR(14, 2, J); _DEFPIN_AVR(15, 1, J);
+_DEFPIN_AVR(16, 2, H); _DEFPIN_AVR(17, 1, H); _DEFPIN_AVR(18, 8, D); _DEFPIN_AVR(19, 4, D);
+_DEFPIN_AVR(20, 2, D); _DEFPIN_AVR(21, 1, D); _DEFPIN_AVR(22, 1, A); _DEFPIN_AVR(23, 2, A);
+_DEFPIN_AVR(24, 4, A); _DEFPIN_AVR(25, 8, A); _DEFPIN_AVR(26, 16, A); _DEFPIN_AVR(27, 32, A);
+_DEFPIN_AVR(28, 64, A); _DEFPIN_AVR(29, 128, A); _DEFPIN_AVR(30, 128, C); _DEFPIN_AVR(31, 64, C);
+_DEFPIN_AVR(32, 32, C); _DEFPIN_AVR(33, 16, C); _DEFPIN_AVR(34, 8, C); _DEFPIN_AVR(35, 4, C);
+_DEFPIN_AVR(36, 2, C); _DEFPIN_AVR(37, 1, C); _DEFPIN_AVR(38, 128, D); _DEFPIN_AVR(39, 4, G);
+_DEFPIN_AVR(40, 2, G); _DEFPIN_AVR(41, 1, G); _DEFPIN_AVR(42, 128, L); _DEFPIN_AVR(43, 64, L);
+_DEFPIN_AVR(44, 32, L); _DEFPIN_AVR(45, 16, L); _DEFPIN_AVR(46, 8, L); _DEFPIN_AVR(47, 4, L);
+_DEFPIN_AVR(48, 2, L); _DEFPIN_AVR(49, 1, L); _DEFPIN_AVR(50, 8, B); _DEFPIN_AVR(51, 4, B);
+_DEFPIN_AVR(52, 2, B); _DEFPIN_AVR(53, 1, B); _DEFPIN_AVR(54, 1, F); _DEFPIN_AVR(55, 2, F);
+_DEFPIN_AVR(56, 4, F); _DEFPIN_AVR(57, 8, F); _DEFPIN_AVR(58, 16, F); _DEFPIN_AVR(59, 32, F);
+_DEFPIN_AVR(60, 64, F); _DEFPIN_AVR(61, 128, F); _DEFPIN_AVR(62, 1, K); _DEFPIN_AVR(63, 2, K);
+_DEFPIN_AVR(64, 4, K); _DEFPIN_AVR(65, 8, K); _DEFPIN_AVR(66, 16, K); _DEFPIN_AVR(67, 32, K);
+_DEFPIN_AVR(68, 64, K); _DEFPIN_AVR(69, 128, K);
+
+#define SPI_DATA 51
+#define SPI_CLOCK 52
+#define SPI_SELECT 53
+#define AVR_HARDWARE_SPI
+
+// Leonardo, teensy, blinkm
+#elif defined(__AVR_ATmega32U4__) && defined(CORE_TEENSY)
+
+// teensy defs
+_IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
+
+_DEFPIN_AVR(0, 1, B); _DEFPIN_AVR(1, 2, B); _DEFPIN_AVR(2, 4, B); _DEFPIN_AVR(3, 8, B);
+_DEFPIN_AVR(4, 128, B); _DEFPIN_AVR(5, 1, D); _DEFPIN_AVR(6, 2, D); _DEFPIN_AVR(7, 4, D);
+_DEFPIN_AVR(8, 8, D); _DEFPIN_AVR(9, 64, C); _DEFPIN_AVR(10, 128, C); _DEFPIN_AVR(11, 64, D);
+_DEFPIN_AVR(12, 128, D); _DEFPIN_AVR(13, 16, B); _DEFPIN_AVR(14, 32, B); _DEFPIN_AVR(15, 64, B);
+_DEFPIN_AVR(16, 128, F); _DEFPIN_AVR(17, 64, F); _DEFPIN_AVR(18, 32, F); _DEFPIN_AVR(19, 16, F);
+_DEFPIN_AVR(20, 2, F); _DEFPIN_AVR(21, 1, F); _DEFPIN_AVR(22, 16, D); _DEFPIN_AVR(23, 32, D);
+
+#define SPI_DATA 2
+#define SPI_CLOCK 1
+#define SPI_SELECT 3
+#define AVR_HARDWARE_SPI
+
+#elif defined(__AVR_AT90USB646__) || defined(__AVR_AT90USB1286__)
+// teensy++ 2 defs
+
+_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
+
+_DEFPIN_AVR(0, 1, D); _DEFPIN_AVR(1, 2, D); _DEFPIN_AVR(2, 4, D); _DEFPIN_AVR(3, 8, D);
+_DEFPIN_AVR(4, 16, D); _DEFPIN_AVR(5, 32, D); _DEFPIN_AVR(6, 64, D); _DEFPIN_AVR(7, 128, D);
+_DEFPIN_AVR(8, 1, E); _DEFPIN_AVR(9, 2, E); _DEFPIN_AVR(10, 1, C); _DEFPIN_AVR(11, 2, C);
+_DEFPIN_AVR(12, 4, C); _DEFPIN_AVR(13, 8, C); _DEFPIN_AVR(14, 16, C); _DEFPIN_AVR(15, 32, C);
+_DEFPIN_AVR(16, 64, C); _DEFPIN_AVR(17, 128, C); _DEFPIN_AVR(18, 64, E); _DEFPIN_AVR(19, 128, E);
+_DEFPIN_AVR(20, 1, B); _DEFPIN_AVR(21, 2, B); _DEFPIN_AVR(22, 4, B); _DEFPIN_AVR(23, 8, B);
+_DEFPIN_AVR(24, 16, B); _DEFPIN_AVR(25, 32, B); _DEFPIN_AVR(26, 64, B); _DEFPIN_AVR(27, 128, B);
+_DEFPIN_AVR(28, 1, A); _DEFPIN_AVR(29, 2, A); _DEFPIN_AVR(30, 4, A); _DEFPIN_AVR(31, 8, A);
+_DEFPIN_AVR(32, 16, A); _DEFPIN_AVR(33, 32, A); _DEFPIN_AVR(34, 64, A); _DEFPIN_AVR(35, 128, A);
+_DEFPIN_AVR(36, 16, E); _DEFPIN_AVR(37, 32, E); _DEFPIN_AVR(38, 1, F); _DEFPIN_AVR(39, 2, F);
+_DEFPIN_AVR(40, 4, F); _DEFPIN_AVR(41, 8, F); _DEFPIN_AVR(42, 16, F); _DEFPIN_AVR(43, 32, F);
+_DEFPIN_AVR(44, 64, F); _DEFPIN_AVR(45, 128, F);
+
+#define SPI_DATA 22
+#define SPI_CLOCK 21
+#define SPI_SELECT 20
+#define AVR_HARDWARE_SPI
+
+#elif defined(__AVR_ATmega32U4__)
+
+// leonard defs
+_IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
+
+_DEFPIN_AVR(0, 4, D); _DEFPIN_AVR(1, 8, D); _DEFPIN_AVR(2, 2, D); _DEFPIN_AVR(3, 1, D);
+_DEFPIN_AVR(4, 16, D); _DEFPIN_AVR(5, 64, C); _DEFPIN_AVR(6, 128, D); _DEFPIN_AVR(7, 64, E);
+_DEFPIN_AVR(8, 16, B); _DEFPIN_AVR(9, 32, B); _DEFPIN_AVR(10, 64, B); _DEFPIN_AVR(11, 128, B);
+_DEFPIN_AVR(12, 64, D); _DEFPIN_AVR(13, 128, C); _DEFPIN_AVR(14, 8, B); _DEFPIN_AVR(15, 2, B);
+_DEFPIN_AVR(16, 4, B); _DEFPIN_AVR(17, 1, B); _DEFPIN_AVR(18, 128, F); _DEFPIN_AVR(19, 64, F);
+_DEFPIN_AVR(20, 32, F); _DEFPIN_AVR(21, 16, F); _DEFPIN_AVR(22, 2, F); _DEFPIN_AVR(23, 0, F);
+
+#define SPI_DATA 16
+#define SPI_CLOCK 15
+#define AVR_HARDWARE_SPI
+
+#elif defined(__MK20DX128__) && defined(CORE_TEENSY)
+
+_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E);
+
+_DEFPIN_ARM(0, 16, B); _DEFPIN_ARM(1, 17, B); _DEFPIN_ARM(2, 0, D); _DEFPIN_ARM(3, 12, A);
+_DEFPIN_ARM(4, 13, A); _DEFPIN_ARM(5, 7, D); _DEFPIN_ARM(6, 4, D); _DEFPIN_ARM(7, 2, D);
+_DEFPIN_ARM(8, 3, D); _DEFPIN_ARM(9, 3, C); _DEFPIN_ARM(10, 4, C); _DEFPIN_ARM(11, 6, C);
+_DEFPIN_ARM(12, 7, C); _DEFPIN_ARM(13, 5, C); _DEFPIN_ARM(14, 1, D); _DEFPIN_ARM(15, 0, C);
+_DEFPIN_ARM(16, 0, B); _DEFPIN_ARM(17, 1, B); _DEFPIN_ARM(18, 3, B); _DEFPIN_ARM(19, 2, B);
+_DEFPIN_ARM(20, 5, D); _DEFPIN_ARM(21, 6, D); _DEFPIN_ARM(22, 1, C); _DEFPIN_ARM(23, 2, C);
+_DEFPIN_ARM(24, 5, A); _DEFPIN_ARM(25, 19, B); _DEFPIN_ARM(26, 1, E); _DEFPIN_ARM(27, 9, C);
+_DEFPIN_ARM(28, 8, C); _DEFPIN_ARM(29, 10, C); _DEFPIN_ARM(30, 11, C); _DEFPIN_ARM(31, 0, E);
+_DEFPIN_ARM(32, 18, B); _DEFPIN_ARM(33, 4, A);
+
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+#define ARM_HARDWARE_SPI
+
+#elif defined(__SAM3X8E__)
+
+DUE_IO32(A);
+DUE_IO32(B);
+DUE_IO32(C);
+DUE_IO32(D);
+
+_DEFPIN_DUE(0, 8, A); _DEFPIN_DUE(1, 9, A); _DEFPIN_DUE(2, 25, B); _DEFPIN_DUE(3, 28, C);
+_DEFPIN_DUE(4, 26, C); _DEFPIN_DUE(5, 25, C); _DEFPIN_DUE(6, 24, C); _DEFPIN_DUE(7, 23, C);
+_DEFPIN_DUE(8, 22, C); _DEFPIN_DUE(9, 21, C); _DEFPIN_DUE(10, 29, C); _DEFPIN_DUE(11, 7, D);
+_DEFPIN_DUE(12, 8, D); _DEFPIN_DUE(13, 27, B); _DEFPIN_DUE(14, 4, D); _DEFPIN_DUE(15, 5, D);
+_DEFPIN_DUE(16, 13, A); _DEFPIN_DUE(17, 12, A); _DEFPIN_DUE(18, 11, A); _DEFPIN_DUE(19, 10, A);
+_DEFPIN_DUE(20, 12, B); _DEFPIN_DUE(21, 13, B); _DEFPIN_DUE(22, 26, B); _DEFPIN_DUE(23, 14, A);
+_DEFPIN_DUE(24, 15, A); _DEFPIN_DUE(25, 0, D); _DEFPIN_DUE(26, 1, D); _DEFPIN_DUE(27, 2, D);
+_DEFPIN_DUE(28, 3, D); _DEFPIN_DUE(29, 6, D); _DEFPIN_DUE(30, 9, D); _DEFPIN_DUE(31, 7, A);
+_DEFPIN_DUE(32, 10, D); _DEFPIN_DUE(33, 1, C); _DEFPIN_DUE(34, 2, C); _DEFPIN_DUE(35, 3, C);
+_DEFPIN_DUE(36, 4, C); _DEFPIN_DUE(37, 5, C); _DEFPIN_DUE(38, 6, C); _DEFPIN_DUE(39, 7, C);
+_DEFPIN_DUE(40, 8, C); _DEFPIN_DUE(41, 9, C); _DEFPIN_DUE(42, 19, A); _DEFPIN_DUE(43, 20, A);
+_DEFPIN_DUE(44, 19, C); _DEFPIN_DUE(45, 18, C); _DEFPIN_DUE(46, 17, C); _DEFPIN_DUE(47, 16, C);
+_DEFPIN_DUE(48, 15, C); _DEFPIN_DUE(49, 14, C); _DEFPIN_DUE(50, 13, C); _DEFPIN_DUE(51, 12, C);
+_DEFPIN_DUE(52, 21, B); _DEFPIN_DUE(53, 14, B); _DEFPIN_DUE(54, 16, A); _DEFPIN_DUE(55, 24, A);
+_DEFPIN_DUE(56, 23, A); _DEFPIN_DUE(57, 22, A); _DEFPIN_DUE(58, 6, A); _DEFPIN_DUE(59, 4, A);
+_DEFPIN_DUE(60, 3, A); _DEFPIN_DUE(61, 2, A); _DEFPIN_DUE(62, 17, B); _DEFPIN_DUE(63, 18, B);
+_DEFPIN_DUE(64, 19, B); _DEFPIN_DUE(65, 20, B); _DEFPIN_DUE(66, 15, B); _DEFPIN_DUE(67, 16, B);
+_DEFPIN_DUE(68, 1, A); _DEFPIN_DUE(69, 0, A); _DEFPIN_DUE(70, 17, A); _DEFPIN_DUE(71, 18, A);
+_DEFPIN_DUE(72, 30, C); _DEFPIN_DUE(73, 21, A); _DEFPIN_DUE(74, 25, A); _DEFPIN_DUE(75, 26, A);
+_DEFPIN_DUE(76, 27, A); _DEFPIN_DUE(77, 28, A); _DEFPIN_DUE(78, 23, B);
+
+#else
+
+#warning "No pin/port mappings found, pin access will be slightly slower. See fastpin.h for info."
+#define NO_HARDWARE_PIN_SUPPORT
+
+#endif
+
+#endif
diff --git a/fastspi.h b/fastspi.h
new file mode 100644
index 00000000..00747137
--- /dev/null
+++ b/fastspi.h
@@ -0,0 +1,91 @@
+#ifndef __INC_FASTSPI_H
+#define __INC_FASTSPI_H
+
+#include "controller.h"
+#include "lib8tion.h"
+#include "delay.h"
+
+// Some helper macros for getting at mis-ordered byte values
+#define SPI_B0 (RGB_BYTE0(RGB_ORDER) + (MASK_SKIP_BITS & SKIP))
+#define SPI_B1 (RGB_BYTE1(RGB_ORDER) + (MASK_SKIP_BITS & SKIP))
+#define SPI_B2 (RGB_BYTE2(RGB_ORDER) + (MASK_SKIP_BITS & SKIP))
+#define SPI_ADVANCE (3 + (MASK_SKIP_BITS & SKIP))
+
+/// Some of the SPI controllers will need to perform a transform on each byte before doing
+/// anyting with it. Creating a class of this form and passing it in as a template parameter to
+/// writeBytes/writeBytes3 below will ensure that the body of this method will get called on every
+/// byte worked on. Recommendation, make the adjust method aggressively inlined.
+///
+/// TODO: Convinience macro for building these
+class DATA_NOP {
+public:
+ static __attribute__((always_inline)) inline uint8_t adjust(register uint8_t data) { return data; }
+ static __attribute__((always_inline)) inline uint8_t adjust(register uint8_t data, register uint8_t scale) { return scale8(data, scale); }
+ static __attribute__((always_inline)) inline void postBlock(int len) {}
+};
+
+#define FLAG_START_BIT 0x80
+#define MASK_SKIP_BITS 0x3F
+
+// Clock speed dividers
+#define SPEED_DIV_2 2
+#define SPEED_DIV_4 4
+#define SPEED_DIV_8 8
+#define SPEED_DIV_16 16
+#define SPEED_DIV_32 32
+#define SPEED_DIV_64 64
+#define SPEED_DIV_128 128
+
+#define MAX_DATA_RATE 0
+#define DATA_RATE_MHZ(X) ((F_CPU / 1000000L) / X)
+#define DATA_RATE_KHZ(X) ((F_CPU / 1000L) / X)
+
+// Include the various specific SPI implementations
+#include "fastspi_bitbang.h"
+#include "fastspi_arm.h"
+#include "fastspi_avr.h"
+#include "fastspi_dma.h"
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// External SPI template definition with partial instantiation(s) to map to hardware SPI ports on platforms/builds where the pin
+// mappings are known at compile time.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class SPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
+
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class SoftwareSPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
+
+#ifndef FORCE_SOFTWARE_SPI
+#if defined(SPI_DATA) && defined(SPI_CLOCK)
+
+#if defined(__MK20DX128__) && defined(CORE_TEENSY)
+
+template<uint8_t SPI_SPEED>
+class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {};
+
+#else
+
+template<uint8_t SPI_SPEED>
+class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public AVRHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {};
+
+#endif
+
+#else
+#warning "No hardware SPI pins defined. All SPI access will default to bitbanged output"
+
+#endif
+
+// #if defined(USART_DATA) && defined(USART_CLOCK)
+// template<uint8_t SPI_SPEED>
+// class AVRSPIOutput<USART_DATA, USART_CLOCK, SPI_SPEED> : public AVRUSARTSPIOutput<USART_DATA, USART_CLOCK, SPI_SPEED> {};
+// #endif
+
+#else
+#warning "Forcing software SPI - no hardware SPI for you!"
+#endif
+
+#endif
diff --git a/fastspi_arm.h b/fastspi_arm.h
new file mode 100644
index 00000000..e9c38343
--- /dev/null
+++ b/fastspi_arm.h
@@ -0,0 +1,386 @@
+#ifndef __INC_FASTSPI_ARM_H
+#define __INC_FASTSPI_ARM_H
+
+
+#if defined(__MK20DX128__) && defined(CORE_TEENSY)
+
+#ifndef SPI_PUSHR_CONT
+#define SPI_PUSHR_CONT SPI0_PUSHR_CONT
+#define SPI_PUSHR_CTAS(X) SPI0_PUSHR_CTAS(X)
+#define SPI_PUSHR_EOQ SPI0_PUSHR_EOQ
+#define SPI_PUSHR_CTCNT SPI0_PUSHR_CTCNT
+#define SPI_PUSHR_PCS(X) SPI0_PUSHR_PCS(X)
+#endif
+
+// Template function that, on compilation, expands to a constant representing the highest bit set in a byte. Right now,
+// if no bits are set (value is 0), it returns 0, which is also the value returned if the lowest bit is the only bit
+// set (the zero-th bit). Unclear if I will want this to change at some point.
+template<int VAL, int BIT> class BitWork {
+ public:
+ static int highestBit() __attribute__((always_inline)) { return (VAL & 1 << BIT) ? BIT : BitWork<VAL, BIT-1>::highestBit(); }
+};
+template<int VAL> class BitWork<VAL, 0> {
+ public:
+ static int highestBit() __attribute__((always_inline)) { return 0; }
+};
+
+#define MAX(A, B) (( (A) > (B) ) ? (A) : (B))
+
+#define USE_CONT 0
+
+// Templated function to translate a clock divider value into the prescalar, scalar, and clock doubling setting for the world.
+template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint32_t & dbl) {
+ switch(VAL) {
+ // Handle the dbl clock cases
+ case 0: case 1:
+ case 2: preScalar = 0; scalar = 0; dbl = 1; break;
+ case 3: preScalar = 1; scalar = 0; dbl = 1; break;
+ case 5: preScalar = 2; scalar = 0; dbl = 1; break;
+ case 7: preScalar = 3; scalar = 0; dbl = 1; break;
+
+ // Handle the scalar value 6 cases (since it's not a power of two, it won't get caught
+ // below)
+ case 9: preScalar = 1; scalar = 2; dbl = 1; break;
+ case 18: case 19: preScalar = 1; scalar = 2; dbl = 0; break;
+
+ case 15: preScalar = 2; scalar = 2; dbl = 1; break;
+ case 30: case 31: preScalar = 2; scalar = 2; dbl = 0; break;
+
+ case 21: case 22: case 23: preScalar = 3; scalar = 2; dbl = 1; break;
+ case 42: case 43: case 44: case 45: case 46: case 47: preScalar = 3; scalar = 2; dbl = 0; break;
+ default: {
+ int p2 = BitWork<VAL/2, 15>::highestBit();
+ int p3 = BitWork<VAL/3, 15>::highestBit();
+ int p5 = BitWork<VAL/5, 15>::highestBit();
+ int p7 = BitWork<VAL/7, 15>::highestBit();
+
+ int w2 = 2 * (1 << p2);
+ int w3 = (VAL/3) > 0 ? 3 * (1 << p3) : 0;
+ int w5 = (VAL/5) > 0 ? 5 * (1 << p5) : 0;
+ int w7 = (VAL/7) > 0 ? 7 * (1 << p7) : 0;
+
+ int maxval = MAX(MAX(w2, w3), MAX(w5, w7));
+
+ if(w2 == maxval) { preScalar = 0; scalar = p2; }
+ else if(w3 == maxval) { preScalar = 1; scalar = p3; }
+ else if(w5 == maxval) { preScalar = 2; scalar = p5; }
+ else if(w7 == maxval) { preScalar = 3; scalar = p7; }
+
+ dbl = 0;
+ if(scalar == 0) { dbl = 1; }
+ else if(scalar < 3) { scalar--; }
+ }
+ }
+ return;
+}
+
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class ARMHardwareSPIOutput {
+ Selectable *m_pSelect;
+
+ // Borrowed from the teensy3 SPSR emulation code
+ static inline void enable_pins(void) __attribute__((always_inline)) {
+ //serial_print("enable_pins\n");
+ CORE_PIN11_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+ CORE_PIN12_CONFIG = PORT_PCR_MUX(2);
+ CORE_PIN13_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
+ }
+
+ // Borrowed from the teensy3 SPSR emulation code
+ static inline void disable_pins(void) __attribute__((always_inline)) {
+ //serial_print("disable_pins\n");
+ CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+ CORE_PIN12_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+ CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1);
+ }
+
+public:
+ ARMHardwareSPIOutput() { m_pSelect = NULL; }
+ ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+ void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+ static inline void update_ctar0(uint32_t ctar) __attribute__((always_inline)) {
+ if (SPI0_CTAR0 == ctar) return;
+ uint32_t mcr = SPI0_MCR;
+ if (mcr & SPI_MCR_MDIS) {
+ SPI0_CTAR0 = ctar;
+ } else {
+ SPI0_MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+ SPI0_CTAR0 = ctar;
+
+ SPI0_MCR = mcr;
+ }
+ }
+
+ static inline void update_ctar1(uint32_t ctar) __attribute__((always_inline)) {
+ if (SPI0_CTAR1 == ctar) return;
+ uint32_t mcr = SPI0_MCR;
+ if (mcr & SPI_MCR_MDIS) {
+ SPI0_CTAR1 = ctar;
+ } else {
+ SPI0_MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT;
+ SPI0_CTAR1 = ctar;
+ SPI0_MCR = mcr;
+
+ }
+ }
+
+ static inline void set_ctar1_bits(int bits) {
+ // Set ctar1 to 16 bits
+ int ctar = SPI0_CTAR1;
+
+ // clear the FMSZ bits
+ ctar &= SPI_CTAR_FMSZ(0x0F);
+ ctar |= SPI_CTAR_FMSZ((bits-1) & 0x0F);
+
+ update_ctar1(ctar);
+ }
+
+ static inline void set_ctar0_bits(int bits) {
+ // Set ctar1 to 16 bits
+ int ctar = SPI0_CTAR1;
+
+ // clear the FMSZ bits
+ ctar &= SPI_CTAR_FMSZ(0x0F);
+ ctar |= SPI_CTAR_FMSZ((bits-1) & 0x0F);
+
+ update_ctar1(ctar);
+ }
+
+
+ void setSPIRate() {
+ // Configure CTAR0, defaulting to 8 bits and CTAR1, defaulting to 16 bits
+ uint32_t _PBR = 0;
+ uint32_t _BR = 0;
+ uint32_t _CSSCK = 0;
+ uint32_t _DBR = 0;
+
+ // if(_SPI_CLOCK_DIVIDER >= 256) { _PBR = 0; _BR = _CSSCK = 7; _DBR = 0; } // osc/256
+ // else if(_SPI_CLOCK_DIVIDER >= 128) { _PBR = 0; _BR = _CSSCK = 6; _DBR = 0; } // osc/128
+ // else if(_SPI_CLOCK_DIVIDER >= 64) { _PBR = 0; _BR = _CSSCK = 5; _DBR = 0; } // osc/64
+ // else if(_SPI_CLOCK_DIVIDER >= 32) { _PBR = 0; _BR = _CSSCK = 4; _DBR = 0; } // osc/32
+ // else if(_SPI_CLOCK_DIVIDER >= 16) { _PBR = 0; _BR = _CSSCK = 3; _DBR = 0; } // osc/16
+ // else if(_SPI_CLOCK_DIVIDER >= 8) { _PBR = 0; _BR = _CSSCK = 1; _DBR = 0; } // osc/8
+ // else if(_SPI_CLOCK_DIVIDER >= 7) { _PBR = 3; _BR = _CSSCK = 0; _DBR = 1; } // osc/7
+ // else if(_SPI_CLOCK_DIVIDER >= 5) { _PBR = 2; _BR = _CSSCK = 0; _DBR = 1; } // osc/5
+ // else if(_SPI_CLOCK_DIVIDER >= 4) { _PBR = 0; _BR = _CSSCK = 0; _DBR = 0; } // osc/4
+ // else if(_SPI_CLOCK_DIVIDER >= 3) { _PBR = 1; _BR = _CSSCK = 0; _DBR = 1; } // osc/3
+ // else { _PBR = 0; _BR = _CSSCK = 0; _DBR = 1; } // osc/2
+
+ getScalars<_SPI_CLOCK_DIVIDER>(_PBR, _BR, _DBR);
+ _CSSCK = _BR;
+
+ uint32_t ctar0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK);
+ uint32_t ctar1 = SPI_CTAR_FMSZ(15) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK);
+
+#if USE_CONT == 1
+ ctar0 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL;
+ ctar1 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL;
+#endif
+
+ if(_DBR) {
+ ctar0 |= SPI_CTAR_DBR;
+ ctar1 |= SPI_CTAR_DBR;
+ }
+
+ update_ctar0(ctar0);
+ update_ctar1(ctar1);
+
+ }
+
+ void init() {
+ // set the pins to output
+ FastPin<_DATA_PIN>::setOutput();
+ FastPin<_CLOCK_PIN>::setOutput();
+ release();
+
+ // Enable SPI0 clock
+ uint32_t sim6 = SIM_SCGC6;
+ if (!(sim6 & SIM_SCGC6_SPI0)) {
+ //serial_print("init1\n");
+ SIM_SCGC6 = sim6 | SIM_SCGC6_SPI0;
+ SPI0_CTAR0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(1);
+ }
+
+ setSPIRate();
+
+ // Configure SPI as the master and enable
+ SPI0_MCR |= SPI_MCR_MSTR; // | SPI_MCR_CONT_SCKE);
+ SPI0_MCR &= ~(SPI_MCR_MDIS | SPI_MCR_HALT);
+
+ enable_pins();
+ }
+
+ static void waitFully() __attribute__((always_inline)) {
+ while( (SPI0_SR & 0xF000) > 0);
+ while (!(SPI0_SR & SPI_SR_TCF));
+ SPI0_SR |= (SPI_SR_TCF | SPI_SR_EOQF);
+ }
+
+ static bool needwait() __attribute__((always_inline)) { return (SPI0_SR & 0x4000); }
+ static void wait() __attribute__((always_inline)) { while( (SPI0_SR & 0x4000) ); }
+ static void wait1() __attribute__((always_inline)) { while( (SPI0_SR & 0xF000) >= 0x2000); }
+
+ enum ECont { CONT, NOCONT };
+ enum EWait { PRE, POST, NONE };
+ enum ELast { NOTLAST, LAST };
+
+#if USE_CONT == 1
+ #define CM CONT
+#else
+ #define CM NOCONT
+#endif
+ #define WM PRE
+
+ template<ECont CONT_STATE, EWait WAIT_STATE, ELast LAST_STATE> class Write {
+ public:
+ static void writeWord(uint16_t w) __attribute__((always_inline)) {
+ if(WAIT_STATE == PRE) { wait(); }
+ SPI0_PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
+ ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+ SPI_PUSHR_CTAS(1) | (w & 0xFFFF);
+ if(WAIT_STATE == POST) { wait(); }
+ }
+
+ static void writeByte(uint8_t b) __attribute__((always_inline)) {
+ if(WAIT_STATE == PRE) { wait(); }
+ SPI0_PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
+ ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+ SPI_PUSHR_CTAS(0) | (b & 0xFF);
+ if(WAIT_STATE == POST) { wait(); }
+ }
+ };
+
+ static void writeWord(uint16_t w) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); }
+ static void writeWordNoWait(uint16_t w) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); }
+
+ static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); }
+ static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); wait(); }
+ static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); }
+
+ static void writeWordCont(uint16_t w) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); }
+ static void writeWordContNoWait(uint16_t w) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); }
+
+ static void writeByteCont(uint8_t b) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); }
+ static void writeByteContPostWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); wait(); }
+ static void writeByteContNoWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); }
+
+ // not the most efficient mechanism in the world - but should be enough for sm16716 and friends
+ template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+ uint32_t ctar1_save = SPI0_CTAR1;
+
+ // Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit
+ uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(0);
+ update_ctar1(ctar1);
+
+ writeWord( (b & (1 << BIT)) != 0);
+
+ update_ctar1(ctar1_save);
+ }
+
+ void inline select() __attribute__((always_inline)) { if(m_pSelect != NULL) { m_pSelect->select(); } }
+ void inline release() __attribute__((always_inline)) { if(m_pSelect != NULL) { m_pSelect->release(); } }
+
+ static void writeBytesValueRaw(uint8_t value, int len) {
+ while(len--) { Write<CM, WM, NOTLAST>::writeByte(value); }
+ }
+
+ void writeBytesValue(uint8_t value, int len) {
+ setSPIRate();
+ select();
+ while(len--) {
+ writeByte(value);
+ }
+ waitFully();
+ release();
+ }
+
+ // Write a block of n uint8_ts out
+ template <class D> void writeBytes(register uint8_t *data, int len) {
+ setSPIRate();
+ uint8_t *end = data + len;
+ select();
+ while(data != end) {
+ writeByte(D::adjust(*data++));
+ }
+ D::postBlock(len);
+ waitFully();
+ release();
+ }
+
+ void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+ // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template
+ // parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+ template <uint8_t SKIP, class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ // setSPIRate();
+ uint8_t *end = data + len;
+ select();
+ if((SKIP & FLAG_START_BIT) == 0) {
+ //If no start bit stupiditiy, write out as many 16-bit blocks as we can
+ uint8_t *first_end = end - (len % (SPI_ADVANCE * 2));
+
+ while(data != first_end) {
+ if(WM == NONE) { wait1(); }
+ Write<CM, WM, NOTLAST>::writeWord(D::adjust(data[SPI_B0], scale) << 8 | D::adjust(data[SPI_B1], scale));
+ Write<CM, WM, NOTLAST>::writeWord(D::adjust(data[SPI_B2], scale) << 8 | D::adjust(data[SPI_ADVANCE + SPI_B0], scale));
+ Write<CM, WM, NOTLAST>::writeWord(D::adjust(data[SPI_ADVANCE + SPI_B1], scale) << 8 | D::adjust(data[SPI_ADVANCE + SPI_B2], scale));
+ data += (SPI_ADVANCE + SPI_ADVANCE);
+ }
+
+ if(data != end) {
+ if(WM == NONE) { wait1(); }
+ // write out the rest as alternating 16/8-bit blocks (likely to be just one)
+ Write<CM, WM, NOTLAST>::writeWord(D::adjust(data[SPI_B0], scale) << 8 | D::adjust(data[SPI_B1], scale));
+ Write<CM, WM, NOTLAST>::writeByte(D::adjust(data[SPI_B2], scale));
+ }
+
+ D::postBlock(len);
+ waitFully();
+ } else if(SKIP & FLAG_START_BIT) {
+ uint32_t ctar1_save = SPI0_CTAR1;
+
+ // Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit
+ uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(8);
+ update_ctar1(ctar1);
+
+ while(data != end) {
+ writeWord( 0x100 | D::adjust(data[SPI_B0], scale));
+ writeByte(D::adjust(data[SPI_B1], scale));
+ writeByte(D::adjust(data[SPI_B2], scale));
+ data += SPI_ADVANCE;
+ }
+ D::postBlock(len);
+ waitFully();
+
+ // restore ctar1
+ update_ctar1(ctar1_save);
+ // } else {
+ // while(data != end) {
+ // writeByte(D::adjust(data[SPI_B0], scale);
+ // writeWord(D::adjust(data[SPI_B1], scale) << 8 | D::adjust(data[SPI_B2], scale));
+ // data += SPI_ADVANCE;
+ // }
+ // waitFully();
+ }
+ release();
+ }
+
+
+ template <uint8_t SKIP, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<SKIP, DATA_NOP, RGB_ORDER>(data, len, scale);
+ }
+ template <class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, D, RGB_ORDER>(data, len, scale);
+ }
+ template <EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, DATA_NOP, RGB_ORDER>(data, len, scale);
+ }
+ void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, DATA_NOP, RGB>(data, len, scale);
+ }
+};
+#endif
+
+#endif
diff --git a/fastspi_avr.h b/fastspi_avr.h
new file mode 100644
index 00000000..af116cab
--- /dev/null
+++ b/fastspi_avr.h
@@ -0,0 +1,314 @@
+#ifndef __INC_FASTSPI_AVR_H
+#define __INC_FASTSPI_AVR_H
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Hardware SPI support using USART registers and friends
+//
+// TODO: Complete/test implementation - right now this doesn't work
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// uno/mini/duemilanove
+#if defined(AVR_HARDWARE_SPI)
+#if defined(UBRR0)
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class AVRUSARTSPIOutput {
+ Selectable *m_pSelect;
+
+public:
+ AVRUSARTSPIOutput() { m_pSelect = NULL; }
+ AVRUSARTSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+ void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+ void init() {
+ UBRR0 = 0;
+ UCSR0A = 1<<TXC0;
+
+ FastPin<_CLOCK_PIN>::setOutput();
+ FastPin<_DATA_PIN>::setOutput();
+
+ UCSR0C = _BV (UMSEL00) | _BV (UMSEL01); // Master SPI mode
+ UCSR0B = _BV (TXEN0) | _BV (RXEN0); // transmit enable and receive enable
+
+ // must be done last, see page 206
+ UBRR0 = 3; // 2 Mhz clock rate
+ }
+
+ static void stop() {
+ // TODO: stop the uart spi output
+ }
+
+ static void wait() __attribute__((always_inline)) { while(!(UCSR0A & (1<<UDRE0))); }
+ static void waitFully() __attribute__((always_inline)) { wait(); }
+
+ static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { UDR0 = b;}
+ static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { UDR0 = b; wait(); }
+ static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); UDR0 = b; }
+
+ static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); }
+
+ template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+ if(b && (1 << BIT)) {
+ FastPin<_DATA_PIN>::hi();
+ } else {
+ FastPin<_DATA_PIN>::lo();
+ }
+
+ FastPin<_CLOCK_PIN>::hi();
+ FastPin<_CLOCK_PIN>::lo();
+ }
+
+ void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } // FastPin<_SELECT_PIN>::hi(); }
+ void release() {
+ // wait for all transmissions to finish
+ while ((UCSR0A & (1 <<TXC0)) == 0) {}
+ if(m_pSelect != NULL) { m_pSelect->release(); } // FastPin<_SELECT_PIN>::hi();
+ }
+
+ static void writeBytesValueRaw(uint8_t value, int len) {
+ while(len--) { writeByte(value); }
+ }
+
+ void writeBytesValue(uint8_t value, int len) {
+ select();
+ while(len--) {
+ writeByte(value);
+ }
+ release();
+ }
+
+ // Write a block of n uint8_ts out
+ template <class D> void writeBytes(register uint8_t *data, int len) {
+ uint8_t *end = data + len;
+ select();
+ while(data != end) {
+#if defined(__MK20DX128__)
+ writeByte(D::adjust(*data++));
+#else
+ // a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM)
+ writeByte(D::adjust(*data++)); delaycycles<3>();
+#endif
+ }
+ D::postBlock(len);
+ release();
+ }
+
+ void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+ // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template
+ // parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+ template <uint8_t SKIP, class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ uint8_t *end = data + len;
+ select();
+ while(data != end) {
+ writeByte(D::adjust(data[SPI_B0], scale));
+ writeByte(D::adjust(data[SPI_B1], scale));
+ writeByte(D::adjust(data[SPI_B2], scale));
+ data += SPI_ADVANCE;
+ }
+ D::postBlock(len);
+ release();
+ }
+
+ template <uint8_t SKIP, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<SKIP, DATA_NOP, RGB_ORDER>(data, len, scale);
+ }
+ template <class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, D, RGB_ORDER>(data, len, scale);
+ }
+ template <EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, DATA_NOP, RGB_ORDER>(data, len, scale);
+ }
+ void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, DATA_NOP, RGB>(data, len, scale);
+ }
+
+};
+
+#endif
+
+#if defined(SPSR)
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Hardware SPI support using SPDR registers and friends
+//
+// Technically speaking, this uses the AVR SPI registers. This will work on the Teensy 3.0 because Paul made a set of compatability
+// classes that map the AVR SPI registers to ARM's, however this caps the performance of output.
+//
+// TODO: implement ARMHardwareSPIOutput
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class AVRHardwareSPIOutput {
+ Selectable *m_pSelect;
+ bool mWait;
+public:
+ AVRHardwareSPIOutput() { m_pSelect = NULL; mWait = false;}
+ AVRHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+ void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+ void setSPIRate() {
+ SPCR &= ~ ( (1<<SPR1) | (1<<SPR0) ); // clear out the prescalar bits
+
+ bool b2x = false;
+
+ if(_SPI_CLOCK_DIVIDER >= 128) { SPCR |= (1<<SPR1); SPCR |= (1<<SPR0); }
+ else if(_SPI_CLOCK_DIVIDER >= 64) { SPCR |= (1<<SPR1);}
+ else if(_SPI_CLOCK_DIVIDER >= 32) { SPCR |= (1<<SPR1); b2x = true; }
+ else if(_SPI_CLOCK_DIVIDER >= 16) { SPCR |= (1<<SPR0); }
+ else if(_SPI_CLOCK_DIVIDER >= 8) { SPCR |= (1<<SPR0); b2x = true; }
+ else if(_SPI_CLOCK_DIVIDER >= 4) { /* do nothing - default rate */ }
+ else { b2x = true; }
+
+ if(b2x) { SPSR |= (1<<SPI2X); }
+ else { SPSR &= ~ (1<<SPI2X); }
+ }
+
+ void init() {
+ volatile uint8_t clr;
+
+ // set the pins to output
+ FastPin<_DATA_PIN>::setOutput();
+ FastPin<_CLOCK_PIN>::setOutput();
+#ifdef SPI_SELECT
+ // Make sure the slave select line is set to output, or arduino will block us
+ FastPin<SPI_SELECT>::setOutput();
+ FastPin<SPI_SELECT>::lo();
+#endif
+ release();
+
+ SPCR |= ((1<<SPE) | (1<<MSTR) ); // enable SPI as master
+ SPCR &= ~ ( (1<<SPR1) | (1<<SPR0) ); // clear out the prescalar bits
+
+ clr = SPSR; // clear SPI status register
+ clr = SPDR; // clear SPI data register
+ clr;
+
+ bool b2x = false;
+
+ if(_SPI_CLOCK_DIVIDER >= 128) { SPCR |= (1<<SPR1); SPCR |= (1<<SPR0); }
+ else if(_SPI_CLOCK_DIVIDER >= 64) { SPCR |= (1<<SPR1);}
+ else if(_SPI_CLOCK_DIVIDER >= 32) { SPCR |= (1<<SPR1); b2x = true; }
+ else if(_SPI_CLOCK_DIVIDER >= 16) { SPCR |= (1<<SPR0); }
+ else if(_SPI_CLOCK_DIVIDER >= 8) { SPCR |= (1<<SPR0); b2x = true; }
+ else if(_SPI_CLOCK_DIVIDER >= 4) { /* do nothing - default rate */ }
+ else { b2x = true; }
+
+ if(b2x) { SPSR |= (1<<SPI2X); }
+ else { SPSR &= ~ (1<<SPI2X); }
+
+ SPDR=0;
+ shouldWait(false);
+ }
+
+ static bool shouldWait(bool wait = false) __attribute__((always_inline)) {
+ static bool sWait=false;
+ if(sWait) { sWait = wait; return true; } else { sWait = wait; return false; }
+ // return true;
+ }
+ static void wait() __attribute__((always_inline)) { if(shouldWait()) { while(!(SPSR & (1<<SPIF))); } }
+ static void waitFully() __attribute__((always_inline)) { wait(); }
+
+ static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPDR=b; shouldWait(true); }
+ static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { SPDR=b; shouldWait(true); wait(); }
+ static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { SPDR=b; shouldWait(true); }
+
+ template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+ SPCR &= ~(1 << SPE);
+ if(b & (1 << BIT)) {
+ FastPin<_DATA_PIN>::hi();
+ } else {
+ FastPin<_DATA_PIN>::lo();
+ }
+
+ FastPin<_CLOCK_PIN>::hi();
+ FastPin<_CLOCK_PIN>::lo();
+ SPCR |= 1 << SPE;
+ shouldWait(false);
+ }
+
+ void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } // FastPin<_SELECT_PIN>::hi(); }
+ void release() { if(m_pSelect != NULL) { m_pSelect->release(); } } // FastPin<_SELECT_PIN>::lo(); }
+
+ static void writeBytesValueRaw(uint8_t value, int len) {
+ while(len--) { writeByte(value); }
+ }
+
+ void writeBytesValue(uint8_t value, int len) {
+ //setSPIRate();
+ select();
+ while(len--) {
+ writeByte(value);
+ }
+ release();
+ }
+
+ // Write a block of n uint8_ts out
+ template <class D> void writeBytes(register uint8_t *data, int len) {
+ //setSPIRate();
+ uint8_t *end = data + len;
+ select();
+ while(data != end) {
+ // a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM)
+ writeByte(D::adjust(*data++)); delaycycles<3>();
+ }
+ release();
+ }
+
+ void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+ // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template
+ // parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+ template <uint8_t SKIP, class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ //setSPIRate();
+ uint8_t *end = data + len;
+ select();
+ while(data != end) {
+ if(SKIP & FLAG_START_BIT) {
+ writeBit<0>(1);
+ }
+ // a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM)
+ if(false && _SPI_CLOCK_DIVIDER == 0) {
+ writeByteNoWait(D::adjust(data[SPI_B0], scale)); delaycycles<13>();
+ writeByteNoWait(D::adjust(data[SPI_B1], scale)); delaycycles<13>();
+ writeByteNoWait(D::adjust(data[SPI_B2], scale)); delaycycles<9>();
+ } else if(SKIP & FLAG_START_BIT) {
+ writeBytePostWait(D::adjust(data[SPI_B0], scale));
+ writeBytePostWait(D::adjust(data[SPI_B1], scale));
+ writeBytePostWait(D::adjust(data[SPI_B2], scale));
+ } else {
+ writeByte(D::adjust(data[SPI_B0], scale));
+ writeByte(D::adjust(data[SPI_B1], scale));
+ writeByte(D::adjust(data[SPI_B2], scale));
+ }
+
+ data += SPI_ADVANCE;
+ }
+ D::postBlock(len);
+ release();
+ }
+
+ template <uint8_t SKIP, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<SKIP, DATA_NOP, RGB_ORDER>(data, len, scale);
+ }
+ template <class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, D, RGB_ORDER>(data, len, scale);
+ }
+ template <EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, DATA_NOP, RGB_ORDER>(data, len, scale);
+ }
+ void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, DATA_NOP, RGB>(data, len, scale);
+ }
+
+};
+#endif
+
+#else
+// #define FORCE_SOFTWARE_SPI
+#endif
+
+#endif \ No newline at end of file
diff --git a/fastspi_bitbang.h b/fastspi_bitbang.h
new file mode 100644
index 00000000..f9c1a218
--- /dev/null
+++ b/fastspi_bitbang.h
@@ -0,0 +1,368 @@
+#ifndef __INC_FASTSPI_BITBANG_H
+#define __INC_FASTSPI_BITBANG_H
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Software SPI (aka bit-banging) support - with aggressive optimizations for when the clock and data pin are on the same port
+//
+// TODO: Replace the select pin definition with a set of pins, to allow using mux hardware for routing in the future
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint8_t SPI_SPEED>
+class AVRSoftwareSPIOutput {
+ // The data types for pointers to the pin port - typedef'd here from the Pin definition because on avr these
+ // are pointers to 8 bit values, while on arm they are 32 bit
+ typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+ typedef typename FastPin<CLOCK_PIN>::port_ptr_t clock_ptr_t;
+
+ // The data type for what's at a pin's port - typedef'd here from the Pin definition because on avr the ports
+ // are 8 bits wide while on arm they are 32.
+ typedef typename FastPin<DATA_PIN>::port_t data_t;
+ typedef typename FastPin<CLOCK_PIN>::port_t clock_t;
+ Selectable *m_pSelect;
+
+public:
+ AVRSoftwareSPIOutput() { m_pSelect = NULL; }
+ AVRSoftwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+ void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
+
+ void init() {
+ // set the pins to output and make sure the select is released (which apparently means hi? This is a bit
+ // confusing to me)
+ FastPin<DATA_PIN>::setOutput();
+ FastPin<CLOCK_PIN>::setOutput();
+ release();
+ }
+
+ // stop the SPI output. Pretty much a NOP with software, as there's no registers to kick
+ static void stop() { }
+
+ // wait until the SPI subsystem is ready for more data to write. A NOP when bitbanging
+ static void wait() __attribute__((always_inline)) { }
+ static void waitFully() __attribute__((always_inline)) { wait(); }
+
+ static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { writeByte(b); }
+ static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { writeByte(b); wait(); }
+
+ static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); }
+
+ // naive writeByte implelentation, simply calls writeBit on the 8 bits in the byte.
+ static void writeByte(uint8_t b) __attribute__((always_inline)) {
+ writeBit<7>(b);
+ writeBit<6>(b);
+ writeBit<5>(b);
+ writeBit<4>(b);
+ writeBit<3>(b);
+ writeBit<2>(b);
+ writeBit<1>(b);
+ writeBit<0>(b);
+ }
+
+private:
+ // writeByte implementation with data/clock registers passed in.
+ static void writeByte(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin) __attribute__((always_inline)) {
+ writeBit<7>(b, clockpin, datapin);
+ writeBit<6>(b, clockpin, datapin);
+ writeBit<5>(b, clockpin, datapin);
+ writeBit<4>(b, clockpin, datapin);
+ writeBit<3>(b, clockpin, datapin);
+ writeBit<2>(b, clockpin, datapin);
+ writeBit<1>(b, clockpin, datapin);
+ writeBit<0>(b, clockpin, datapin);
+ }
+
+ // writeByte implementation with the data register passed in and prebaked values for data hi w/clock hi and
+ // low and data lo w/clock hi and lo. This is to be used when clock and data are on the same GPIO register,
+ // can get close to getting a bit out the door in 2 clock cycles!
+ static void writeByte(uint8_t b, data_ptr_t datapin,
+ data_t hival, data_t loval,
+ clock_t hiclock, clock_t loclock) __attribute__((always_inline, hot)) {
+ writeBit<7>(b, datapin, hival, loval, hiclock, loclock);
+ writeBit<6>(b, datapin, hival, loval, hiclock, loclock);
+ writeBit<5>(b, datapin, hival, loval, hiclock, loclock);
+ writeBit<4>(b, datapin, hival, loval, hiclock, loclock);
+ writeBit<3>(b, datapin, hival, loval, hiclock, loclock);
+ writeBit<2>(b, datapin, hival, loval, hiclock, loclock);
+ writeBit<1>(b, datapin, hival, loval, hiclock, loclock);
+ writeBit<0>(b, datapin, hival, loval, hiclock, loclock);
+ }
+
+ // writeByte implementation with not just registers passed in, but pre-baked values for said registers for
+ // data hi/lo and clock hi/lo values. Note: weird things will happen if this method is called in cases where
+ // the data and clock pins are on the same port! Don't do that!
+ static void writeByte(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin,
+ data_t hival, data_t loval,
+ clock_t hiclock, clock_t loclock) __attribute__((always_inline)) {
+ writeBit<7>(b, clockpin, datapin, hival, loval, hiclock, loclock);
+ writeBit<6>(b, clockpin, datapin, hival, loval, hiclock, loclock);
+ writeBit<5>(b, clockpin, datapin, hival, loval, hiclock, loclock);
+ writeBit<4>(b, clockpin, datapin, hival, loval, hiclock, loclock);
+ writeBit<3>(b, clockpin, datapin, hival, loval, hiclock, loclock);
+ writeBit<2>(b, clockpin, datapin, hival, loval, hiclock, loclock);
+ writeBit<1>(b, clockpin, datapin, hival, loval, hiclock, loclock);
+ writeBit<0>(b, clockpin, datapin, hival, loval, hiclock, loclock);
+ }
+
+public:
+ #define SPI_DELAY delaycycles< (SPI_SPEED-2) / 2>();
+
+ // write the BIT'th bit out via spi, setting the data pin then strobing the clcok
+ template <uint8_t BIT> __attribute__((always_inline, hot)) inline static void writeBit(uint8_t b) {
+ if(b & (1 << BIT)) {
+ FastPin<DATA_PIN>::hi();
+ if(SPI_SPEED < 3) {
+ FastPin<CLOCK_PIN>::strobe();
+ } else {
+ FastPin<CLOCK_PIN>::hi(); SPI_DELAY;
+ FastPin<CLOCK_PIN>::lo(); SPI_DELAY;
+ }
+ } else {
+ FastPin<DATA_PIN>::lo();
+ if(SPI_SPEED < 3) {
+ FastPin<CLOCK_PIN>::strobe();
+ } else {
+ FastPin<CLOCK_PIN>::hi(); SPI_DELAY;
+ FastPin<CLOCK_PIN>::lo(); SPI_DELAY;
+ }
+ }
+ }
+
+private:
+ // write the BIT'th bit out via spi, setting the data pin then strobing the clock, using the passed in pin registers to accelerate access if needed
+ template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin) {
+ if(b & (1 << BIT)) {
+ FastPin<DATA_PIN>::hi(datapin);
+ FastPin<CLOCK_PIN>::hi(clockpin); SPI_DELAY;
+ FastPin<CLOCK_PIN>::lo(clockpin); SPI_DELAY;
+ } else {
+ FastPin<DATA_PIN>::lo(datapin);
+ FastPin<CLOCK_PIN>::hi(clockpin); SPI_DELAY;
+ FastPin<CLOCK_PIN>::lo(clockpin); SPI_DELAY;
+ }
+
+ }
+
+ // the version of write to use when clock and data are on separate pins with precomputed values for setting
+ // the clock and data pins
+ template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin,
+ data_t hival, data_t loval, clock_t hiclock, clock_t loclock) {
+ // // only need to explicitly set clock hi if clock and data are on different ports
+ if(b & (1 << BIT)) {
+ FastPin<DATA_PIN>::fastset(datapin, hival);
+ FastPin<CLOCK_PIN>::fastset(clockpin, hiclock); SPI_DELAY;
+ FastPin<CLOCK_PIN>::fastset(clockpin, loclock); SPI_DELAY;
+ } else {
+ // NOP;
+ FastPin<DATA_PIN>::fastset(datapin, loval);
+ FastPin<CLOCK_PIN>::fastset(clockpin, hiclock); SPI_DELAY;
+ FastPin<CLOCK_PIN>::fastset(clockpin, loclock); SPI_DELAY;
+ }
+ }
+
+ // the version of write to use when clock and data are on the same port with precomputed values for the various
+ // combinations
+ template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, data_ptr_t clockdatapin,
+ data_t datahiclockhi, data_t dataloclockhi,
+ data_t datahiclocklo, data_t dataloclocklo) {
+#if 0
+ writeBit<BIT>(b);
+#else
+ if(b & (1 << BIT)) {
+ FastPin<DATA_PIN>::fastset(clockdatapin, datahiclocklo); SPI_DELAY;
+ FastPin<DATA_PIN>::fastset(clockdatapin, datahiclockhi); SPI_DELAY;
+ FastPin<DATA_PIN>::fastset(clockdatapin, datahiclocklo); SPI_DELAY;
+ } else {
+ // NOP;
+ FastPin<DATA_PIN>::fastset(clockdatapin, dataloclocklo); SPI_DELAY;
+ FastPin<DATA_PIN>::fastset(clockdatapin, dataloclockhi); SPI_DELAY;
+ FastPin<DATA_PIN>::fastset(clockdatapin, dataloclocklo); SPI_DELAY;
+ }
+#endif
+ }
+public:
+
+ // select the SPI output (TODO: research whether this really means hi or lo. Alt TODO: move select responsibility out of the SPI classes
+ // entirely, make it up to the caller to remember to lock/select the line?)
+ void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } // FastPin<SELECT_PIN>::hi(); }
+
+ // release the SPI line
+ void release() { if(m_pSelect != NULL) { m_pSelect->release(); } } // FastPin<SELECT_PIN>::lo(); }
+
+ // Write out len bytes of the given value out over SPI. Useful for quickly flushing, say, a line of 0's down the line.
+ void writeBytesValue(uint8_t value, int len) {
+ select();
+ writeBytesValueRaw(value, len);
+ release();
+ }
+
+ static void writeBytesValueRaw(uint8_t value, int len) {
+#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
+ // TODO: Weird things may happen if software bitbanging SPI output and other pins on the output reigsters are being twiddled. Need
+ // to allow specifying whether or not exclusive i/o access is allowed during this process, and if i/o access is not allowed fall
+ // back to the degenerative code below
+ while(len--) {
+ writeByte(value);
+ }
+#else
+ register data_ptr_t datapin = FastPin<DATA_PIN>::port();
+
+ if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
+ // If data and clock are on different ports, then writing a bit will consist of writing the value foor
+ // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
+ register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
+ register data_t datahi = FastPin<DATA_PIN>::hival();
+ register data_t datalo = FastPin<DATA_PIN>::loval();
+ register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
+ register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
+ while(len--) {
+ writeByte(value, clockpin, datapin, datahi, datalo, clockhi, clocklo);
+ }
+
+ } else {
+ // If data and clock are on the same port then we can combine setting the data and clock pins
+ register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
+ register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
+ register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
+ register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
+
+ while(len--) {
+ writeByte(value, datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
+ }
+ }
+#endif
+ }
+
+ // write a block of len uint8_ts out. Need to type this better so that explicit casts into the call aren't required.
+ // note that this template version takes a class parameter for a per-byte modifier to the data.
+ template <class D> void writeBytes(register uint8_t *data, int len) {
+ select();
+#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
+ uint8_t *end = data + len;
+ while(data != end) {
+ writeByte(D::adjust(*data++));
+ }
+#else
+ register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
+ register data_ptr_t datapin = FastPin<DATA_PIN>::port();
+
+ if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
+ // If data and clock are on different ports, then writing a bit will consist of writing the value foor
+ // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
+ register data_t datahi = FastPin<DATA_PIN>::hival();
+ register data_t datalo = FastPin<DATA_PIN>::loval();
+ register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
+ register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
+ uint8_t *end = data + len;
+
+ while(data != end) {
+ writeByte(D::adjust(*data++), clockpin, datapin, datahi, datalo, clockhi, clocklo);
+ }
+
+ } else {
+ // FastPin<CLOCK_PIN>::hi();
+ // If data and clock are on the same port then we can combine setting the data and clock pins
+ register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
+ register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
+ register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
+ register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
+
+ uint8_t *end = data + len;
+
+ while(data != end) {
+ writeByte(D::adjust(*data++), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
+ }
+ // FastPin<CLOCK_PIN>::lo();
+ }
+#endif
+ D::postBlock(len);
+ release();
+ }
+
+ // default version of writing a block of data out to the SPI port, with no data modifications being made
+ void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+
+ // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template
+ // parameters indicate how many uint8_ts to skip at the beginning of each grouping, as well as a class specifying a per
+ // byte of data modification to be made. (See DATA_NOP above)
+ template <uint8_t SKIP, class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ select();
+
+#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
+ // If interrupts or other things may be generating output while we're working on things, then we need
+ // to use this block
+ uint8_t *end = data + len;
+ while(data != end) {
+ if(SKIP & FLAG_START_BIT) {
+ writeBit<0>(1);
+ }
+ writeByte(D::adjust(data[SPI_B0], scale));
+ writeByte(D::adjust(data[SPI_B1], scale));
+ writeByte(D::adjust(data[SPI_B2], scale));
+ data += SPI_ADVANCE;
+ }
+#else
+ // If we can guaruntee that no one else will be writing data while we are running (namely, changing the values of the PORT/PDOR pins)
+ // then we can use a bunch of optimizations in here
+ register data_ptr_t datapin = FastPin<DATA_PIN>::port();
+
+ if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
+ register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
+ // If data and clock are on different ports, then writing a bit will consist of writing the value foor
+ // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
+ register data_t datahi = FastPin<DATA_PIN>::hival();
+ register data_t datalo = FastPin<DATA_PIN>::loval();
+ register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
+ register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
+ uint8_t *end = data + len;
+
+ while(data != end) {
+ if(SKIP & FLAG_START_BIT) {
+ writeBit<0>(1, clockpin, datapin, datahi, datalo, clockhi, clocklo);
+ }
+ writeByte(D::adjust(data[SPI_B0], scale), clockpin, datapin, datahi, datalo, clockhi, clocklo);
+ writeByte(D::adjust(data[SPI_B1], scale), clockpin, datapin, datahi, datalo, clockhi, clocklo);
+ writeByte(D::adjust(data[SPI_B2], scale), clockpin, datapin, datahi, datalo, clockhi, clocklo);
+ data += SPI_ADVANCE;
+ }
+
+ } else {
+ // If data and clock are on the same port then we can combine setting the data and clock pins
+ register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
+ register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
+ register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
+ register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
+
+ uint8_t *end = data + len;
+
+ while(data != end) {
+ if(SKIP & FLAG_START_BIT) {
+ writeBit<0>(1, datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
+ }
+ writeByte(D::adjust(data[SPI_B0], scale), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
+ writeByte(D::adjust(data[SPI_B1], scale), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
+ writeByte(D::adjust(data[SPI_B2], scale), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
+ data += SPI_ADVANCE;
+ }
+ }
+#endif
+ D::postBlock(len);
+ release();
+ }
+
+ template <uint8_t SKIP, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<SKIP, DATA_NOP, RGB_ORDER>(data, len, scale);
+ }
+ template <class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, D, RGB_ORDER>(data, len, scale);
+ }
+ template <EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, DATA_NOP, RGB_ORDER>(data, len, scale);
+ }
+ void writeBytes3(register uint8_t *data, int len, register uint8_t scale) {
+ writeBytes3<0, DATA_NOP, RGB>(data, len, scale);
+ }
+};
+
+#endif
diff --git a/fastspi_dma.h b/fastspi_dma.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/fastspi_dma.h
diff --git a/hsv2rgb.cpp b/hsv2rgb.cpp
new file mode 100644
index 00000000..b0abdab9
--- /dev/null
+++ b/hsv2rgb.cpp
@@ -0,0 +1,495 @@
+#include <stdint.h>
+
+#include "lib8tion.h"
+#include "hsv2rgb.h"
+
+// Functions to convert HSV colors to RGB colors.
+//
+// The basically fall into two groups: spectra, and rainbows.
+// Spectra and rainbows are not the same thing. Wikipedia has a good
+// illustration here
+// http://upload.wikimedia.org/wikipedia/commons/f/f6/Prism_compare_rainbow_01.png
+// from this article
+// http://en.wikipedia.org/wiki/Rainbow#Number_of_colours_in_spectrum_or_rainbow
+// that shows a 'spectrum' and a 'rainbow' side by side. Among other
+// differences, you'll see that a 'rainbow' has much more yellow than
+// a plain spectrum. "Classic" LED color washes are spectrum based, and
+// usually show very little yellow.
+//
+// Wikipedia's page on HSV color space, with pseudocode for conversion
+// to RGB color space
+// http://en.wikipedia.org/wiki/HSL_and_HSV
+// Note that their conversion algorithm, which is (naturally) very popular
+// is in the "maximum brightness at any given hue" style, vs the "uniform
+// brightness for all hues" style.
+//
+// You can't have both; either purple is the same brightness as red, e.g
+// red = #FF0000 and purple = #800080 -> same "total light" output
+// OR purple is 'as bright as it can be', e.g.
+// red = #FF0000 and purple = #FF00FF -> purple is much brighter than red.
+// The colorspace conversions here try to keep the apparent brightness
+// constant even as the hue varies.
+//
+// Adafruit's "Wheel" function, discussed here
+// http://forums.adafruit.com/viewtopic.php?f=47&t=22483
+// is also of the "constant apparent brightness" variety.
+//
+// TODO: provide the 'maximum brightness no matter what' variation.
+//
+// See also some good, clear Arduino C code from Kasper Kamperman
+// http://www.kasperkamperman.com/blog/arduino/arduino-programming-hsb-to-rgb/
+// which in turn was was based on Windows C code from "nico80"
+// http://www.codeproject.com/Articles/9207/An-HSB-RGBA-colour-picker
+
+
+
+
+
+void hsv2rgb_raw_C (const struct CHSV & hsv, struct CRGB & rgb);
+void hsv2rgb_raw_avr(const struct CHSV & hsv, struct CRGB & rgb);
+
+#if defined(__AVR__) && !defined( LIB8_ATTINY )
+void hsv2rgb_raw(const struct CHSV & hsv, struct CRGB & rgb)
+{
+ hsv2rgb_raw_avr( hsv, rgb);
+}
+#else
+void hsv2rgb_raw(const struct CHSV & hsv, struct CRGB & rgb)
+{
+ hsv2rgb_raw_C( hsv, rgb);
+}
+#endif
+
+
+
+#define APPLY_DIMMING(X) (X)
+#define HSV_SECTION_6 (0x20)
+#define HSV_SECTION_3 (0x40)
+
+void hsv2rgb_raw_C (const struct CHSV & hsv, struct CRGB & rgb)
+{
+ // Convert hue, saturation and brightness ( HSV/HSB ) to RGB
+ // "Dimming" is used on saturation and brightness to make
+ // the output more visually linear.
+
+ // Apply dimming curves
+ uint8_t value = APPLY_DIMMING( hsv.val);
+ uint8_t saturation = hsv.sat;
+
+ // The brightness floor is minimum number that all of
+ // R, G, and B will be set to.
+ uint8_t invsat = APPLY_DIMMING( 255 - saturation);
+ uint8_t brightness_floor = (value * invsat) / 256;
+
+ // The color amplitude is the maximum amount of R, G, and B
+ // that will be added on top of the brightness_floor to
+ // create the specific hue desired.
+ uint8_t color_amplitude = value - brightness_floor;
+
+ // Figure out which section of the hue wheel we're in,
+ // and how far offset we are withing that section
+ uint8_t section = hsv.hue / HSV_SECTION_3; // 0..2
+ uint8_t offset = hsv.hue % HSV_SECTION_3; // 0..63
+
+ uint8_t rampup = offset; // 0..63
+ uint8_t rampdown = (HSV_SECTION_3 - 1) - offset; // 63..0
+
+ // We now scale rampup and rampdown to a 0-255 range -- at least
+ // in theory, but here's where architecture-specific decsions
+ // come in to play:
+ // To scale them up to 0-255, we'd want to multiply by 4.
+ // But in the very next step, we multiply the ramps by other
+ // values and then divide the resulting product by 256.
+ // So which is faster?
+ // ((ramp * 4) * othervalue) / 256
+ // or
+ // ((ramp ) * othervalue) / 64
+ // It depends on your processor architecture.
+ // On 8-bit AVR, the "/ 256" is just a one-cycle register move,
+ // but the "/ 64" might be a multicycle shift process. So on AVR
+ // it's faster do multiply the ramp values by four, and then
+ // divide by 256.
+ // On ARM, the "/ 256" and "/ 64" are one cycle each, so it's
+ // faster to NOT multiply the ramp values by four, and just to
+ // divide the resulting product by 64 (instead of 256).
+ // Moral of the story: trust your profiler, not your insticts.
+
+ // Since there's an AVR assembly version elsewhere, we'll
+ // assume what we're on an architecture where any number of
+ // bit shifts has roughly the same cost, and we'll remove the
+ // redundant math at the source level:
+
+ // // scale up to 255 range
+ // //rampup *= 4; // 0..252
+ // //rampdown *= 4; // 0..252
+
+ // compute color-amplitude-scaled-down versions of rampup and rampdown
+ uint8_t rampup_amp_adj = (rampup * color_amplitude) / (256 / 4);
+ uint8_t rampdown_amp_adj = (rampdown * color_amplitude) / (256 / 4);
+
+ // add brightness_floor offset to everything
+ uint8_t rampup_adj_with_floor = rampup_amp_adj + brightness_floor;
+ uint8_t rampdown_adj_with_floor = rampdown_amp_adj + brightness_floor;
+
+
+ if( section ) {
+ if( section == 1) {
+ // section 1: 0x40..0x7F
+ rgb.r = brightness_floor;
+ rgb.g = rampdown_adj_with_floor;
+ rgb.b = rampup_adj_with_floor;
+ } else {
+ // section 2; 0x80..0xBF
+ rgb.r = rampup_adj_with_floor;
+ rgb.g = brightness_floor;
+ rgb.b = rampdown_adj_with_floor;
+ }
+ } else {
+ // section 0: 0x00..0x3F
+ rgb.r = rampdown_adj_with_floor;
+ rgb.g = rampup_adj_with_floor;
+ rgb.b = brightness_floor;
+ }
+}
+
+
+
+#if defined(__AVR__) && !defined( LIB8_ATTINY )
+void hsv2rgb_raw_avr(const struct CHSV & hsv, struct CRGB & rgb)
+{
+ uint8_t hue, saturation, value;
+
+ hue = hsv.hue;
+ saturation = hsv.sat;
+ value = hsv.val;
+
+ // Saturation more useful the other way around
+ saturation = 255 - saturation;
+ uint8_t invsat = APPLY_DIMMING( saturation );
+
+ // Apply dimming curves
+ value = APPLY_DIMMING( value );
+
+ // The brightness floor is minimum number that all of
+ // R, G, and B will be set to, which is value * invsat
+ uint8_t brightness_floor;
+
+ asm volatile(
+ "mul %[value], %[invsat] \n"
+ "mov %[brightness_floor], r1 \n"
+ : [brightness_floor] "=r" (brightness_floor)
+ : [value] "r" (value),
+ [invsat] "r" (invsat)
+ : "r0", "r1"
+ );
+
+ // The color amplitude is the maximum amount of R, G, and B
+ // that will be added on top of the brightness_floor to
+ // create the specific hue desired.
+ uint8_t color_amplitude = value - brightness_floor;
+
+ // Figure how far we are offset into the section of the
+ // color wheel that we're in
+ uint8_t offset = hsv.hue & (HSV_SECTION_3 - 1); // 0..63
+ uint8_t rampup = offset * 4; // 0..252
+
+
+ // compute color-amplitude-scaled-down versions of rampup and rampdown
+ uint8_t rampup_amp_adj;
+ uint8_t rampdown_amp_adj;
+
+ asm volatile(
+ "mul %[rampup], %[color_amplitude] \n"
+ "mov %[rampup_amp_adj], r1 \n"
+ "com %[rampup] \n"
+ "mul %[rampup], %[color_amplitude] \n"
+ "mov %[rampdown_amp_adj], r1 \n"
+ : [rampup_amp_adj] "=&r" (rampup_amp_adj),
+ [rampdown_amp_adj] "=&r" (rampdown_amp_adj),
+ [rampup] "+r" (rampup)
+ : [color_amplitude] "r" (color_amplitude)
+ : "r0", "r1"
+ );
+
+
+ // add brightness_floor offset to everything
+ uint8_t rampup_adj_with_floor = rampup_amp_adj + brightness_floor;
+ uint8_t rampdown_adj_with_floor = rampdown_amp_adj + brightness_floor;
+
+
+ // keep gcc from using "X" as the index register for storing
+ // results back in the return structure. AVR's X register can't
+ // do "std X+q, rnn", but the Y and Z registers can.
+ // if the pointer to 'rgb' is in X, gcc will add all kinds of crazy
+ // extra instructions. Simply killing X here seems to help it
+ // try Y or Z first.
+ asm volatile( "" : : : "r26", "r27" );
+
+
+ if( hue & 0x80 ) {
+ // section 2: 0x80..0xBF
+ rgb.r = rampup_adj_with_floor;
+ rgb.g = brightness_floor;
+ rgb.b = rampdown_adj_with_floor;
+ } else {
+ if( hue & 0x40) {
+ // section 1: 0x40..0x7F
+ rgb.r = brightness_floor;
+ rgb.g = rampdown_adj_with_floor;
+ rgb.b = rampup_adj_with_floor;
+ } else {
+ // section 0: 0x00..0x3F
+ rgb.r = rampdown_adj_with_floor;
+ rgb.g = rampup_adj_with_floor;
+ rgb.b = brightness_floor;
+ }
+ }
+
+ cleanup_R1();
+}
+// End of AVR asm implementation
+
+#endif
+
+void hsv2rgb_spectrum( const CHSV& hsv, CRGB& rgb)
+{
+ CHSV hsv2(hsv);
+ hsv2.hue = scale8( hsv2.hue, 192);
+ hsv2rgb_raw(hsv2, rgb);
+}
+
+
+// Sometimes the compiler will do clever things to reduce
+// code size that result in a net slowdown, if it thinks that
+// a variable is not used in a certain location.
+// This macro does its best to convince the compiler that
+// the variable is used in this location, to help control
+// code motion and de-duplication that would result in a slowdown.
+#define FORCE_REFERENCE(var) asm volatile( "" : : "r" (var) )
+
+
+#define K255 255
+#define K171 171
+#define K85 85
+
+void hsv2rgb_rainbow( const CHSV& hsv, CRGB& rgb)
+{
+ // Yellow has a higher inherent brightness than
+ // any other color; 'pure' yellow is perceived to
+ // be 93% as bright as white. In order to make
+ // yellow appear the correct relative brightness,
+ // it has to be rendered brighter than all other
+ // colors.
+ // Level Y1 is a moderate boost, the default.
+ // Level Y2 is a strong boost.
+ const uint8_t Y1 = 1;
+ const uint8_t Y2 = 0;
+
+ // G2: Whether to divide all greens by two.
+ // Depends GREATLY on your particular LEDs
+ const uint8_t G2 = 0;
+
+ // Gscale: what to scale green down by.
+ // Depends GREATLY on your particular LEDs
+ const uint8_t Gscale = 0;
+
+
+ uint8_t hue = hsv.hue;
+ uint8_t sat = hsv.sat;
+ uint8_t val = hsv.val;
+
+ uint8_t offset = hue & 0x1F; // 0..31
+
+ // offset8 = offset * 8
+ uint8_t offset8 = offset;
+ {
+ offset8 <<= 1;
+ asm volatile("");
+ offset8 <<= 1;
+ asm volatile("");
+ offset8 <<= 1;
+ }
+
+ uint8_t third = scale8( offset8, (256 / 3));
+
+ uint8_t r, g, b;
+
+ if( ! (hue & 0x80) ) {
+ // 0XX
+ if( ! (hue & 0x40) ) {
+ // 00X
+ //section 0-1
+ if( ! (hue & 0x20) ) {
+ // 000
+ //case 0: // R -> O
+ r = K255 - third;
+ g = third;
+ b = 0;
+ FORCE_REFERENCE(b);
+ } else {
+ // 001
+ //case 1: // O -> Y
+ if( Y1 ) {
+ r = K171;
+ g = K85 + third ;
+ b = 0;
+ FORCE_REFERENCE(b);
+ }
+ if( Y2 ) {
+ r = K171 + third;
+ uint8_t twothirds = (third << 1);
+ g = K85 + twothirds;
+ b = 0;
+ FORCE_REFERENCE(b);
+ }
+ }
+ } else {
+ //01X
+ // section 2-3
+ if( ! (hue & 0x20) ) {
+ // 010
+ //case 2: // Y -> G
+ if( Y1 ) {
+ uint8_t twothirds = (third << 1);
+ r = K171 - twothirds;
+ g = K171 + third;
+ b = 0;
+ FORCE_REFERENCE(b);
+ }
+ if( Y2 ) {
+ r = K255 - offset8;
+ g = K255;
+ b = 0;
+ FORCE_REFERENCE(b);
+ }
+ } else {
+ // 011
+ // case 3: // G -> A
+ r = 0;
+ FORCE_REFERENCE(r);
+ g = K255 - third;
+ b = third;
+ }
+ }
+ } else {
+ // section 4-7
+ // 1XX
+ if( ! (hue & 0x40) ) {
+ // 10X
+ if( ! ( hue & 0x20) ) {
+ // 100
+ //case 4: // A -> B
+ r = 0;
+ FORCE_REFERENCE(r);
+ uint8_t twothirds = (third << 1);
+ g = K171 - twothirds;
+ b = K85 + twothirds;
+
+ } else {
+ // 101
+ //case 5: // B -> P
+ r = third;
+ g = 0;
+ FORCE_REFERENCE(g);
+ b = K255 - third;
+
+ }
+ } else {
+ if( ! (hue & 0x20) ) {
+ // 110
+ //case 6: // P -- K
+ r = K85 + third;
+ g = 0;
+ FORCE_REFERENCE(g);
+ b = K171 - third;
+
+ } else {
+ // 111
+ //case 7: // K -> R
+ r = K171 + third;
+ g = 0;
+ FORCE_REFERENCE(g);
+ b = K85 - third;
+
+ }
+ }
+ }
+
+ // This is one of the good places to scale the green down,
+ // although the client can scale green down as well.
+ if( G2 ) g = g >> 1;
+ if( Gscale ) g = scale8_video_LEAVING_R1_DIRTY( g, Gscale);
+
+ // Scale down colors if we're desaturated at all
+ // and add the brightness_floor to r, g, and b.
+ if( sat != 255 ) {
+
+ nscale8x3_video( r, g, b, sat);
+
+ uint8_t desat = 255 - sat;
+ desat = scale8( desat, desat);
+
+ uint8_t brightness_floor = desat;
+ r += brightness_floor;
+ g += brightness_floor;
+ b += brightness_floor;
+ }
+
+ // Now scale everything down if we're at value < 255.
+ if( val != 255 ) {
+
+ val = scale8_video_LEAVING_R1_DIRTY( val, val);
+ nscale8x3_video( r, g, b, val);
+ }
+
+ // Here we have the old AVR "missing std X+n" problem again
+ // It turns out that fixing it winds up costing more than
+ // not fixing it.
+ // To paraphrase Dr Bronner, profile! profile! profile!
+ //asm volatile( "" : : : "r26", "r27" );
+ //asm volatile (" movw r30, r26 \n" : : : "r30", "r31");
+ rgb.r = r;
+ rgb.g = g;
+ rgb.b = b;
+}
+
+
+void hsv2rgb_raw(const struct CHSV * phsv, struct CRGB * prgb, int numLeds) {
+ for(int i = 0; i < numLeds; i++) {
+ hsv2rgb_raw(phsv[i], prgb[i]);
+ }
+}
+
+void hsv2rgb_rainbow( const struct CHSV* phsv, struct CRGB * prgb, int numLeds) {
+ for(int i = 0; i < numLeds; i++) {
+ hsv2rgb_rainbow(phsv[i], prgb[i]);
+ }
+}
+
+void hsv2rgb_spectrum( const struct CHSV* phsv, struct CRGB * prgb, int numLeds) {
+ for(int i = 0; i < numLeds; i++) {
+ hsv2rgb_spectrum(phsv[i], prgb[i]);
+ }
+}
+
+void fill_solid( struct CRGB * pFirstLED, int numToFill,
+ const struct CRGB& color)
+{
+ for( int i = 0; i < numToFill; i++) {
+ pFirstLED[i] = color;
+ }
+}
+
+void fill_rainbow( struct CRGB * pFirstLED, int numToFill,
+ uint8_t initialhue,
+ uint8_t deltahue )
+{
+ CHSV hsv;
+ hsv.hue = initialhue;
+ hsv.val = 255;
+ hsv.sat = 255;
+ for( int i = 0; i < numToFill; i++) {
+ hsv2rgb_rainbow( hsv, pFirstLED[i]);
+ hsv.hue += deltahue;
+ }
+}
diff --git a/hsv2rgb.h b/hsv2rgb.h
new file mode 100644
index 00000000..d30e9aef
--- /dev/null
+++ b/hsv2rgb.h
@@ -0,0 +1,59 @@
+#ifndef __INC_HSV2RGB_H
+#define __INC_HSV2RGB_H
+
+#include "pixeltypes.h"
+
+
+// hsv2rgb_rainbow - convert a hue, saturation, and value to RGB
+// using a visually balanced rainbow (vs a straight
+// mathematical spectrum).
+// This 'rainbow' yields better yellow and orange
+// than a straight 'spectrum'.
+//
+// NOTE: here hue is 0-255, not just 0-191
+
+void hsv2rgb_rainbow( const struct CHSV& hsv, struct CRGB& rgb);
+void hsv2rgb_rainbow( const struct CHSV* phsv, struct CRGB * prgb, int numLeds);
+#define HUE_MAX_RAINBOW 255
+
+
+// hsv2rgb_spectrum - convert a hue, saturation, and value to RGB
+// using a mathematically straight spectrum (vs
+// a visually balanced rainbow).
+// This 'spectrum' will have more green & blue
+// than a 'rainbow', and less yellow and orange.
+//
+// NOTE: here hue is 0-255, not just 0-191
+
+void hsv2rgb_spectrum( const struct CHSV& hsv, struct CRGB& rgb);
+void hsv2rgb_spectrum( const struct CHSV* phsv, struct CRGB * prgb, int numLeds);
+#define HUE_MAX_SPECTRUM 255
+
+
+// hsv2rgb_raw - convert hue, saturation, and value to RGB.
+// This 'spectrum' conversion will be more green & blue
+// than a real 'rainbow', and the hue is specified just
+// in the range 0-191. Together, these result in a
+// slightly faster conversion speed, at the expense of
+// color balance.
+//
+// NOTE: Hue is 0-191 only!
+// Saturation & value are 0-255 each.
+//
+
+void hsv2rgb_raw(const struct CHSV& hsv, struct CRGB & rgb);
+void hsv2rgb_raw(const struct CHSV* phsv, struct CRGB * prgb, int numLeds);
+#define HUE_MAX 191
+
+
+// fill_solid - fill a range of LEDs with a solid color
+void fill_solid( struct CRGB * pFirstLED, int numToFill,
+ const struct CRGB& color);
+
+// fill_rainbow - fill a range of LEDs with a rainbow of colors, at
+// full saturation and full value (brightness)
+void fill_rainbow( struct CRGB * pFirstLED, int numToFill,
+ uint8_t initialhue,
+ uint8_t deltahue = 5);
+
+#endif
diff --git a/lib8tion.cpp b/lib8tion.cpp
new file mode 100644
index 00000000..224d44f2
--- /dev/null
+++ b/lib8tion.cpp
@@ -0,0 +1,242 @@
+#include <stdint.h>
+
+#define RAND16_SEED 1337
+uint16_t rand16seed = RAND16_SEED;
+
+
+// memset8, memcpy8, memmove8:
+// optimized avr replacements for the standard "C" library
+// routines memset, memcpy, and memmove.
+//
+// There are two techniques that make these routines
+// faster than the standard avr-libc routines.
+// First, the loops are unrolled 2X, meaning that
+// the average loop overhead is cut in half.
+// And second, the compare-and-branch at the bottom
+// of each loop decrements the low byte of the
+// counter, and if the carry is clear, it branches
+// back up immediately. Only if the low byte math
+// causes carry do we bother to decrement the high
+// byte and check that result for carry as well.
+// Results for a 100-byte buffer are 20-40% faster
+// than standard avr-libc, at a cost of a few extra
+// bytes of code.
+
+#if defined(__AVR__)
+extern "C" {
+//__attribute__ ((noinline))
+void * memset8 ( void * ptr, uint8_t val, uint16_t num )
+{
+ asm volatile(
+ " movw r26, %[ptr] \n\t"
+ " sbrs %A[num], 0 \n\t"
+ " rjmp Lseteven_%= \n\t"
+ " rjmp Lsetodd_%= \n\t"
+ "Lsetloop_%=: \n\t"
+ " st X+, %[val] \n\t"
+ "Lsetodd_%=: \n\t"
+ " st X+, %[val] \n\t"
+ "Lseteven_%=: \n\t"
+ " subi %A[num], 2 \n\t"
+ " brcc Lsetloop_%= \n\t"
+ " sbci %B[num], 0 \n\t"
+ " brcc Lsetloop_%= \n\t"
+ : [num] "+r" (num)
+ : [ptr] "r" (ptr),
+ [val] "r" (val)
+ : "memory"
+ );
+ return ptr;
+}
+
+
+
+//__attribute__ ((noinline))
+void * memcpy8 ( void * dst, void* src, uint16_t num )
+{
+ asm volatile(
+ " movw r30, %[src] \n\t"
+ " movw r26, %[dst] \n\t"
+ " sbrs %A[num], 0 \n\t"
+ " rjmp Lcpyeven_%= \n\t"
+ " rjmp Lcpyodd_%= \n\t"
+ "Lcpyloop_%=: \n\t"
+ " ld __tmp_reg__, Z+ \n\t"
+ " st X+, __tmp_reg__ \n\t"
+ "Lcpyodd_%=: \n\t"
+ " ld __tmp_reg__, Z+ \n\t"
+ " st X+, __tmp_reg__ \n\t"
+ "Lcpyeven_%=: \n\t"
+ " subi %A[num], 2 \n\t"
+ " brcc Lcpyloop_%= \n\t"
+ " sbci %B[num], 0 \n\t"
+ " brcc Lcpyloop_%= \n\t"
+ : [num] "+r" (num)
+ : [src] "r" (src),
+ [dst] "r" (dst)
+ : "memory"
+ );
+ return dst;
+}
+
+//__attribute__ ((noinline))
+void * memmove8 ( void * dst, void* src, uint16_t num )
+{
+ if( src < dst) {
+ // if src < dst then we can use the forward-stepping memcpy8
+ return memcpy8( dst, src, num);
+ } else {
+ // if src > dst then we have to step backward:
+ dst = (char*)dst + num;
+ src = (char*)src + num;
+ asm volatile(
+ " movw r30, %[src] \n\t"
+ " movw r26, %[dst] \n\t"
+ " sbrs %A[num], 0 \n\t"
+ " rjmp Lmoveven_%= \n\t"
+ " rjmp Lmovodd_%= \n\t"
+ "Lmovloop_%=: \n\t"
+ " ld __tmp_reg__, -Z \n\t"
+ " st -X, __tmp_reg__ \n\t"
+ "Lmovodd_%=: \n\t"
+ " ld __tmp_reg__, -Z \n\t"
+ " st -X, __tmp_reg__ \n\t"
+ "Lmoveven_%=: \n\t"
+ " subi %A[num], 2 \n\t"
+ " brcc Lmovloop_%= \n\t"
+ " sbci %B[num], 0 \n\t"
+ " brcc Lmovloop_%= \n\t"
+ : [num] "+r" (num)
+ : [src] "r" (src),
+ [dst] "r" (dst)
+ : "memory"
+ );
+ return dst;
+ }
+}
+
+
+} /* end extern "C" */
+
+#endif /* AVR */
+
+#if 0
+// TEST / VERIFICATION CODE ONLY BELOW THIS POINT
+#include <Arduino.h>
+#include "lib8tion.h"
+
+void test1abs( int8_t i)
+{
+ Serial.print("abs("); Serial.print(i); Serial.print(") = ");
+ int8_t j = abs8(i);
+ Serial.print(j); Serial.println(" ");
+}
+
+void testabs()
+{
+ delay(5000);
+ for( int8_t q = -128; q != 127; q++) {
+ test1abs(q);
+ }
+ for(;;){};
+}
+
+
+void testmul8()
+{
+ delay(5000);
+ byte r, c;
+
+ Serial.println("mul8:");
+ for( r = 0; r <= 20; r += 1) {
+ Serial.print(r); Serial.print(" : ");
+ for( c = 0; c <= 20; c += 1) {
+ byte t;
+ t = mul8( r, c);
+ Serial.print(t); Serial.print(' ');
+ }
+ Serial.println(' ');
+ }
+ Serial.println("done.");
+ for(;;){};
+}
+
+
+void testscale8()
+{
+ delay(5000);
+ byte r, c;
+
+ Serial.println("scale8:");
+ for( r = 0; r <= 240; r += 10) {
+ Serial.print(r); Serial.print(" : ");
+ for( c = 0; c <= 240; c += 10) {
+ byte t;
+ t = scale8( r, c);
+ Serial.print(t); Serial.print(' ');
+ }
+ Serial.println(' ');
+ }
+
+ Serial.println(' ');
+ Serial.println("scale8_video:");
+
+ for( r = 0; r <= 100; r += 4) {
+ Serial.print(r); Serial.print(" : ");
+ for( c = 0; c <= 100; c += 4) {
+ byte t;
+ t = scale8_video( r, c);
+ Serial.print(t); Serial.print(' ');
+ }
+ Serial.println(' ');
+ }
+
+ Serial.println("done.");
+ for(;;){};
+}
+
+
+
+void testqadd8()
+{
+ delay(5000);
+ byte r, c;
+ for( r = 0; r <= 240; r += 10) {
+ Serial.print(r); Serial.print(" : ");
+ for( c = 0; c <= 240; c += 10) {
+ byte t;
+ t = qadd8( r, c);
+ Serial.print(t); Serial.print(' ');
+ }
+ Serial.println(' ');
+ }
+ Serial.println("done.");
+ for(;;){};
+}
+
+void testnscale8x3()
+{
+ delay(5000);
+ byte r, g, b, sc;
+ for( byte z = 0; z < 10; z++) {
+ r = random8(); g = random8(); b = random8(); sc = random8();
+
+ Serial.print("nscale8x3_video( ");
+ Serial.print(r); Serial.print(", ");
+ Serial.print(g); Serial.print(", ");
+ Serial.print(b); Serial.print(", ");
+ Serial.print(sc); Serial.print(") = [ ");
+
+ nscale8x3_video( r, g, b, sc);
+
+ Serial.print(r); Serial.print(", ");
+ Serial.print(g); Serial.print(", ");
+ Serial.print(b); Serial.print("]");
+
+ Serial.println(' ');
+ }
+ Serial.println("done.");
+ for(;;){};
+}
+
+#endif
diff --git a/lib8tion.h b/lib8tion.h
new file mode 100644
index 00000000..5fb812cb
--- /dev/null
+++ b/lib8tion.h
@@ -0,0 +1,1272 @@
+#ifndef __INC_LIB8TION_H
+#define __INC_LIB8TION_H
+
+/*
+
+ Fast, efficient 8-bit math functions specifically
+ designed for high-performance LED programming.
+
+ Because of the AVR(Arduino) and ARM assembly language
+ implementations provided, using these functions often
+ results in smaller and faster code than the equivalent
+ program using plain "C" arithmetic and logic.
+
+
+ Included are:
+
+
+ - Saturating unsigned 8-bit add and subtract.
+ Instead of wrapping around if an overflow occurs,
+ these routines just 'clamp' the output at a maxumum
+ of 255, or a minimum of 0. Useful for adding pixel
+ values. E.g., qadd8( 200, 100) = 255.
+
+ qadd8( i, j) == MIN( (i + j), 0xFF )
+ qsub8( i, j) == MAX( (i - j), 0 )
+
+ - Saturating signed 8-bit ("7-bit") add.
+ qadd7( i, j) == MIN( (i + j), 0x7F)
+
+
+ - Scaling (down) of unsigned 8- and 16- bit values.
+ Scaledown value is specified in 1/256ths.
+ scale8( i, sc) == (i * sc) / 256
+ scale16by8( i, sc) == (i * sc) / 256
+
+ Example: scaling a 0-255 value down into a
+ range from 0-99:
+ downscaled = scale8( originalnumber, 100);
+
+ A special version of scale8 is provided for scaling
+ LED brightness values, to make sure that they don't
+ accidentally scale down to total black at low
+ dimming levels, since that would look wrong:
+ scale8_video( i, sc) = ((i * sc) / 256) +? 1
+
+ Example: reducing an LED brightness by a
+ dimming factor:
+ new_bright = scale8_video( orig_bright, dimming);
+
+
+ - Fast 8- and 16- bit unsigned random numbers.
+ Significantly faster than Arduino random(), but
+ also somewhat less random. You can add entropy.
+ random8() == random from 0..255
+ random8( n) == random from 0..(N-1)
+ random8( n, m) == random from N..(M-1)
+
+ random16() == random from 0..65535
+ random16( n) == random from 0..(N-1)
+ random16( n, m) == random from N..(M-1)
+
+ random16_set_seed( k) == seed = k
+ random16_add_entropy( k) == seed += k
+
+
+ - Absolute value of a signed 8-bit value.
+ abs8( i) == abs( i)
+
+
+ - 8-bit math operations which return 8-bit values.
+ These are provided mostly for completeness,
+ not particularly for performance.
+ mul8( i, j) == (i * j) & 0xFF
+ add8( i, j) == (i + j) & 0xFF
+ sub8( i, j) == (i - j) & 0xFF
+
+
+ - Fast 16-bit approximations of sin and cos.
+ Input angle is a uint16_t from 0-65535.
+ Output is a signed int16_t from -32767 to 32767.
+ sin16( x) == sin( (x/32768.0) * pi) * 32767
+ cos16( x) == cos( (x/32768.0) * pi) * 32767
+ Accurate to more than 99% in all cases.
+
+
+ - Dimming and brightening functions for 8-bit
+ light values.
+ dim8_video( x) == scale8_video( x, x)
+ dim8_raw( x) == scale8( x, x)
+ brighten8_video( x) == 255 - dim8_video( 255 - x)
+ brighten8_raw( x) == 255 - dim8_raw( 255 - x)
+ The dimming functions in particular are suitable
+ for making LED light output appear more 'linear'.
+
+
+ - Fast 8-bit "easing in/out" function.
+ ease8InOutCubic(x) == 3(x^i) - 2(x^3)
+ ease8InOutApprox(x) ==
+ faster, rougher, approximation of cubic easing
+
+
+ - Linear interpolation between two values, with the
+ fraction between them expressed as an 8- or 16-bit
+ fixed point fraction (fract8 or fract16).
+ lerp8by8( fromU8, toU8, fract8 )
+ lerp16by8( fromU16, toU16, fract8 )
+ lerp15by8( fromS16, toS16, fract8 )
+ == from + (( to - from ) * fract8) / 256)
+ lerp16by16( fromU16, toU16, fract16 )
+ == from + (( to - from ) * fract16) / 65536)
+
+ - Optimized memmove, memcpy, and memset, that are
+ faster than standard avr-libc 1.8.
+ memmove8( dest, src, bytecount)
+ memcpy8( dest, src, bytecount)
+ memset8( buf, value, bytecount)
+
+
+Lib8tion is pronounced like 'libation': lie-BAY-shun
+
+*/
+
+
+
+#include <stdint.h>
+
+#define LIB8STATIC __attribute__ ((unused)) static
+
+
+#if defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) || defined(__AVR_ATtiny25__) || defined(__AVR_ATtiny45__) || defined(__AVR_ATtiny85__)
+#define LIB8_ATTINY 1
+#endif
+
+
+#if defined(__arm__)
+
+#if defined(__MK20DX128__)
+// Can use Cortex M4 DSP instructions
+#define QADD8_C 0
+#define QADD7_C 0
+#define QADD8_ARM_DSP_ASM 1
+#define QADD7_ARM_DSP_ASM 1
+#else
+// Generic ARM
+#define QADD8_C 1
+#define QADD7_C 1
+#endif
+
+#define QSUB8_C 1
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define ABS8_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define ADD8_C 1
+#define SUB8_C 1
+#define EASE8_C 1
+
+
+#elif defined(__AVR__)
+
+// AVR ATmega and friends Arduino
+
+#define QADD8_C 0
+#define QADD7_C 0
+#define QSUB8_C 0
+#define ABS8_C 0
+#define ADD8_C 0
+#define SUB8_C 0
+
+#define QADD8_AVRASM 1
+#define QADD7_AVRASM 1
+#define QSUB8_AVRASM 1
+#define ABS8_AVRASM 1
+#define ADD8_AVRASM 1
+#define SUB8_AVRASM 1
+
+// Note: these require hardware MUL instruction
+// -- sorry, ATtiny!
+#if !defined(LIB8_ATTINY)
+#define SCALE8_C 0
+#define SCALE16BY8_C 0
+#define SCALE16_C 0
+#define MUL8_C 0
+#define QMUL8_C 0
+#define EASE8_C 0
+#define SCALE8_AVRASM 1
+#define SCALE16BY8_AVRASM 1
+#define SCALE16_AVRASM 1
+#define MUL8_AVRASM 1
+#define QMUL8_AVRASM 1
+#define EASE8_AVRASM 1
+#define CLEANUP_R1_AVRASM 1
+#else
+// On ATtiny, we just use C implementations
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define EASE8_C 1
+#define SCALE8_AVRASM 0
+#define SCALE16BY8_AVRASM 0
+#define SCALE16_AVRASM 0
+#define MUL8_AVRASM 0
+#define QMUL8_AVRASM 0
+#define EASE8_AVRASM 0
+#endif
+
+#else
+
+// unspecified architecture, so
+// no ASM, everything in C
+#define QADD8_C 1
+#define QADD7_C 1
+#define QSUB8_C 1
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define ABS8_C 1
+#define MUL8_C 1
+#define ADD8_C 1
+#define SUB8_C 1
+#define EASE8_C 1
+
+#endif
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// typdefs for fixed-point fractional types.
+//
+// sfract7 should be interpreted as signed 128ths.
+// fract8 should be interpreted as unsigned 256ths.
+// sfract15 should be interpreted as signed 32768ths.
+// fract16 should be interpreted as unsigned 65536ths.
+//
+// Example: if a fract8 has the value "64", that should be interpreted
+// as 64/256ths, or one-quarter.
+//
+//
+// fract8 range is 0 to 0.99609375
+// in steps of 0.00390625
+//
+// sfract7 range is -0.9921875 to 0.9921875
+// in steps of 0.0078125
+//
+// fract16 range is 0 to 0.99998474121
+// in steps of 0.00001525878
+//
+// sfract15 range is -0.99996948242 to 0.99996948242
+// in steps of 0.00003051757
+//
+
+typedef uint8_t fract8; // ANSI: unsigned short _Fract
+typedef int8_t sfract7; // ANSI: signed short _Fract
+typedef uint16_t fract16; // ANSI: unsigned _Fract
+typedef int16_t sfract15; // ANSI: signed _Fract
+
+
+// accumXY types should be interpreted as X bits of integer,
+// and Y bits of fraction.
+// E.g., accum88 has 8 bits of int, 8 bits of fraction
+
+typedef uint16_t accum88; // ANSI: unsigned short _Accum
+typedef int16_t saccum78; // ANSI: signed short _Accum
+typedef uint32_t accum1616;// ANSI: signed _Accum
+typedef int32_t saccum1516;//ANSI: signed _Accum
+typedef uint16_t accum124; // no direct ANSI counterpart
+typedef int32_t saccum114;// no direct ANSI counterpart
+
+
+// typedef for IEEE754 "binary32" float type internals
+
+typedef union {
+ uint32_t i;
+ float f;
+ struct {
+ uint32_t mantissa: 23;
+ uint32_t exponent: 8;
+ uint32_t signbit: 1;
+ };
+ struct {
+ uint32_t mant7 : 7;
+ uint32_t mant16: 16;
+ uint32_t exp_ : 8;
+ uint32_t sb_ : 1;
+ };
+ struct {
+ uint32_t mant_lo8 : 8;
+ uint32_t mant_hi16_exp_lo1 : 16;
+ uint32_t sb_exphi7 : 8;
+ };
+} IEEE754binary32_t;
+
+
+
+///////////////////////////////////////////////////////////////////////
+
+// qadd8: add one byte to another, saturating at 0xFF
+LIB8STATIC uint8_t qadd8( uint8_t i, uint8_t j)
+{
+#if QADD8_C == 1
+ int t = i + j;
+ if( t > 255) t = 255;
+ return t;
+#elif QADD8_AVRASM == 1
+ asm volatile(
+ /* First, add j to i, conditioning the C flag */
+ "add %0, %1 \n\t"
+
+ /* Now test the C flag.
+ If C is clear, we branch around a load of 0xFF into i.
+ If C is set, we go ahead and load 0xFF into i.
+ */
+ "brcc L_%= \n\t"
+ "ldi %0, 0xFF \n\t"
+ "L_%=: "
+ : "+a" (i)
+ : "a" (j) );
+ return i;
+#elif QADD8_ARM_DSP_ASM == 1
+ asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
+ return i;
+#else
+#error "No implementation for qadd8 available."
+#endif
+}
+
+
+// qadd7: add one signed byte to another,
+// saturating at 0x7F.
+LIB8STATIC int8_t qadd7( int8_t i, int8_t j)
+{
+#if QADD7_C == 1
+ int16_t t = i + j;
+ if( t > 127) t = 127;
+ return t;
+#elif QADD7_AVRASM == 1
+ asm volatile(
+ /* First, add j to i, conditioning the V flag */
+ "add %0, %1 \n\t"
+
+ /* Now test the V flag.
+ If V is clear, we branch around a load of 0x7F into i.
+ If V is set, we go ahead and load 0x7F into i.
+ */
+ "brvc L_%= \n\t"
+ "ldi %0, 0x7F \n\t"
+ "L_%=: "
+ : "+a" (i)
+ : "a" (j) );
+
+ return i;
+#elif QADD7_ARM_DSP_ASM == 1
+ asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
+ return i;
+#else
+#error "No implementation for qadd7 available."
+#endif
+}
+
+// qsub8: subtract one byte from another, saturating at 0x00
+LIB8STATIC uint8_t qsub8( uint8_t i, uint8_t j)
+{
+#if QSUB8_C == 1
+ int t = i - j;
+ if( t < 0) t = 0;
+ return t;
+#elif QSUB8_AVRASM == 1
+
+ asm volatile(
+ /* First, subtract j from i, conditioning the C flag */
+ "sub %0, %1 \n\t"
+
+ /* Now test the C flag.
+ If C is clear, we branch around a load of 0x00 into i.
+ If C is set, we go ahead and load 0x00 into i.
+ */
+ "brcc L_%= \n\t"
+ "ldi %0, 0x00 \n\t"
+ "L_%=: "
+ : "+a" (i)
+ : "a" (j) );
+
+ return i;
+#else
+#error "No implementation for qsub8 available."
+#endif
+}
+
+// add8: add one byte to another, with one byte result
+LIB8STATIC uint8_t add8( uint8_t i, uint8_t j)
+{
+#if ADD8_C == 1
+ int t = i + j;
+ return t;
+#elif ADD8_AVRASM == 1
+ // Add j to i, period.
+ asm volatile( "add %0, %1" : "+a" (i) : "a" (j));
+ return i;
+#else
+#error "No implementation for add8 available."
+#endif
+}
+
+
+// sub8: subtract one byte from another, 8-bit result
+LIB8STATIC uint8_t sub8( uint8_t i, uint8_t j)
+{
+#if SUB8_C == 1
+ int t = i - j;
+ return t;
+#elif SUB8_AVRASM == 1
+ // Subtract j from i, period.
+ asm volatile( "sub %0, %1" : "+a" (i) : "a" (j));
+ return i;
+#else
+#error "No implementation for sub8 available."
+#endif
+}
+
+
+// scale8: scale one byte by a second one, which is treated as
+// the numerator of a fraction whose denominator is 256
+// In other words, it computes i * (scale / 256)
+// 4 clocks AVR, 2 clocks ARM
+LIB8STATIC uint8_t scale8( uint8_t i, fract8 scale)
+{
+#if SCALE8_C == 1
+ return ((int)i * (int)(scale) ) >> 8;
+#elif SCALE8_AVRASM == 1
+ asm volatile(
+ /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+ "mul %0, %1 \n\t"
+ /* Move the high 8-bits of the product (r1) back to i */
+ "mov %0, r1 \n\t"
+ /* Restore r1 to "0"; it's expected to always be that */
+ "clr __zero_reg__ \n\t"
+
+ : "+a" (i) /* writes to i */
+ : "a" (scale) /* uses scale */
+ : "r0", "r1" /* clobbers r0, r1 */ );
+
+ /* Return the result */
+ return i;
+#else
+#error "No implementation for scale8 available."
+#endif
+}
+
+
+// The "video" version of scale8 guarantees that the output will
+// be only be zero if one or both of the inputs are zero. If both
+// inputs are non-zero, the output is guaranteed to be non-zero.
+// This makes for better 'video'/LED dimming, at the cost of
+// several additional cycles.
+LIB8STATIC uint8_t scale8_video( uint8_t i, fract8 scale)
+{
+#if SCALE8_C == 1
+ uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
+ return j;
+#elif SCALE8_AVRASM == 1
+
+ uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ asm volatile(
+ " tst %0 \n"
+ " breq L_%= \n"
+ " mul %0, %1 \n"
+ " mov %0, r1 \n"
+ " add %0, %2 \n"
+ " clr __zero_reg__ \n"
+ "L_%=: \n"
+
+ : "+a" (i)
+ : "a" (scale), "a" (nonzeroscale)
+ : "r0", "r1");
+
+ // Return the result
+ return i;
+#else
+#error "No implementation for scale8_video available."
+#endif
+}
+
+
+// This version of scale8 does not clean up the R1 register on AVR
+// If you are doing several 'scale8's in a row, use this, and
+// then explicitly call cleanup_R1.
+LIB8STATIC uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
+{
+#if SCALE8_C == 1
+ return ((int)i * (int)(scale) ) >> 8;
+#elif SCALE8_AVRASM == 1
+ asm volatile(
+ /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+ "mul %0, %1 \n\t"
+ /* Move the high 8-bits of the product (r1) back to i */
+ "mov %0, r1 \n\t"
+ /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
+ /* "clr __zero_reg__ \n\t" */
+
+ : "+a" (i) /* writes to i */
+ : "a" (scale) /* uses scale */
+ : "r0", "r1" /* clobbers r0, r1 */ );
+
+ // Return the result
+ return i;
+#else
+#error "No implementation for scale8_LEAVING_R1_DIRTY available."
+#endif
+}
+
+// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENT DIRECTLY IN PLACE
+
+LIB8STATIC void nscale8_LEAVING_R1_DIRTY( uint8_t& i, fract8 scale)
+{
+#if SCALE8_C == 1
+ i = ((int)i * (int)(scale) ) >> 8;
+#elif SCALE8_AVRASM == 1
+ asm volatile(
+ /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+ "mul %0, %1 \n\t"
+ /* Move the high 8-bits of the product (r1) back to i */
+ "mov %0, r1 \n\t"
+ /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
+ /* "clr __zero_reg__ \n\t" */
+
+ : "+a" (i) /* writes to i */
+ : "a" (scale) /* uses scale */
+ : "r0", "r1" /* clobbers r0, r1 */ );
+#else
+#error "No implementation for nscale8_LEAVING_R1_DIRTY available."
+#endif
+}
+
+
+
+LIB8STATIC uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
+{
+#if SCALE8_C == 1
+ uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
+ return j;
+#elif SCALE8_AVRASM == 1
+
+ uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ asm volatile(
+ " tst %0 \n"
+ " breq L_%= \n"
+ " mul %0, %1 \n"
+ " mov %0, r1 \n"
+ " add %0, %2 \n"
+ /* R1 IS LEFT DIRTY, YOU MUST ZERO IT OUT YOURSELF */
+ "L_%=: \n"
+
+ : "+a" (i)
+ : "a" (scale), "a" (nonzeroscale)
+ : "r0", "r1");
+
+ // Return the result
+ return i;
+#else
+#error "No implementation for scale8_video available."
+#endif
+}
+
+
+
+LIB8STATIC void cleanup_R1()
+{
+#if CLEANUP_R1_AVRASM == 1
+ // Restore r1 to "0"; it's expected to always be that
+ asm volatile( "clr __zero_reg__ \n\t" : : : "r1" );
+#endif
+}
+
+
+// nscale8x3: scale three one byte values by a fourth one, which is treated as
+// the numerator of a fraction whose demominator is 256
+// In other words, it computes r,g,b * (scale / 256)
+//
+// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE
+
+LIB8STATIC void nscale8x3( uint8_t& r, uint8_t& g, uint8_t& b, fract8 scale)
+{
+#if SCALE8_C == 1
+ r = ((int)r * (int)(scale) ) >> 8;
+ g = ((int)g * (int)(scale) ) >> 8;
+ b = ((int)b * (int)(scale) ) >> 8;
+#elif SCALE8_AVRASM == 1
+ r = scale8_LEAVING_R1_DIRTY(r, scale);
+ g = scale8_LEAVING_R1_DIRTY(g, scale);
+ b = scale8_LEAVING_R1_DIRTY(b, scale);
+ cleanup_R1();
+#else
+#error "No implementation for nscale8x3 available."
+#endif
+}
+
+
+LIB8STATIC void nscale8x3_video( uint8_t& r, uint8_t& g, uint8_t& b, fract8 scale)
+{
+#if SCALE8_C == 1
+ uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ r = (r == 0) ? 0 : (((int)r * (int)(scale) ) >> 8) + nonzeroscale;
+ g = (g == 0) ? 0 : (((int)g * (int)(scale) ) >> 8) + nonzeroscale;
+ b = (b == 0) ? 0 : (((int)b * (int)(scale) ) >> 8) + nonzeroscale;
+#elif SCALE8_AVRASM == 1
+ r = scale8_video_LEAVING_R1_DIRTY( r, scale);
+ g = scale8_video_LEAVING_R1_DIRTY( g, scale);
+ b = scale8_video_LEAVING_R1_DIRTY( b, scale);
+ cleanup_R1();
+#else
+#error "No implementation for nscale8x3 available."
+#endif
+}
+
+// nscale8x2: scale two one byte values by a third one, which is treated as
+// the numerator of a fraction whose demominator is 256
+// In other words, it computes i,j * (scale / 256)
+//
+// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE
+
+LIB8STATIC void nscale8x2( uint8_t& i, uint8_t& j, fract8 scale)
+{
+#if SCALE8_C == 1
+ i = ((int)i * (int)(scale) ) >> 8;
+ j = ((int)j * (int)(scale) ) >> 8;
+#elif SCALE8_AVRASM == 1
+ i = scale8_LEAVING_R1_DIRTY(i, scale);
+ j = scale8_LEAVING_R1_DIRTY(j, scale);
+ cleanup_R1();
+#else
+#error "No implementation for nscale8x2 available."
+#endif
+}
+
+
+LIB8STATIC void nscale8x2_video( uint8_t& i, uint8_t& j, fract8 scale)
+{
+#if SCALE8_C == 1
+ uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+ i = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
+ j = (j == 0) ? 0 : (((int)j * (int)(scale) ) >> 8) + nonzeroscale;
+#elif SCALE8_AVRASM == 1
+ i = scale8_video_LEAVING_R1_DIRTY( i, scale);
+ j = scale8_video_LEAVING_R1_DIRTY( j, scale);
+ cleanup_R1();
+#else
+#error "No implementation for nscale8x2 available."
+#endif
+}
+
+
+// scale16by8: scale a 16-bit unsigned value by an 8-bit value,
+// considered as numerator of a fraction whose denominator
+// is 256. In other words, it computes i * (scale / 256)
+
+#if SCALE16BY8_C == 1
+LIB8STATIC uint16_t scale16by8( uint16_t i, fract8 scale )
+{
+ uint16_t result;
+ result = (i * scale) / 256;
+ return result;
+}
+#elif SCALE16BY8_AVRASM == 1
+LIB8STATIC uint16_t scale16by8( uint16_t i, fract8 scale )
+{
+ uint16_t result;
+ asm volatile(
+ // result.A = HighByte(i.A x j )
+ " mul %A[i], %[scale] \n\t"
+ " mov %A[result], r1 \n\t"
+ " clr %B[result] \n\t"
+
+ // result.A-B += i.B x j
+ " mul %B[i], %[scale] \n\t"
+ " add %A[result], r0 \n\t"
+ " adc %B[result], r1 \n\t"
+
+ // cleanup r1
+ " clr __zero_reg__ \n\t"
+
+ : [result] "=r" (result)
+ : [i] "r" (i), [scale] "r" (scale)
+ : "r0", "r1"
+ );
+ return result;
+}
+#else
+#error "No implementation for scale16by8 available."
+#endif
+
+// scale16: scale a 16-bit unsigned value by a 16-bit value,
+// considered as numerator of a fraction whose denominator
+// is 65536. In other words, it computes i * (scale / 65536)
+
+#if SCALE16_C == 1
+LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
+{
+ uint16_t result;
+ result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
+ return result;
+}
+#elif SCALE16_AVRASM == 1
+LIB8STATIC
+uint16_t scale16( uint16_t i, fract16 scale )
+{
+ uint32_t result = 0;
+ const uint8_t zero = 0;
+ asm volatile(
+ // result.A-B = i.A x scale.A
+ " mul %A[i], %A[scale] \n\t"
+ // save results...
+ // basic idea:
+ //" mov %A[result], r0 \n\t"
+ //" mov %B[result], r1 \n\t"
+ // which can be written as...
+ " movw %A[result], r0 \n\t"
+ // We actually need to do anything with r0,
+ // as result.A is never used again here, so we
+ // could just move the high byte, but movw is
+ // one clock cycle, just like mov, so might as
+ // well, in case we want to use this code for
+ // a generic 16x16 multiply somewhere.
+
+ // result.C-D = i.B x scale.B
+ " mul %B[i], %B[scale] \n\t"
+ //" mov %C[result], r0 \n\t"
+ //" mov %D[result], r1 \n\t"
+ " movw %C[result], r0 \n\t"
+
+ // result.B-D += i.B x scale.A
+ " mul %B[i], %A[scale] \n\t"
+
+ " add %B[result], r0 \n\t"
+ " adc %C[result], r1 \n\t"
+ " adc %D[result], %[zero] \n\t"
+
+ // result.B-D += i.A x scale.B
+ " mul %A[i], %B[scale] \n\t"
+
+ " add %B[result], r0 \n\t"
+ " adc %C[result], r1 \n\t"
+ " adc %D[result], %[zero] \n\t"
+
+ // cleanup r1
+ " clr r1 \n\t"
+
+ : [result] "+r" (result)
+ : [i] "r" (i),
+ [scale] "r" (scale),
+ [zero] "r" (zero)
+ : "r0", "r1"
+ );
+ result = result >> 16;
+ return result;
+}
+#else
+#error "No implementation for scale16 available."
+#endif
+
+
+
+// mul8: 8x8 bit multiplication, with 8 bit result
+LIB8STATIC uint8_t mul8( uint8_t i, uint8_t j)
+{
+#if MUL8_C == 1
+ return ((int)i * (int)(j) ) & 0xFF;
+#elif MUL8_AVRASM == 1
+ asm volatile(
+ /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
+ "mul %0, %1 \n\t"
+ /* Extract the LOW 8-bits (r0) */
+ "mov %0, r0 \n\t"
+ /* Restore r1 to "0"; it's expected to always be that */
+ "clr __zero_reg__ \n\t"
+ : "+a" (i)
+ : "a" (j)
+ : "r0", "r1");
+
+ return i;
+#else
+#error "No implementation for mul8 available."
+#endif
+}
+
+
+// mul8: saturating 8x8 bit multiplication, with 8 bit result
+LIB8STATIC uint8_t qmul8( uint8_t i, uint8_t j)
+{
+#if QMUL8_C == 1
+ int p = ((int)i * (int)(j) );
+ if( p > 255) p = 255;
+ return p;
+#elif QMUL8_AVRASM == 1
+ asm volatile(
+ /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
+ " mul %0, %1 \n\t"
+ /* If high byte of result is zero, all is well. */
+ " tst r1 \n\t"
+ " breq Lnospill_%= \n\t"
+ /* If high byte of result > 0, saturate low byte to 0xFF */
+ " ldi %0,0xFF \n\t"
+ " rjmp Ldone_%= \n\t"
+ "Lnospill_%=: \n\t"
+ /* Extract the LOW 8-bits (r0) */
+ " mov %0, r0 \n\t"
+ "Ldone_%=: \n\t"
+ /* Restore r1 to "0"; it's expected to always be that */
+ " clr __zero_reg__ \n\t"
+ : "+a" (i)
+ : "a" (j)
+ : "r0", "r1");
+
+ return i;
+#else
+#error "No implementation for qmul8 available."
+#endif
+}
+
+
+// abs8: take abs() of a signed 8-bit uint8_t
+LIB8STATIC int8_t abs8( int8_t i)
+{
+#if ABS8_C == 1
+ if( i < 0) i = -i;
+ return i;
+#elif ABS8_AVRASM == 1
+
+
+ asm volatile(
+ /* First, check the high bit, and prepare to skip if it's clear */
+ "sbrc %0, 7 \n"
+
+ /* Negate the value */
+ "neg %0 \n"
+
+ : "+r" (i) : "r" (i) );
+ return i;
+#else
+#error "No implementation for abs8 available."
+#endif
+}
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// float-to-fixed and fixed-to-float conversions
+//
+// Note that anything involving a 'float' on AVR will be slower.
+
+// floatToSfract15: conversion from IEEE754 float in the range (-1,1)
+// to 16-bit fixed point. Note that the extremes of
+// one and negative one are NOT representable. The
+// representable range is basically
+//
+// sfract15ToFloat: conversion from sfract15 fixed point to
+// IEEE754 32-bit float.
+
+LIB8STATIC
+float sfract15ToFloat( sfract15 y)
+{
+ return y / 32768.0;
+}
+
+LIB8STATIC
+sfract15 floatToSfract15( float f)
+{
+ return f * 32768.0;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////
+
+// Dimming and brightening functions
+//
+// The eye does not respond in a linear way to light.
+// High speed PWM'd LEDs at 50% duty cycle appear far
+// brighter then the 'half as bright' you might expect.
+//
+// If you want your midpoint brightness leve (128) to
+// appear half as bright as 'full' brightness (255), you
+// have to apply a 'dimming function'.
+//
+//
+
+LIB8STATIC uint8_t dim8_raw( uint8_t x)
+{
+ return scale8( x, x);
+}
+
+LIB8STATIC uint8_t dim8_video( uint8_t x)
+{
+ return scale8_video( x, x);
+}
+
+LIB8STATIC uint8_t brighten8_raw( uint8_t x)
+{
+ uint8_t ix = 255 - x;
+ return 255 - scale8( ix, ix);
+}
+
+LIB8STATIC uint8_t brighten8_video( uint8_t x)
+{
+ uint8_t ix = 255 - x;
+ return 255 - scale8_video( ix, ix);
+}
+
+///////////////////////////////////////////////////////////////////////
+
+// A 16-bit PNRG good enough for LED animations
+
+// X(n+1) = (2053 * X(n)) + 13849)
+#define RAND16_2053 2053
+#define RAND16_13849 13849
+
+extern uint16_t rand16seed;// = RAND16_SEED;
+
+
+LIB8STATIC uint8_t random8()
+{
+ rand16seed = (rand16seed * RAND16_2053) + RAND16_13849;
+ return rand16seed;
+}
+
+LIB8STATIC uint16_t random16()
+{
+ rand16seed = (rand16seed * RAND16_2053) + RAND16_13849;
+ return rand16seed;
+}
+
+
+LIB8STATIC uint8_t random8(uint8_t lim)
+{
+ uint8_t r = random8();
+ r = scale8( r, lim);
+ return r;
+}
+
+LIB8STATIC uint8_t random8(uint8_t min, uint8_t lim)
+{
+ uint8_t delta = lim - min;
+ uint8_t r = random8(delta) + min;
+ return r;
+}
+
+LIB8STATIC uint16_t random16( uint16_t lim)
+{
+ uint16_t r = random16();
+ uint32_t p = (uint32_t)lim * (uint32_t)r;
+ r = p >> 16;
+ return r;
+}
+
+LIB8STATIC uint16_t random16( uint16_t min, uint16_t lim)
+{
+ uint16_t delta = lim - min;
+ uint16_t r = random16( delta) + min;
+ return r;
+}
+
+LIB8STATIC void random16_set_seed( uint16_t seed)
+{
+ rand16seed = seed;
+}
+
+LIB8STATIC uint16_t random16_get_seed()
+{
+ return rand16seed;
+}
+
+LIB8STATIC void random16_add_entropy( uint16_t entropy)
+{
+ rand16seed += entropy;
+}
+
+
+///////////////////////////////////////////////////////////////////////
+
+// sin16 & cos16:
+// Fast 16-bit approximations of sin(x) & cos(x).
+// Input angle is an unsigned int from 0-65535.
+// Output is signed int from -32767 to 32767.
+//
+// This approximation never varies more than 0.69%
+// from the floating point value you'd get by doing
+// float s = sin( x ) * 32767.0;
+//
+// Don't use this approximation for calculating the
+// trajectory of a rocket to Mars, but it's great
+// for art projects and LED displays.
+//
+// On Arduino/AVR, this approximation is more than
+// 10X faster than floating point sin(x) and cos(x)
+
+#if defined(__AVR__)
+#define sin16 sin16_avr
+#else
+#define sin16 sin16_C
+#endif
+
+LIB8STATIC int16_t sin16_avr( uint16_t theta )
+{
+ static const uint8_t data[] =
+ { 0, 0, 49, 0, 6393%256, 6393/256, 48, 0,
+ 12539%256, 12539/256, 44, 0, 18204%256, 18204/256, 38, 0,
+ 23170%256, 23170/256, 31, 0, 27245%256, 27245/256, 23, 0,
+ 30273%256, 30273/256, 14, 0, 32137%256, 32137/256, 4 /*,0*/ };
+
+ uint16_t offset = (theta & 0x3FFF);
+
+ // AVR doesn't have a multi-bit shift instruction,
+ // so if we say "offset >>= 3", gcc makes a tiny loop.
+ // Inserting empty volatile statements between each
+ // bit shift forces gcc to unroll the loop.
+ offset >>= 1; // 0..8191
+ asm volatile("");
+ offset >>= 1; // 0..4095
+ asm volatile("");
+ offset >>= 1; // 0..2047
+
+ if( theta & 0x4000 ) offset = 2047 - offset;
+
+ uint8_t sectionX4;
+ sectionX4 = offset / 256;
+ sectionX4 *= 4;
+
+ uint8_t m;
+
+ union {
+ uint16_t b;
+ struct {
+ uint8_t blo;
+ uint8_t bhi;
+ };
+ } u;
+
+ //in effect u.b = blo + (256 * bhi);
+ u.blo = data[ sectionX4 ];
+ u.bhi = data[ sectionX4 + 1];
+ m = data[ sectionX4 + 2];
+
+ uint8_t secoffset8 = (uint8_t)(offset) / 2;
+
+ uint16_t mx = m * secoffset8;
+
+ int16_t y = mx + u.b;
+ if( theta & 0x8000 ) y = -y;
+
+ return y;
+}
+
+LIB8STATIC int16_t sin16_C( uint16_t theta )
+{
+ static const uint16_t base[] =
+ { 0, 6393, 12539, 18204, 23170, 27245, 30273, 32137 };
+ static const uint8_t slope[] =
+ { 49, 48, 44, 38, 31, 23, 14, 4 };
+
+ uint16_t offset = (theta & 0x3FFF) >> 3; // 0..2047
+ if( theta & 0x4000 ) offset = 2047 - offset;
+
+ uint8_t section = offset / 256; // 0..7
+ uint16_t b = base[section];
+ uint8_t m = slope[section];
+
+ uint8_t secoffset8 = (uint8_t)(offset) / 2;
+
+ uint16_t mx = m * secoffset8;
+ int16_t y = mx + b;
+
+ if( theta & 0x8000 ) y = -y;
+
+ return y;
+}
+
+LIB8STATIC int16_t cos16( uint16_t theta)
+{
+ return sin16( theta + 16384);
+}
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// memmove8, memcpy8, and memset8:
+// alternatives to memmove, memcpy, and memset that are
+// faster on AVR than standard avr-libc 1.8
+
+#if defined(__AVR__)
+extern "C" {
+void * memmove8( void * dst, const void * src, uint16_t num );
+void * memcpy8 ( void * dst, const void * src, uint16_t num ) __attribute__ ((noinline));
+void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinline)) ;
+}
+#else
+// on non-AVR platforms, these names just call standard libc.
+#define memmove8 memmove
+#define memcpy8 memcpy
+#define memset8 memset
+#endif
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// linear interpolation, such as could be used for Perlin noise, etc.
+//
+
+// linear interpolation between two unsigned 8-bit values,
+// with 8-bit fraction
+LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, fract8 frac)
+{
+ uint8_t delta = b - a;
+ uint8_t scaled = scale8( delta, frac);
+ uint8_t result = a + scaled;
+ return result;
+}
+
+// linear interpolation between two unsigned 16-bit values,
+// with 16-bit fraction
+LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, fract16 frac)
+{
+ uint16_t delta = b - a;
+ uint32_t prod = (uint32_t)delta * (uint32_t)frac;
+ uint16_t scaled = prod >> 16;
+ uint16_t result = a + scaled;
+ return result;
+}
+
+
+// A note on the structure of lerp16by8 (and lerp15by8) :
+// The cases for b>a and b<=a are handled separately for
+// speed: without knowing the relative order of a and b,
+// the value (a-b) might be a signed 17-bit value, which
+// would have to be stored in a 32-bit signed int and
+// processed as such. To avoid that, we separate the
+// two cases, and are able to do all the math with 16-bit
+// unsigned values, which is much faster and smaller on AVR.
+
+// linear interpolation between two unsigned 16-bit values,
+// with 8-bit fraction
+LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, fract8 frac)
+{
+ uint16_t result;
+ if( b > a) {
+ uint16_t delta = b - a;
+ uint16_t scaled = scale16by8( delta, frac);
+ result = a + scaled;
+ } else {
+ uint16_t delta = a - b;
+ uint16_t scaled = scale16by8( delta, frac);
+ result = a - scaled;
+ }
+ return result;
+}
+
+// linear interpolation between two signed 15-bit values,
+// with 8-bit fraction
+LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, fract8 frac)
+{
+ int16_t result;
+ if( b > a) {
+ uint16_t delta = b - a;
+ uint16_t scaled = scale16by8( delta, frac);
+ result = a + scaled;
+ } else {
+ uint16_t delta = a - b;
+ uint16_t scaled = scale16by8( delta, frac);
+ result = a - scaled;
+ }
+ return result;
+}
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// easing functions; see http://easings.net
+//
+
+// ease8InOuCubic: 8-bit cubic ease-in / ease-out function
+// Takes around 18 cycles on AVR
+LIB8STATIC fract8 ease8InOutCubic( fract8 i)
+{
+ uint8_t ii = scale8_LEAVING_R1_DIRTY( i, i);
+ uint8_t iii = scale8_LEAVING_R1_DIRTY( ii, i);
+
+ uint16_t r1 = (3 * (uint16_t)(ii)) - ( 2 * (uint16_t)(iii));
+
+ /* the code generated for the above *'s automatically
+ cleans up R1, so there's no need to explicitily call
+ cleanup_R1(); */
+
+ uint8_t result = r1;
+
+ // if we got "256", return 255:
+ if( r1 & 0x100 ) {
+ result = 255;
+ }
+ return result;
+}
+
+// ease8InOutApprox: fast, rough 8-bit ease-in/ease-out function
+// shaped approximately like 'ease8InOutCubic',
+// it's never off by more than a couple of percent
+// from the actual cubic S-curve, and it executes
+// more than twice as fast. Use when the cycles
+// are more important than visual smoothness.
+// Asm version takes around 7 cycles on AVR.
+
+#if EASE8_C == 1
+LIB8STATIC fract8 ease8InOutApprox( fract8 i)
+{
+ if( i < 64) {
+ // start with slope 0.5
+ i /= 2;
+ } else if( i > (255 - 64)) {
+ // end with slope 0.5
+ i = 255 - i;
+ i /= 2;
+ i = 255 - i;
+ } else {
+ // in the middle, use slope 192/128 = 1.5
+ i -= 64;
+ i += (i / 2);
+ i += 32;
+ }
+
+ return i;
+}
+
+#elif EASE8_AVRASM == 1
+LIB8STATIC uint8_t ease8InOutApprox( fract8 i)
+{
+ // takes around 7 cycles on AVR
+ asm volatile (
+ " subi %[i], 64 \n\t"
+ " cpi %[i], 128 \n\t"
+ " brcc Lshift_%= \n\t"
+
+ // middle case
+ " mov __tmp_reg__, %[i] \n\t"
+ " lsr __tmp_reg__ \n\t"
+ " add %[i], __tmp_reg__ \n\t"
+ " subi %[i], 224 \n\t"
+ " rjmp Ldone_%= \n\t"
+
+ // start or end case
+ "Lshift_%=: \n\t"
+ " lsr %[i] \n\t"
+ " subi %[i], 96 \n\t"
+
+ "Ldone_%=: \n\t"
+
+ : [i] "+a" (i)
+ :
+ : "r0", "r1"
+ );
+ return i;
+}
+#else
+#error "No implementation for ease8 available."
+#endif
+
+
+
+
+
+#endif
diff --git a/pixeltypes.h b/pixeltypes.h
new file mode 100644
index 00000000..6d3f67f2
--- /dev/null
+++ b/pixeltypes.h
@@ -0,0 +1,659 @@
+#ifndef __INC_PIXELS_H
+#define __INC_PIXELS_H
+
+#include <stdint.h>
+#include "lib8tion.h"
+
+struct CRGB;
+struct CHSV;
+
+// Forward declaration of hsv2rgb_rainbow here,
+// to avoid circular dependencies.
+extern void hsv2rgb_rainbow( const CHSV& hsv, CRGB& rgb);
+
+
+struct CHSV {
+ union {
+ struct {
+ union {
+ uint8_t hue;
+ uint8_t h; };
+ union {
+ uint8_t saturation;
+ uint8_t sat;
+ uint8_t s; };
+ union {
+ uint8_t value;
+ uint8_t val;
+ uint8_t v; };
+ };
+ uint8_t raw[3];
+ };
+
+ // default values are UNITIALIZED
+ inline CHSV() __attribute__((always_inline))
+ {
+ }
+
+ // allow construction from H, S, V
+ inline CHSV( uint8_t ih, uint8_t is, uint8_t iv) __attribute__((always_inline))
+ : h(ih), s(is), v(iv)
+ {
+ }
+
+ // allow copy construction
+ inline CHSV(const CHSV& rhs) __attribute__((always_inline))
+ {
+ h = rhs.h;
+ s = rhs.s;
+ v = rhs.v;
+ }
+
+ inline CHSV& operator= (const CHSV& rhs) __attribute__((always_inline))
+ {
+ h = rhs.h;
+ s = rhs.s;
+ v = rhs.v;
+ return *this;
+ }
+
+ inline CHSV& setHSV(uint8_t ih, uint8_t is, uint8_t iv) __attribute__((always_inline))
+ {
+ h = ih;
+ s = is;
+ v = iv;
+ return *this;
+ }
+};
+
+
+struct CRGB {
+ union {
+ struct {
+ union {
+ uint8_t r;
+ uint8_t red;
+ };
+ union {
+ uint8_t g;
+ uint8_t green;
+ };
+ union {
+ uint8_t b;
+ uint8_t blue;
+ };
+ };
+ uint8_t raw[3];
+ };
+
+ inline uint8_t& operator[] (uint8_t x) __attribute__((always_inline))
+ {
+ return raw[x];
+ }
+
+ inline const uint8_t& operator[] (uint8_t x) const __attribute__((always_inline))
+ {
+ return raw[x];
+ }
+
+ // default values are UNINITIALIZED
+ inline CRGB() __attribute__((always_inline))
+ {
+ }
+
+ // allow construction from R, G, B
+ inline CRGB( uint8_t ir, uint8_t ig, uint8_t ib) __attribute__((always_inline))
+ : r(ir), g(ig), b(ib)
+ {
+ }
+
+ // allow construction from 32-bit (really 24-bit) bit 0xRRGGBB color code
+ inline CRGB( uint32_t colorcode) __attribute__((always_inline))
+ : r((colorcode >> 16) & 0xFF), g((colorcode >> 8) & 0xFF), b((colorcode >> 0) & 0xFF)
+ {
+ }
+
+ // allow copy construction
+ inline CRGB(const CRGB& rhs) __attribute__((always_inline))
+ {
+ r = rhs.r;
+ g = rhs.g;
+ b = rhs.b;
+ }
+
+ // allow construction from HSV color
+ inline CRGB(const CHSV& rhs) __attribute__((always_inline))
+ {
+ hsv2rgb_rainbow( rhs, *this);
+ }
+
+ // allow assignment from one RGB struct to another
+ inline CRGB& operator= (const CRGB& rhs) __attribute__((always_inline))
+ {
+ r = rhs.r;
+ g = rhs.g;
+ b = rhs.b;
+ return *this;
+ }
+
+ // allow assignment from 32-bit (really 24-bit) 0xRRGGBB color code
+ inline CRGB& operator= (const uint32_t colorcode) __attribute__((always_inline))
+ {
+ r = (colorcode >> 16) & 0xFF;
+ g = (colorcode >> 8) & 0xFF;
+ b = (colorcode >> 0) & 0xFF;
+ return *this;
+ }
+
+ // allow assignment from R, G, and B
+ inline CRGB& setRGB (uint8_t nr, uint8_t ng, uint8_t nb) __attribute__((always_inline))
+ {
+ r = nr;
+ g = ng;
+ b = nb;
+ return *this;
+ }
+
+ // allow assignment from H, S, and V
+ inline CRGB& setHSV (uint8_t hue, uint8_t sat, uint8_t val) __attribute__((always_inline))
+ {
+ hsv2rgb_rainbow( CHSV(hue, sat, val), *this);
+ return *this;
+ }
+
+ // allow assignment from just a Hue, saturation and value automatically at max.
+ inline CRGB& setHue (uint8_t hue) __attribute__((always_inline))
+ {
+ hsv2rgb_rainbow( CHSV(hue, 255, 255), *this);
+ return *this;
+ }
+
+ // allow assignment from HSV color
+ inline CRGB& operator= (const CHSV& rhs) __attribute__((always_inline))
+ {
+ hsv2rgb_rainbow( rhs, *this);
+ return *this;
+ }
+
+ // allow assignment from 32-bit (really 24-bit) 0xRRGGBB color code
+ inline CRGB& setColorCode (uint32_t colorcode) __attribute__((always_inline))
+ {
+ r = (colorcode >> 16) & 0xFF;
+ g = (colorcode >> 8) & 0xFF;
+ b = (colorcode >> 0) & 0xFF;
+ return *this;
+ }
+
+
+ // add one RGB to another, saturating at 0xFF for each channel
+ inline CRGB& operator+= (const CRGB& rhs )
+ {
+ r = qadd8( r, rhs.r);
+ g = qadd8( g, rhs.g);
+ b = qadd8( b, rhs.b);
+ return *this;
+ }
+
+ // add a contstant to each channel, saturating at 0xFF
+ // this is NOT an operator+= overload because the compiler
+ // can't usefully decide when it's being passed a 32-bit
+ // constant (e.g. CRGB::Red) and an 8-bit one (CRGB::Blue)
+ inline CRGB& addToRGB (uint8_t d )
+ {
+ r = qadd8( r, d);
+ g = qadd8( g, d);
+ b = qadd8( b, d);
+ return *this;
+ }
+
+ // subtract one RGB from another, saturating at 0x00 for each channel
+ inline CRGB& operator-= (const CRGB& rhs )
+ {
+ r = qsub8( r, rhs.r);
+ g = qsub8( g, rhs.g);
+ b = qsub8( b, rhs.b);
+ return *this;
+ }
+
+ // subtract a constant from each channel, saturating at 0x00
+ // this is NOT an operator+= overload because the compiler
+ // can't usefully decide when it's being passed a 32-bit
+ // constant (e.g. CRGB::Red) and an 8-bit one (CRGB::Blue)
+ inline CRGB& subtractFromRGB(uint8_t d )
+ {
+ r = qsub8( r, d);
+ g = qsub8( g, d);
+ b = qsub8( b, d);
+ return *this;
+ }
+
+ // subtract a constant of '1' from each channel, saturating at 0x00
+ inline CRGB& operator-- () __attribute__((always_inline))
+ {
+ subtractFromRGB(1);
+ return *this;
+ }
+
+ // subtract a constant of '1' from each channel, saturating at 0x00
+ inline CRGB operator-- (int DUMMY_ARG) __attribute__((always_inline))
+ {
+ CRGB retval(*this);
+ --(*this);
+ return retval;
+ }
+
+ // add a constant of '1' from each channel, saturating at 0xFF
+ inline CRGB& operator++ () __attribute__((always_inline))
+ {
+ addToRGB(1);
+ return *this;
+ }
+
+ // add a constant of '1' from each channel, saturating at 0xFF
+ inline CRGB operator++ (int DUMMY_ARG) __attribute__((always_inline))
+ {
+ CRGB retval(*this);
+ ++(*this);
+ return retval;
+ }
+
+ // divide each of the channels by a constant
+ inline CRGB& operator/= (uint8_t d )
+ {
+ r /= d;
+ g /= d;
+ b /= d;
+ return *this;
+ }
+
+ // multiply each of the channels by a constant,
+ // saturating each channel at 0xFF
+ inline CRGB& operator*= (uint8_t d )
+ {
+ r = qmul8( r, d);
+ g = qmul8( g, d);
+ b = qmul8( b, d);
+ return *this;
+ }
+
+ // scale down a RGB to N 256ths of it's current brightness, using
+ // 'video' dimming rules, which means that unless the scale factor is ZERO
+ // each channel is guaranteed NOT to dim down to zero. If it's already
+ // nonzero, it'll stay nonzero, even if that means the hue shifts a little
+ // at low brightness levels.
+ inline CRGB& nscale8_video (uint8_t scaledown )
+ {
+ nscale8x3_video( r, g, b, scaledown);
+ return *this;
+ }
+
+ // %= is a synonym for nscale8_video. Think of it is scaling down
+ // by "a percentage"
+ inline CRGB& operator%= (uint8_t scaledown )
+ {
+ nscale8x3_video( r, g, b, scaledown);
+ return *this;
+ }
+
+ // fadeLightBy is a synonym for nscale8_video( ..., 255-fadefactor)
+ inline CRGB& fadeLightBy (uint8_t fadefactor )
+ {
+ nscale8x3_video( r, g, b, 255 - fadefactor);
+ return *this;
+ }
+
+ // scale down a RGB to N 256ths of it's current brightness, using
+ // 'plain math' dimming rules, which means that if the low light levels
+ // may dim all the way to 100% black.
+ inline CRGB& nscale8 (uint8_t scaledown )
+ {
+ nscale8x3( r, g, b, scaledown);
+ return *this;
+ }
+
+ // fadeToBlackBy is a synonym for nscale8( ..., 255-fadefactor)
+ inline CRGB& fadeToBlackBy (uint8_t fadefactor )
+ {
+ nscale8x3( r, g, b, 255 - fadefactor);
+ return *this;
+ }
+
+ // "or" operator brings each channel up to the higher of the two values
+ inline CRGB& operator|= (const CRGB& rhs )
+ {
+ if( rhs.r > r) r = rhs.r;
+ if( rhs.g > g) g = rhs.g;
+ if( rhs.b > b) b = rhs.b;
+ return *this;
+ }
+ inline CRGB& operator|= (uint8_t d )
+ {
+ if( d > r) r = d;
+ if( d > g) g = d;
+ if( d > b) b = d;
+ return *this;
+ }
+
+ // "and" operator brings each channel down to the lower of the two values
+ inline CRGB& operator&= (const CRGB& rhs )
+ {
+ if( rhs.r < r) r = rhs.r;
+ if( rhs.g < g) g = rhs.g;
+ if( rhs.b < b) b = rhs.b;
+ return *this;
+ }
+ inline CRGB& operator&= (uint8_t d )
+ {
+ if( d < r) r = d;
+ if( d < g) g = d;
+ if( d < b) b = d;
+ return *this;
+ }
+
+ // this allows testing a CRGB for zero-ness
+ inline operator bool() const __attribute__((always_inline))
+ {
+ return r || g || b;
+ }
+
+ // invert each channel
+ inline CRGB operator- ()
+ {
+ CRGB retval;
+ retval.r = 255 - r;
+ retval.g = 255 - g;
+ retval.b = 255 - b;
+ return retval;
+ }
+
+
+ inline uint8_t getLuma ( ) {
+ //Y' = 0.2126 R' + 0.7152 G' + 0.0722 B'
+ // 54 183 18 (!)
+
+ uint8_t luma = scale8_LEAVING_R1_DIRTY( r, 54) + \
+ scale8_LEAVING_R1_DIRTY( g, 183) + \
+ scale8_LEAVING_R1_DIRTY( b, 18);
+ cleanup_R1();
+ return luma;
+ }
+
+ inline uint8_t getAverageLight( ) {
+ const uint8_t eightysix = 86;
+ uint8_t avg = scale8_LEAVING_R1_DIRTY( r, eightysix) + \
+ scale8_LEAVING_R1_DIRTY( g, eightysix) + \
+ scale8_LEAVING_R1_DIRTY( b, eightysix);
+ cleanup_R1();
+ return avg;
+ }
+
+ inline void maximizeBrightness( uint8_t limit = 255 ) {
+ uint8_t max = red;
+ if( green > max) max = green;
+ if( blue > max) max = blue;
+ uint16_t factor = ((uint16_t)(limit) * 256) / max;
+ red = (red * factor) / 256;
+ green = (green * factor) / 256;
+ blue = (blue * factor) / 256;
+ }
+
+ typedef enum {
+ AliceBlue=0xF0F8FF,
+ Amethyst=0x9966CC,
+ AntiqueWhite=0xFAEBD7,
+ Aqua=0x00FFFF,
+ Aquamarine=0x7FFFD4,
+ Azure=0xF0FFFF,
+ Beige=0xF5F5DC,
+ Bisque=0xFFE4C4,
+ Black=0x000000,
+ BlanchedAlmond=0xFFEBCD,
+ Blue=0x0000FF,
+ BlueViolet=0x8A2BE2,
+ Brown=0xA52A2A,
+ BurlyWood=0xDEB887,
+ CadetBlue=0x5F9EA0,
+ Chartreuse=0x7FFF00,
+ Chocolate=0xD2691E,
+ Coral=0xFF7F50,
+ CornflowerBlue=0x6495ED,
+ Cornsilk=0xFFF8DC,
+ Crimson=0xDC143C,
+ Cyan=0x00FFFF,
+ DarkBlue=0x00008B,
+ DarkCyan=0x008B8B,
+ DarkGoldenrod=0xB8860B,
+ DarkGray=0xA9A9A9,
+ DarkGreen=0x006400,
+ DarkKhaki=0xBDB76B,
+ DarkMagenta=0x8B008B,
+ DarkOliveGreen=0x556B2F,
+ DarkOrange=0xFF8C00,
+ DarkOrchid=0x9932CC,
+ DarkRed=0x8B0000,
+ DarkSalmon=0xE9967A,
+ DarkSeaGreen=0x8FBC8F,
+ DarkSlateBlue=0x483D8B,
+ DarkSlateGray=0x2F4F4F,
+ DarkTurquoise=0x00CED1,
+ DarkViolet=0x9400D3,
+ DeepPink=0xFF1493,
+ DeepSkyBlue=0x00BFFF,
+ DimGray=0x696969,
+ DodgerBlue=0x1E90FF,
+ FireBrick=0xB22222,
+ FloralWhite=0xFFFAF0,
+ ForestGreen=0x228B22,
+ Fuchsia=0xFF00FF,
+ Gainsboro=0xDCDCDC,
+ GhostWhite=0xF8F8FF,
+ Gold=0xFFD700,
+ Goldenrod=0xDAA520,
+ Gray=0x808080,
+ Green=0x008000,
+ GreenYellow=0xADFF2F,
+ Honeydew=0xF0FFF0,
+ HotPink=0xFF69B4,
+ IndianRed=0xCD5C5C,
+ Indigo=0x4B0082,
+ Ivory=0xFFFFF0,
+ Khaki=0xF0E68C,
+ Lavender=0xE6E6FA,
+ LavenderBlush=0xFFF0F5,
+ LawnGreen=0x7CFC00,
+ LemonChiffon=0xFFFACD,
+ LightBlue=0xADD8E6,
+ LightCoral=0xF08080,
+ LightCyan=0xE0FFFF,
+ LightGoldenrodYellow=0xFAFAD2,
+ LightGreen=0x90EE90,
+ LightGrey=0xD3D3D3,
+ LightPink=0xFFB6C1,
+ LightSalmon=0xFFA07A,
+ LightSeaGreen=0x20B2AA,
+ LightSkyBlue=0x87CEFA,
+ LightSlateGray=0x778899,
+ LightSteelBlue=0xB0C4DE,
+ LightYellow=0xFFFFE0,
+ Lime=0x00FF00,
+ LimeGreen=0x32CD32,
+ Linen=0xFAF0E6,
+ Magenta=0xFF00FF,
+ Maroon=0x800000,
+ MediumAquamarine=0x66CDAA,
+ MediumBlue=0x0000CD,
+ MediumOrchid=0xBA55D3,
+ MediumPurple=0x9370DB,
+ MediumSeaGreen=0x3CB371,
+ MediumSlateBlue=0x7B68EE,
+ MediumSpringGreen=0x00FA9A,
+ MediumTurquoise=0x48D1CC,
+ MediumVioletRed=0xC71585,
+ MidnightBlue=0x191970,
+ MintCream=0xF5FFFA,
+ MistyRose=0xFFE4E1,
+ Moccasin=0xFFE4B5,
+ NavajoWhite=0xFFDEAD,
+ Navy=0x000080,
+ OldLace=0xFDF5E6,
+ Olive=0x808000,
+ OliveDrab=0x6B8E23,
+ Orange=0xFFA500,
+ OrangeRed=0xFF4500,
+ Orchid=0xDA70D6,
+ PaleGoldenrod=0xEEE8AA,
+ PaleGreen=0x98FB98,
+ PaleTurquoise=0xAFEEEE,
+ PaleVioletRed=0xDB7093,
+ PapayaWhip=0xFFEFD5,
+ PeachPuff=0xFFDAB9,
+ Peru=0xCD853F,
+ Pink=0xFFC0CB,
+ Plaid=0xCC5533,
+ Plum=0xDDA0DD,
+ PowderBlue=0xB0E0E6,
+ Purple=0x800080,
+ Red=0xFF0000,
+ RosyBrown=0xBC8F8F,
+ RoyalBlue=0x4169E1,
+ SaddleBrown=0x8B4513,
+ Salmon=0xFA8072,
+ SandyBrown=0xF4A460,
+ SeaGreen=0x2E8B57,
+ Seashell=0xFFF5EE,
+ Sienna=0xA0522D,
+ Silver=0xC0C0C0,
+ SkyBlue=0x87CEEB,
+ SlateBlue=0x6A5ACD,
+ SlateGray=0x708090,
+ Snow=0xFFFAFA,
+ SpringGreen=0x00FF7F,
+ SteelBlue=0x4682B4,
+ Tan=0xD2B48C,
+ Teal=0x008080,
+ Thistle=0xD8BFD8,
+ Tomato=0xFF6347,
+ Turquoise=0x40E0D0,
+ Violet=0xEE82EE,
+ Wheat=0xF5DEB3,
+ White=0xFFFFFF,
+ WhiteSmoke=0xF5F5F5,
+ Yellow=0xFFFF00,
+ YellowGreen=0x9ACD32
+ } HTMLColorCode;
+ static uint32_t Squant;
+};
+
+
+inline __attribute__((always_inline)) bool operator== (const CRGB& lhs, const CRGB& rhs)
+{
+ return (lhs.r == rhs.r) && (lhs.g == rhs.g) && (lhs.b == rhs.b);
+}
+
+inline __attribute__((always_inline)) bool operator!= (const CRGB& lhs, const CRGB& rhs)
+{
+ return !(lhs == rhs);
+}
+
+inline __attribute__((always_inline)) bool operator< (const CRGB& lhs, const CRGB& rhs)
+{
+ uint16_t sl, sr;
+ sl = lhs.r + lhs.g + lhs.b;
+ sr = rhs.r + rhs.g + rhs.b;
+ return sl < sr;
+}
+
+inline __attribute__((always_inline)) bool operator> (const CRGB& lhs, const CRGB& rhs)
+{
+ uint16_t sl, sr;
+ sl = lhs.r + lhs.g + lhs.b;
+ sr = rhs.r + rhs.g + rhs.b;
+ return sl > sr;
+}
+
+inline __attribute__((always_inline)) bool operator>= (const CRGB& lhs, const CRGB& rhs)
+{
+ uint16_t sl, sr;
+ sl = lhs.r + lhs.g + lhs.b;
+ sr = rhs.r + rhs.g + rhs.b;
+ return sl >= sr;
+}
+
+inline __attribute__((always_inline)) bool operator<= (const CRGB& lhs, const CRGB& rhs)
+{
+ uint16_t sl, sr;
+ sl = lhs.r + lhs.g + lhs.b;
+ sr = rhs.r + rhs.g + rhs.b;
+ return sl <= sr;
+}
+
+
+__attribute__((always_inline))
+inline CRGB operator+( const CRGB& p1, const CRGB& p2)
+{
+ return CRGB( qadd8( p1.r, p2.r),
+ qadd8( p1.g, p2.g),
+ qadd8( p1.b, p2.b));
+}
+
+__attribute__((always_inline))
+inline CRGB operator-( const CRGB& p1, const CRGB& p2)
+{
+ return CRGB( qsub8( p1.r, p2.r),
+ qsub8( p1.g, p2.g),
+ qsub8( p1.b, p2.b));
+}
+
+__attribute__((always_inline))
+inline CRGB operator*( const CRGB& p1, uint8_t d)
+{
+ return CRGB( qmul8( p1.r, d),
+ qmul8( p1.g, d),
+ qmul8( p1.b, d));
+}
+
+__attribute__((always_inline))
+inline CRGB operator/( const CRGB& p1, uint8_t d)
+{
+ return CRGB( p1.r/d, p1.g/d, p1.b/d);
+}
+
+
+__attribute__((always_inline))
+inline CRGB operator&( const CRGB& p1, const CRGB& p2)
+{
+ return CRGB( p1.r < p2.r ? p1.r : p2.r,
+ p1.g < p2.g ? p1.g : p2.g,
+ p1.b < p2.b ? p1.b : p2.b);
+}
+
+__attribute__((always_inline))
+inline CRGB operator|( const CRGB& p1, const CRGB& p2)
+{
+ return CRGB( p1.r > p2.r ? p1.r : p2.r,
+ p1.g > p2.g ? p1.g : p2.g,
+ p1.b > p2.b ? p1.b : p2.b);
+}
+
+__attribute__((always_inline))
+inline CRGB operator%( const CRGB& p1, uint8_t d)
+{
+ CRGB retval( p1);
+ retval.nscale8_video( d);
+ return retval;
+}
+
+
+
+// Define RGB orderings
+enum EOrder {
+ RGB=0012,
+ RBG=0021,
+ GRB=0102,
+ GBR=0120,
+ BRG=0201,
+ BGR=0210
+};
+
+
+#endif
diff --git a/preview_changes.txt b/preview_changes.txt
new file mode 100644
index 00000000..9b1a8831
--- /dev/null
+++ b/preview_changes.txt
@@ -0,0 +1,57 @@
+Release Candidate 5
+* Gemma and Trinket: supported except for global "setBrightness"
+
+Release Candidate 4
+* Added NEOPIXEL as a synonym for WS2811
+* Fix WS2811/WS2812B timings, bring it in line to exactly 1.25ns/bit.
+* Fix handling of constant color definitions (damn you, gcc!)
+
+Release Candidate 3
+* Fixed bug when Clock and Data were on the same port
+* Added ability to set pixel color directly from HSV
+* Added ability to retrieve current random16 seed
+
+Release Candidate 2
+* mostly bug fixes
+* Fix SPI macro definitions for latest teensy3 software update
+* Teensy 2 compilation fix
+* hsv2rgb_rainbow performance fix
+
+Release Candidate 1
+* New unified/simplified API for adding/using controllers
+* fleshout clockless chip support
+* add hsv (spectrum and rainbow style colors)
+* high speed memory management operations
+* library for interpolation/easing functions
+* various api changes, addition of clear and showColor functions
+* scale value applied to all show methods
+* bug fixes for SM16716
+* performance improvements, lpd8806 exceeds 22Mbit now
+* hardware def fixes
+* allow alternate rgb color orderings
+* high speed math methods
+* rich CRGB structure
+
+Preview 3
+* True hardware SPI support for teensy (up to 20Mbit output!)
+* Minor bug fixes/tweaks
+
+Preview 2
+* Rename pin class to FastPin
+* Replace latch with select, more accurate description of what it does
+* Enforce intra-frame timing for ws2801s
+* SM16716 support
+* Add #define FAST_SPI_INTERRUPTS_WRITE_PINS to make sure world is ok w/interrupts and SPI
+* Add #define FORCE_SOFTWARE_SPI for those times when you absolutely don't want to use hardware SPI, ev
+en if you're using the hardware SPI pins
+* Add pin definitions for the arduino megas - should fix ws2811 support
+* Add pin definitions for the leonardo - should fix spi support and pin mappings
+* Add warnings when pin definitions are missing
+* Added google+ community for fastspi users - https://plus.google.com/communities/109127054924227823508
+# Add pin definitions for Teensy++ 2.0
+
+
+Preview 1
+* Initial release
+
+