diff options
author | Daniel Garcia <danielgarcia@gmail.com> | 2013-11-11 02:54:41 +0400 |
---|---|---|
committer | Daniel Garcia <danielgarcia@gmail.com> | 2013-11-11 02:54:41 +0400 |
commit | 6bcfa714588b12a72bdde36a1f0a43871fd5d567 (patch) | |
tree | 74c4fadde71b107f9a823928602141d673c0d9b5 | |
parent | e325d5d3f934aed2b301c224352b41a1d07e3693 (diff) | |
parent | 59edcab79837185feeea2dfe6f46b2c4ad17b8d8 (diff) |
Merge branch 'FastSPI_LED2'
-rw-r--r-- | FastLED.cpp | 79 | ||||
-rw-r--r-- | FastLED.h | 147 | ||||
-rw-r--r-- | chipsets.h | 262 | ||||
-rw-r--r-- | clockless.h | 318 | ||||
-rw-r--r-- | controller.h | 56 | ||||
-rw-r--r-- | delay.h | 62 | ||||
-rw-r--r-- | dmx.h | 115 | ||||
-rw-r--r-- | examples/Fast2Dev/Fast2Dev.ino | 98 | ||||
-rw-r--r-- | examples/FirstLight/FirstLight.ino | 66 | ||||
-rw-r--r-- | examples/RGBCalibrate/RGBCalibrate.ino | 66 | ||||
-rw-r--r-- | fastpin.h | 424 | ||||
-rw-r--r-- | fastspi.h | 91 | ||||
-rw-r--r-- | fastspi_arm.h | 386 | ||||
-rw-r--r-- | fastspi_avr.h | 314 | ||||
-rw-r--r-- | fastspi_bitbang.h | 368 | ||||
-rw-r--r-- | fastspi_dma.h | 0 | ||||
-rw-r--r-- | hsv2rgb.cpp | 495 | ||||
-rw-r--r-- | hsv2rgb.h | 59 | ||||
-rw-r--r-- | lib8tion.cpp | 242 | ||||
-rw-r--r-- | lib8tion.h | 1272 | ||||
-rw-r--r-- | pixeltypes.h | 659 | ||||
-rw-r--r-- | preview_changes.txt | 57 |
22 files changed, 5636 insertions, 0 deletions
diff --git a/FastLED.cpp b/FastLED.cpp new file mode 100644 index 00000000..85095210 --- /dev/null +++ b/FastLED.cpp @@ -0,0 +1,79 @@ +#include "FastSPI_LED2.h" + + +CFastLED LEDS; +CFastLED & FastSPI_LED = LEDS; +CFastLED & FastSPI_LED2 = LEDS; +CFastLED & FastLED = LEDS; + +uint32_t CRGB::Squant = ((uint32_t)((__TIME__[4]-'0') * 28))<<16 | ((__TIME__[6]-'0')*50)<<8 | ((__TIME__[7]-'0')*28); + +CFastLED::CFastLED() { + // clear out the array of led controllers + m_nControllers = NUM_CONTROLLERS; + m_nScale = 255; + memset8(m_Controllers, 0, m_nControllers * sizeof(CControllerInfo)); +} + +CLEDController *CFastLED::addLeds(CLEDController *pLed, + const struct CRGB *data, + int nLedsOrOffset, int nLedsIfOffset) { + int nOffset = (nLedsIfOffset > 0) ? nLedsOrOffset : 0; + int nLeds = (nLedsIfOffset > 0) ? nLedsIfOffset : nLedsOrOffset; + + int target = -1; + + // Figure out where to put the new led controller + for(int i = 0; i < m_nControllers; i++) { + if(m_Controllers[i].pLedController == NULL) { + target = i; + break; + } + } + + // if we have a spot, use it! + if(target != -1) { + m_Controllers[target].pLedController = pLed; + m_Controllers[target].pLedData = data; + m_Controllers[target].nOffset = nOffset; + m_Controllers[target].nLeds = nLeds; + pLed->init(); + return pLed; + } + + return NULL; +} + +void CFastLED::show(uint8_t scale) { + for(int i = 0; i < m_nControllers; i++) { + if(m_Controllers[i].pLedController != NULL) { + m_Controllers[i].pLedController->show(m_Controllers[i].pLedData + m_Controllers[i].nOffset, + m_Controllers[i].nLeds, scale); + } else { + return; + } + } +} + +void CFastLED::showColor(const struct CRGB & color, uint8_t scale) { + for(int i = 0; i < m_nControllers; i++) { + if(m_Controllers[i].pLedController != NULL) { + m_Controllers[i].pLedController->showColor(color, m_Controllers[i].nLeds, scale); + } else { + return; + } + } +} + +void CFastLED::clear(boolean includeLedData) { + showColor(CRGB(0,0,0), 0); + if(includeLedData) { + for(int i = 0; i < m_nControllers; i++) { + if(m_Controllers[i].pLedData != NULL) { + memset8((void*)m_Controllers[i].pLedData, 0, sizeof(struct CRGB) * m_Controllers[i].nLeds); + } else { + return; + } + } + } +} diff --git a/FastLED.h b/FastLED.h new file mode 100644 index 00000000..a2891fcd --- /dev/null +++ b/FastLED.h @@ -0,0 +1,147 @@ +#ifndef __INC_FASTSPI_LED2_H +#define __INC_FASTSPI_LED2_H + +#include "controller.h" +#include "fastpin.h" +#include "fastspi.h" +#include "clockless.h" +#include "lib8tion.h" +#include "hsv2rgb.h" +#include "chipsets.h" +#include "dmx.h" + +enum ESPIChipsets { + LPD8806, + WS2801, + SM16716 +}; + +enum EClocklessChipsets { + DMX, + TM1809, + TM1804, + TM1803, + WS2811, + WS2812, + WS2812B, + WS2811_400, + NEOPIXEL, + UCS1903 +}; + +#define NUM_CONTROLLERS 8 + +class CFastLED { + struct CControllerInfo { + CLEDController *pLedController; + const struct CRGB *pLedData; + int nLeds; + int nOffset; + }; + + CControllerInfo m_Controllers[NUM_CONTROLLERS]; + int m_nControllers; + uint8_t m_nScale; + +public: + CFastLED(); + + CLEDController *addLeds(CLEDController *pLed, const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0); + + template<ESPIChipsets CHIPSET, uint8_t DATA_PIN, uint8_t CLOCK_PIN > CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) { + switch(CHIPSET) { + case LPD8806: return addLeds(new LPD8806Controller<DATA_PIN, CLOCK_PIN>(), data, nLedsOrOffset, nLedsIfOffset); + case WS2801: return addLeds(new WS2801Controller<DATA_PIN, CLOCK_PIN>(), data, nLedsOrOffset, nLedsIfOffset); + case SM16716: return addLeds(new SM16716Controller<DATA_PIN, CLOCK_PIN>(), data, nLedsOrOffset, nLedsIfOffset); + } + } + + template<ESPIChipsets CHIPSET, uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER > CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) { + switch(CHIPSET) { + case LPD8806: return addLeds(new LPD8806Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); + case WS2801: return addLeds(new WS2801Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); + case SM16716: return addLeds(new SM16716Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); + } + } + + template<ESPIChipsets CHIPSET, uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER, uint8_t SPI_DATA_RATE > CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) { + switch(CHIPSET) { + case LPD8806: return addLeds(new LPD8806Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_DATA_RATE>(), data, nLedsOrOffset, nLedsIfOffset); + case WS2801: return addLeds(new WS2801Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_DATA_RATE>(), data, nLedsOrOffset, nLedsIfOffset); + case SM16716: return addLeds(new SM16716Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_DATA_RATE>(), data, nLedsOrOffset, nLedsIfOffset); + } + } + +#ifdef SPI_DATA + template<ESPIChipsets CHIPSET> CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) { + return addLeds<CHIPSET, SPI_DATA, SPI_CLOCK, RGB>(data, nLedsOrOffset, nLedsIfOffset); + } + + template<ESPIChipsets CHIPSET, EOrder RGB_ORDER> CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) { + return addLeds<CHIPSET, SPI_DATA, SPI_CLOCK, RGB_ORDER>(data, nLedsOrOffset, nLedsIfOffset); + } + + template<ESPIChipsets CHIPSET, EOrder RGB_ORDER, uint8_t SPI_DATA_RATE> CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) { + return addLeds<CHIPSET, SPI_DATA, SPI_CLOCK, RGB_ORDER, SPI_DATA_RATE>(data, nLedsOrOffset, nLedsIfOffset); + } + +#endif + + template<EClocklessChipsets CHIPSET, uint8_t DATA_PIN> + CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) { + switch(CHIPSET) { +#ifdef FASTSPI_USE_DMX_SIMPLE + case DMX: return addLeds(new DMXController<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset); +#endif + case TM1804: + case TM1809: return addLeds(new TM1809Controller800Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset); + case TM1803: return addLeds(new TM1803Controller400Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset); + case UCS1903: return addLeds(new UCS1903Controller400Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset); + case WS2812: + case WS2812B: + case NEOPIXEL: + case WS2811: return addLeds(new WS2811Controller800Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset); + case WS2811_400: return addLeds(new WS2811Controller400Khz<DATA_PIN>(), data, nLedsOrOffset, nLedsIfOffset); + } + } + + template<EClocklessChipsets CHIPSET, uint8_t DATA_PIN, EOrder RGB_ORDER> + CLEDController *addLeds(const struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) { + switch(CHIPSET) { +#ifdef FASTSPI_USE_DMX_SIMPLE + case DMX: return addLeds(new DMXController<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); +#endif + case TM1809: return addLeds(new TM1809Controller800Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); + case TM1803: return addLeds(new TM1803Controller400Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); + case UCS1903: return addLeds(new UCS1903Controller400Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); + case WS2812: + case WS2812B: + case NEOPIXEL: + case WS2811: return addLeds(new WS2811Controller800Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); + case WS2811_400: return addLeds(new WS2811Controller400Khz<DATA_PIN, RGB_ORDER>(), data, nLedsOrOffset, nLedsIfOffset); + } + } + + void setBrightness(uint8_t scale) { m_nScale = scale; } + uint8_t getBrightness() { return m_nScale; } + + /// Update all our controllers with the current led colors, using the passed in brightness + void show(uint8_t scale); + + /// Update all our controllers with the current led colors + void show() { show(m_nScale); } + + void clear(boolean includeLedData = true); + + void showColor(const struct CRGB & color, uint8_t scale); + + void showColor(const struct CRGB & color) { showColor(color, m_nScale); } + +}; + +extern CFastLED & FastSPI_LED; +extern CFastLED & FastSPI_LED2; +extern CFastLED & FastLED; +extern CFastLED LEDS; + +#endif diff --git a/chipsets.h b/chipsets.h new file mode 100644 index 00000000..5688ed48 --- /dev/null +++ b/chipsets.h @@ -0,0 +1,262 @@ +#ifndef __INC_CHIPSETS_H +#define __INC_CHIPSETS_H + +#include "pixeltypes.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// LPD8806 controller class - takes data/clock/select pin values (N.B. should take an SPI definition?) +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(24) > +class LPD8806Controller : public CLEDController { + typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; + + class LPD8806_ADJUST { + public: + // LPD8806 spec wants the high bit of every rgb data byte sent out to be set. + __attribute__((always_inline)) inline static uint8_t adjust(register uint8_t data) { return (data>>1) | 0x80; } + __attribute__((always_inline)) inline static uint8_t adjust(register uint8_t data, register uint8_t scale) { return (scale8(data, scale)>>1) | 0x80; } + __attribute__((always_inline)) inline static void postBlock(int len) { + SPI::writeBytesValueRaw(0, ((len+63)>>6)); + } + + }; + + SPI mSPI; + int mClearedLeds; + + void checkClear(int nLeds) { + if(nLeds > mClearedLeds) { + clearLine(nLeds); + mClearedLeds = nLeds; + } + } + + void clearLine(int nLeds) { + int n = ((nLeds + 63) >> 6); + mSPI.writeBytesValue(0, n); + } +public: + LPD8806Controller() {} + virtual void init() { + mSPI.init(); + mClearedLeds = 0; + } + + virtual void clearLeds(int nLeds) { + mSPI.select(); + mSPI.writeBytesValueRaw(0x80, nLeds * 3); + mSPI.writeBytesValueRaw(0, ((nLeds*3+63)>>6)); + mSPI.release(); + } + + virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) { + mSPI.select(); + uint8_t a = 0x80 | (scale8(data[RGB_BYTE0(RGB_ORDER)], scale) >> 1); + uint8_t b = 0x80 | (scale8(data[RGB_BYTE1(RGB_ORDER)], scale) >> 1); + uint8_t c = 0x80 | (scale8(data[RGB_BYTE2(RGB_ORDER)], scale) >> 1); + int iLeds = 0; + + while(iLeds++ < nLeds) { + mSPI.writeByte(a); + mSPI.writeByte(b); + mSPI.writeByte(c); + } + + // latch in the world + mSPI.writeBytesValueRaw(0, ((nLeds*3+63)>>6)); + mSPI.release(); + } + + virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) { + mSPI.template writeBytes3<LPD8806_ADJUST, RGB_ORDER>((byte*)data, nLeds * 3, scale); + } + +#ifdef SUPPORT_ARGB + virtual void show(const struct CARGB *data, int nLeds, uint8_t scale) { + checkClear(nLeds); + mSPI.template writeBytes3<1, LPD8806_ADJUST, RGB_ORDER>((byte*)data, nLeds * 4, scale); + } +#endif +}; + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// WS2801 definition - takes data/clock/select pin values (N.B. should take an SPI definition?) +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(1)> +class WS2801Controller : public CLEDController { + typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; + SPI mSPI; + CMinWait<500> mWaitDelay; +public: + WS2801Controller() {} + + virtual void init() { + mSPI.init(); + mWaitDelay.mark(); + } + + virtual void clearLeds(int nLeds) { + mWaitDelay.wait(); + mSPI.writeBytesValue(0, nLeds*3); + mWaitDelay.mark(); + } + + virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) { + mWaitDelay.wait(); + mSPI.select(); + uint8_t a = scale8(data[RGB_BYTE0(RGB_ORDER)], scale); + uint8_t b = scale8(data[RGB_BYTE1(RGB_ORDER)], scale); + uint8_t c = scale8(data[RGB_BYTE2(RGB_ORDER)], scale); + + while(nLeds--) { + mSPI.writeByte(a); + mSPI.writeByte(b); + mSPI.writeByte(c); + } + mSPI.waitFully(); + mSPI.release(); + mWaitDelay.mark(); + } + + virtual void show(const struct CRGB *data, int nLeds, uint8_t scale) { + mWaitDelay.wait(); + mSPI.template writeBytes3<0, RGB_ORDER>((byte*)data, nLeds * 3, scale); + mWaitDelay.mark(); + } + +#ifdef SUPPORT_ARGB + virtual void show(const struct CRGB *data, int nLeds, uint8_t scale) { + mWaitDelay.wait(); + mSPI.template writeBytes3<1, RGB_ORDER>((byte*)data, nLeds * 4, scale); + mWaitDelay.mark(); + } +#endif +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// SM16716 definition - takes data/clock/select pin values (N.B. should take an SPI definition?) +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(16)> +class SM16716Controller : public CLEDController { + typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI; + SPI mSPI; + + void writeHeader() { + // Write out 50 zeros to the spi line (6 blocks of 8 followed by two single bit writes) + mSPI.select(); + mSPI.writeBytesValueRaw(0, 6); + mSPI.waitFully(); + mSPI.template writeBit<0>(0); + mSPI.template writeBit<0>(0); + mSPI.release(); + } + +public: + SM16716Controller() {} + + virtual void init() { + mSPI.init(); + } + + virtual void clearLeds(int nLeds) { + mSPI.select(); + while(nLeds--) { + mSPI.template writeBit<0>(1); + mSPI.writeByte(0); + mSPI.writeByte(0); + mSPI.writeByte(0); + } + mSPI.waitFully(); + mSPI.release(); + writeHeader(); + } + + virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) { + mSPI.select(); + uint8_t a = scale8(data[RGB_BYTE0(RGB_ORDER)], scale); + uint8_t b = scale8(data[RGB_BYTE1(RGB_ORDER)], scale); + uint8_t c = scale8(data[RGB_BYTE2(RGB_ORDER)], scale); + + while(nLeds--) { + mSPI.template writeBit<0>(1); + mSPI.writeByte(a); + mSPI.writeByte(b); + mSPI.writeByte(c); + } + writeHeader(); + mSPI.release(); + } + + virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) { + // Make sure the FLAG_START_BIT flag is set to ensure that an extra 1 bit is sent at the start + // of each triplet of bytes for rgb data + // writeHeader(); + mSPI.template writeBytes3<FLAG_START_BIT, RGB_ORDER>((byte*)data, nLeds * 3, scale); + writeHeader(); + } + +#ifdef SUPPORT_ARGB + virtual void show(const struct CARGB *data, int nLeds, uint8_t scale = 255) { + mSPI.writeBytesValue(0, 6); + mSPI.template writeBit<0>(0); + mSPI.template writeBit<0>(0); + + // Make sure the FLAG_START_BIT flag is set to ensure that an extra 1 bit is sent at the start + // of each triplet of bytes for rgb data + mSPI.template writeBytes3<1 | FLAG_START_BIT, RGB_ORDER>((byte*)data, nLeds * 4, scale); + } +#endif +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Clockless template instantiations +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// UCS1903 - 500ns, 1500ns, 500ns +template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> +class UCS1903Controller400Khz : public ClocklessController<DATA_PIN, NS(500), NS(1500), NS(500), RGB_ORDER> {}; +#if NO_TIME(500, 1500, 500) +#warning "No enough clock cycles available for the UCS103" +#endif + +// TM1809 - 312.5ns, 312.5ns, 325ns +template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> +class TM1809Controller800Khz : public ClocklessController<DATA_PIN, NS(350), NS(350), NS(550), RGB_ORDER> {}; +#if NO_TIME(350, 350, 550) +#warning "No enough clock cycles available for the TM1809" +#endif + +// WS2811 - 400ns, 400ns, 450ns +template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> +class WS2811Controller800Khz : public ClocklessController<DATA_PIN, NS(400), NS(400), NS(450), RGB_ORDER> {}; +#if NO_TIME(400, 400, 450) +#warning "No enough clock cycles available for the WS2811 (800khz)" +#endif + +// WS2811@400khz - 800ns, 800ns, 900ns +template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> +class WS2811Controller400Khz : public ClocklessController<DATA_PIN, NS(800), NS(800), NS(900), RGB_ORDER> {}; +#if NO_TIME(800, 800, 900) +#warning "No enough clock cycles available for the WS2811 (400Khz)" +#endif + +// 750NS, 750NS, 750NS +template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> +class TM1803Controller400Khz : public ClocklessController<DATA_PIN, NS(750), NS(750), NS(750), RGB_ORDER> {}; +#if NO_TIME(750, 750, 750) +#warning "No enough clock cycles available for the UCS103" +#endif + +#endif diff --git a/clockless.h b/clockless.h new file mode 100644 index 00000000..238276ef --- /dev/null +++ b/clockless.h @@ -0,0 +1,318 @@ +#ifndef __INC_CLOCKLESS_H +#define __INC_CLOCKLESS_H + +#include "controller.h" +#include "lib8tion.h" +#include <avr/interrupt.h> // for cli/se definitions + +// Macro to convert from nano-seconds to clocks and clocks to nano-seconds +// #define NS(_NS) (_NS / (1000 / (F_CPU / 1000000L))) +#if F_CPU < 96000000 +#define NS(_NS) ( (_NS * (F_CPU / 1000000L))) / 1000 +#define CLKS_TO_MICROS(_CLKS) ((long)(_CLKS)) / (F_CPU / 1000000L) +#else +#define NS(_NS) ( (_NS * (F_CPU / 2000000L))) / 1000 +#define CLKS_TO_MICROS(_CLKS) ((long)(_CLKS)) / (F_CPU / 2000000L) +#endif + +// Macro for making sure there's enough time available +#define NO_TIME(A, B, C) (NS(A) < 3 || NS(B) < 3 || NS(C) < 6) + +#if defined(__MK20DX128__) + extern volatile uint32_t systick_millis_count; +# define MS_COUNTER systick_millis_count +#else +# if defined(CORE_TEENSY) + extern volatile unsigned long timer0_millis_count; +# define MS_COUNTER timer0_millis_count +# else + extern volatile unsigned long timer0_millis; +# define MS_COUNTER timer0_millis +# endif +#endif + +// Scaling macro choice +#if defined(LIB8_ATTINY) +# define INLINE_SCALE(B, SCALE) delaycycles<3>() +# warning "No hardware multiply, inline brightness scaling disabled" +#else +# define INLINE_SCALE(B, SCALE) B = scale8_LEAVING_R1_DIRTY(B, SCALE) +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Base template for clockless controllers. These controllers have 3 control points in their cycle for each bit. The first point +// is where the line is raised hi. The second pointsnt is where the line is dropped low for a zero. The third point is where the +// line is dropped low for a one. T1, T2, and T3 correspond to the timings for those three in clock cycles. +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int WAIT_TIME = 50> +class ClocklessController : public CLEDController { + typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t; + typedef typename FastPin<DATA_PIN>::port_t data_t; + + data_t mPinMask; + data_ptr_t mPort; + CMinWait<WAIT_TIME> mWait; +public: + virtual void init() { + FastPin<DATA_PIN>::setOutput(); + mPinMask = FastPin<DATA_PIN>::mask(); + mPort = FastPin<DATA_PIN>::port(); + } + +#if defined(__MK20DX128__) + // We don't use the bitSetFast methods for ARM. +#else + template <int N>inline static void bitSetFast(register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t b) { + // First cycle + FastPin<DATA_PIN>::fastset(port, hi); // 1/2 clock cycle if using out + delaycycles<T1 - (_CYCLES(DATA_PIN) + 1)>(); // 1st cycle length minus 1/2 clock for out, 1 clock for sbrs + __asm__ __volatile__ ("sbrs %0, %1" :: "r" (b), "M" (N) :); // 1 clock for check (+1 if skipping, next op is also 1 clock) + + // Second cycle + FastPin<DATA_PIN>::fastset(port, lo); // 1/2 clock cycle if using out + delaycycles<T2 - _CYCLES(DATA_PIN)>(); // 2nd cycle length minus 1/2 clock for out + + // Third cycle + FastPin<DATA_PIN>::fastset(port, lo); // 1 clock cycle if using out + delaycycles<T3 - _CYCLES(DATA_PIN)>(); // 3rd cycle length minus 1 clock for out + } + + #define END_OF_BYTE + #define END_OF_LOOP 6 // loop compare, jump, next uint8_t load + template <int N, int ADJ>inline static void bitSetLast(register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t b) { + // First cycle + FastPin<DATA_PIN>::fastset(port, hi); // 1 clock cycle if using out, 2 otherwise + delaycycles<T1 - (_CYCLES(DATA_PIN))>(); // 1st cycle length minus 1 clock for out, 1 clock for sbrs + __asm__ __volatile__ ("sbrs %0, %1" :: "r" (b), "M" (N) :); // 1 clock for check (+1 if skipping, next op is also 1 clock) + + // Second cycle + FastPin<DATA_PIN>::fastset(port, lo); // 1/2 clock cycle if using out + delaycycles<T2 - (_CYCLES(DATA_PIN))>(); // 2nd cycle length minus 1/2 clock for out + + // Third cycle + FastPin<DATA_PIN>::fastset(port, lo); // 1/2 clock cycle if using out + delaycycles<T3 - (_CYCLES(DATA_PIN) + ADJ)>(); // 3rd cycle length minus 7 clocks for out, loop compare, jump, next uint8_t load + } +#endif + + virtual void clearLeds(int nLeds) { + showColor(CRGB(0, 0, 0), nLeds, 0); + } + + // set all the leds on the controller to a given color + virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) { + mWait.wait(); + cli(); + + showRGBInternal<0, false>(nLeds, scale, (const byte*)&data); + + // Adjust the timer + long microsTaken = CLKS_TO_MICROS((long)nLeds * 24 * (T1 + T2 + T3)); + MS_COUNTER += (microsTaken / 1000); + sei(); + mWait.mark(); + } + + virtual void show(const struct CRGB *rgbdata, int nLeds, uint8_t scale = 255) { + mWait.wait(); + cli(); + + showRGBInternal<0, true>(nLeds, scale, (const byte*)rgbdata); + + // Adjust the timer + long microsTaken = CLKS_TO_MICROS((long)nLeds * 24 * (T1 + T2 + T3)); + MS_COUNTER += (microsTaken / 1000); + sei(); + mWait.mark(); + } + +#ifdef SUPPORT_ARGB + virtual void show(const struct CARGB *rgbdata, int nLeds, uint8_t scale = 255) { + mWait.wait(); + cli(); + + showRGBInternal<1, true>(nLeds, scale, (const byte*)rgbdata); + + // Adjust the timer + long microsTaken = CLKS_TO_MICROS((long)nLeds * 24 * (T1 + T2 + T3)); + MS_COUNTER += (microsTaken / 1000); + sei(); + mWait.mark(); + } +#endif + +#if defined(__MK20DX128__) + inline static void write8Bits(register data_ptr_t port, register data_t hi, register data_t lo, register uint32_t & b) __attribute__ ((always_inline)) { + // TODO: hand rig asm version of this method. The timings are based on adjusting/studying GCC compiler ouptut. This + // will bite me in the ass at some point, I know it. + for(register uint32_t i = 7; i > 0; i--) { + FastPin<DATA_PIN>::fastset(port, hi); + delaycycles<T1 - 5>(); // 5 cycles - 2 store, 1 and, 1 test, 1 if + if(b & 0x80) { FastPin<DATA_PIN>::fastset(port, hi); } else { FastPin<DATA_PIN>::fastset(port, lo); } + b <<= 1; + delaycycles<T2 - 2>(); // 2 cycles, 1 store/skip, 1 shift + FastPin<DATA_PIN>::fastset(port, lo); + delaycycles<T3 - 5>(); // 3 cycles, 2 store, 1 sub, 1 branch backwards + } + // delay an extra cycle because falling out of the loop takes on less cycle than looping around + delaycycles<1>(); + + FastPin<DATA_PIN>::fastset(port, hi); + delaycycles<T1 - 6>(); + if(b & 0x80) { FastPin<DATA_PIN>::fastset(port, hi); } else { FastPin<DATA_PIN>::fastset(port, lo); } + delaycycles<T2 - 2>(); // 4 cycles, 2 store, store/skip + FastPin<DATA_PIN>::fastset(port, lo); + } +#endif + + // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then + // gcc will use register Y for the this pointer. + template<int SKIP, bool ADVANCE> static void showRGBInternal(register int nLeds, register uint8_t scale, register const byte *rgbdata) { + register byte *data = (byte*)rgbdata; + register data_t mask = FastPin<DATA_PIN>::mask(); + register data_ptr_t port = FastPin<DATA_PIN>::port(); + nLeds *= (3 + SKIP); + register uint8_t *end = data + nLeds; + register data_t hi = *port | mask; + register data_t lo = *port & ~mask; + *port = lo; + +#if defined(__MK20DX128__) + register uint32_t b; + b = ((ADVANCE)?data:rgbdata)[SKIP + RGB_BYTE0(RGB_ORDER)]; + b = scale8(b, scale); + while(data < end) { + // Write first byte, read next byte + write8Bits(port, hi, lo, b); + + b = ((ADVANCE)?data:rgbdata)[SKIP + RGB_BYTE1(RGB_ORDER)]; + INLINE_SCALE(b, scale); + delaycycles<T3 - 5>(); // 1 store, 2 load, 1 mul, 1 shift, + + // Write second byte + write8Bits(port, hi, lo, b); + + b = ((ADVANCE)?data:rgbdata)[SKIP + RGB_BYTE2(RGB_ORDER)]; + INLINE_SCALE(b, scale); + + data += 3 + SKIP; + if((RGB_ORDER & 0070) == 0) { + delaycycles<T3 - 6>(); // 1 store, 2 load, 1 mul, 1 shift, 1 adds if BRG or GRB + } else { + delaycycles<T3 - 5>(); // 1 store, 2 load, 1 mul, 1 shift, + } + + // Write third byte + write8Bits(port, hi, lo, b); + + b = ((ADVANCE)?data:rgbdata)[SKIP + RGB_BYTE0(RGB_ORDER)]; + INLINE_SCALE(b, scale); + + delaycycles<T3 - 11>(); // 1 store, 2 load (with increment), 1 mul, 1 shift, 1 cmp, 1 branch backwards, 1 movim + }; +#else +#if 0 + register uint8_t b = *data++; + while(data <= end) { + bitSetFast<7>(port, hi, lo, b); + bitSetFast<6>(port, hi, lo, b); + bitSetFast<5>(port, hi, lo, b); + bitSetFast<4>(port, hi, lo, b); + bitSetFast<3>(port, hi, lo, b); + // Leave an extra 2 clocks for the next byte load + bitSetLast<2, 2>(port, hi, lo, b); + register uint8_t next = *data++; + // Leave an extra 4 clocks for the scale + bitSetLast<1, 4>(port, hi, lo, b); + next = scale8(next, scale); + bitSetLast<0, END_OF_LOOP>(port, hi, lo, b); + b = next; + } +#else + register uint8_t b; + + if(ADVANCE) { + b = data[SKIP + RGB_BYTE0(RGB_ORDER)]; + } else { + b = rgbdata[SKIP + RGB_BYTE0(RGB_ORDER)]; + } + b = scale8_LEAVING_R1_DIRTY(b, scale); + + register uint8_t c; + register uint8_t d; + while(data < end) { + for(register byte x=5; x; x--) { + bitSetLast<7, 4>(port, hi, lo, b); + b <<= 1; + } + delaycycles<1>(); + // Leave an extra 2 clocks for the next byte load + bitSetLast<7, 1>(port, hi, lo, b); + delaycycles<1>(); + + // Leave an extra 4 clocks for the scale + bitSetLast<6, 6>(port, hi, lo, b); + if(ADVANCE) { + c = data[SKIP + RGB_BYTE1(RGB_ORDER)]; + } else { + c = rgbdata[SKIP + RGB_BYTE1(RGB_ORDER)]; + delaycycles<1>(); + } + INLINE_SCALE(c, scale); + bitSetLast<5, 1>(port, hi, lo, b); + + for(register byte x=5; x; x--) { + bitSetLast<7, 4>(port, hi, lo, c); + c <<= 1; + } + delaycycles<1>(); + // Leave an extra 2 clocks for the next byte load + bitSetLast<7, 1>(port, hi, lo, c); + delaycycles<1>(); + + // Leave an extra 4 clocks for the scale + bitSetLast<6, 6>(port, hi, lo, c); + if(ADVANCE) { + d = data[SKIP + RGB_BYTE2(RGB_ORDER)]; + } else { + d = rgbdata[SKIP + RGB_BYTE2(RGB_ORDER)]; + delaycycles<1>(); + } + INLINE_SCALE(d, scale); + bitSetLast<5, 1>(port, hi, lo, c); + + for(register byte x=5; x; x--) { + bitSetLast<7, 4>(port, hi, lo, d); + d <<= 1; + } + delaycycles<1>(); + // Leave an extra 2 clocks for the next byte load + bitSetLast<7, 2>(port, hi, lo, d); + data += (SKIP + 3); + // Leave an extra 4 clocks for the scale + bitSetLast<6, 6>(port, hi, lo, d); + if(ADVANCE) { + b = data[SKIP + RGB_BYTE0(RGB_ORDER)]; + } else { + b = rgbdata[SKIP + RGB_BYTE0(RGB_ORDER)]; + delaycycles<1>(); + } + INLINE_SCALE(b, scale); + bitSetLast<5, 6>(port, hi, lo, d); + } + cleanup_R1(); +#endif +#endif + } + +#ifdef SUPPORT_ARGB + virtual void showARGB(struct CARGB *data, int nLeds) { + // TODO: IMPLEMENTME + } +#endif +}; + +#endif diff --git a/controller.h b/controller.h new file mode 100644 index 00000000..2c703f00 --- /dev/null +++ b/controller.h @@ -0,0 +1,56 @@ +#ifndef __INC_CONTROLLER_H +#define __INC_CONTROLLER_H + +#include <avr/io.h> +#include "pixeltypes.h" + + +#define RGB_BYTE0(X) ((X>>6) & 0x3) +#define RGB_BYTE1(X) ((X>>3) & 0x3) +#define RGB_BYTE2(X) ((X) & 0x3) + +// operator byte *(struct CRGB[] arr) { return (byte*)arr; } + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// LED Controller interface definition +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Base definition for an LED controller. Pretty much the methods that every LED controller object will make available. +/// Note that the showARGB method is not impelemented for all controllers yet. Note also the methods for eventual checking +/// of background writing of data (I'm looking at you, teensy 3.0 DMA controller!). If you want to pass LED controllers around +/// to methods, make them references to this type, keeps your code saner. +class CLEDController { +public: + // initialize the LED controller + virtual void init() = 0; + + // reset any internal state to a clean point + virtual void reset() { init(); } + + // clear out/zero out the given number of leds. + virtual void clearLeds(int nLeds) = 0; + + // set all the leds on the controller to a given color + virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) = 0; + + // note that the uint8_ts will be in the order that you want them sent out to the device. + // nLeds is the number of RGB leds being written to + virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) = 0; + +#ifdef SUPPORT_ARGB + // as above, but every 4th uint8_t is assumed to be alpha channel data, and will be skipped + virtual void show(const struct CARGB *data, int nLeds, uint8_t scale = 255) = 0; +#endif + + // is the controller ready to write data out + virtual bool ready() { return true; } + + // wait until the controller is ready to write data out + virtual void wait() { return; } + +}; + +#endif
\ No newline at end of file diff --git a/delay.h b/delay.h new file mode 100644 index 00000000..c29de694 --- /dev/null +++ b/delay.h @@ -0,0 +1,62 @@ +#ifndef __INC_DELAY_H +#define __INC_DELAY_H + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// Clock cycle counted delay loop +// +//////////////////////////////////////////////////////////////////////////////////////////// + +#if defined(__arm__) +# define NOP __asm__ __volatile__ ("nop\n"); +#else +# define NOP __asm__ __volatile__ ("cp r0,r0\n"); +#endif + +// predeclaration to not upset the compiler +template<int CYCLES> inline void delaycycles(); + +// TODO: ARM version of _delaycycles_ +// worker template - this will nop for LOOP * 3 + PAD cycles total +template<int LOOP, int PAD> inline void _delaycycles_AVR() { + delaycycles<PAD>(); + // the loop below is 3 cycles * LOOP. the LDI is one cycle, + // the DEC is 1 cycle, the BRNE is 2 cycles if looping back and + // 1 if not (the LDI balances out the BRNE being 1 cycle on exit) + __asm__ __volatile__ ( + " LDI R16, %0\n" + "L_%=: DEC R16\n" + " BRNE L_%=\n" + : /* no outputs */ + : "M" (LOOP) + : "r16" + ); +} + +// usable definition +#if !defined(__MK20DX128__) +template<int CYCLES> __attribute__((always_inline)) inline void delaycycles() { + _delaycycles_AVR<CYCLES / 3, CYCLES % 3>(); +} +#else +template<int CYCLES> __attribute__((always_inline)) inline void delaycycles() { + NOP; delaycycles<CYCLES-1>(); +} +#endif + +// pre-instantiations for values small enough to not need the loop, as well as sanity holders +// for some negative values. +template<> __attribute__((always_inline)) inline void delaycycles<-6>() {} +template<> __attribute__((always_inline)) inline void delaycycles<-5>() {} +template<> __attribute__((always_inline)) inline void delaycycles<-4>() {} +template<> __attribute__((always_inline)) inline void delaycycles<-3>() {} +template<> __attribute__((always_inline)) inline void delaycycles<-2>() {} +template<> __attribute__((always_inline)) inline void delaycycles<-1>() {} +template<> __attribute__((always_inline)) inline void delaycycles<0>() {} +template<> __attribute__((always_inline)) inline void delaycycles<1>() {NOP;} +template<> __attribute__((always_inline)) inline void delaycycles<2>() {NOP;NOP;} +template<> __attribute__((always_inline)) inline void delaycycles<3>() {NOP;NOP;NOP;} +template<> __attribute__((always_inline)) inline void delaycycles<4>() {NOP;NOP;NOP;NOP;} +template<> __attribute__((always_inline)) inline void delaycycles<5>() {NOP;NOP;NOP;NOP;NOP;} + +#endif
\ No newline at end of file @@ -0,0 +1,115 @@ +#ifndef __INC_DMX_H +#define __INC_DMX_H + +//#ifdef DmxSimple_H +//#if USE_DMX_SIMPLE +#ifdef FASTSPI_USE_DMX_SIMPLE +#include<DmxSimple.h> +// note - dmx simple must be included before FastSPI for this code to be enabled +template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> class DMXController : public CLEDController { +public: + // initialize the LED controller + virtual void init() { DmxSimple.usePin(DATA_PIN); } + + // reset any internal state to a clean point + virtual void reset() { init(); } + + // clear out/zero out the given number of leds. + virtual void clearLeds(int nLeds) { + int count = min(nLeds * 3, DMX_SIZE); + for(int iChannel = 1; iChannel <= count; iChannel++) { DmxSimple.write(iChannel, 0); } + } + + // set all the leds on the controller to a given color + virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) { + int count = min(nLeds, DMX_SIZE / 3); + int iChannel = 1; + for(int i = 0; i < count; i++) { + DmxSimple.write(iChannel++, scale8(data[RGB_BYTE0(RGB_ORDER)], scale)); + DmxSimple.write(iChannel++, scale8(data[RGB_BYTE1(RGB_ORDER)], scale)); + DmxSimple.write(iChannel++, scale8(data[RGB_BYTE2(RGB_ORDER)], scale)); + } + } + + // note that the uint8_ts will be in the order that you want them sent out to the device. + // nLeds is the number of RGB leds being written to + virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) { + int count = min(nLeds, DMX_SIZE / 3); + int iChannel = 1; + for(int i = 0; i < count; i++) { + DmxSimple.write(iChannel++, scale8(data[i][RGB_BYTE0(RGB_ORDER)], scale)); + DmxSimple.write(iChannel++, scale8(data[i][RGB_BYTE1(RGB_ORDER)], scale)); + DmxSimple.write(iChannel++, scale8(data[i][RGB_BYTE2(RGB_ORDER)], scale)); + } + + } + +#ifdef SUPPORT_ARGB + // as above, but every 4th uint8_t is assumed to be alpha channel data, and will be skipped + virtual void show(const struct CARGB *data, int nLeds, uint8_t scale = 255) = 0; +#endif + + // is the controller ready to write data out + virtual bool ready() { return true; } + + // wait until the controller is ready to write data out + virtual void wait() { return; } + +}; + +#elif defined(DmxSerial_h) + +template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB> class DMXController : public CLEDController { +public: + // initialize the LED controller + virtual void init() { DMXSerial.init(DMXController); } + + // reset any internal state to a clean point + virtual void reset() { init(); } + + // clear out/zero out the given number of leds. + virtual void clearLeds(int nLeds) { + int count = min(nLeds * 3, DMX_SIZE); + for(int iChannel = 0; iChannel < count; iChannel++) { DmxSimple.write(iChannel, 0); } + } + + // set all the leds on the controller to a given color + virtual void showColor(const struct CRGB & data, int nLeds, uint8_t scale = 255) { + int count = min(nLeds, DMX_SIZE / 3); + int iChannel = 0; + for(int i = 0; i < count; i++) { + DMXSerial.write(iChannel++, scale8(data[RGB_BYTE0(RGB_ORDER)], scale)); + DMXSerial.write(iChannel++, scale8(data[RGB_BYTE1(RGB_ORDER)], scale)); + DMXSerial.write(iChannel++, scale8(data[RGB_BYTE2(RGB_ORDER)], scale)); + } + } + + // note that the uint8_ts will be in the order that you want them sent out to the device. + // nLeds is the number of RGB leds being written to + virtual void show(const struct CRGB *data, int nLeds, uint8_t scale = 255) { + int count = min(nLeds, DMX_SIZE / 3); + int iChannel = 0; + for(int i = 0; i < count; i++) { + DMXSerial.write(iChannel++, scale8(data[i][RGB_BYTE0(RGB_ORDER)], scale)); + DMXSerial.write(iChannel++, scale8(data[i][RGB_BYTE1(RGB_ORDER)], scale)); + DMXSerial.write(iChannel++, scale8(data[i][RGB_BYTE2(RGB_ORDER)], scale)); + } + + } + +#ifdef SUPPORT_ARGB + // as above, but every 4th uint8_t is assumed to be alpha channel data, and will be skipped + virtual void show(const struct CARGB *data, int nLeds, uint8_t scale = 255) = 0; +#endif + + // is the controller ready to write data out + virtual bool ready() { return true; } + + // wait until the controller is ready to write data out + virtual void wait() { return; } + +}; + +#endif + +#endif
\ No newline at end of file diff --git a/examples/Fast2Dev/Fast2Dev.ino b/examples/Fast2Dev/Fast2Dev.ino new file mode 100644 index 00000000..5090aaad --- /dev/null +++ b/examples/Fast2Dev/Fast2Dev.ino @@ -0,0 +1,98 @@ +// Uncomment this line if you have any interrupts that are changing pins - this causes the library to be a little bit more cautious +// #define FAST_SPI_INTERRUPTS_WRITE_PINS 1 + +// Uncomment this line to force always using software, instead of hardware, SPI (why?) +// #define FORCE_SOFTWARE_SPI 1 + +// Uncomment this line if you want to talk to DMX controllers +// #define FASTSPI_USE_DMX_SIMPLE 1 + +#include "FastLED.h" + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// test code +// +////////////////////////////////////////////////// + +#define NUM_LEDS 150 + +CRGB leds[NUM_LEDS]; + +void setup() { + // sanity check delay - allows reprogramming if accidently blowing power w/leds + delay(2000); + + // For safety (to prevent too high of a power draw), the test case defaults to + // setting brightness to 25% brightness + LEDS.setBrightness(64); + + // LEDS.addLeds<WS2811, 13>(leds, NUM_LEDS); + // LEDS.addLeds<TM1809, 13>(leds, NUM_LEDS); + // LEDS.addLeds<UCS1903, 13>(leds, NUM_LEDS); + // LEDS.addLeds<TM1803, 13>(leds, NUM_LEDS); + + // LEDS.addLeds<P9813>(leds, NUM_LEDS); + + LEDS.addLeds<LPD8806>(leds, NUM_LEDS); + // LEDS.addLeds<WS2801>(leds, NUM_LEDS); + // LEDS.addLeds<SM16716>(leds, NUM_LEDS); + + // LEDS.addLeds<WS2811, 11>(leds, NUM_LEDS); + + // Put ws2801 strip on the hardware SPI pins with a BGR ordering of rgb and limited to a 1Mhz data rate + // LEDS.addLeds<WS2801, 11, 13, BGR, DATA_RATE_MHZ(1)>(leds, NUM_LEDS); + + // LEDS.addLeds<LPD8806, 10, 11>(leds, NUM_LEDS); + // LEDS.addLeds<WS2811, 13, BRG>(leds, NUM_LEDS); + // LEDS.addLeds<LPD8806, BGR>(leds, NUM_LEDS); +} + +void loop() { + for(int i = 0; i < 3; i++) { + for(int iLed = 0; iLed < NUM_LEDS; iLed++) { + memset(leds, 0, NUM_LEDS * sizeof(struct CRGB)); + + switch(i) { + // You can access the rgb values by field r, g, b + case 0: leds[iLed].r = 128; break; + + // or by indexing into the led (r==0, g==1, b==2) + case 1: leds[iLed][i] = 128; break; + + // or by setting the rgb values for the pixel all at once + case 2: leds[iLed] = CRGB(0, 0, 128); break; + } + + // and now, show your led array! + LEDS.show(); + delay(10); + } + + // fade up + for(int x = 0; x < 128; x++) { + // The showColor method sets all the leds in the strip to the same color + LEDS.showColor(CRGB(x, 0, 0)); + delay(10); + } + + // fade down + for(int x = 128; x >= 0; x--) { + LEDS.showColor(CRGB(x, 0, 0)); + delay(10); + } + + // let's fade up by scaling the brightness + for(int scale = 0; scale < 128; scale++) { + LEDS.showColor(CRGB(0, 128, 0), scale); + delay(10); + } + + // let's fade down by scaling the brightness + for(int scale = 128; scale > 0; scale--) { + LEDS.showColor(CRGB(0, 128, 0), scale); + delay(10); + } + } +} diff --git a/examples/FirstLight/FirstLight.ino b/examples/FirstLight/FirstLight.ino new file mode 100644 index 00000000..fcfbacbd --- /dev/null +++ b/examples/FirstLight/FirstLight.ino @@ -0,0 +1,66 @@ +#define FORCE_SOFTWARE_SPI +#define FORCE_SOFTWARE_PINS +#include "FastLED.h" + +/////////////////////////////////////////////////////////////////////////////////////////// +// +// Move a white dot along the strip of leds. This program simply shows how to configure the leds, +// and then how to turn a single pixel white and then off, moving down the line of pixels. +// + +// How many leds are in the strip? +#define NUM_LEDS 60 + +// Data pin that led data will be written out over +#define DATA_PIN 6 + +// Clock pin only needed for SPI based chipsets when not using hardware SPI +//#define CLOCK_PIN 8 + +// This is an array of leds. One item for each led in your strip. +CRGB leds[NUM_LEDS]; + +// This function sets up the ledsand tells the controller about them +void setup() { + // sanity check delay - allows reprogramming if accidently blowing power w/leds + delay(2000); + + // Uncomment one of the following lines for your leds arrangement. + // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<WS2811, DATA_PIN, GRB>(leds+18, NUM_LEDS/3); + // FastLED.addLeds<WS2811, 8, RGB>(leds + 225, NUM_LEDS/4); + // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<NEOPIXEL, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<WS2811_400, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS); + + // FastLED.addLeds<WS2801, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<SM16716, RGB>(leds, NUM_LEDS); + FastLED.addLeds<LPD8806, RGB>(leds, NUM_LEDS); + + // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS); +} + +// This function runs over and over, and is where you do the magic to light +// your leds. +void loop() { + // Move a single white led + for(int whiteLed = 0; whiteLed < NUM_LEDS; whiteLed = whiteLed + 1) { + // Turn our current led on to white, then show the leds + leds[whiteLed] = CRGB::White; + + // Show the leds (only one of which is set to white, from above) + FastLED.show(); + + // Wait a little bit + delay(100); + + // Turn our current led back to black for the next loop around + leds[whiteLed] = CRGB::Black; + } +} diff --git a/examples/RGBCalibrate/RGBCalibrate.ino b/examples/RGBCalibrate/RGBCalibrate.ino new file mode 100644 index 00000000..55661052 --- /dev/null +++ b/examples/RGBCalibrate/RGBCalibrate.ino @@ -0,0 +1,66 @@ +#include "FastLED.h" + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RGB Calibration code +// +// Use this sketch to determine what the RGB ordering for your chipset should be. Steps for setting up to use: + +// * Uncomment the line in setup that corresponds to the LED chipset that you are using. (Note that they +// all explicitly specify the RGB order as RGB) +// * Define DATA_PIN to the pin that data is connected to. +// * (Optional) if using software SPI for chipsets that are SPI based, define CLOCK_PIN to the clock pin +// * Compile/upload/run the sketch + +// You should see six leds on. If the RGB ordering is correct, you should see 1 red led, 2 green +// leds, and 3 blue leds. If you see different colors, the count of each color tells you what the +// position for that color in the rgb orering should be. So, for example, if you see 1 Blue, and 2 +// Red, and 3 Green leds then the rgb ordering should be BRG (Blue, Red, Green). + +// You can then test this ordering by setting the RGB ordering in the addLeds line below to the new ordering +// and it should come out correctly, 1 red, 2 green, and 3 blue. +// +////////////////////////////////////////////////// + +#define NUM_LEDS 6 + +// Data pin that led data will be written out over +#define DATA_PIN 7 +// Clock pin only needed for SPI based chipsets when not using hardware SPI +//#define CLOCK_PIN 8 + +CRGB leds[NUM_LEDS]; + +void setup() { + // sanity check delay - allows reprogramming if accidently blowing power w/leds + delay(2000); + + // Uncomment one of the following lines for your leds arrangement. + // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<WS2811, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS); + + // FastLED.addLeds<WS2801, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<SM16716, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<LPD8806, RGB>(leds, NUM_LEDS); + + // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS); + // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS); +} + +void loop() { + leds[0] = CRGB::Red; + leds[1] = CRGB::Green; + leds[2] = CRGB::Green; + leds[3] = CRGB::Blue; + leds[4] = CRGB::Blue; + leds[5] = CRGB::Blue; + FastLED.show(); + delay(1000); +} diff --git a/fastpin.h b/fastpin.h new file mode 100644 index 00000000..b6355fce --- /dev/null +++ b/fastpin.h @@ -0,0 +1,424 @@ +#ifndef __INC_FASTPIN_H +#define __INC_FASTPIN_H + +#include<avr/io.h> + +// Arduino.h needed for convinience functions digitalPinToPort/BitMask/portOutputRegister and the pinMode methods. +#include<Arduino.h> + +#define NO_PIN 255 + +// Class to ensure that a minimum amount of time has kicked since the last time run - and delay if not enough time has passed yet +// this should make sure that chipsets that have +template<int WAIT> class CMinWait { + long mLastMicros; +public: + CMinWait() { mLastMicros = 0; } + + void wait() { + long diff = micros() - mLastMicros; + if(diff < WAIT) { + delayMicroseconds(WAIT - diff); + } + } + + void mark() { mLastMicros = micros(); } +}; + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Pin access class - needs to tune for various platforms (naive fallback solution?) +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#if defined(__AVR_ATmega1280__) || defined(__AVR_ATmega2560__) +#define _CYCLES(_PIN) (((_PIN >= 62 ) || (_PIN>=42 && _PIN<=49) || (_PIN>=14 && _PIN <=17) || (_PIN>=6 && _PIN <=9)) ? 2 : 1) +#else +#define _CYCLES(_PIN) ((_PIN >= 24) ? 2 : 1) +#endif + +class Selectable { +public: + virtual void select() = 0; + virtual void release() = 0; + virtual bool isSelected() = 0; +}; + +class Pin : public Selectable { + uint8_t mPinMask; + uint8_t mPin; + volatile uint8_t *mPort; + + void _init() { + mPinMask = digitalPinToBitMask(mPin); + mPort = portOutputRegister(digitalPinToPort(mPin)); + } +public: + Pin(int pin) : mPin(pin) { _init(); } + + typedef volatile uint8_t * port_ptr_t; + typedef uint8_t port_t; + + inline void setOutput() { pinMode(mPin, OUTPUT); } + inline void setInput() { pinMode(mPin, INPUT); } + + inline void hi() __attribute__ ((always_inline)) { *mPort |= mPinMask; } + inline void lo() __attribute__ ((always_inline)) { *mPort &= ~mPinMask; } + + inline void strobe() __attribute__ ((always_inline)) { hi(); lo(); } + + inline void hi(register port_ptr_t port) __attribute__ ((always_inline)) { *port |= mPinMask; } + inline void lo(register port_ptr_t port) __attribute__ ((always_inline)) { *port &= ~mPinMask; } + inline void set(register port_t val) __attribute__ ((always_inline)) { *mPort = val; } + + inline void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; } + + port_t hival() __attribute__ ((always_inline)) { return *mPort | mPinMask; } + port_t loval() __attribute__ ((always_inline)) { return *mPort & ~mPinMask; } + port_ptr_t port() __attribute__ ((always_inline)) { return mPort; } + port_t mask() __attribute__ ((always_inline)) { return mPinMask; } + + virtual void select() { hi(); } + virtual void release() { lo(); } + virtual bool isSelected() { return (*mPort & mPinMask) == mPinMask; } +}; + +class OutputPin : public Pin { +public: + OutputPin(int pin) : Pin(pin) { setOutput(); } +}; + +class InputPin : public Pin { +public: + InputPin(int pin) : Pin(pin) { setInput(); } +}; + +/// The simplest level of Pin class. This relies on runtime functions durinig initialization to get the port/pin mask for the pin. Most +/// of the accesses involve references to these static globals that get set up. This won't be the fastest set of pin operations, but it +/// will provide pin level access on pretty much all arduino environments. In addition, it includes some methods to help optimize access in +/// various ways. Namely, the versions of hi, lo, and fastset that take the port register as a passed in register variable (saving a global +/// dereference), since these functions are aggressively inlined, that can help collapse out a lot of extraneous memory loads/dereferences. +/// +/// In addition, if, while writing a bunch of data to a pin, you know no other pins will be getting written to, you can get/cache a value of +/// the pin's port register and use that to do a full set to the register. This results in one being able to simply do a store to the register, +/// vs. the load, and/or, and store that would be done normally. +/// +/// There are platform specific instantiations of this class that provide direct i/o register access to pins for much higher speed pin twiddling. +/// +/// Note that these classes are all static functions. So the proper usage is Pin<13>::hi(); or such. Instantiating objects is not recommended, +/// as passing Pin objects around will likely -not- have the effect you're expecting. +template<uint8_t PIN> class FastPin { + static uint8_t sPinMask; + static volatile uint8_t *sPort; + static void _init() { + sPinMask = digitalPinToBitMask(PIN); + sPort = portOutputRegister(digitalPinToPort(PIN)); + } +public: + typedef volatile uint8_t * port_ptr_t; + typedef uint8_t port_t; + + inline static void setOutput() { _init(); pinMode(PIN, OUTPUT); } + inline static void setInput() { _init(); pinMode(PIN, INPUT); } + + inline static void hi() __attribute__ ((always_inline)) { *sPort |= sPinMask; } + inline static void lo() __attribute__ ((always_inline)) { *sPort &= ~sPinMask; } + + inline static void strobe() __attribute__ ((always_inline)) { hi(); lo(); } + + inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { *port |= sPinMask; } + inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { *port &= ~sPinMask; } + inline static void set(register port_t val) __attribute__ ((always_inline)) { *sPort = val; } + + inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; } + + static port_t hival() __attribute__ ((always_inline)) { return *sPort | sPinMask; } + static port_t loval() __attribute__ ((always_inline)) { return *sPort & ~sPinMask; } + static port_ptr_t port() __attribute__ ((always_inline)) { return sPort; } + static port_t mask() __attribute__ ((always_inline)) { return sPinMask; } +}; + +template<uint8_t PIN> uint8_t FastPin<PIN>::sPinMask; +template<uint8_t PIN> volatile uint8_t *FastPin<PIN>::sPort; + +/// Class definition for a Pin where we know the port registers at compile time for said pin. This allows us to make +/// a lot of optimizations, as the inlined hi/lo methods will devolve to a single io register write/bitset. +template<uint8_t PIN, uint8_t _MASK, typename _PORT, typename _DDR, typename _PIN> class _AVRPIN { +public: + typedef volatile uint8_t * port_ptr_t; + typedef uint8_t port_t; + + inline static void setOutput() { _DDR::r() |= _MASK; } + inline static void setInput() { _DDR::r() &= ~_MASK; } + + inline static void hi() __attribute__ ((always_inline)) { _PORT::r() |= _MASK; } + inline static void lo() __attribute__ ((always_inline)) { _PORT::r() &= ~_MASK; } + inline static void set(register uint8_t val) __attribute__ ((always_inline)) { _PORT::r() = val; } + + inline static void strobe() __attribute__ ((always_inline)) { hi(); lo(); } + + inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); } + inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); } + inline static void fastset(register port_ptr_t port, register uint8_t val) __attribute__ ((always_inline)) { set(val); } + + inline static port_t hival() __attribute__ ((always_inline)) { return _PORT::r() | _MASK; } + inline static port_t loval() __attribute__ ((always_inline)) { return _PORT::r() & ~_MASK; } + inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PORT::r(); } + inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; } +}; + +/// Template definition for teensy 3.0 style ARM pins, providing direct access to the various GPIO registers. Note that this +/// uses the full port GPIO registers. In theory, in some way, bit-band register access -should- be faster, however I have found +/// that something about the way gcc does register allocation results in the bit-band code being slower. It will need more fine tuning. +template<uint8_t PIN, uint32_t _MASK, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN { +public: + typedef volatile uint32_t * port_ptr_t; + typedef uint32_t port_t; + + inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; } + inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; } + + inline static void hi() __attribute__ ((always_inline)) { _PSOR::r() = _MASK; } + inline static void lo() __attribute__ ((always_inline)) { _PCOR::r() = _MASK; } + inline static void set(register port_t val) __attribute__ ((always_inline)) { _PDOR::r() = val; } + + inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); } + + inline static void toggle() __attribute__ ((always_inline)) { _PTOR::r() = _MASK; } + + inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); } + inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); } + inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; } + + inline static port_t hival() __attribute__ ((always_inline)) { return _PDOR::r() | _MASK; } + inline static port_t loval() __attribute__ ((always_inline)) { return _PDOR::r() & ~_MASK; } + inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_PDOR::r(); } + inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; } +}; + +/// Template definition for teensy 3.0 style ARM pins using bit banding, providing direct access to the various GPIO registers. GCC +/// does a poor job of optimizing around these accesses so they are not being used just yet. +template<uint8_t PIN, int _BIT, typename _PDOR, typename _PSOR, typename _PCOR, typename _PTOR, typename _PDIR, typename _PDDR> class _ARMPIN_BITBAND { +public: + typedef volatile uint32_t * port_ptr_t; + typedef uint32_t port_t; + + inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; } + inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; } + + inline static void hi() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 1; } + inline static void lo() __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = 0; } + inline static void set(register port_t val) __attribute__ ((always_inline)) { *_PDOR::template rx<_BIT>() = val; } + + inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); } + + inline static void toggle() __attribute__ ((always_inline)) { *_PTOR::template rx<_BIT>() = 1; } + + inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { *port = 1; } + inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { *port = 0; } + inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; } + + inline static port_t hival() __attribute__ ((always_inline)) { return 1; } + inline static port_t loval() __attribute__ ((always_inline)) { return 0; } + inline static port_ptr_t port() __attribute__ ((always_inline)) { return _PDOR::template rx<_BIT>(); } + inline static port_t mask() __attribute__ ((always_inline)) { return 1; } +}; + +/// AVR definitions for pins. Getting around the fact that I can't pass GPIO register addresses in as template arguments by instead creating +/// a custom type for each GPIO register with a single, static, aggressively inlined function that returns that specific GPIO register. A similar +/// trick is used a bit further below for the ARM GPIO registers (of which there are far more than on AVR!) +typedef volatile uint8_t & reg8_t; +#define _R(T) struct __gen_struct_ ## T +#define _RD8(T) struct __gen_struct_ ## T { static inline reg8_t r() { return T; }}; +#define _IO(L) _RD8(DDR ## L); _RD8(PORT ## L); _RD8(PIN ## L); +#define _DEFPIN_AVR(PIN, MASK, L) template<> class FastPin<PIN> : public _AVRPIN<PIN, MASK, _R(PORT ## L), _R(DDR ## L), _R(PIN ## L)> {}; + +// ARM definitions +#define GPIO_BITBAND_ADDR(reg, bit) (((uint32_t)&(reg) - 0x40000000) * 32 + (bit) * 4 + 0x42000000) +#define GPIO_BITBAND_PTR(reg, bit) ((uint32_t *)GPIO_BITBAND_ADDR((reg), (bit))) + +typedef volatile uint32_t & reg32_t; +typedef volatile uint32_t * ptr_reg32_t; + +#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \ + template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } }; +#define _IO32(L) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR); + +#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \ + _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; + +// Don't use bit band'd pins for now, the compiler generates far less efficient code around them +// #define _DEFPIN_ARM(PIN, BIT, L) template<> class Pin<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), +// _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Pin definitions for AVR and ARM. If there are pin definitions supplied below for the platform being +// built on, then much higher speed access will be possible, namely with direct GPIO register accesses. +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(FORCE_SOFTWARE_PINS) +#warning "Softwrae pin support forced pin access will be slightly slower. See fastpin.h for info." +#define NO_HARDWARE_PIN_SUPPORT + +#elif defined(__AVR_ATtiny85__) +_IO(B); + +_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B); _DEFPIN_AVR(3, 0x08, B); +_DEFPIN_AVR(4, 0x10, B); _DEFPIN_AVR(5, 0x20, B); + +#elif defined(__AVR_ATmega328P__) || defined(__AVR_ATmega168__) +// Accelerated port definitions for arduino avrs +_IO(D); _IO(B); _IO(C); +_DEFPIN_AVR( 0, 0x01, D); _DEFPIN_AVR( 1, 0x02, D); _DEFPIN_AVR( 2, 0x04, D); _DEFPIN_AVR( 3, 0x08, D); +_DEFPIN_AVR( 4, 0x10, D); _DEFPIN_AVR( 5, 0x20, D); _DEFPIN_AVR( 6, 0x40, D); _DEFPIN_AVR( 7, 0x80, D); +_DEFPIN_AVR( 8, 0x01, B); _DEFPIN_AVR( 9, 0x02, B); _DEFPIN_AVR(10, 0x04, B); _DEFPIN_AVR(11, 0x08, B); +_DEFPIN_AVR(12, 0x10, B); _DEFPIN_AVR(13, 0x20, B); _DEFPIN_AVR(14, 0x01, C); _DEFPIN_AVR(15, 0x02, C); +_DEFPIN_AVR(16, 0x04, C); _DEFPIN_AVR(17, 0x08, C); _DEFPIN_AVR(18, 0x10, C); _DEFPIN_AVR(19, 0x20, C); + +#define SPI_DATA 11 +#define SPI_CLOCK 13 +#define SPI_SELECT 10 +#define AVR_HARDWARE_SPI + +#elif defined(__AVR_ATmega1280__) || defined(__AVR_ATmega2560__) +// megas + +_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F); _IO(G); _IO(H); _IO(J); _IO(K); _IO(L); + +_DEFPIN_AVR(0, 1, E); _DEFPIN_AVR(1, 2, E); _DEFPIN_AVR(2, 16, E); _DEFPIN_AVR(3, 32, E); +_DEFPIN_AVR(4, 32, G); _DEFPIN_AVR(5, 8, E); _DEFPIN_AVR(6, 8, H); _DEFPIN_AVR(7, 16, H); +_DEFPIN_AVR(8, 32, H); _DEFPIN_AVR(9, 64, H); _DEFPIN_AVR(10, 16, B); _DEFPIN_AVR(11, 32, B); +_DEFPIN_AVR(12, 64, B); _DEFPIN_AVR(13, 128, B); _DEFPIN_AVR(14, 2, J); _DEFPIN_AVR(15, 1, J); +_DEFPIN_AVR(16, 2, H); _DEFPIN_AVR(17, 1, H); _DEFPIN_AVR(18, 8, D); _DEFPIN_AVR(19, 4, D); +_DEFPIN_AVR(20, 2, D); _DEFPIN_AVR(21, 1, D); _DEFPIN_AVR(22, 1, A); _DEFPIN_AVR(23, 2, A); +_DEFPIN_AVR(24, 4, A); _DEFPIN_AVR(25, 8, A); _DEFPIN_AVR(26, 16, A); _DEFPIN_AVR(27, 32, A); +_DEFPIN_AVR(28, 64, A); _DEFPIN_AVR(29, 128, A); _DEFPIN_AVR(30, 128, C); _DEFPIN_AVR(31, 64, C); +_DEFPIN_AVR(32, 32, C); _DEFPIN_AVR(33, 16, C); _DEFPIN_AVR(34, 8, C); _DEFPIN_AVR(35, 4, C); +_DEFPIN_AVR(36, 2, C); _DEFPIN_AVR(37, 1, C); _DEFPIN_AVR(38, 128, D); _DEFPIN_AVR(39, 4, G); +_DEFPIN_AVR(40, 2, G); _DEFPIN_AVR(41, 1, G); _DEFPIN_AVR(42, 128, L); _DEFPIN_AVR(43, 64, L); +_DEFPIN_AVR(44, 32, L); _DEFPIN_AVR(45, 16, L); _DEFPIN_AVR(46, 8, L); _DEFPIN_AVR(47, 4, L); +_DEFPIN_AVR(48, 2, L); _DEFPIN_AVR(49, 1, L); _DEFPIN_AVR(50, 8, B); _DEFPIN_AVR(51, 4, B); +_DEFPIN_AVR(52, 2, B); _DEFPIN_AVR(53, 1, B); _DEFPIN_AVR(54, 1, F); _DEFPIN_AVR(55, 2, F); +_DEFPIN_AVR(56, 4, F); _DEFPIN_AVR(57, 8, F); _DEFPIN_AVR(58, 16, F); _DEFPIN_AVR(59, 32, F); +_DEFPIN_AVR(60, 64, F); _DEFPIN_AVR(61, 128, F); _DEFPIN_AVR(62, 1, K); _DEFPIN_AVR(63, 2, K); +_DEFPIN_AVR(64, 4, K); _DEFPIN_AVR(65, 8, K); _DEFPIN_AVR(66, 16, K); _DEFPIN_AVR(67, 32, K); +_DEFPIN_AVR(68, 64, K); _DEFPIN_AVR(69, 128, K); + +#define SPI_DATA 51 +#define SPI_CLOCK 52 +#define SPI_SELECT 53 +#define AVR_HARDWARE_SPI + +// Leonardo, teensy, blinkm +#elif defined(__AVR_ATmega32U4__) && defined(CORE_TEENSY) + +// teensy defs +_IO(B); _IO(C); _IO(D); _IO(E); _IO(F); + +_DEFPIN_AVR(0, 1, B); _DEFPIN_AVR(1, 2, B); _DEFPIN_AVR(2, 4, B); _DEFPIN_AVR(3, 8, B); +_DEFPIN_AVR(4, 128, B); _DEFPIN_AVR(5, 1, D); _DEFPIN_AVR(6, 2, D); _DEFPIN_AVR(7, 4, D); +_DEFPIN_AVR(8, 8, D); _DEFPIN_AVR(9, 64, C); _DEFPIN_AVR(10, 128, C); _DEFPIN_AVR(11, 64, D); +_DEFPIN_AVR(12, 128, D); _DEFPIN_AVR(13, 16, B); _DEFPIN_AVR(14, 32, B); _DEFPIN_AVR(15, 64, B); +_DEFPIN_AVR(16, 128, F); _DEFPIN_AVR(17, 64, F); _DEFPIN_AVR(18, 32, F); _DEFPIN_AVR(19, 16, F); +_DEFPIN_AVR(20, 2, F); _DEFPIN_AVR(21, 1, F); _DEFPIN_AVR(22, 16, D); _DEFPIN_AVR(23, 32, D); + +#define SPI_DATA 2 +#define SPI_CLOCK 1 +#define SPI_SELECT 3 +#define AVR_HARDWARE_SPI + +#elif defined(__AVR_AT90USB646__) || defined(__AVR_AT90USB1286__) +// teensy++ 2 defs + +_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F); + +_DEFPIN_AVR(0, 1, D); _DEFPIN_AVR(1, 2, D); _DEFPIN_AVR(2, 4, D); _DEFPIN_AVR(3, 8, D); +_DEFPIN_AVR(4, 16, D); _DEFPIN_AVR(5, 32, D); _DEFPIN_AVR(6, 64, D); _DEFPIN_AVR(7, 128, D); +_DEFPIN_AVR(8, 1, E); _DEFPIN_AVR(9, 2, E); _DEFPIN_AVR(10, 1, C); _DEFPIN_AVR(11, 2, C); +_DEFPIN_AVR(12, 4, C); _DEFPIN_AVR(13, 8, C); _DEFPIN_AVR(14, 16, C); _DEFPIN_AVR(15, 32, C); +_DEFPIN_AVR(16, 64, C); _DEFPIN_AVR(17, 128, C); _DEFPIN_AVR(18, 64, E); _DEFPIN_AVR(19, 128, E); +_DEFPIN_AVR(20, 1, B); _DEFPIN_AVR(21, 2, B); _DEFPIN_AVR(22, 4, B); _DEFPIN_AVR(23, 8, B); +_DEFPIN_AVR(24, 16, B); _DEFPIN_AVR(25, 32, B); _DEFPIN_AVR(26, 64, B); _DEFPIN_AVR(27, 128, B); +_DEFPIN_AVR(28, 1, A); _DEFPIN_AVR(29, 2, A); _DEFPIN_AVR(30, 4, A); _DEFPIN_AVR(31, 8, A); +_DEFPIN_AVR(32, 16, A); _DEFPIN_AVR(33, 32, A); _DEFPIN_AVR(34, 64, A); _DEFPIN_AVR(35, 128, A); +_DEFPIN_AVR(36, 16, E); _DEFPIN_AVR(37, 32, E); _DEFPIN_AVR(38, 1, F); _DEFPIN_AVR(39, 2, F); +_DEFPIN_AVR(40, 4, F); _DEFPIN_AVR(41, 8, F); _DEFPIN_AVR(42, 16, F); _DEFPIN_AVR(43, 32, F); +_DEFPIN_AVR(44, 64, F); _DEFPIN_AVR(45, 128, F); + +#define SPI_DATA 22 +#define SPI_CLOCK 21 +#define SPI_SELECT 20 +#define AVR_HARDWARE_SPI + +#elif defined(__AVR_ATmega32U4__) + +// leonard defs +_IO(B); _IO(C); _IO(D); _IO(E); _IO(F); + +_DEFPIN_AVR(0, 4, D); _DEFPIN_AVR(1, 8, D); _DEFPIN_AVR(2, 2, D); _DEFPIN_AVR(3, 1, D); +_DEFPIN_AVR(4, 16, D); _DEFPIN_AVR(5, 64, C); _DEFPIN_AVR(6, 128, D); _DEFPIN_AVR(7, 64, E); +_DEFPIN_AVR(8, 16, B); _DEFPIN_AVR(9, 32, B); _DEFPIN_AVR(10, 64, B); _DEFPIN_AVR(11, 128, B); +_DEFPIN_AVR(12, 64, D); _DEFPIN_AVR(13, 128, C); _DEFPIN_AVR(14, 8, B); _DEFPIN_AVR(15, 2, B); +_DEFPIN_AVR(16, 4, B); _DEFPIN_AVR(17, 1, B); _DEFPIN_AVR(18, 128, F); _DEFPIN_AVR(19, 64, F); +_DEFPIN_AVR(20, 32, F); _DEFPIN_AVR(21, 16, F); _DEFPIN_AVR(22, 2, F); _DEFPIN_AVR(23, 0, F); + +#define SPI_DATA 16 +#define SPI_CLOCK 15 +#define AVR_HARDWARE_SPI + +#elif defined(__MK20DX128__) && defined(CORE_TEENSY) + +_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E); + +_DEFPIN_ARM(0, 16, B); _DEFPIN_ARM(1, 17, B); _DEFPIN_ARM(2, 0, D); _DEFPIN_ARM(3, 12, A); +_DEFPIN_ARM(4, 13, A); _DEFPIN_ARM(5, 7, D); _DEFPIN_ARM(6, 4, D); _DEFPIN_ARM(7, 2, D); +_DEFPIN_ARM(8, 3, D); _DEFPIN_ARM(9, 3, C); _DEFPIN_ARM(10, 4, C); _DEFPIN_ARM(11, 6, C); +_DEFPIN_ARM(12, 7, C); _DEFPIN_ARM(13, 5, C); _DEFPIN_ARM(14, 1, D); _DEFPIN_ARM(15, 0, C); +_DEFPIN_ARM(16, 0, B); _DEFPIN_ARM(17, 1, B); _DEFPIN_ARM(18, 3, B); _DEFPIN_ARM(19, 2, B); +_DEFPIN_ARM(20, 5, D); _DEFPIN_ARM(21, 6, D); _DEFPIN_ARM(22, 1, C); _DEFPIN_ARM(23, 2, C); +_DEFPIN_ARM(24, 5, A); _DEFPIN_ARM(25, 19, B); _DEFPIN_ARM(26, 1, E); _DEFPIN_ARM(27, 9, C); +_DEFPIN_ARM(28, 8, C); _DEFPIN_ARM(29, 10, C); _DEFPIN_ARM(30, 11, C); _DEFPIN_ARM(31, 0, E); +_DEFPIN_ARM(32, 18, B); _DEFPIN_ARM(33, 4, A); + +#define SPI_DATA 11 +#define SPI_CLOCK 13 +#define ARM_HARDWARE_SPI + +#elif defined(__SAM3X8E__) + +DUE_IO32(A); +DUE_IO32(B); +DUE_IO32(C); +DUE_IO32(D); + +_DEFPIN_DUE(0, 8, A); _DEFPIN_DUE(1, 9, A); _DEFPIN_DUE(2, 25, B); _DEFPIN_DUE(3, 28, C); +_DEFPIN_DUE(4, 26, C); _DEFPIN_DUE(5, 25, C); _DEFPIN_DUE(6, 24, C); _DEFPIN_DUE(7, 23, C); +_DEFPIN_DUE(8, 22, C); _DEFPIN_DUE(9, 21, C); _DEFPIN_DUE(10, 29, C); _DEFPIN_DUE(11, 7, D); +_DEFPIN_DUE(12, 8, D); _DEFPIN_DUE(13, 27, B); _DEFPIN_DUE(14, 4, D); _DEFPIN_DUE(15, 5, D); +_DEFPIN_DUE(16, 13, A); _DEFPIN_DUE(17, 12, A); _DEFPIN_DUE(18, 11, A); _DEFPIN_DUE(19, 10, A); +_DEFPIN_DUE(20, 12, B); _DEFPIN_DUE(21, 13, B); _DEFPIN_DUE(22, 26, B); _DEFPIN_DUE(23, 14, A); +_DEFPIN_DUE(24, 15, A); _DEFPIN_DUE(25, 0, D); _DEFPIN_DUE(26, 1, D); _DEFPIN_DUE(27, 2, D); +_DEFPIN_DUE(28, 3, D); _DEFPIN_DUE(29, 6, D); _DEFPIN_DUE(30, 9, D); _DEFPIN_DUE(31, 7, A); +_DEFPIN_DUE(32, 10, D); _DEFPIN_DUE(33, 1, C); _DEFPIN_DUE(34, 2, C); _DEFPIN_DUE(35, 3, C); +_DEFPIN_DUE(36, 4, C); _DEFPIN_DUE(37, 5, C); _DEFPIN_DUE(38, 6, C); _DEFPIN_DUE(39, 7, C); +_DEFPIN_DUE(40, 8, C); _DEFPIN_DUE(41, 9, C); _DEFPIN_DUE(42, 19, A); _DEFPIN_DUE(43, 20, A); +_DEFPIN_DUE(44, 19, C); _DEFPIN_DUE(45, 18, C); _DEFPIN_DUE(46, 17, C); _DEFPIN_DUE(47, 16, C); +_DEFPIN_DUE(48, 15, C); _DEFPIN_DUE(49, 14, C); _DEFPIN_DUE(50, 13, C); _DEFPIN_DUE(51, 12, C); +_DEFPIN_DUE(52, 21, B); _DEFPIN_DUE(53, 14, B); _DEFPIN_DUE(54, 16, A); _DEFPIN_DUE(55, 24, A); +_DEFPIN_DUE(56, 23, A); _DEFPIN_DUE(57, 22, A); _DEFPIN_DUE(58, 6, A); _DEFPIN_DUE(59, 4, A); +_DEFPIN_DUE(60, 3, A); _DEFPIN_DUE(61, 2, A); _DEFPIN_DUE(62, 17, B); _DEFPIN_DUE(63, 18, B); +_DEFPIN_DUE(64, 19, B); _DEFPIN_DUE(65, 20, B); _DEFPIN_DUE(66, 15, B); _DEFPIN_DUE(67, 16, B); +_DEFPIN_DUE(68, 1, A); _DEFPIN_DUE(69, 0, A); _DEFPIN_DUE(70, 17, A); _DEFPIN_DUE(71, 18, A); +_DEFPIN_DUE(72, 30, C); _DEFPIN_DUE(73, 21, A); _DEFPIN_DUE(74, 25, A); _DEFPIN_DUE(75, 26, A); +_DEFPIN_DUE(76, 27, A); _DEFPIN_DUE(77, 28, A); _DEFPIN_DUE(78, 23, B); + +#else + +#warning "No pin/port mappings found, pin access will be slightly slower. See fastpin.h for info." +#define NO_HARDWARE_PIN_SUPPORT + +#endif + +#endif diff --git a/fastspi.h b/fastspi.h new file mode 100644 index 00000000..00747137 --- /dev/null +++ b/fastspi.h @@ -0,0 +1,91 @@ +#ifndef __INC_FASTSPI_H +#define __INC_FASTSPI_H + +#include "controller.h" +#include "lib8tion.h" +#include "delay.h" + +// Some helper macros for getting at mis-ordered byte values +#define SPI_B0 (RGB_BYTE0(RGB_ORDER) + (MASK_SKIP_BITS & SKIP)) +#define SPI_B1 (RGB_BYTE1(RGB_ORDER) + (MASK_SKIP_BITS & SKIP)) +#define SPI_B2 (RGB_BYTE2(RGB_ORDER) + (MASK_SKIP_BITS & SKIP)) +#define SPI_ADVANCE (3 + (MASK_SKIP_BITS & SKIP)) + +/// Some of the SPI controllers will need to perform a transform on each byte before doing +/// anyting with it. Creating a class of this form and passing it in as a template parameter to +/// writeBytes/writeBytes3 below will ensure that the body of this method will get called on every +/// byte worked on. Recommendation, make the adjust method aggressively inlined. +/// +/// TODO: Convinience macro for building these +class DATA_NOP { +public: + static __attribute__((always_inline)) inline uint8_t adjust(register uint8_t data) { return data; } + static __attribute__((always_inline)) inline uint8_t adjust(register uint8_t data, register uint8_t scale) { return scale8(data, scale); } + static __attribute__((always_inline)) inline void postBlock(int len) {} +}; + +#define FLAG_START_BIT 0x80 +#define MASK_SKIP_BITS 0x3F + +// Clock speed dividers +#define SPEED_DIV_2 2 +#define SPEED_DIV_4 4 +#define SPEED_DIV_8 8 +#define SPEED_DIV_16 16 +#define SPEED_DIV_32 32 +#define SPEED_DIV_64 64 +#define SPEED_DIV_128 128 + +#define MAX_DATA_RATE 0 +#define DATA_RATE_MHZ(X) ((F_CPU / 1000000L) / X) +#define DATA_RATE_KHZ(X) ((F_CPU / 1000L) / X) + +// Include the various specific SPI implementations +#include "fastspi_bitbang.h" +#include "fastspi_arm.h" +#include "fastspi_avr.h" +#include "fastspi_dma.h" + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// External SPI template definition with partial instantiation(s) to map to hardware SPI ports on platforms/builds where the pin +// mappings are known at compile time. +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER> +class SPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {}; + +template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER> +class SoftwareSPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {}; + +#ifndef FORCE_SOFTWARE_SPI +#if defined(SPI_DATA) && defined(SPI_CLOCK) + +#if defined(__MK20DX128__) && defined(CORE_TEENSY) + +template<uint8_t SPI_SPEED> +class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {}; + +#else + +template<uint8_t SPI_SPEED> +class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public AVRHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {}; + +#endif + +#else +#warning "No hardware SPI pins defined. All SPI access will default to bitbanged output" + +#endif + +// #if defined(USART_DATA) && defined(USART_CLOCK) +// template<uint8_t SPI_SPEED> +// class AVRSPIOutput<USART_DATA, USART_CLOCK, SPI_SPEED> : public AVRUSARTSPIOutput<USART_DATA, USART_CLOCK, SPI_SPEED> {}; +// #endif + +#else +#warning "Forcing software SPI - no hardware SPI for you!" +#endif + +#endif diff --git a/fastspi_arm.h b/fastspi_arm.h new file mode 100644 index 00000000..e9c38343 --- /dev/null +++ b/fastspi_arm.h @@ -0,0 +1,386 @@ +#ifndef __INC_FASTSPI_ARM_H +#define __INC_FASTSPI_ARM_H + + +#if defined(__MK20DX128__) && defined(CORE_TEENSY) + +#ifndef SPI_PUSHR_CONT +#define SPI_PUSHR_CONT SPI0_PUSHR_CONT +#define SPI_PUSHR_CTAS(X) SPI0_PUSHR_CTAS(X) +#define SPI_PUSHR_EOQ SPI0_PUSHR_EOQ +#define SPI_PUSHR_CTCNT SPI0_PUSHR_CTCNT +#define SPI_PUSHR_PCS(X) SPI0_PUSHR_PCS(X) +#endif + +// Template function that, on compilation, expands to a constant representing the highest bit set in a byte. Right now, +// if no bits are set (value is 0), it returns 0, which is also the value returned if the lowest bit is the only bit +// set (the zero-th bit). Unclear if I will want this to change at some point. +template<int VAL, int BIT> class BitWork { + public: + static int highestBit() __attribute__((always_inline)) { return (VAL & 1 << BIT) ? BIT : BitWork<VAL, BIT-1>::highestBit(); } +}; +template<int VAL> class BitWork<VAL, 0> { + public: + static int highestBit() __attribute__((always_inline)) { return 0; } +}; + +#define MAX(A, B) (( (A) > (B) ) ? (A) : (B)) + +#define USE_CONT 0 + +// Templated function to translate a clock divider value into the prescalar, scalar, and clock doubling setting for the world. +template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint32_t & dbl) { + switch(VAL) { + // Handle the dbl clock cases + case 0: case 1: + case 2: preScalar = 0; scalar = 0; dbl = 1; break; + case 3: preScalar = 1; scalar = 0; dbl = 1; break; + case 5: preScalar = 2; scalar = 0; dbl = 1; break; + case 7: preScalar = 3; scalar = 0; dbl = 1; break; + + // Handle the scalar value 6 cases (since it's not a power of two, it won't get caught + // below) + case 9: preScalar = 1; scalar = 2; dbl = 1; break; + case 18: case 19: preScalar = 1; scalar = 2; dbl = 0; break; + + case 15: preScalar = 2; scalar = 2; dbl = 1; break; + case 30: case 31: preScalar = 2; scalar = 2; dbl = 0; break; + + case 21: case 22: case 23: preScalar = 3; scalar = 2; dbl = 1; break; + case 42: case 43: case 44: case 45: case 46: case 47: preScalar = 3; scalar = 2; dbl = 0; break; + default: { + int p2 = BitWork<VAL/2, 15>::highestBit(); + int p3 = BitWork<VAL/3, 15>::highestBit(); + int p5 = BitWork<VAL/5, 15>::highestBit(); + int p7 = BitWork<VAL/7, 15>::highestBit(); + + int w2 = 2 * (1 << p2); + int w3 = (VAL/3) > 0 ? 3 * (1 << p3) : 0; + int w5 = (VAL/5) > 0 ? 5 * (1 << p5) : 0; + int w7 = (VAL/7) > 0 ? 7 * (1 << p7) : 0; + + int maxval = MAX(MAX(w2, w3), MAX(w5, w7)); + + if(w2 == maxval) { preScalar = 0; scalar = p2; } + else if(w3 == maxval) { preScalar = 1; scalar = p3; } + else if(w5 == maxval) { preScalar = 2; scalar = p5; } + else if(w7 == maxval) { preScalar = 3; scalar = p7; } + + dbl = 0; + if(scalar == 0) { dbl = 1; } + else if(scalar < 3) { scalar--; } + } + } + return; +} + + +template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER> +class ARMHardwareSPIOutput { + Selectable *m_pSelect; + + // Borrowed from the teensy3 SPSR emulation code + static inline void enable_pins(void) __attribute__((always_inline)) { + //serial_print("enable_pins\n"); + CORE_PIN11_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2); + CORE_PIN12_CONFIG = PORT_PCR_MUX(2); + CORE_PIN13_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2); + } + + // Borrowed from the teensy3 SPSR emulation code + static inline void disable_pins(void) __attribute__((always_inline)) { + //serial_print("disable_pins\n"); + CORE_PIN11_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); + CORE_PIN12_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); + CORE_PIN13_CONFIG = PORT_PCR_SRE | PORT_PCR_DSE | PORT_PCR_MUX(1); + } + +public: + ARMHardwareSPIOutput() { m_pSelect = NULL; } + ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; } + void setSelect(Selectable *pSelect) { m_pSelect = pSelect; } + + static inline void update_ctar0(uint32_t ctar) __attribute__((always_inline)) { + if (SPI0_CTAR0 == ctar) return; + uint32_t mcr = SPI0_MCR; + if (mcr & SPI_MCR_MDIS) { + SPI0_CTAR0 = ctar; + } else { + SPI0_MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT; + SPI0_CTAR0 = ctar; + + SPI0_MCR = mcr; + } + } + + static inline void update_ctar1(uint32_t ctar) __attribute__((always_inline)) { + if (SPI0_CTAR1 == ctar) return; + uint32_t mcr = SPI0_MCR; + if (mcr & SPI_MCR_MDIS) { + SPI0_CTAR1 = ctar; + } else { + SPI0_MCR = mcr | SPI_MCR_MDIS | SPI_MCR_HALT; + SPI0_CTAR1 = ctar; + SPI0_MCR = mcr; + + } + } + + static inline void set_ctar1_bits(int bits) { + // Set ctar1 to 16 bits + int ctar = SPI0_CTAR1; + + // clear the FMSZ bits + ctar &= SPI_CTAR_FMSZ(0x0F); + ctar |= SPI_CTAR_FMSZ((bits-1) & 0x0F); + + update_ctar1(ctar); + } + + static inline void set_ctar0_bits(int bits) { + // Set ctar1 to 16 bits + int ctar = SPI0_CTAR1; + + // clear the FMSZ bits + ctar &= SPI_CTAR_FMSZ(0x0F); + ctar |= SPI_CTAR_FMSZ((bits-1) & 0x0F); + + update_ctar1(ctar); + } + + + void setSPIRate() { + // Configure CTAR0, defaulting to 8 bits and CTAR1, defaulting to 16 bits + uint32_t _PBR = 0; + uint32_t _BR = 0; + uint32_t _CSSCK = 0; + uint32_t _DBR = 0; + + // if(_SPI_CLOCK_DIVIDER >= 256) { _PBR = 0; _BR = _CSSCK = 7; _DBR = 0; } // osc/256 + // else if(_SPI_CLOCK_DIVIDER >= 128) { _PBR = 0; _BR = _CSSCK = 6; _DBR = 0; } // osc/128 + // else if(_SPI_CLOCK_DIVIDER >= 64) { _PBR = 0; _BR = _CSSCK = 5; _DBR = 0; } // osc/64 + // else if(_SPI_CLOCK_DIVIDER >= 32) { _PBR = 0; _BR = _CSSCK = 4; _DBR = 0; } // osc/32 + // else if(_SPI_CLOCK_DIVIDER >= 16) { _PBR = 0; _BR = _CSSCK = 3; _DBR = 0; } // osc/16 + // else if(_SPI_CLOCK_DIVIDER >= 8) { _PBR = 0; _BR = _CSSCK = 1; _DBR = 0; } // osc/8 + // else if(_SPI_CLOCK_DIVIDER >= 7) { _PBR = 3; _BR = _CSSCK = 0; _DBR = 1; } // osc/7 + // else if(_SPI_CLOCK_DIVIDER >= 5) { _PBR = 2; _BR = _CSSCK = 0; _DBR = 1; } // osc/5 + // else if(_SPI_CLOCK_DIVIDER >= 4) { _PBR = 0; _BR = _CSSCK = 0; _DBR = 0; } // osc/4 + // else if(_SPI_CLOCK_DIVIDER >= 3) { _PBR = 1; _BR = _CSSCK = 0; _DBR = 1; } // osc/3 + // else { _PBR = 0; _BR = _CSSCK = 0; _DBR = 1; } // osc/2 + + getScalars<_SPI_CLOCK_DIVIDER>(_PBR, _BR, _DBR); + _CSSCK = _BR; + + uint32_t ctar0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK); + uint32_t ctar1 = SPI_CTAR_FMSZ(15) | SPI_CTAR_PBR(_PBR) | SPI_CTAR_BR(_BR) | SPI_CTAR_CSSCK(_CSSCK); + +#if USE_CONT == 1 + ctar0 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL; + ctar1 |= SPI_CTAR_CPHA | SPI_CTAR_CPOL; +#endif + + if(_DBR) { + ctar0 |= SPI_CTAR_DBR; + ctar1 |= SPI_CTAR_DBR; + } + + update_ctar0(ctar0); + update_ctar1(ctar1); + + } + + void init() { + // set the pins to output + FastPin<_DATA_PIN>::setOutput(); + FastPin<_CLOCK_PIN>::setOutput(); + release(); + + // Enable SPI0 clock + uint32_t sim6 = SIM_SCGC6; + if (!(sim6 & SIM_SCGC6_SPI0)) { + //serial_print("init1\n"); + SIM_SCGC6 = sim6 | SIM_SCGC6_SPI0; + SPI0_CTAR0 = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(1); + } + + setSPIRate(); + + // Configure SPI as the master and enable + SPI0_MCR |= SPI_MCR_MSTR; // | SPI_MCR_CONT_SCKE); + SPI0_MCR &= ~(SPI_MCR_MDIS | SPI_MCR_HALT); + + enable_pins(); + } + + static void waitFully() __attribute__((always_inline)) { + while( (SPI0_SR & 0xF000) > 0); + while (!(SPI0_SR & SPI_SR_TCF)); + SPI0_SR |= (SPI_SR_TCF | SPI_SR_EOQF); + } + + static bool needwait() __attribute__((always_inline)) { return (SPI0_SR & 0x4000); } + static void wait() __attribute__((always_inline)) { while( (SPI0_SR & 0x4000) ); } + static void wait1() __attribute__((always_inline)) { while( (SPI0_SR & 0xF000) >= 0x2000); } + + enum ECont { CONT, NOCONT }; + enum EWait { PRE, POST, NONE }; + enum ELast { NOTLAST, LAST }; + +#if USE_CONT == 1 + #define CM CONT +#else + #define CM NOCONT +#endif + #define WM PRE + + template<ECont CONT_STATE, EWait WAIT_STATE, ELast LAST_STATE> class Write { + public: + static void writeWord(uint16_t w) __attribute__((always_inline)) { + if(WAIT_STATE == PRE) { wait(); } + SPI0_PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) | + ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) | + SPI_PUSHR_CTAS(1) | (w & 0xFFFF); + if(WAIT_STATE == POST) { wait(); } + } + + static void writeByte(uint8_t b) __attribute__((always_inline)) { + if(WAIT_STATE == PRE) { wait(); } + SPI0_PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) | + ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) | + SPI_PUSHR_CTAS(0) | (b & 0xFF); + if(WAIT_STATE == POST) { wait(); } + } + }; + + static void writeWord(uint16_t w) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); } + static void writeWordNoWait(uint16_t w) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CTAS(1) | (w & 0xFFFF); } + + static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); } + static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); wait(); } + static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CTAS(0) | (b & 0xFF); } + + static void writeWordCont(uint16_t w) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); } + static void writeWordContNoWait(uint16_t w) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(1) | (w & 0xFFFF); } + + static void writeByteCont(uint8_t b) __attribute__((always_inline)) { wait(); SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); } + static void writeByteContPostWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); wait(); } + static void writeByteContNoWait(uint8_t b) __attribute__((always_inline)) { SPI0_PUSHR = SPI_PUSHR_CONT | SPI_PUSHR_CTAS(0) | (b & 0xFF); } + + // not the most efficient mechanism in the world - but should be enough for sm16716 and friends + template <uint8_t BIT> inline static void writeBit(uint8_t b) { + uint32_t ctar1_save = SPI0_CTAR1; + + // Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit + uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(0); + update_ctar1(ctar1); + + writeWord( (b & (1 << BIT)) != 0); + + update_ctar1(ctar1_save); + } + + void inline select() __attribute__((always_inline)) { if(m_pSelect != NULL) { m_pSelect->select(); } } + void inline release() __attribute__((always_inline)) { if(m_pSelect != NULL) { m_pSelect->release(); } } + + static void writeBytesValueRaw(uint8_t value, int len) { + while(len--) { Write<CM, WM, NOTLAST>::writeByte(value); } + } + + void writeBytesValue(uint8_t value, int len) { + setSPIRate(); + select(); + while(len--) { + writeByte(value); + } + waitFully(); + release(); + } + + // Write a block of n uint8_ts out + template <class D> void writeBytes(register uint8_t *data, int len) { + setSPIRate(); + uint8_t *end = data + len; + select(); + while(data != end) { + writeByte(D::adjust(*data++)); + } + D::postBlock(len); + waitFully(); + release(); + } + + void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); } + + // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template + // parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping + template <uint8_t SKIP, class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + // setSPIRate(); + uint8_t *end = data + len; + select(); + if((SKIP & FLAG_START_BIT) == 0) { + //If no start bit stupiditiy, write out as many 16-bit blocks as we can + uint8_t *first_end = end - (len % (SPI_ADVANCE * 2)); + + while(data != first_end) { + if(WM == NONE) { wait1(); } + Write<CM, WM, NOTLAST>::writeWord(D::adjust(data[SPI_B0], scale) << 8 | D::adjust(data[SPI_B1], scale)); + Write<CM, WM, NOTLAST>::writeWord(D::adjust(data[SPI_B2], scale) << 8 | D::adjust(data[SPI_ADVANCE + SPI_B0], scale)); + Write<CM, WM, NOTLAST>::writeWord(D::adjust(data[SPI_ADVANCE + SPI_B1], scale) << 8 | D::adjust(data[SPI_ADVANCE + SPI_B2], scale)); + data += (SPI_ADVANCE + SPI_ADVANCE); + } + + if(data != end) { + if(WM == NONE) { wait1(); } + // write out the rest as alternating 16/8-bit blocks (likely to be just one) + Write<CM, WM, NOTLAST>::writeWord(D::adjust(data[SPI_B0], scale) << 8 | D::adjust(data[SPI_B1], scale)); + Write<CM, WM, NOTLAST>::writeByte(D::adjust(data[SPI_B2], scale)); + } + + D::postBlock(len); + waitFully(); + } else if(SKIP & FLAG_START_BIT) { + uint32_t ctar1_save = SPI0_CTAR1; + + // Clear out the FMSZ bits, reset them for 9 bits transferd for the start bit + uint32_t ctar1 = (ctar1_save & (~SPI_CTAR_FMSZ(15))) | SPI_CTAR_FMSZ(8); + update_ctar1(ctar1); + + while(data != end) { + writeWord( 0x100 | D::adjust(data[SPI_B0], scale)); + writeByte(D::adjust(data[SPI_B1], scale)); + writeByte(D::adjust(data[SPI_B2], scale)); + data += SPI_ADVANCE; + } + D::postBlock(len); + waitFully(); + + // restore ctar1 + update_ctar1(ctar1_save); + // } else { + // while(data != end) { + // writeByte(D::adjust(data[SPI_B0], scale); + // writeWord(D::adjust(data[SPI_B1], scale) << 8 | D::adjust(data[SPI_B2], scale)); + // data += SPI_ADVANCE; + // } + // waitFully(); + } + release(); + } + + + template <uint8_t SKIP, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<SKIP, DATA_NOP, RGB_ORDER>(data, len, scale); + } + template <class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, D, RGB_ORDER>(data, len, scale); + } + template <EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, DATA_NOP, RGB_ORDER>(data, len, scale); + } + void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, DATA_NOP, RGB>(data, len, scale); + } +}; +#endif + +#endif diff --git a/fastspi_avr.h b/fastspi_avr.h new file mode 100644 index 00000000..af116cab --- /dev/null +++ b/fastspi_avr.h @@ -0,0 +1,314 @@ +#ifndef __INC_FASTSPI_AVR_H +#define __INC_FASTSPI_AVR_H + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Hardware SPI support using USART registers and friends +// +// TODO: Complete/test implementation - right now this doesn't work +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// uno/mini/duemilanove +#if defined(AVR_HARDWARE_SPI) +#if defined(UBRR0) +template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER> +class AVRUSARTSPIOutput { + Selectable *m_pSelect; + +public: + AVRUSARTSPIOutput() { m_pSelect = NULL; } + AVRUSARTSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; } + void setSelect(Selectable *pSelect) { m_pSelect = pSelect; } + + void init() { + UBRR0 = 0; + UCSR0A = 1<<TXC0; + + FastPin<_CLOCK_PIN>::setOutput(); + FastPin<_DATA_PIN>::setOutput(); + + UCSR0C = _BV (UMSEL00) | _BV (UMSEL01); // Master SPI mode + UCSR0B = _BV (TXEN0) | _BV (RXEN0); // transmit enable and receive enable + + // must be done last, see page 206 + UBRR0 = 3; // 2 Mhz clock rate + } + + static void stop() { + // TODO: stop the uart spi output + } + + static void wait() __attribute__((always_inline)) { while(!(UCSR0A & (1<<UDRE0))); } + static void waitFully() __attribute__((always_inline)) { wait(); } + + static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { UDR0 = b;} + static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { UDR0 = b; wait(); } + static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); UDR0 = b; } + + static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); } + + template <uint8_t BIT> inline static void writeBit(uint8_t b) { + if(b && (1 << BIT)) { + FastPin<_DATA_PIN>::hi(); + } else { + FastPin<_DATA_PIN>::lo(); + } + + FastPin<_CLOCK_PIN>::hi(); + FastPin<_CLOCK_PIN>::lo(); + } + + void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } // FastPin<_SELECT_PIN>::hi(); } + void release() { + // wait for all transmissions to finish + while ((UCSR0A & (1 <<TXC0)) == 0) {} + if(m_pSelect != NULL) { m_pSelect->release(); } // FastPin<_SELECT_PIN>::hi(); + } + + static void writeBytesValueRaw(uint8_t value, int len) { + while(len--) { writeByte(value); } + } + + void writeBytesValue(uint8_t value, int len) { + select(); + while(len--) { + writeByte(value); + } + release(); + } + + // Write a block of n uint8_ts out + template <class D> void writeBytes(register uint8_t *data, int len) { + uint8_t *end = data + len; + select(); + while(data != end) { +#if defined(__MK20DX128__) + writeByte(D::adjust(*data++)); +#else + // a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM) + writeByte(D::adjust(*data++)); delaycycles<3>(); +#endif + } + D::postBlock(len); + release(); + } + + void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); } + + // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template + // parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping + template <uint8_t SKIP, class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + uint8_t *end = data + len; + select(); + while(data != end) { + writeByte(D::adjust(data[SPI_B0], scale)); + writeByte(D::adjust(data[SPI_B1], scale)); + writeByte(D::adjust(data[SPI_B2], scale)); + data += SPI_ADVANCE; + } + D::postBlock(len); + release(); + } + + template <uint8_t SKIP, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<SKIP, DATA_NOP, RGB_ORDER>(data, len, scale); + } + template <class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, D, RGB_ORDER>(data, len, scale); + } + template <EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, DATA_NOP, RGB_ORDER>(data, len, scale); + } + void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, DATA_NOP, RGB>(data, len, scale); + } + +}; + +#endif + +#if defined(SPSR) + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Hardware SPI support using SPDR registers and friends +// +// Technically speaking, this uses the AVR SPI registers. This will work on the Teensy 3.0 because Paul made a set of compatability +// classes that map the AVR SPI registers to ARM's, however this caps the performance of output. +// +// TODO: implement ARMHardwareSPIOutput +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER> +class AVRHardwareSPIOutput { + Selectable *m_pSelect; + bool mWait; +public: + AVRHardwareSPIOutput() { m_pSelect = NULL; mWait = false;} + AVRHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; } + void setSelect(Selectable *pSelect) { m_pSelect = pSelect; } + + void setSPIRate() { + SPCR &= ~ ( (1<<SPR1) | (1<<SPR0) ); // clear out the prescalar bits + + bool b2x = false; + + if(_SPI_CLOCK_DIVIDER >= 128) { SPCR |= (1<<SPR1); SPCR |= (1<<SPR0); } + else if(_SPI_CLOCK_DIVIDER >= 64) { SPCR |= (1<<SPR1);} + else if(_SPI_CLOCK_DIVIDER >= 32) { SPCR |= (1<<SPR1); b2x = true; } + else if(_SPI_CLOCK_DIVIDER >= 16) { SPCR |= (1<<SPR0); } + else if(_SPI_CLOCK_DIVIDER >= 8) { SPCR |= (1<<SPR0); b2x = true; } + else if(_SPI_CLOCK_DIVIDER >= 4) { /* do nothing - default rate */ } + else { b2x = true; } + + if(b2x) { SPSR |= (1<<SPI2X); } + else { SPSR &= ~ (1<<SPI2X); } + } + + void init() { + volatile uint8_t clr; + + // set the pins to output + FastPin<_DATA_PIN>::setOutput(); + FastPin<_CLOCK_PIN>::setOutput(); +#ifdef SPI_SELECT + // Make sure the slave select line is set to output, or arduino will block us + FastPin<SPI_SELECT>::setOutput(); + FastPin<SPI_SELECT>::lo(); +#endif + release(); + + SPCR |= ((1<<SPE) | (1<<MSTR) ); // enable SPI as master + SPCR &= ~ ( (1<<SPR1) | (1<<SPR0) ); // clear out the prescalar bits + + clr = SPSR; // clear SPI status register + clr = SPDR; // clear SPI data register + clr; + + bool b2x = false; + + if(_SPI_CLOCK_DIVIDER >= 128) { SPCR |= (1<<SPR1); SPCR |= (1<<SPR0); } + else if(_SPI_CLOCK_DIVIDER >= 64) { SPCR |= (1<<SPR1);} + else if(_SPI_CLOCK_DIVIDER >= 32) { SPCR |= (1<<SPR1); b2x = true; } + else if(_SPI_CLOCK_DIVIDER >= 16) { SPCR |= (1<<SPR0); } + else if(_SPI_CLOCK_DIVIDER >= 8) { SPCR |= (1<<SPR0); b2x = true; } + else if(_SPI_CLOCK_DIVIDER >= 4) { /* do nothing - default rate */ } + else { b2x = true; } + + if(b2x) { SPSR |= (1<<SPI2X); } + else { SPSR &= ~ (1<<SPI2X); } + + SPDR=0; + shouldWait(false); + } + + static bool shouldWait(bool wait = false) __attribute__((always_inline)) { + static bool sWait=false; + if(sWait) { sWait = wait; return true; } else { sWait = wait; return false; } + // return true; + } + static void wait() __attribute__((always_inline)) { if(shouldWait()) { while(!(SPSR & (1<<SPIF))); } } + static void waitFully() __attribute__((always_inline)) { wait(); } + + static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); SPDR=b; shouldWait(true); } + static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { SPDR=b; shouldWait(true); wait(); } + static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { SPDR=b; shouldWait(true); } + + template <uint8_t BIT> inline static void writeBit(uint8_t b) { + SPCR &= ~(1 << SPE); + if(b & (1 << BIT)) { + FastPin<_DATA_PIN>::hi(); + } else { + FastPin<_DATA_PIN>::lo(); + } + + FastPin<_CLOCK_PIN>::hi(); + FastPin<_CLOCK_PIN>::lo(); + SPCR |= 1 << SPE; + shouldWait(false); + } + + void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } // FastPin<_SELECT_PIN>::hi(); } + void release() { if(m_pSelect != NULL) { m_pSelect->release(); } } // FastPin<_SELECT_PIN>::lo(); } + + static void writeBytesValueRaw(uint8_t value, int len) { + while(len--) { writeByte(value); } + } + + void writeBytesValue(uint8_t value, int len) { + //setSPIRate(); + select(); + while(len--) { + writeByte(value); + } + release(); + } + + // Write a block of n uint8_ts out + template <class D> void writeBytes(register uint8_t *data, int len) { + //setSPIRate(); + uint8_t *end = data + len; + select(); + while(data != end) { + // a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM) + writeByte(D::adjust(*data++)); delaycycles<3>(); + } + release(); + } + + void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); } + + // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template + // parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping + template <uint8_t SKIP, class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + //setSPIRate(); + uint8_t *end = data + len; + select(); + while(data != end) { + if(SKIP & FLAG_START_BIT) { + writeBit<0>(1); + } + // a slight touch of delay here helps optimize the timing of the status register check loop (not used on ARM) + if(false && _SPI_CLOCK_DIVIDER == 0) { + writeByteNoWait(D::adjust(data[SPI_B0], scale)); delaycycles<13>(); + writeByteNoWait(D::adjust(data[SPI_B1], scale)); delaycycles<13>(); + writeByteNoWait(D::adjust(data[SPI_B2], scale)); delaycycles<9>(); + } else if(SKIP & FLAG_START_BIT) { + writeBytePostWait(D::adjust(data[SPI_B0], scale)); + writeBytePostWait(D::adjust(data[SPI_B1], scale)); + writeBytePostWait(D::adjust(data[SPI_B2], scale)); + } else { + writeByte(D::adjust(data[SPI_B0], scale)); + writeByte(D::adjust(data[SPI_B1], scale)); + writeByte(D::adjust(data[SPI_B2], scale)); + } + + data += SPI_ADVANCE; + } + D::postBlock(len); + release(); + } + + template <uint8_t SKIP, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<SKIP, DATA_NOP, RGB_ORDER>(data, len, scale); + } + template <class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, D, RGB_ORDER>(data, len, scale); + } + template <EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, DATA_NOP, RGB_ORDER>(data, len, scale); + } + void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, DATA_NOP, RGB>(data, len, scale); + } + +}; +#endif + +#else +// #define FORCE_SOFTWARE_SPI +#endif + +#endif
\ No newline at end of file diff --git a/fastspi_bitbang.h b/fastspi_bitbang.h new file mode 100644 index 00000000..f9c1a218 --- /dev/null +++ b/fastspi_bitbang.h @@ -0,0 +1,368 @@ +#ifndef __INC_FASTSPI_BITBANG_H +#define __INC_FASTSPI_BITBANG_H + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Software SPI (aka bit-banging) support - with aggressive optimizations for when the clock and data pin are on the same port +// +// TODO: Replace the select pin definition with a set of pins, to allow using mux hardware for routing in the future +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint8_t SPI_SPEED> +class AVRSoftwareSPIOutput { + // The data types for pointers to the pin port - typedef'd here from the Pin definition because on avr these + // are pointers to 8 bit values, while on arm they are 32 bit + typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t; + typedef typename FastPin<CLOCK_PIN>::port_ptr_t clock_ptr_t; + + // The data type for what's at a pin's port - typedef'd here from the Pin definition because on avr the ports + // are 8 bits wide while on arm they are 32. + typedef typename FastPin<DATA_PIN>::port_t data_t; + typedef typename FastPin<CLOCK_PIN>::port_t clock_t; + Selectable *m_pSelect; + +public: + AVRSoftwareSPIOutput() { m_pSelect = NULL; } + AVRSoftwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; } + void setSelect(Selectable *pSelect) { m_pSelect = pSelect; } + + void init() { + // set the pins to output and make sure the select is released (which apparently means hi? This is a bit + // confusing to me) + FastPin<DATA_PIN>::setOutput(); + FastPin<CLOCK_PIN>::setOutput(); + release(); + } + + // stop the SPI output. Pretty much a NOP with software, as there's no registers to kick + static void stop() { } + + // wait until the SPI subsystem is ready for more data to write. A NOP when bitbanging + static void wait() __attribute__((always_inline)) { } + static void waitFully() __attribute__((always_inline)) { wait(); } + + static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { writeByte(b); } + static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { writeByte(b); wait(); } + + static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); } + + // naive writeByte implelentation, simply calls writeBit on the 8 bits in the byte. + static void writeByte(uint8_t b) __attribute__((always_inline)) { + writeBit<7>(b); + writeBit<6>(b); + writeBit<5>(b); + writeBit<4>(b); + writeBit<3>(b); + writeBit<2>(b); + writeBit<1>(b); + writeBit<0>(b); + } + +private: + // writeByte implementation with data/clock registers passed in. + static void writeByte(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin) __attribute__((always_inline)) { + writeBit<7>(b, clockpin, datapin); + writeBit<6>(b, clockpin, datapin); + writeBit<5>(b, clockpin, datapin); + writeBit<4>(b, clockpin, datapin); + writeBit<3>(b, clockpin, datapin); + writeBit<2>(b, clockpin, datapin); + writeBit<1>(b, clockpin, datapin); + writeBit<0>(b, clockpin, datapin); + } + + // writeByte implementation with the data register passed in and prebaked values for data hi w/clock hi and + // low and data lo w/clock hi and lo. This is to be used when clock and data are on the same GPIO register, + // can get close to getting a bit out the door in 2 clock cycles! + static void writeByte(uint8_t b, data_ptr_t datapin, + data_t hival, data_t loval, + clock_t hiclock, clock_t loclock) __attribute__((always_inline, hot)) { + writeBit<7>(b, datapin, hival, loval, hiclock, loclock); + writeBit<6>(b, datapin, hival, loval, hiclock, loclock); + writeBit<5>(b, datapin, hival, loval, hiclock, loclock); + writeBit<4>(b, datapin, hival, loval, hiclock, loclock); + writeBit<3>(b, datapin, hival, loval, hiclock, loclock); + writeBit<2>(b, datapin, hival, loval, hiclock, loclock); + writeBit<1>(b, datapin, hival, loval, hiclock, loclock); + writeBit<0>(b, datapin, hival, loval, hiclock, loclock); + } + + // writeByte implementation with not just registers passed in, but pre-baked values for said registers for + // data hi/lo and clock hi/lo values. Note: weird things will happen if this method is called in cases where + // the data and clock pins are on the same port! Don't do that! + static void writeByte(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin, + data_t hival, data_t loval, + clock_t hiclock, clock_t loclock) __attribute__((always_inline)) { + writeBit<7>(b, clockpin, datapin, hival, loval, hiclock, loclock); + writeBit<6>(b, clockpin, datapin, hival, loval, hiclock, loclock); + writeBit<5>(b, clockpin, datapin, hival, loval, hiclock, loclock); + writeBit<4>(b, clockpin, datapin, hival, loval, hiclock, loclock); + writeBit<3>(b, clockpin, datapin, hival, loval, hiclock, loclock); + writeBit<2>(b, clockpin, datapin, hival, loval, hiclock, loclock); + writeBit<1>(b, clockpin, datapin, hival, loval, hiclock, loclock); + writeBit<0>(b, clockpin, datapin, hival, loval, hiclock, loclock); + } + +public: + #define SPI_DELAY delaycycles< (SPI_SPEED-2) / 2>(); + + // write the BIT'th bit out via spi, setting the data pin then strobing the clcok + template <uint8_t BIT> __attribute__((always_inline, hot)) inline static void writeBit(uint8_t b) { + if(b & (1 << BIT)) { + FastPin<DATA_PIN>::hi(); + if(SPI_SPEED < 3) { + FastPin<CLOCK_PIN>::strobe(); + } else { + FastPin<CLOCK_PIN>::hi(); SPI_DELAY; + FastPin<CLOCK_PIN>::lo(); SPI_DELAY; + } + } else { + FastPin<DATA_PIN>::lo(); + if(SPI_SPEED < 3) { + FastPin<CLOCK_PIN>::strobe(); + } else { + FastPin<CLOCK_PIN>::hi(); SPI_DELAY; + FastPin<CLOCK_PIN>::lo(); SPI_DELAY; + } + } + } + +private: + // write the BIT'th bit out via spi, setting the data pin then strobing the clock, using the passed in pin registers to accelerate access if needed + template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin) { + if(b & (1 << BIT)) { + FastPin<DATA_PIN>::hi(datapin); + FastPin<CLOCK_PIN>::hi(clockpin); SPI_DELAY; + FastPin<CLOCK_PIN>::lo(clockpin); SPI_DELAY; + } else { + FastPin<DATA_PIN>::lo(datapin); + FastPin<CLOCK_PIN>::hi(clockpin); SPI_DELAY; + FastPin<CLOCK_PIN>::lo(clockpin); SPI_DELAY; + } + + } + + // the version of write to use when clock and data are on separate pins with precomputed values for setting + // the clock and data pins + template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin, + data_t hival, data_t loval, clock_t hiclock, clock_t loclock) { + // // only need to explicitly set clock hi if clock and data are on different ports + if(b & (1 << BIT)) { + FastPin<DATA_PIN>::fastset(datapin, hival); + FastPin<CLOCK_PIN>::fastset(clockpin, hiclock); SPI_DELAY; + FastPin<CLOCK_PIN>::fastset(clockpin, loclock); SPI_DELAY; + } else { + // NOP; + FastPin<DATA_PIN>::fastset(datapin, loval); + FastPin<CLOCK_PIN>::fastset(clockpin, hiclock); SPI_DELAY; + FastPin<CLOCK_PIN>::fastset(clockpin, loclock); SPI_DELAY; + } + } + + // the version of write to use when clock and data are on the same port with precomputed values for the various + // combinations + template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, data_ptr_t clockdatapin, + data_t datahiclockhi, data_t dataloclockhi, + data_t datahiclocklo, data_t dataloclocklo) { +#if 0 + writeBit<BIT>(b); +#else + if(b & (1 << BIT)) { + FastPin<DATA_PIN>::fastset(clockdatapin, datahiclocklo); SPI_DELAY; + FastPin<DATA_PIN>::fastset(clockdatapin, datahiclockhi); SPI_DELAY; + FastPin<DATA_PIN>::fastset(clockdatapin, datahiclocklo); SPI_DELAY; + } else { + // NOP; + FastPin<DATA_PIN>::fastset(clockdatapin, dataloclocklo); SPI_DELAY; + FastPin<DATA_PIN>::fastset(clockdatapin, dataloclockhi); SPI_DELAY; + FastPin<DATA_PIN>::fastset(clockdatapin, dataloclocklo); SPI_DELAY; + } +#endif + } +public: + + // select the SPI output (TODO: research whether this really means hi or lo. Alt TODO: move select responsibility out of the SPI classes + // entirely, make it up to the caller to remember to lock/select the line?) + void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } // FastPin<SELECT_PIN>::hi(); } + + // release the SPI line + void release() { if(m_pSelect != NULL) { m_pSelect->release(); } } // FastPin<SELECT_PIN>::lo(); } + + // Write out len bytes of the given value out over SPI. Useful for quickly flushing, say, a line of 0's down the line. + void writeBytesValue(uint8_t value, int len) { + select(); + writeBytesValueRaw(value, len); + release(); + } + + static void writeBytesValueRaw(uint8_t value, int len) { +#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS + // TODO: Weird things may happen if software bitbanging SPI output and other pins on the output reigsters are being twiddled. Need + // to allow specifying whether or not exclusive i/o access is allowed during this process, and if i/o access is not allowed fall + // back to the degenerative code below + while(len--) { + writeByte(value); + } +#else + register data_ptr_t datapin = FastPin<DATA_PIN>::port(); + + if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) { + // If data and clock are on different ports, then writing a bit will consist of writing the value foor + // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line + register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port(); + register data_t datahi = FastPin<DATA_PIN>::hival(); + register data_t datalo = FastPin<DATA_PIN>::loval(); + register clock_t clockhi = FastPin<CLOCK_PIN>::hival(); + register clock_t clocklo = FastPin<CLOCK_PIN>::loval(); + while(len--) { + writeByte(value, clockpin, datapin, datahi, datalo, clockhi, clocklo); + } + + } else { + // If data and clock are on the same port then we can combine setting the data and clock pins + register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask(); + register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask(); + register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask(); + register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask(); + + while(len--) { + writeByte(value, datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo); + } + } +#endif + } + + // write a block of len uint8_ts out. Need to type this better so that explicit casts into the call aren't required. + // note that this template version takes a class parameter for a per-byte modifier to the data. + template <class D> void writeBytes(register uint8_t *data, int len) { + select(); +#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS + uint8_t *end = data + len; + while(data != end) { + writeByte(D::adjust(*data++)); + } +#else + register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port(); + register data_ptr_t datapin = FastPin<DATA_PIN>::port(); + + if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) { + // If data and clock are on different ports, then writing a bit will consist of writing the value foor + // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line + register data_t datahi = FastPin<DATA_PIN>::hival(); + register data_t datalo = FastPin<DATA_PIN>::loval(); + register clock_t clockhi = FastPin<CLOCK_PIN>::hival(); + register clock_t clocklo = FastPin<CLOCK_PIN>::loval(); + uint8_t *end = data + len; + + while(data != end) { + writeByte(D::adjust(*data++), clockpin, datapin, datahi, datalo, clockhi, clocklo); + } + + } else { + // FastPin<CLOCK_PIN>::hi(); + // If data and clock are on the same port then we can combine setting the data and clock pins + register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask(); + register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask(); + register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask(); + register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask(); + + uint8_t *end = data + len; + + while(data != end) { + writeByte(D::adjust(*data++), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo); + } + // FastPin<CLOCK_PIN>::lo(); + } +#endif + D::postBlock(len); + release(); + } + + // default version of writing a block of data out to the SPI port, with no data modifications being made + void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); } + + + // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template + // parameters indicate how many uint8_ts to skip at the beginning of each grouping, as well as a class specifying a per + // byte of data modification to be made. (See DATA_NOP above) + template <uint8_t SKIP, class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + select(); + +#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS + // If interrupts or other things may be generating output while we're working on things, then we need + // to use this block + uint8_t *end = data + len; + while(data != end) { + if(SKIP & FLAG_START_BIT) { + writeBit<0>(1); + } + writeByte(D::adjust(data[SPI_B0], scale)); + writeByte(D::adjust(data[SPI_B1], scale)); + writeByte(D::adjust(data[SPI_B2], scale)); + data += SPI_ADVANCE; + } +#else + // If we can guaruntee that no one else will be writing data while we are running (namely, changing the values of the PORT/PDOR pins) + // then we can use a bunch of optimizations in here + register data_ptr_t datapin = FastPin<DATA_PIN>::port(); + + if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) { + register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port(); + // If data and clock are on different ports, then writing a bit will consist of writing the value foor + // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line + register data_t datahi = FastPin<DATA_PIN>::hival(); + register data_t datalo = FastPin<DATA_PIN>::loval(); + register clock_t clockhi = FastPin<CLOCK_PIN>::hival(); + register clock_t clocklo = FastPin<CLOCK_PIN>::loval(); + uint8_t *end = data + len; + + while(data != end) { + if(SKIP & FLAG_START_BIT) { + writeBit<0>(1, clockpin, datapin, datahi, datalo, clockhi, clocklo); + } + writeByte(D::adjust(data[SPI_B0], scale), clockpin, datapin, datahi, datalo, clockhi, clocklo); + writeByte(D::adjust(data[SPI_B1], scale), clockpin, datapin, datahi, datalo, clockhi, clocklo); + writeByte(D::adjust(data[SPI_B2], scale), clockpin, datapin, datahi, datalo, clockhi, clocklo); + data += SPI_ADVANCE; + } + + } else { + // If data and clock are on the same port then we can combine setting the data and clock pins + register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask(); + register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask(); + register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask(); + register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask(); + + uint8_t *end = data + len; + + while(data != end) { + if(SKIP & FLAG_START_BIT) { + writeBit<0>(1, datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo); + } + writeByte(D::adjust(data[SPI_B0], scale), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo); + writeByte(D::adjust(data[SPI_B1], scale), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo); + writeByte(D::adjust(data[SPI_B2], scale), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo); + data += SPI_ADVANCE; + } + } +#endif + D::postBlock(len); + release(); + } + + template <uint8_t SKIP, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<SKIP, DATA_NOP, RGB_ORDER>(data, len, scale); + } + template <class D, EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, D, RGB_ORDER>(data, len, scale); + } + template <EOrder RGB_ORDER> void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, DATA_NOP, RGB_ORDER>(data, len, scale); + } + void writeBytes3(register uint8_t *data, int len, register uint8_t scale) { + writeBytes3<0, DATA_NOP, RGB>(data, len, scale); + } +}; + +#endif diff --git a/fastspi_dma.h b/fastspi_dma.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/fastspi_dma.h diff --git a/hsv2rgb.cpp b/hsv2rgb.cpp new file mode 100644 index 00000000..b0abdab9 --- /dev/null +++ b/hsv2rgb.cpp @@ -0,0 +1,495 @@ +#include <stdint.h> + +#include "lib8tion.h" +#include "hsv2rgb.h" + +// Functions to convert HSV colors to RGB colors. +// +// The basically fall into two groups: spectra, and rainbows. +// Spectra and rainbows are not the same thing. Wikipedia has a good +// illustration here +// http://upload.wikimedia.org/wikipedia/commons/f/f6/Prism_compare_rainbow_01.png +// from this article +// http://en.wikipedia.org/wiki/Rainbow#Number_of_colours_in_spectrum_or_rainbow +// that shows a 'spectrum' and a 'rainbow' side by side. Among other +// differences, you'll see that a 'rainbow' has much more yellow than +// a plain spectrum. "Classic" LED color washes are spectrum based, and +// usually show very little yellow. +// +// Wikipedia's page on HSV color space, with pseudocode for conversion +// to RGB color space +// http://en.wikipedia.org/wiki/HSL_and_HSV +// Note that their conversion algorithm, which is (naturally) very popular +// is in the "maximum brightness at any given hue" style, vs the "uniform +// brightness for all hues" style. +// +// You can't have both; either purple is the same brightness as red, e.g +// red = #FF0000 and purple = #800080 -> same "total light" output +// OR purple is 'as bright as it can be', e.g. +// red = #FF0000 and purple = #FF00FF -> purple is much brighter than red. +// The colorspace conversions here try to keep the apparent brightness +// constant even as the hue varies. +// +// Adafruit's "Wheel" function, discussed here +// http://forums.adafruit.com/viewtopic.php?f=47&t=22483 +// is also of the "constant apparent brightness" variety. +// +// TODO: provide the 'maximum brightness no matter what' variation. +// +// See also some good, clear Arduino C code from Kasper Kamperman +// http://www.kasperkamperman.com/blog/arduino/arduino-programming-hsb-to-rgb/ +// which in turn was was based on Windows C code from "nico80" +// http://www.codeproject.com/Articles/9207/An-HSB-RGBA-colour-picker + + + + + +void hsv2rgb_raw_C (const struct CHSV & hsv, struct CRGB & rgb); +void hsv2rgb_raw_avr(const struct CHSV & hsv, struct CRGB & rgb); + +#if defined(__AVR__) && !defined( LIB8_ATTINY ) +void hsv2rgb_raw(const struct CHSV & hsv, struct CRGB & rgb) +{ + hsv2rgb_raw_avr( hsv, rgb); +} +#else +void hsv2rgb_raw(const struct CHSV & hsv, struct CRGB & rgb) +{ + hsv2rgb_raw_C( hsv, rgb); +} +#endif + + + +#define APPLY_DIMMING(X) (X) +#define HSV_SECTION_6 (0x20) +#define HSV_SECTION_3 (0x40) + +void hsv2rgb_raw_C (const struct CHSV & hsv, struct CRGB & rgb) +{ + // Convert hue, saturation and brightness ( HSV/HSB ) to RGB + // "Dimming" is used on saturation and brightness to make + // the output more visually linear. + + // Apply dimming curves + uint8_t value = APPLY_DIMMING( hsv.val); + uint8_t saturation = hsv.sat; + + // The brightness floor is minimum number that all of + // R, G, and B will be set to. + uint8_t invsat = APPLY_DIMMING( 255 - saturation); + uint8_t brightness_floor = (value * invsat) / 256; + + // The color amplitude is the maximum amount of R, G, and B + // that will be added on top of the brightness_floor to + // create the specific hue desired. + uint8_t color_amplitude = value - brightness_floor; + + // Figure out which section of the hue wheel we're in, + // and how far offset we are withing that section + uint8_t section = hsv.hue / HSV_SECTION_3; // 0..2 + uint8_t offset = hsv.hue % HSV_SECTION_3; // 0..63 + + uint8_t rampup = offset; // 0..63 + uint8_t rampdown = (HSV_SECTION_3 - 1) - offset; // 63..0 + + // We now scale rampup and rampdown to a 0-255 range -- at least + // in theory, but here's where architecture-specific decsions + // come in to play: + // To scale them up to 0-255, we'd want to multiply by 4. + // But in the very next step, we multiply the ramps by other + // values and then divide the resulting product by 256. + // So which is faster? + // ((ramp * 4) * othervalue) / 256 + // or + // ((ramp ) * othervalue) / 64 + // It depends on your processor architecture. + // On 8-bit AVR, the "/ 256" is just a one-cycle register move, + // but the "/ 64" might be a multicycle shift process. So on AVR + // it's faster do multiply the ramp values by four, and then + // divide by 256. + // On ARM, the "/ 256" and "/ 64" are one cycle each, so it's + // faster to NOT multiply the ramp values by four, and just to + // divide the resulting product by 64 (instead of 256). + // Moral of the story: trust your profiler, not your insticts. + + // Since there's an AVR assembly version elsewhere, we'll + // assume what we're on an architecture where any number of + // bit shifts has roughly the same cost, and we'll remove the + // redundant math at the source level: + + // // scale up to 255 range + // //rampup *= 4; // 0..252 + // //rampdown *= 4; // 0..252 + + // compute color-amplitude-scaled-down versions of rampup and rampdown + uint8_t rampup_amp_adj = (rampup * color_amplitude) / (256 / 4); + uint8_t rampdown_amp_adj = (rampdown * color_amplitude) / (256 / 4); + + // add brightness_floor offset to everything + uint8_t rampup_adj_with_floor = rampup_amp_adj + brightness_floor; + uint8_t rampdown_adj_with_floor = rampdown_amp_adj + brightness_floor; + + + if( section ) { + if( section == 1) { + // section 1: 0x40..0x7F + rgb.r = brightness_floor; + rgb.g = rampdown_adj_with_floor; + rgb.b = rampup_adj_with_floor; + } else { + // section 2; 0x80..0xBF + rgb.r = rampup_adj_with_floor; + rgb.g = brightness_floor; + rgb.b = rampdown_adj_with_floor; + } + } else { + // section 0: 0x00..0x3F + rgb.r = rampdown_adj_with_floor; + rgb.g = rampup_adj_with_floor; + rgb.b = brightness_floor; + } +} + + + +#if defined(__AVR__) && !defined( LIB8_ATTINY ) +void hsv2rgb_raw_avr(const struct CHSV & hsv, struct CRGB & rgb) +{ + uint8_t hue, saturation, value; + + hue = hsv.hue; + saturation = hsv.sat; + value = hsv.val; + + // Saturation more useful the other way around + saturation = 255 - saturation; + uint8_t invsat = APPLY_DIMMING( saturation ); + + // Apply dimming curves + value = APPLY_DIMMING( value ); + + // The brightness floor is minimum number that all of + // R, G, and B will be set to, which is value * invsat + uint8_t brightness_floor; + + asm volatile( + "mul %[value], %[invsat] \n" + "mov %[brightness_floor], r1 \n" + : [brightness_floor] "=r" (brightness_floor) + : [value] "r" (value), + [invsat] "r" (invsat) + : "r0", "r1" + ); + + // The color amplitude is the maximum amount of R, G, and B + // that will be added on top of the brightness_floor to + // create the specific hue desired. + uint8_t color_amplitude = value - brightness_floor; + + // Figure how far we are offset into the section of the + // color wheel that we're in + uint8_t offset = hsv.hue & (HSV_SECTION_3 - 1); // 0..63 + uint8_t rampup = offset * 4; // 0..252 + + + // compute color-amplitude-scaled-down versions of rampup and rampdown + uint8_t rampup_amp_adj; + uint8_t rampdown_amp_adj; + + asm volatile( + "mul %[rampup], %[color_amplitude] \n" + "mov %[rampup_amp_adj], r1 \n" + "com %[rampup] \n" + "mul %[rampup], %[color_amplitude] \n" + "mov %[rampdown_amp_adj], r1 \n" + : [rampup_amp_adj] "=&r" (rampup_amp_adj), + [rampdown_amp_adj] "=&r" (rampdown_amp_adj), + [rampup] "+r" (rampup) + : [color_amplitude] "r" (color_amplitude) + : "r0", "r1" + ); + + + // add brightness_floor offset to everything + uint8_t rampup_adj_with_floor = rampup_amp_adj + brightness_floor; + uint8_t rampdown_adj_with_floor = rampdown_amp_adj + brightness_floor; + + + // keep gcc from using "X" as the index register for storing + // results back in the return structure. AVR's X register can't + // do "std X+q, rnn", but the Y and Z registers can. + // if the pointer to 'rgb' is in X, gcc will add all kinds of crazy + // extra instructions. Simply killing X here seems to help it + // try Y or Z first. + asm volatile( "" : : : "r26", "r27" ); + + + if( hue & 0x80 ) { + // section 2: 0x80..0xBF + rgb.r = rampup_adj_with_floor; + rgb.g = brightness_floor; + rgb.b = rampdown_adj_with_floor; + } else { + if( hue & 0x40) { + // section 1: 0x40..0x7F + rgb.r = brightness_floor; + rgb.g = rampdown_adj_with_floor; + rgb.b = rampup_adj_with_floor; + } else { + // section 0: 0x00..0x3F + rgb.r = rampdown_adj_with_floor; + rgb.g = rampup_adj_with_floor; + rgb.b = brightness_floor; + } + } + + cleanup_R1(); +} +// End of AVR asm implementation + +#endif + +void hsv2rgb_spectrum( const CHSV& hsv, CRGB& rgb) +{ + CHSV hsv2(hsv); + hsv2.hue = scale8( hsv2.hue, 192); + hsv2rgb_raw(hsv2, rgb); +} + + +// Sometimes the compiler will do clever things to reduce +// code size that result in a net slowdown, if it thinks that +// a variable is not used in a certain location. +// This macro does its best to convince the compiler that +// the variable is used in this location, to help control +// code motion and de-duplication that would result in a slowdown. +#define FORCE_REFERENCE(var) asm volatile( "" : : "r" (var) ) + + +#define K255 255 +#define K171 171 +#define K85 85 + +void hsv2rgb_rainbow( const CHSV& hsv, CRGB& rgb) +{ + // Yellow has a higher inherent brightness than + // any other color; 'pure' yellow is perceived to + // be 93% as bright as white. In order to make + // yellow appear the correct relative brightness, + // it has to be rendered brighter than all other + // colors. + // Level Y1 is a moderate boost, the default. + // Level Y2 is a strong boost. + const uint8_t Y1 = 1; + const uint8_t Y2 = 0; + + // G2: Whether to divide all greens by two. + // Depends GREATLY on your particular LEDs + const uint8_t G2 = 0; + + // Gscale: what to scale green down by. + // Depends GREATLY on your particular LEDs + const uint8_t Gscale = 0; + + + uint8_t hue = hsv.hue; + uint8_t sat = hsv.sat; + uint8_t val = hsv.val; + + uint8_t offset = hue & 0x1F; // 0..31 + + // offset8 = offset * 8 + uint8_t offset8 = offset; + { + offset8 <<= 1; + asm volatile(""); + offset8 <<= 1; + asm volatile(""); + offset8 <<= 1; + } + + uint8_t third = scale8( offset8, (256 / 3)); + + uint8_t r, g, b; + + if( ! (hue & 0x80) ) { + // 0XX + if( ! (hue & 0x40) ) { + // 00X + //section 0-1 + if( ! (hue & 0x20) ) { + // 000 + //case 0: // R -> O + r = K255 - third; + g = third; + b = 0; + FORCE_REFERENCE(b); + } else { + // 001 + //case 1: // O -> Y + if( Y1 ) { + r = K171; + g = K85 + third ; + b = 0; + FORCE_REFERENCE(b); + } + if( Y2 ) { + r = K171 + third; + uint8_t twothirds = (third << 1); + g = K85 + twothirds; + b = 0; + FORCE_REFERENCE(b); + } + } + } else { + //01X + // section 2-3 + if( ! (hue & 0x20) ) { + // 010 + //case 2: // Y -> G + if( Y1 ) { + uint8_t twothirds = (third << 1); + r = K171 - twothirds; + g = K171 + third; + b = 0; + FORCE_REFERENCE(b); + } + if( Y2 ) { + r = K255 - offset8; + g = K255; + b = 0; + FORCE_REFERENCE(b); + } + } else { + // 011 + // case 3: // G -> A + r = 0; + FORCE_REFERENCE(r); + g = K255 - third; + b = third; + } + } + } else { + // section 4-7 + // 1XX + if( ! (hue & 0x40) ) { + // 10X + if( ! ( hue & 0x20) ) { + // 100 + //case 4: // A -> B + r = 0; + FORCE_REFERENCE(r); + uint8_t twothirds = (third << 1); + g = K171 - twothirds; + b = K85 + twothirds; + + } else { + // 101 + //case 5: // B -> P + r = third; + g = 0; + FORCE_REFERENCE(g); + b = K255 - third; + + } + } else { + if( ! (hue & 0x20) ) { + // 110 + //case 6: // P -- K + r = K85 + third; + g = 0; + FORCE_REFERENCE(g); + b = K171 - third; + + } else { + // 111 + //case 7: // K -> R + r = K171 + third; + g = 0; + FORCE_REFERENCE(g); + b = K85 - third; + + } + } + } + + // This is one of the good places to scale the green down, + // although the client can scale green down as well. + if( G2 ) g = g >> 1; + if( Gscale ) g = scale8_video_LEAVING_R1_DIRTY( g, Gscale); + + // Scale down colors if we're desaturated at all + // and add the brightness_floor to r, g, and b. + if( sat != 255 ) { + + nscale8x3_video( r, g, b, sat); + + uint8_t desat = 255 - sat; + desat = scale8( desat, desat); + + uint8_t brightness_floor = desat; + r += brightness_floor; + g += brightness_floor; + b += brightness_floor; + } + + // Now scale everything down if we're at value < 255. + if( val != 255 ) { + + val = scale8_video_LEAVING_R1_DIRTY( val, val); + nscale8x3_video( r, g, b, val); + } + + // Here we have the old AVR "missing std X+n" problem again + // It turns out that fixing it winds up costing more than + // not fixing it. + // To paraphrase Dr Bronner, profile! profile! profile! + //asm volatile( "" : : : "r26", "r27" ); + //asm volatile (" movw r30, r26 \n" : : : "r30", "r31"); + rgb.r = r; + rgb.g = g; + rgb.b = b; +} + + +void hsv2rgb_raw(const struct CHSV * phsv, struct CRGB * prgb, int numLeds) { + for(int i = 0; i < numLeds; i++) { + hsv2rgb_raw(phsv[i], prgb[i]); + } +} + +void hsv2rgb_rainbow( const struct CHSV* phsv, struct CRGB * prgb, int numLeds) { + for(int i = 0; i < numLeds; i++) { + hsv2rgb_rainbow(phsv[i], prgb[i]); + } +} + +void hsv2rgb_spectrum( const struct CHSV* phsv, struct CRGB * prgb, int numLeds) { + for(int i = 0; i < numLeds; i++) { + hsv2rgb_spectrum(phsv[i], prgb[i]); + } +} + +void fill_solid( struct CRGB * pFirstLED, int numToFill, + const struct CRGB& color) +{ + for( int i = 0; i < numToFill; i++) { + pFirstLED[i] = color; + } +} + +void fill_rainbow( struct CRGB * pFirstLED, int numToFill, + uint8_t initialhue, + uint8_t deltahue ) +{ + CHSV hsv; + hsv.hue = initialhue; + hsv.val = 255; + hsv.sat = 255; + for( int i = 0; i < numToFill; i++) { + hsv2rgb_rainbow( hsv, pFirstLED[i]); + hsv.hue += deltahue; + } +} diff --git a/hsv2rgb.h b/hsv2rgb.h new file mode 100644 index 00000000..d30e9aef --- /dev/null +++ b/hsv2rgb.h @@ -0,0 +1,59 @@ +#ifndef __INC_HSV2RGB_H +#define __INC_HSV2RGB_H + +#include "pixeltypes.h" + + +// hsv2rgb_rainbow - convert a hue, saturation, and value to RGB +// using a visually balanced rainbow (vs a straight +// mathematical spectrum). +// This 'rainbow' yields better yellow and orange +// than a straight 'spectrum'. +// +// NOTE: here hue is 0-255, not just 0-191 + +void hsv2rgb_rainbow( const struct CHSV& hsv, struct CRGB& rgb); +void hsv2rgb_rainbow( const struct CHSV* phsv, struct CRGB * prgb, int numLeds); +#define HUE_MAX_RAINBOW 255 + + +// hsv2rgb_spectrum - convert a hue, saturation, and value to RGB +// using a mathematically straight spectrum (vs +// a visually balanced rainbow). +// This 'spectrum' will have more green & blue +// than a 'rainbow', and less yellow and orange. +// +// NOTE: here hue is 0-255, not just 0-191 + +void hsv2rgb_spectrum( const struct CHSV& hsv, struct CRGB& rgb); +void hsv2rgb_spectrum( const struct CHSV* phsv, struct CRGB * prgb, int numLeds); +#define HUE_MAX_SPECTRUM 255 + + +// hsv2rgb_raw - convert hue, saturation, and value to RGB. +// This 'spectrum' conversion will be more green & blue +// than a real 'rainbow', and the hue is specified just +// in the range 0-191. Together, these result in a +// slightly faster conversion speed, at the expense of +// color balance. +// +// NOTE: Hue is 0-191 only! +// Saturation & value are 0-255 each. +// + +void hsv2rgb_raw(const struct CHSV& hsv, struct CRGB & rgb); +void hsv2rgb_raw(const struct CHSV* phsv, struct CRGB * prgb, int numLeds); +#define HUE_MAX 191 + + +// fill_solid - fill a range of LEDs with a solid color +void fill_solid( struct CRGB * pFirstLED, int numToFill, + const struct CRGB& color); + +// fill_rainbow - fill a range of LEDs with a rainbow of colors, at +// full saturation and full value (brightness) +void fill_rainbow( struct CRGB * pFirstLED, int numToFill, + uint8_t initialhue, + uint8_t deltahue = 5); + +#endif diff --git a/lib8tion.cpp b/lib8tion.cpp new file mode 100644 index 00000000..224d44f2 --- /dev/null +++ b/lib8tion.cpp @@ -0,0 +1,242 @@ +#include <stdint.h> + +#define RAND16_SEED 1337 +uint16_t rand16seed = RAND16_SEED; + + +// memset8, memcpy8, memmove8: +// optimized avr replacements for the standard "C" library +// routines memset, memcpy, and memmove. +// +// There are two techniques that make these routines +// faster than the standard avr-libc routines. +// First, the loops are unrolled 2X, meaning that +// the average loop overhead is cut in half. +// And second, the compare-and-branch at the bottom +// of each loop decrements the low byte of the +// counter, and if the carry is clear, it branches +// back up immediately. Only if the low byte math +// causes carry do we bother to decrement the high +// byte and check that result for carry as well. +// Results for a 100-byte buffer are 20-40% faster +// than standard avr-libc, at a cost of a few extra +// bytes of code. + +#if defined(__AVR__) +extern "C" { +//__attribute__ ((noinline)) +void * memset8 ( void * ptr, uint8_t val, uint16_t num ) +{ + asm volatile( + " movw r26, %[ptr] \n\t" + " sbrs %A[num], 0 \n\t" + " rjmp Lseteven_%= \n\t" + " rjmp Lsetodd_%= \n\t" + "Lsetloop_%=: \n\t" + " st X+, %[val] \n\t" + "Lsetodd_%=: \n\t" + " st X+, %[val] \n\t" + "Lseteven_%=: \n\t" + " subi %A[num], 2 \n\t" + " brcc Lsetloop_%= \n\t" + " sbci %B[num], 0 \n\t" + " brcc Lsetloop_%= \n\t" + : [num] "+r" (num) + : [ptr] "r" (ptr), + [val] "r" (val) + : "memory" + ); + return ptr; +} + + + +//__attribute__ ((noinline)) +void * memcpy8 ( void * dst, void* src, uint16_t num ) +{ + asm volatile( + " movw r30, %[src] \n\t" + " movw r26, %[dst] \n\t" + " sbrs %A[num], 0 \n\t" + " rjmp Lcpyeven_%= \n\t" + " rjmp Lcpyodd_%= \n\t" + "Lcpyloop_%=: \n\t" + " ld __tmp_reg__, Z+ \n\t" + " st X+, __tmp_reg__ \n\t" + "Lcpyodd_%=: \n\t" + " ld __tmp_reg__, Z+ \n\t" + " st X+, __tmp_reg__ \n\t" + "Lcpyeven_%=: \n\t" + " subi %A[num], 2 \n\t" + " brcc Lcpyloop_%= \n\t" + " sbci %B[num], 0 \n\t" + " brcc Lcpyloop_%= \n\t" + : [num] "+r" (num) + : [src] "r" (src), + [dst] "r" (dst) + : "memory" + ); + return dst; +} + +//__attribute__ ((noinline)) +void * memmove8 ( void * dst, void* src, uint16_t num ) +{ + if( src < dst) { + // if src < dst then we can use the forward-stepping memcpy8 + return memcpy8( dst, src, num); + } else { + // if src > dst then we have to step backward: + dst = (char*)dst + num; + src = (char*)src + num; + asm volatile( + " movw r30, %[src] \n\t" + " movw r26, %[dst] \n\t" + " sbrs %A[num], 0 \n\t" + " rjmp Lmoveven_%= \n\t" + " rjmp Lmovodd_%= \n\t" + "Lmovloop_%=: \n\t" + " ld __tmp_reg__, -Z \n\t" + " st -X, __tmp_reg__ \n\t" + "Lmovodd_%=: \n\t" + " ld __tmp_reg__, -Z \n\t" + " st -X, __tmp_reg__ \n\t" + "Lmoveven_%=: \n\t" + " subi %A[num], 2 \n\t" + " brcc Lmovloop_%= \n\t" + " sbci %B[num], 0 \n\t" + " brcc Lmovloop_%= \n\t" + : [num] "+r" (num) + : [src] "r" (src), + [dst] "r" (dst) + : "memory" + ); + return dst; + } +} + + +} /* end extern "C" */ + +#endif /* AVR */ + +#if 0 +// TEST / VERIFICATION CODE ONLY BELOW THIS POINT +#include <Arduino.h> +#include "lib8tion.h" + +void test1abs( int8_t i) +{ + Serial.print("abs("); Serial.print(i); Serial.print(") = "); + int8_t j = abs8(i); + Serial.print(j); Serial.println(" "); +} + +void testabs() +{ + delay(5000); + for( int8_t q = -128; q != 127; q++) { + test1abs(q); + } + for(;;){}; +} + + +void testmul8() +{ + delay(5000); + byte r, c; + + Serial.println("mul8:"); + for( r = 0; r <= 20; r += 1) { + Serial.print(r); Serial.print(" : "); + for( c = 0; c <= 20; c += 1) { + byte t; + t = mul8( r, c); + Serial.print(t); Serial.print(' '); + } + Serial.println(' '); + } + Serial.println("done."); + for(;;){}; +} + + +void testscale8() +{ + delay(5000); + byte r, c; + + Serial.println("scale8:"); + for( r = 0; r <= 240; r += 10) { + Serial.print(r); Serial.print(" : "); + for( c = 0; c <= 240; c += 10) { + byte t; + t = scale8( r, c); + Serial.print(t); Serial.print(' '); + } + Serial.println(' '); + } + + Serial.println(' '); + Serial.println("scale8_video:"); + + for( r = 0; r <= 100; r += 4) { + Serial.print(r); Serial.print(" : "); + for( c = 0; c <= 100; c += 4) { + byte t; + t = scale8_video( r, c); + Serial.print(t); Serial.print(' '); + } + Serial.println(' '); + } + + Serial.println("done."); + for(;;){}; +} + + + +void testqadd8() +{ + delay(5000); + byte r, c; + for( r = 0; r <= 240; r += 10) { + Serial.print(r); Serial.print(" : "); + for( c = 0; c <= 240; c += 10) { + byte t; + t = qadd8( r, c); + Serial.print(t); Serial.print(' '); + } + Serial.println(' '); + } + Serial.println("done."); + for(;;){}; +} + +void testnscale8x3() +{ + delay(5000); + byte r, g, b, sc; + for( byte z = 0; z < 10; z++) { + r = random8(); g = random8(); b = random8(); sc = random8(); + + Serial.print("nscale8x3_video( "); + Serial.print(r); Serial.print(", "); + Serial.print(g); Serial.print(", "); + Serial.print(b); Serial.print(", "); + Serial.print(sc); Serial.print(") = [ "); + + nscale8x3_video( r, g, b, sc); + + Serial.print(r); Serial.print(", "); + Serial.print(g); Serial.print(", "); + Serial.print(b); Serial.print("]"); + + Serial.println(' '); + } + Serial.println("done."); + for(;;){}; +} + +#endif diff --git a/lib8tion.h b/lib8tion.h new file mode 100644 index 00000000..5fb812cb --- /dev/null +++ b/lib8tion.h @@ -0,0 +1,1272 @@ +#ifndef __INC_LIB8TION_H +#define __INC_LIB8TION_H + +/* + + Fast, efficient 8-bit math functions specifically + designed for high-performance LED programming. + + Because of the AVR(Arduino) and ARM assembly language + implementations provided, using these functions often + results in smaller and faster code than the equivalent + program using plain "C" arithmetic and logic. + + + Included are: + + + - Saturating unsigned 8-bit add and subtract. + Instead of wrapping around if an overflow occurs, + these routines just 'clamp' the output at a maxumum + of 255, or a minimum of 0. Useful for adding pixel + values. E.g., qadd8( 200, 100) = 255. + + qadd8( i, j) == MIN( (i + j), 0xFF ) + qsub8( i, j) == MAX( (i - j), 0 ) + + - Saturating signed 8-bit ("7-bit") add. + qadd7( i, j) == MIN( (i + j), 0x7F) + + + - Scaling (down) of unsigned 8- and 16- bit values. + Scaledown value is specified in 1/256ths. + scale8( i, sc) == (i * sc) / 256 + scale16by8( i, sc) == (i * sc) / 256 + + Example: scaling a 0-255 value down into a + range from 0-99: + downscaled = scale8( originalnumber, 100); + + A special version of scale8 is provided for scaling + LED brightness values, to make sure that they don't + accidentally scale down to total black at low + dimming levels, since that would look wrong: + scale8_video( i, sc) = ((i * sc) / 256) +? 1 + + Example: reducing an LED brightness by a + dimming factor: + new_bright = scale8_video( orig_bright, dimming); + + + - Fast 8- and 16- bit unsigned random numbers. + Significantly faster than Arduino random(), but + also somewhat less random. You can add entropy. + random8() == random from 0..255 + random8( n) == random from 0..(N-1) + random8( n, m) == random from N..(M-1) + + random16() == random from 0..65535 + random16( n) == random from 0..(N-1) + random16( n, m) == random from N..(M-1) + + random16_set_seed( k) == seed = k + random16_add_entropy( k) == seed += k + + + - Absolute value of a signed 8-bit value. + abs8( i) == abs( i) + + + - 8-bit math operations which return 8-bit values. + These are provided mostly for completeness, + not particularly for performance. + mul8( i, j) == (i * j) & 0xFF + add8( i, j) == (i + j) & 0xFF + sub8( i, j) == (i - j) & 0xFF + + + - Fast 16-bit approximations of sin and cos. + Input angle is a uint16_t from 0-65535. + Output is a signed int16_t from -32767 to 32767. + sin16( x) == sin( (x/32768.0) * pi) * 32767 + cos16( x) == cos( (x/32768.0) * pi) * 32767 + Accurate to more than 99% in all cases. + + + - Dimming and brightening functions for 8-bit + light values. + dim8_video( x) == scale8_video( x, x) + dim8_raw( x) == scale8( x, x) + brighten8_video( x) == 255 - dim8_video( 255 - x) + brighten8_raw( x) == 255 - dim8_raw( 255 - x) + The dimming functions in particular are suitable + for making LED light output appear more 'linear'. + + + - Fast 8-bit "easing in/out" function. + ease8InOutCubic(x) == 3(x^i) - 2(x^3) + ease8InOutApprox(x) == + faster, rougher, approximation of cubic easing + + + - Linear interpolation between two values, with the + fraction between them expressed as an 8- or 16-bit + fixed point fraction (fract8 or fract16). + lerp8by8( fromU8, toU8, fract8 ) + lerp16by8( fromU16, toU16, fract8 ) + lerp15by8( fromS16, toS16, fract8 ) + == from + (( to - from ) * fract8) / 256) + lerp16by16( fromU16, toU16, fract16 ) + == from + (( to - from ) * fract16) / 65536) + + - Optimized memmove, memcpy, and memset, that are + faster than standard avr-libc 1.8. + memmove8( dest, src, bytecount) + memcpy8( dest, src, bytecount) + memset8( buf, value, bytecount) + + +Lib8tion is pronounced like 'libation': lie-BAY-shun + +*/ + + + +#include <stdint.h> + +#define LIB8STATIC __attribute__ ((unused)) static + + +#if defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) || defined(__AVR_ATtiny25__) || defined(__AVR_ATtiny45__) || defined(__AVR_ATtiny85__) +#define LIB8_ATTINY 1 +#endif + + +#if defined(__arm__) + +#if defined(__MK20DX128__) +// Can use Cortex M4 DSP instructions +#define QADD8_C 0 +#define QADD7_C 0 +#define QADD8_ARM_DSP_ASM 1 +#define QADD7_ARM_DSP_ASM 1 +#else +// Generic ARM +#define QADD8_C 1 +#define QADD7_C 1 +#endif + +#define QSUB8_C 1 +#define SCALE8_C 1 +#define SCALE16BY8_C 1 +#define SCALE16_C 1 +#define ABS8_C 1 +#define MUL8_C 1 +#define QMUL8_C 1 +#define ADD8_C 1 +#define SUB8_C 1 +#define EASE8_C 1 + + +#elif defined(__AVR__) + +// AVR ATmega and friends Arduino + +#define QADD8_C 0 +#define QADD7_C 0 +#define QSUB8_C 0 +#define ABS8_C 0 +#define ADD8_C 0 +#define SUB8_C 0 + +#define QADD8_AVRASM 1 +#define QADD7_AVRASM 1 +#define QSUB8_AVRASM 1 +#define ABS8_AVRASM 1 +#define ADD8_AVRASM 1 +#define SUB8_AVRASM 1 + +// Note: these require hardware MUL instruction +// -- sorry, ATtiny! +#if !defined(LIB8_ATTINY) +#define SCALE8_C 0 +#define SCALE16BY8_C 0 +#define SCALE16_C 0 +#define MUL8_C 0 +#define QMUL8_C 0 +#define EASE8_C 0 +#define SCALE8_AVRASM 1 +#define SCALE16BY8_AVRASM 1 +#define SCALE16_AVRASM 1 +#define MUL8_AVRASM 1 +#define QMUL8_AVRASM 1 +#define EASE8_AVRASM 1 +#define CLEANUP_R1_AVRASM 1 +#else +// On ATtiny, we just use C implementations +#define SCALE8_C 1 +#define SCALE16BY8_C 1 +#define SCALE16_C 1 +#define MUL8_C 1 +#define QMUL8_C 1 +#define EASE8_C 1 +#define SCALE8_AVRASM 0 +#define SCALE16BY8_AVRASM 0 +#define SCALE16_AVRASM 0 +#define MUL8_AVRASM 0 +#define QMUL8_AVRASM 0 +#define EASE8_AVRASM 0 +#endif + +#else + +// unspecified architecture, so +// no ASM, everything in C +#define QADD8_C 1 +#define QADD7_C 1 +#define QSUB8_C 1 +#define SCALE8_C 1 +#define SCALE16BY8_C 1 +#define SCALE16_C 1 +#define ABS8_C 1 +#define MUL8_C 1 +#define ADD8_C 1 +#define SUB8_C 1 +#define EASE8_C 1 + +#endif + + +/////////////////////////////////////////////////////////////////////// +// +// typdefs for fixed-point fractional types. +// +// sfract7 should be interpreted as signed 128ths. +// fract8 should be interpreted as unsigned 256ths. +// sfract15 should be interpreted as signed 32768ths. +// fract16 should be interpreted as unsigned 65536ths. +// +// Example: if a fract8 has the value "64", that should be interpreted +// as 64/256ths, or one-quarter. +// +// +// fract8 range is 0 to 0.99609375 +// in steps of 0.00390625 +// +// sfract7 range is -0.9921875 to 0.9921875 +// in steps of 0.0078125 +// +// fract16 range is 0 to 0.99998474121 +// in steps of 0.00001525878 +// +// sfract15 range is -0.99996948242 to 0.99996948242 +// in steps of 0.00003051757 +// + +typedef uint8_t fract8; // ANSI: unsigned short _Fract +typedef int8_t sfract7; // ANSI: signed short _Fract +typedef uint16_t fract16; // ANSI: unsigned _Fract +typedef int16_t sfract15; // ANSI: signed _Fract + + +// accumXY types should be interpreted as X bits of integer, +// and Y bits of fraction. +// E.g., accum88 has 8 bits of int, 8 bits of fraction + +typedef uint16_t accum88; // ANSI: unsigned short _Accum +typedef int16_t saccum78; // ANSI: signed short _Accum +typedef uint32_t accum1616;// ANSI: signed _Accum +typedef int32_t saccum1516;//ANSI: signed _Accum +typedef uint16_t accum124; // no direct ANSI counterpart +typedef int32_t saccum114;// no direct ANSI counterpart + + +// typedef for IEEE754 "binary32" float type internals + +typedef union { + uint32_t i; + float f; + struct { + uint32_t mantissa: 23; + uint32_t exponent: 8; + uint32_t signbit: 1; + }; + struct { + uint32_t mant7 : 7; + uint32_t mant16: 16; + uint32_t exp_ : 8; + uint32_t sb_ : 1; + }; + struct { + uint32_t mant_lo8 : 8; + uint32_t mant_hi16_exp_lo1 : 16; + uint32_t sb_exphi7 : 8; + }; +} IEEE754binary32_t; + + + +/////////////////////////////////////////////////////////////////////// + +// qadd8: add one byte to another, saturating at 0xFF +LIB8STATIC uint8_t qadd8( uint8_t i, uint8_t j) +{ +#if QADD8_C == 1 + int t = i + j; + if( t > 255) t = 255; + return t; +#elif QADD8_AVRASM == 1 + asm volatile( + /* First, add j to i, conditioning the C flag */ + "add %0, %1 \n\t" + + /* Now test the C flag. + If C is clear, we branch around a load of 0xFF into i. + If C is set, we go ahead and load 0xFF into i. + */ + "brcc L_%= \n\t" + "ldi %0, 0xFF \n\t" + "L_%=: " + : "+a" (i) + : "a" (j) ); + return i; +#elif QADD8_ARM_DSP_ASM == 1 + asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j)); + return i; +#else +#error "No implementation for qadd8 available." +#endif +} + + +// qadd7: add one signed byte to another, +// saturating at 0x7F. +LIB8STATIC int8_t qadd7( int8_t i, int8_t j) +{ +#if QADD7_C == 1 + int16_t t = i + j; + if( t > 127) t = 127; + return t; +#elif QADD7_AVRASM == 1 + asm volatile( + /* First, add j to i, conditioning the V flag */ + "add %0, %1 \n\t" + + /* Now test the V flag. + If V is clear, we branch around a load of 0x7F into i. + If V is set, we go ahead and load 0x7F into i. + */ + "brvc L_%= \n\t" + "ldi %0, 0x7F \n\t" + "L_%=: " + : "+a" (i) + : "a" (j) ); + + return i; +#elif QADD7_ARM_DSP_ASM == 1 + asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j)); + return i; +#else +#error "No implementation for qadd7 available." +#endif +} + +// qsub8: subtract one byte from another, saturating at 0x00 +LIB8STATIC uint8_t qsub8( uint8_t i, uint8_t j) +{ +#if QSUB8_C == 1 + int t = i - j; + if( t < 0) t = 0; + return t; +#elif QSUB8_AVRASM == 1 + + asm volatile( + /* First, subtract j from i, conditioning the C flag */ + "sub %0, %1 \n\t" + + /* Now test the C flag. + If C is clear, we branch around a load of 0x00 into i. + If C is set, we go ahead and load 0x00 into i. + */ + "brcc L_%= \n\t" + "ldi %0, 0x00 \n\t" + "L_%=: " + : "+a" (i) + : "a" (j) ); + + return i; +#else +#error "No implementation for qsub8 available." +#endif +} + +// add8: add one byte to another, with one byte result +LIB8STATIC uint8_t add8( uint8_t i, uint8_t j) +{ +#if ADD8_C == 1 + int t = i + j; + return t; +#elif ADD8_AVRASM == 1 + // Add j to i, period. + asm volatile( "add %0, %1" : "+a" (i) : "a" (j)); + return i; +#else +#error "No implementation for add8 available." +#endif +} + + +// sub8: subtract one byte from another, 8-bit result +LIB8STATIC uint8_t sub8( uint8_t i, uint8_t j) +{ +#if SUB8_C == 1 + int t = i - j; + return t; +#elif SUB8_AVRASM == 1 + // Subtract j from i, period. + asm volatile( "sub %0, %1" : "+a" (i) : "a" (j)); + return i; +#else +#error "No implementation for sub8 available." +#endif +} + + +// scale8: scale one byte by a second one, which is treated as +// the numerator of a fraction whose denominator is 256 +// In other words, it computes i * (scale / 256) +// 4 clocks AVR, 2 clocks ARM +LIB8STATIC uint8_t scale8( uint8_t i, fract8 scale) +{ +#if SCALE8_C == 1 + return ((int)i * (int)(scale) ) >> 8; +#elif SCALE8_AVRASM == 1 + asm volatile( + /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ + "mul %0, %1 \n\t" + /* Move the high 8-bits of the product (r1) back to i */ + "mov %0, r1 \n\t" + /* Restore r1 to "0"; it's expected to always be that */ + "clr __zero_reg__ \n\t" + + : "+a" (i) /* writes to i */ + : "a" (scale) /* uses scale */ + : "r0", "r1" /* clobbers r0, r1 */ ); + + /* Return the result */ + return i; +#else +#error "No implementation for scale8 available." +#endif +} + + +// The "video" version of scale8 guarantees that the output will +// be only be zero if one or both of the inputs are zero. If both +// inputs are non-zero, the output is guaranteed to be non-zero. +// This makes for better 'video'/LED dimming, at the cost of +// several additional cycles. +LIB8STATIC uint8_t scale8_video( uint8_t i, fract8 scale) +{ +#if SCALE8_C == 1 + uint8_t nonzeroscale = (scale != 0) ? 1 : 0; + uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; + return j; +#elif SCALE8_AVRASM == 1 + + uint8_t nonzeroscale = (scale != 0) ? 1 : 0; + asm volatile( + " tst %0 \n" + " breq L_%= \n" + " mul %0, %1 \n" + " mov %0, r1 \n" + " add %0, %2 \n" + " clr __zero_reg__ \n" + "L_%=: \n" + + : "+a" (i) + : "a" (scale), "a" (nonzeroscale) + : "r0", "r1"); + + // Return the result + return i; +#else +#error "No implementation for scale8_video available." +#endif +} + + +// This version of scale8 does not clean up the R1 register on AVR +// If you are doing several 'scale8's in a row, use this, and +// then explicitly call cleanup_R1. +LIB8STATIC uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) +{ +#if SCALE8_C == 1 + return ((int)i * (int)(scale) ) >> 8; +#elif SCALE8_AVRASM == 1 + asm volatile( + /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ + "mul %0, %1 \n\t" + /* Move the high 8-bits of the product (r1) back to i */ + "mov %0, r1 \n\t" + /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */ + /* "clr __zero_reg__ \n\t" */ + + : "+a" (i) /* writes to i */ + : "a" (scale) /* uses scale */ + : "r0", "r1" /* clobbers r0, r1 */ ); + + // Return the result + return i; +#else +#error "No implementation for scale8_LEAVING_R1_DIRTY available." +#endif +} + +// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENT DIRECTLY IN PLACE + +LIB8STATIC void nscale8_LEAVING_R1_DIRTY( uint8_t& i, fract8 scale) +{ +#if SCALE8_C == 1 + i = ((int)i * (int)(scale) ) >> 8; +#elif SCALE8_AVRASM == 1 + asm volatile( + /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ + "mul %0, %1 \n\t" + /* Move the high 8-bits of the product (r1) back to i */ + "mov %0, r1 \n\t" + /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */ + /* "clr __zero_reg__ \n\t" */ + + : "+a" (i) /* writes to i */ + : "a" (scale) /* uses scale */ + : "r0", "r1" /* clobbers r0, r1 */ ); +#else +#error "No implementation for nscale8_LEAVING_R1_DIRTY available." +#endif +} + + + +LIB8STATIC uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) +{ +#if SCALE8_C == 1 + uint8_t nonzeroscale = (scale != 0) ? 1 : 0; + uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; + return j; +#elif SCALE8_AVRASM == 1 + + uint8_t nonzeroscale = (scale != 0) ? 1 : 0; + asm volatile( + " tst %0 \n" + " breq L_%= \n" + " mul %0, %1 \n" + " mov %0, r1 \n" + " add %0, %2 \n" + /* R1 IS LEFT DIRTY, YOU MUST ZERO IT OUT YOURSELF */ + "L_%=: \n" + + : "+a" (i) + : "a" (scale), "a" (nonzeroscale) + : "r0", "r1"); + + // Return the result + return i; +#else +#error "No implementation for scale8_video available." +#endif +} + + + +LIB8STATIC void cleanup_R1() +{ +#if CLEANUP_R1_AVRASM == 1 + // Restore r1 to "0"; it's expected to always be that + asm volatile( "clr __zero_reg__ \n\t" : : : "r1" ); +#endif +} + + +// nscale8x3: scale three one byte values by a fourth one, which is treated as +// the numerator of a fraction whose demominator is 256 +// In other words, it computes r,g,b * (scale / 256) +// +// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE + +LIB8STATIC void nscale8x3( uint8_t& r, uint8_t& g, uint8_t& b, fract8 scale) +{ +#if SCALE8_C == 1 + r = ((int)r * (int)(scale) ) >> 8; + g = ((int)g * (int)(scale) ) >> 8; + b = ((int)b * (int)(scale) ) >> 8; +#elif SCALE8_AVRASM == 1 + r = scale8_LEAVING_R1_DIRTY(r, scale); + g = scale8_LEAVING_R1_DIRTY(g, scale); + b = scale8_LEAVING_R1_DIRTY(b, scale); + cleanup_R1(); +#else +#error "No implementation for nscale8x3 available." +#endif +} + + +LIB8STATIC void nscale8x3_video( uint8_t& r, uint8_t& g, uint8_t& b, fract8 scale) +{ +#if SCALE8_C == 1 + uint8_t nonzeroscale = (scale != 0) ? 1 : 0; + r = (r == 0) ? 0 : (((int)r * (int)(scale) ) >> 8) + nonzeroscale; + g = (g == 0) ? 0 : (((int)g * (int)(scale) ) >> 8) + nonzeroscale; + b = (b == 0) ? 0 : (((int)b * (int)(scale) ) >> 8) + nonzeroscale; +#elif SCALE8_AVRASM == 1 + r = scale8_video_LEAVING_R1_DIRTY( r, scale); + g = scale8_video_LEAVING_R1_DIRTY( g, scale); + b = scale8_video_LEAVING_R1_DIRTY( b, scale); + cleanup_R1(); +#else +#error "No implementation for nscale8x3 available." +#endif +} + +// nscale8x2: scale two one byte values by a third one, which is treated as +// the numerator of a fraction whose demominator is 256 +// In other words, it computes i,j * (scale / 256) +// +// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE + +LIB8STATIC void nscale8x2( uint8_t& i, uint8_t& j, fract8 scale) +{ +#if SCALE8_C == 1 + i = ((int)i * (int)(scale) ) >> 8; + j = ((int)j * (int)(scale) ) >> 8; +#elif SCALE8_AVRASM == 1 + i = scale8_LEAVING_R1_DIRTY(i, scale); + j = scale8_LEAVING_R1_DIRTY(j, scale); + cleanup_R1(); +#else +#error "No implementation for nscale8x2 available." +#endif +} + + +LIB8STATIC void nscale8x2_video( uint8_t& i, uint8_t& j, fract8 scale) +{ +#if SCALE8_C == 1 + uint8_t nonzeroscale = (scale != 0) ? 1 : 0; + i = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; + j = (j == 0) ? 0 : (((int)j * (int)(scale) ) >> 8) + nonzeroscale; +#elif SCALE8_AVRASM == 1 + i = scale8_video_LEAVING_R1_DIRTY( i, scale); + j = scale8_video_LEAVING_R1_DIRTY( j, scale); + cleanup_R1(); +#else +#error "No implementation for nscale8x2 available." +#endif +} + + +// scale16by8: scale a 16-bit unsigned value by an 8-bit value, +// considered as numerator of a fraction whose denominator +// is 256. In other words, it computes i * (scale / 256) + +#if SCALE16BY8_C == 1 +LIB8STATIC uint16_t scale16by8( uint16_t i, fract8 scale ) +{ + uint16_t result; + result = (i * scale) / 256; + return result; +} +#elif SCALE16BY8_AVRASM == 1 +LIB8STATIC uint16_t scale16by8( uint16_t i, fract8 scale ) +{ + uint16_t result; + asm volatile( + // result.A = HighByte(i.A x j ) + " mul %A[i], %[scale] \n\t" + " mov %A[result], r1 \n\t" + " clr %B[result] \n\t" + + // result.A-B += i.B x j + " mul %B[i], %[scale] \n\t" + " add %A[result], r0 \n\t" + " adc %B[result], r1 \n\t" + + // cleanup r1 + " clr __zero_reg__ \n\t" + + : [result] "=r" (result) + : [i] "r" (i), [scale] "r" (scale) + : "r0", "r1" + ); + return result; +} +#else +#error "No implementation for scale16by8 available." +#endif + +// scale16: scale a 16-bit unsigned value by a 16-bit value, +// considered as numerator of a fraction whose denominator +// is 65536. In other words, it computes i * (scale / 65536) + +#if SCALE16_C == 1 +LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) +{ + uint16_t result; + result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536; + return result; +} +#elif SCALE16_AVRASM == 1 +LIB8STATIC +uint16_t scale16( uint16_t i, fract16 scale ) +{ + uint32_t result = 0; + const uint8_t zero = 0; + asm volatile( + // result.A-B = i.A x scale.A + " mul %A[i], %A[scale] \n\t" + // save results... + // basic idea: + //" mov %A[result], r0 \n\t" + //" mov %B[result], r1 \n\t" + // which can be written as... + " movw %A[result], r0 \n\t" + // We actually need to do anything with r0, + // as result.A is never used again here, so we + // could just move the high byte, but movw is + // one clock cycle, just like mov, so might as + // well, in case we want to use this code for + // a generic 16x16 multiply somewhere. + + // result.C-D = i.B x scale.B + " mul %B[i], %B[scale] \n\t" + //" mov %C[result], r0 \n\t" + //" mov %D[result], r1 \n\t" + " movw %C[result], r0 \n\t" + + // result.B-D += i.B x scale.A + " mul %B[i], %A[scale] \n\t" + + " add %B[result], r0 \n\t" + " adc %C[result], r1 \n\t" + " adc %D[result], %[zero] \n\t" + + // result.B-D += i.A x scale.B + " mul %A[i], %B[scale] \n\t" + + " add %B[result], r0 \n\t" + " adc %C[result], r1 \n\t" + " adc %D[result], %[zero] \n\t" + + // cleanup r1 + " clr r1 \n\t" + + : [result] "+r" (result) + : [i] "r" (i), + [scale] "r" (scale), + [zero] "r" (zero) + : "r0", "r1" + ); + result = result >> 16; + return result; +} +#else +#error "No implementation for scale16 available." +#endif + + + +// mul8: 8x8 bit multiplication, with 8 bit result +LIB8STATIC uint8_t mul8( uint8_t i, uint8_t j) +{ +#if MUL8_C == 1 + return ((int)i * (int)(j) ) & 0xFF; +#elif MUL8_AVRASM == 1 + asm volatile( + /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */ + "mul %0, %1 \n\t" + /* Extract the LOW 8-bits (r0) */ + "mov %0, r0 \n\t" + /* Restore r1 to "0"; it's expected to always be that */ + "clr __zero_reg__ \n\t" + : "+a" (i) + : "a" (j) + : "r0", "r1"); + + return i; +#else +#error "No implementation for mul8 available." +#endif +} + + +// mul8: saturating 8x8 bit multiplication, with 8 bit result +LIB8STATIC uint8_t qmul8( uint8_t i, uint8_t j) +{ +#if QMUL8_C == 1 + int p = ((int)i * (int)(j) ); + if( p > 255) p = 255; + return p; +#elif QMUL8_AVRASM == 1 + asm volatile( + /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */ + " mul %0, %1 \n\t" + /* If high byte of result is zero, all is well. */ + " tst r1 \n\t" + " breq Lnospill_%= \n\t" + /* If high byte of result > 0, saturate low byte to 0xFF */ + " ldi %0,0xFF \n\t" + " rjmp Ldone_%= \n\t" + "Lnospill_%=: \n\t" + /* Extract the LOW 8-bits (r0) */ + " mov %0, r0 \n\t" + "Ldone_%=: \n\t" + /* Restore r1 to "0"; it's expected to always be that */ + " clr __zero_reg__ \n\t" + : "+a" (i) + : "a" (j) + : "r0", "r1"); + + return i; +#else +#error "No implementation for qmul8 available." +#endif +} + + +// abs8: take abs() of a signed 8-bit uint8_t +LIB8STATIC int8_t abs8( int8_t i) +{ +#if ABS8_C == 1 + if( i < 0) i = -i; + return i; +#elif ABS8_AVRASM == 1 + + + asm volatile( + /* First, check the high bit, and prepare to skip if it's clear */ + "sbrc %0, 7 \n" + + /* Negate the value */ + "neg %0 \n" + + : "+r" (i) : "r" (i) ); + return i; +#else +#error "No implementation for abs8 available." +#endif +} + + +/////////////////////////////////////////////////////////////////////// +// +// float-to-fixed and fixed-to-float conversions +// +// Note that anything involving a 'float' on AVR will be slower. + +// floatToSfract15: conversion from IEEE754 float in the range (-1,1) +// to 16-bit fixed point. Note that the extremes of +// one and negative one are NOT representable. The +// representable range is basically +// +// sfract15ToFloat: conversion from sfract15 fixed point to +// IEEE754 32-bit float. + +LIB8STATIC +float sfract15ToFloat( sfract15 y) +{ + return y / 32768.0; +} + +LIB8STATIC +sfract15 floatToSfract15( float f) +{ + return f * 32768.0; +} + + + +/////////////////////////////////////////////////////////////////////// + +// Dimming and brightening functions +// +// The eye does not respond in a linear way to light. +// High speed PWM'd LEDs at 50% duty cycle appear far +// brighter then the 'half as bright' you might expect. +// +// If you want your midpoint brightness leve (128) to +// appear half as bright as 'full' brightness (255), you +// have to apply a 'dimming function'. +// +// + +LIB8STATIC uint8_t dim8_raw( uint8_t x) +{ + return scale8( x, x); +} + +LIB8STATIC uint8_t dim8_video( uint8_t x) +{ + return scale8_video( x, x); +} + +LIB8STATIC uint8_t brighten8_raw( uint8_t x) +{ + uint8_t ix = 255 - x; + return 255 - scale8( ix, ix); +} + +LIB8STATIC uint8_t brighten8_video( uint8_t x) +{ + uint8_t ix = 255 - x; + return 255 - scale8_video( ix, ix); +} + +/////////////////////////////////////////////////////////////////////// + +// A 16-bit PNRG good enough for LED animations + +// X(n+1) = (2053 * X(n)) + 13849) +#define RAND16_2053 2053 +#define RAND16_13849 13849 + +extern uint16_t rand16seed;// = RAND16_SEED; + + +LIB8STATIC uint8_t random8() +{ + rand16seed = (rand16seed * RAND16_2053) + RAND16_13849; + return rand16seed; +} + +LIB8STATIC uint16_t random16() +{ + rand16seed = (rand16seed * RAND16_2053) + RAND16_13849; + return rand16seed; +} + + +LIB8STATIC uint8_t random8(uint8_t lim) +{ + uint8_t r = random8(); + r = scale8( r, lim); + return r; +} + +LIB8STATIC uint8_t random8(uint8_t min, uint8_t lim) +{ + uint8_t delta = lim - min; + uint8_t r = random8(delta) + min; + return r; +} + +LIB8STATIC uint16_t random16( uint16_t lim) +{ + uint16_t r = random16(); + uint32_t p = (uint32_t)lim * (uint32_t)r; + r = p >> 16; + return r; +} + +LIB8STATIC uint16_t random16( uint16_t min, uint16_t lim) +{ + uint16_t delta = lim - min; + uint16_t r = random16( delta) + min; + return r; +} + +LIB8STATIC void random16_set_seed( uint16_t seed) +{ + rand16seed = seed; +} + +LIB8STATIC uint16_t random16_get_seed() +{ + return rand16seed; +} + +LIB8STATIC void random16_add_entropy( uint16_t entropy) +{ + rand16seed += entropy; +} + + +/////////////////////////////////////////////////////////////////////// + +// sin16 & cos16: +// Fast 16-bit approximations of sin(x) & cos(x). +// Input angle is an unsigned int from 0-65535. +// Output is signed int from -32767 to 32767. +// +// This approximation never varies more than 0.69% +// from the floating point value you'd get by doing +// float s = sin( x ) * 32767.0; +// +// Don't use this approximation for calculating the +// trajectory of a rocket to Mars, but it's great +// for art projects and LED displays. +// +// On Arduino/AVR, this approximation is more than +// 10X faster than floating point sin(x) and cos(x) + +#if defined(__AVR__) +#define sin16 sin16_avr +#else +#define sin16 sin16_C +#endif + +LIB8STATIC int16_t sin16_avr( uint16_t theta ) +{ + static const uint8_t data[] = + { 0, 0, 49, 0, 6393%256, 6393/256, 48, 0, + 12539%256, 12539/256, 44, 0, 18204%256, 18204/256, 38, 0, + 23170%256, 23170/256, 31, 0, 27245%256, 27245/256, 23, 0, + 30273%256, 30273/256, 14, 0, 32137%256, 32137/256, 4 /*,0*/ }; + + uint16_t offset = (theta & 0x3FFF); + + // AVR doesn't have a multi-bit shift instruction, + // so if we say "offset >>= 3", gcc makes a tiny loop. + // Inserting empty volatile statements between each + // bit shift forces gcc to unroll the loop. + offset >>= 1; // 0..8191 + asm volatile(""); + offset >>= 1; // 0..4095 + asm volatile(""); + offset >>= 1; // 0..2047 + + if( theta & 0x4000 ) offset = 2047 - offset; + + uint8_t sectionX4; + sectionX4 = offset / 256; + sectionX4 *= 4; + + uint8_t m; + + union { + uint16_t b; + struct { + uint8_t blo; + uint8_t bhi; + }; + } u; + + //in effect u.b = blo + (256 * bhi); + u.blo = data[ sectionX4 ]; + u.bhi = data[ sectionX4 + 1]; + m = data[ sectionX4 + 2]; + + uint8_t secoffset8 = (uint8_t)(offset) / 2; + + uint16_t mx = m * secoffset8; + + int16_t y = mx + u.b; + if( theta & 0x8000 ) y = -y; + + return y; +} + +LIB8STATIC int16_t sin16_C( uint16_t theta ) +{ + static const uint16_t base[] = + { 0, 6393, 12539, 18204, 23170, 27245, 30273, 32137 }; + static const uint8_t slope[] = + { 49, 48, 44, 38, 31, 23, 14, 4 }; + + uint16_t offset = (theta & 0x3FFF) >> 3; // 0..2047 + if( theta & 0x4000 ) offset = 2047 - offset; + + uint8_t section = offset / 256; // 0..7 + uint16_t b = base[section]; + uint8_t m = slope[section]; + + uint8_t secoffset8 = (uint8_t)(offset) / 2; + + uint16_t mx = m * secoffset8; + int16_t y = mx + b; + + if( theta & 0x8000 ) y = -y; + + return y; +} + +LIB8STATIC int16_t cos16( uint16_t theta) +{ + return sin16( theta + 16384); +} + + +/////////////////////////////////////////////////////////////////////// +// +// memmove8, memcpy8, and memset8: +// alternatives to memmove, memcpy, and memset that are +// faster on AVR than standard avr-libc 1.8 + +#if defined(__AVR__) +extern "C" { +void * memmove8( void * dst, const void * src, uint16_t num ); +void * memcpy8 ( void * dst, const void * src, uint16_t num ) __attribute__ ((noinline)); +void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinline)) ; +} +#else +// on non-AVR platforms, these names just call standard libc. +#define memmove8 memmove +#define memcpy8 memcpy +#define memset8 memset +#endif + + +/////////////////////////////////////////////////////////////////////// +// +// linear interpolation, such as could be used for Perlin noise, etc. +// + +// linear interpolation between two unsigned 8-bit values, +// with 8-bit fraction +LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, fract8 frac) +{ + uint8_t delta = b - a; + uint8_t scaled = scale8( delta, frac); + uint8_t result = a + scaled; + return result; +} + +// linear interpolation between two unsigned 16-bit values, +// with 16-bit fraction +LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, fract16 frac) +{ + uint16_t delta = b - a; + uint32_t prod = (uint32_t)delta * (uint32_t)frac; + uint16_t scaled = prod >> 16; + uint16_t result = a + scaled; + return result; +} + + +// A note on the structure of lerp16by8 (and lerp15by8) : +// The cases for b>a and b<=a are handled separately for +// speed: without knowing the relative order of a and b, +// the value (a-b) might be a signed 17-bit value, which +// would have to be stored in a 32-bit signed int and +// processed as such. To avoid that, we separate the +// two cases, and are able to do all the math with 16-bit +// unsigned values, which is much faster and smaller on AVR. + +// linear interpolation between two unsigned 16-bit values, +// with 8-bit fraction +LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, fract8 frac) +{ + uint16_t result; + if( b > a) { + uint16_t delta = b - a; + uint16_t scaled = scale16by8( delta, frac); + result = a + scaled; + } else { + uint16_t delta = a - b; + uint16_t scaled = scale16by8( delta, frac); + result = a - scaled; + } + return result; +} + +// linear interpolation between two signed 15-bit values, +// with 8-bit fraction +LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, fract8 frac) +{ + int16_t result; + if( b > a) { + uint16_t delta = b - a; + uint16_t scaled = scale16by8( delta, frac); + result = a + scaled; + } else { + uint16_t delta = a - b; + uint16_t scaled = scale16by8( delta, frac); + result = a - scaled; + } + return result; +} + + +/////////////////////////////////////////////////////////////////////// +// +// easing functions; see http://easings.net +// + +// ease8InOuCubic: 8-bit cubic ease-in / ease-out function +// Takes around 18 cycles on AVR +LIB8STATIC fract8 ease8InOutCubic( fract8 i) +{ + uint8_t ii = scale8_LEAVING_R1_DIRTY( i, i); + uint8_t iii = scale8_LEAVING_R1_DIRTY( ii, i); + + uint16_t r1 = (3 * (uint16_t)(ii)) - ( 2 * (uint16_t)(iii)); + + /* the code generated for the above *'s automatically + cleans up R1, so there's no need to explicitily call + cleanup_R1(); */ + + uint8_t result = r1; + + // if we got "256", return 255: + if( r1 & 0x100 ) { + result = 255; + } + return result; +} + +// ease8InOutApprox: fast, rough 8-bit ease-in/ease-out function +// shaped approximately like 'ease8InOutCubic', +// it's never off by more than a couple of percent +// from the actual cubic S-curve, and it executes +// more than twice as fast. Use when the cycles +// are more important than visual smoothness. +// Asm version takes around 7 cycles on AVR. + +#if EASE8_C == 1 +LIB8STATIC fract8 ease8InOutApprox( fract8 i) +{ + if( i < 64) { + // start with slope 0.5 + i /= 2; + } else if( i > (255 - 64)) { + // end with slope 0.5 + i = 255 - i; + i /= 2; + i = 255 - i; + } else { + // in the middle, use slope 192/128 = 1.5 + i -= 64; + i += (i / 2); + i += 32; + } + + return i; +} + +#elif EASE8_AVRASM == 1 +LIB8STATIC uint8_t ease8InOutApprox( fract8 i) +{ + // takes around 7 cycles on AVR + asm volatile ( + " subi %[i], 64 \n\t" + " cpi %[i], 128 \n\t" + " brcc Lshift_%= \n\t" + + // middle case + " mov __tmp_reg__, %[i] \n\t" + " lsr __tmp_reg__ \n\t" + " add %[i], __tmp_reg__ \n\t" + " subi %[i], 224 \n\t" + " rjmp Ldone_%= \n\t" + + // start or end case + "Lshift_%=: \n\t" + " lsr %[i] \n\t" + " subi %[i], 96 \n\t" + + "Ldone_%=: \n\t" + + : [i] "+a" (i) + : + : "r0", "r1" + ); + return i; +} +#else +#error "No implementation for ease8 available." +#endif + + + + + +#endif diff --git a/pixeltypes.h b/pixeltypes.h new file mode 100644 index 00000000..6d3f67f2 --- /dev/null +++ b/pixeltypes.h @@ -0,0 +1,659 @@ +#ifndef __INC_PIXELS_H +#define __INC_PIXELS_H + +#include <stdint.h> +#include "lib8tion.h" + +struct CRGB; +struct CHSV; + +// Forward declaration of hsv2rgb_rainbow here, +// to avoid circular dependencies. +extern void hsv2rgb_rainbow( const CHSV& hsv, CRGB& rgb); + + +struct CHSV { + union { + struct { + union { + uint8_t hue; + uint8_t h; }; + union { + uint8_t saturation; + uint8_t sat; + uint8_t s; }; + union { + uint8_t value; + uint8_t val; + uint8_t v; }; + }; + uint8_t raw[3]; + }; + + // default values are UNITIALIZED + inline CHSV() __attribute__((always_inline)) + { + } + + // allow construction from H, S, V + inline CHSV( uint8_t ih, uint8_t is, uint8_t iv) __attribute__((always_inline)) + : h(ih), s(is), v(iv) + { + } + + // allow copy construction + inline CHSV(const CHSV& rhs) __attribute__((always_inline)) + { + h = rhs.h; + s = rhs.s; + v = rhs.v; + } + + inline CHSV& operator= (const CHSV& rhs) __attribute__((always_inline)) + { + h = rhs.h; + s = rhs.s; + v = rhs.v; + return *this; + } + + inline CHSV& setHSV(uint8_t ih, uint8_t is, uint8_t iv) __attribute__((always_inline)) + { + h = ih; + s = is; + v = iv; + return *this; + } +}; + + +struct CRGB { + union { + struct { + union { + uint8_t r; + uint8_t red; + }; + union { + uint8_t g; + uint8_t green; + }; + union { + uint8_t b; + uint8_t blue; + }; + }; + uint8_t raw[3]; + }; + + inline uint8_t& operator[] (uint8_t x) __attribute__((always_inline)) + { + return raw[x]; + } + + inline const uint8_t& operator[] (uint8_t x) const __attribute__((always_inline)) + { + return raw[x]; + } + + // default values are UNINITIALIZED + inline CRGB() __attribute__((always_inline)) + { + } + + // allow construction from R, G, B + inline CRGB( uint8_t ir, uint8_t ig, uint8_t ib) __attribute__((always_inline)) + : r(ir), g(ig), b(ib) + { + } + + // allow construction from 32-bit (really 24-bit) bit 0xRRGGBB color code + inline CRGB( uint32_t colorcode) __attribute__((always_inline)) + : r((colorcode >> 16) & 0xFF), g((colorcode >> 8) & 0xFF), b((colorcode >> 0) & 0xFF) + { + } + + // allow copy construction + inline CRGB(const CRGB& rhs) __attribute__((always_inline)) + { + r = rhs.r; + g = rhs.g; + b = rhs.b; + } + + // allow construction from HSV color + inline CRGB(const CHSV& rhs) __attribute__((always_inline)) + { + hsv2rgb_rainbow( rhs, *this); + } + + // allow assignment from one RGB struct to another + inline CRGB& operator= (const CRGB& rhs) __attribute__((always_inline)) + { + r = rhs.r; + g = rhs.g; + b = rhs.b; + return *this; + } + + // allow assignment from 32-bit (really 24-bit) 0xRRGGBB color code + inline CRGB& operator= (const uint32_t colorcode) __attribute__((always_inline)) + { + r = (colorcode >> 16) & 0xFF; + g = (colorcode >> 8) & 0xFF; + b = (colorcode >> 0) & 0xFF; + return *this; + } + + // allow assignment from R, G, and B + inline CRGB& setRGB (uint8_t nr, uint8_t ng, uint8_t nb) __attribute__((always_inline)) + { + r = nr; + g = ng; + b = nb; + return *this; + } + + // allow assignment from H, S, and V + inline CRGB& setHSV (uint8_t hue, uint8_t sat, uint8_t val) __attribute__((always_inline)) + { + hsv2rgb_rainbow( CHSV(hue, sat, val), *this); + return *this; + } + + // allow assignment from just a Hue, saturation and value automatically at max. + inline CRGB& setHue (uint8_t hue) __attribute__((always_inline)) + { + hsv2rgb_rainbow( CHSV(hue, 255, 255), *this); + return *this; + } + + // allow assignment from HSV color + inline CRGB& operator= (const CHSV& rhs) __attribute__((always_inline)) + { + hsv2rgb_rainbow( rhs, *this); + return *this; + } + + // allow assignment from 32-bit (really 24-bit) 0xRRGGBB color code + inline CRGB& setColorCode (uint32_t colorcode) __attribute__((always_inline)) + { + r = (colorcode >> 16) & 0xFF; + g = (colorcode >> 8) & 0xFF; + b = (colorcode >> 0) & 0xFF; + return *this; + } + + + // add one RGB to another, saturating at 0xFF for each channel + inline CRGB& operator+= (const CRGB& rhs ) + { + r = qadd8( r, rhs.r); + g = qadd8( g, rhs.g); + b = qadd8( b, rhs.b); + return *this; + } + + // add a contstant to each channel, saturating at 0xFF + // this is NOT an operator+= overload because the compiler + // can't usefully decide when it's being passed a 32-bit + // constant (e.g. CRGB::Red) and an 8-bit one (CRGB::Blue) + inline CRGB& addToRGB (uint8_t d ) + { + r = qadd8( r, d); + g = qadd8( g, d); + b = qadd8( b, d); + return *this; + } + + // subtract one RGB from another, saturating at 0x00 for each channel + inline CRGB& operator-= (const CRGB& rhs ) + { + r = qsub8( r, rhs.r); + g = qsub8( g, rhs.g); + b = qsub8( b, rhs.b); + return *this; + } + + // subtract a constant from each channel, saturating at 0x00 + // this is NOT an operator+= overload because the compiler + // can't usefully decide when it's being passed a 32-bit + // constant (e.g. CRGB::Red) and an 8-bit one (CRGB::Blue) + inline CRGB& subtractFromRGB(uint8_t d ) + { + r = qsub8( r, d); + g = qsub8( g, d); + b = qsub8( b, d); + return *this; + } + + // subtract a constant of '1' from each channel, saturating at 0x00 + inline CRGB& operator-- () __attribute__((always_inline)) + { + subtractFromRGB(1); + return *this; + } + + // subtract a constant of '1' from each channel, saturating at 0x00 + inline CRGB operator-- (int DUMMY_ARG) __attribute__((always_inline)) + { + CRGB retval(*this); + --(*this); + return retval; + } + + // add a constant of '1' from each channel, saturating at 0xFF + inline CRGB& operator++ () __attribute__((always_inline)) + { + addToRGB(1); + return *this; + } + + // add a constant of '1' from each channel, saturating at 0xFF + inline CRGB operator++ (int DUMMY_ARG) __attribute__((always_inline)) + { + CRGB retval(*this); + ++(*this); + return retval; + } + + // divide each of the channels by a constant + inline CRGB& operator/= (uint8_t d ) + { + r /= d; + g /= d; + b /= d; + return *this; + } + + // multiply each of the channels by a constant, + // saturating each channel at 0xFF + inline CRGB& operator*= (uint8_t d ) + { + r = qmul8( r, d); + g = qmul8( g, d); + b = qmul8( b, d); + return *this; + } + + // scale down a RGB to N 256ths of it's current brightness, using + // 'video' dimming rules, which means that unless the scale factor is ZERO + // each channel is guaranteed NOT to dim down to zero. If it's already + // nonzero, it'll stay nonzero, even if that means the hue shifts a little + // at low brightness levels. + inline CRGB& nscale8_video (uint8_t scaledown ) + { + nscale8x3_video( r, g, b, scaledown); + return *this; + } + + // %= is a synonym for nscale8_video. Think of it is scaling down + // by "a percentage" + inline CRGB& operator%= (uint8_t scaledown ) + { + nscale8x3_video( r, g, b, scaledown); + return *this; + } + + // fadeLightBy is a synonym for nscale8_video( ..., 255-fadefactor) + inline CRGB& fadeLightBy (uint8_t fadefactor ) + { + nscale8x3_video( r, g, b, 255 - fadefactor); + return *this; + } + + // scale down a RGB to N 256ths of it's current brightness, using + // 'plain math' dimming rules, which means that if the low light levels + // may dim all the way to 100% black. + inline CRGB& nscale8 (uint8_t scaledown ) + { + nscale8x3( r, g, b, scaledown); + return *this; + } + + // fadeToBlackBy is a synonym for nscale8( ..., 255-fadefactor) + inline CRGB& fadeToBlackBy (uint8_t fadefactor ) + { + nscale8x3( r, g, b, 255 - fadefactor); + return *this; + } + + // "or" operator brings each channel up to the higher of the two values + inline CRGB& operator|= (const CRGB& rhs ) + { + if( rhs.r > r) r = rhs.r; + if( rhs.g > g) g = rhs.g; + if( rhs.b > b) b = rhs.b; + return *this; + } + inline CRGB& operator|= (uint8_t d ) + { + if( d > r) r = d; + if( d > g) g = d; + if( d > b) b = d; + return *this; + } + + // "and" operator brings each channel down to the lower of the two values + inline CRGB& operator&= (const CRGB& rhs ) + { + if( rhs.r < r) r = rhs.r; + if( rhs.g < g) g = rhs.g; + if( rhs.b < b) b = rhs.b; + return *this; + } + inline CRGB& operator&= (uint8_t d ) + { + if( d < r) r = d; + if( d < g) g = d; + if( d < b) b = d; + return *this; + } + + // this allows testing a CRGB for zero-ness + inline operator bool() const __attribute__((always_inline)) + { + return r || g || b; + } + + // invert each channel + inline CRGB operator- () + { + CRGB retval; + retval.r = 255 - r; + retval.g = 255 - g; + retval.b = 255 - b; + return retval; + } + + + inline uint8_t getLuma ( ) { + //Y' = 0.2126 R' + 0.7152 G' + 0.0722 B' + // 54 183 18 (!) + + uint8_t luma = scale8_LEAVING_R1_DIRTY( r, 54) + \ + scale8_LEAVING_R1_DIRTY( g, 183) + \ + scale8_LEAVING_R1_DIRTY( b, 18); + cleanup_R1(); + return luma; + } + + inline uint8_t getAverageLight( ) { + const uint8_t eightysix = 86; + uint8_t avg = scale8_LEAVING_R1_DIRTY( r, eightysix) + \ + scale8_LEAVING_R1_DIRTY( g, eightysix) + \ + scale8_LEAVING_R1_DIRTY( b, eightysix); + cleanup_R1(); + return avg; + } + + inline void maximizeBrightness( uint8_t limit = 255 ) { + uint8_t max = red; + if( green > max) max = green; + if( blue > max) max = blue; + uint16_t factor = ((uint16_t)(limit) * 256) / max; + red = (red * factor) / 256; + green = (green * factor) / 256; + blue = (blue * factor) / 256; + } + + typedef enum { + AliceBlue=0xF0F8FF, + Amethyst=0x9966CC, + AntiqueWhite=0xFAEBD7, + Aqua=0x00FFFF, + Aquamarine=0x7FFFD4, + Azure=0xF0FFFF, + Beige=0xF5F5DC, + Bisque=0xFFE4C4, + Black=0x000000, + BlanchedAlmond=0xFFEBCD, + Blue=0x0000FF, + BlueViolet=0x8A2BE2, + Brown=0xA52A2A, + BurlyWood=0xDEB887, + CadetBlue=0x5F9EA0, + Chartreuse=0x7FFF00, + Chocolate=0xD2691E, + Coral=0xFF7F50, + CornflowerBlue=0x6495ED, + Cornsilk=0xFFF8DC, + Crimson=0xDC143C, + Cyan=0x00FFFF, + DarkBlue=0x00008B, + DarkCyan=0x008B8B, + DarkGoldenrod=0xB8860B, + DarkGray=0xA9A9A9, + DarkGreen=0x006400, + DarkKhaki=0xBDB76B, + DarkMagenta=0x8B008B, + DarkOliveGreen=0x556B2F, + DarkOrange=0xFF8C00, + DarkOrchid=0x9932CC, + DarkRed=0x8B0000, + DarkSalmon=0xE9967A, + DarkSeaGreen=0x8FBC8F, + DarkSlateBlue=0x483D8B, + DarkSlateGray=0x2F4F4F, + DarkTurquoise=0x00CED1, + DarkViolet=0x9400D3, + DeepPink=0xFF1493, + DeepSkyBlue=0x00BFFF, + DimGray=0x696969, + DodgerBlue=0x1E90FF, + FireBrick=0xB22222, + FloralWhite=0xFFFAF0, + ForestGreen=0x228B22, + Fuchsia=0xFF00FF, + Gainsboro=0xDCDCDC, + GhostWhite=0xF8F8FF, + Gold=0xFFD700, + Goldenrod=0xDAA520, + Gray=0x808080, + Green=0x008000, + GreenYellow=0xADFF2F, + Honeydew=0xF0FFF0, + HotPink=0xFF69B4, + IndianRed=0xCD5C5C, + Indigo=0x4B0082, + Ivory=0xFFFFF0, + Khaki=0xF0E68C, + Lavender=0xE6E6FA, + LavenderBlush=0xFFF0F5, + LawnGreen=0x7CFC00, + LemonChiffon=0xFFFACD, + LightBlue=0xADD8E6, + LightCoral=0xF08080, + LightCyan=0xE0FFFF, + LightGoldenrodYellow=0xFAFAD2, + LightGreen=0x90EE90, + LightGrey=0xD3D3D3, + LightPink=0xFFB6C1, + LightSalmon=0xFFA07A, + LightSeaGreen=0x20B2AA, + LightSkyBlue=0x87CEFA, + LightSlateGray=0x778899, + LightSteelBlue=0xB0C4DE, + LightYellow=0xFFFFE0, + Lime=0x00FF00, + LimeGreen=0x32CD32, + Linen=0xFAF0E6, + Magenta=0xFF00FF, + Maroon=0x800000, + MediumAquamarine=0x66CDAA, + MediumBlue=0x0000CD, + MediumOrchid=0xBA55D3, + MediumPurple=0x9370DB, + MediumSeaGreen=0x3CB371, + MediumSlateBlue=0x7B68EE, + MediumSpringGreen=0x00FA9A, + MediumTurquoise=0x48D1CC, + MediumVioletRed=0xC71585, + MidnightBlue=0x191970, + MintCream=0xF5FFFA, + MistyRose=0xFFE4E1, + Moccasin=0xFFE4B5, + NavajoWhite=0xFFDEAD, + Navy=0x000080, + OldLace=0xFDF5E6, + Olive=0x808000, + OliveDrab=0x6B8E23, + Orange=0xFFA500, + OrangeRed=0xFF4500, + Orchid=0xDA70D6, + PaleGoldenrod=0xEEE8AA, + PaleGreen=0x98FB98, + PaleTurquoise=0xAFEEEE, + PaleVioletRed=0xDB7093, + PapayaWhip=0xFFEFD5, + PeachPuff=0xFFDAB9, + Peru=0xCD853F, + Pink=0xFFC0CB, + Plaid=0xCC5533, + Plum=0xDDA0DD, + PowderBlue=0xB0E0E6, + Purple=0x800080, + Red=0xFF0000, + RosyBrown=0xBC8F8F, + RoyalBlue=0x4169E1, + SaddleBrown=0x8B4513, + Salmon=0xFA8072, + SandyBrown=0xF4A460, + SeaGreen=0x2E8B57, + Seashell=0xFFF5EE, + Sienna=0xA0522D, + Silver=0xC0C0C0, + SkyBlue=0x87CEEB, + SlateBlue=0x6A5ACD, + SlateGray=0x708090, + Snow=0xFFFAFA, + SpringGreen=0x00FF7F, + SteelBlue=0x4682B4, + Tan=0xD2B48C, + Teal=0x008080, + Thistle=0xD8BFD8, + Tomato=0xFF6347, + Turquoise=0x40E0D0, + Violet=0xEE82EE, + Wheat=0xF5DEB3, + White=0xFFFFFF, + WhiteSmoke=0xF5F5F5, + Yellow=0xFFFF00, + YellowGreen=0x9ACD32 + } HTMLColorCode; + static uint32_t Squant; +}; + + +inline __attribute__((always_inline)) bool operator== (const CRGB& lhs, const CRGB& rhs) +{ + return (lhs.r == rhs.r) && (lhs.g == rhs.g) && (lhs.b == rhs.b); +} + +inline __attribute__((always_inline)) bool operator!= (const CRGB& lhs, const CRGB& rhs) +{ + return !(lhs == rhs); +} + +inline __attribute__((always_inline)) bool operator< (const CRGB& lhs, const CRGB& rhs) +{ + uint16_t sl, sr; + sl = lhs.r + lhs.g + lhs.b; + sr = rhs.r + rhs.g + rhs.b; + return sl < sr; +} + +inline __attribute__((always_inline)) bool operator> (const CRGB& lhs, const CRGB& rhs) +{ + uint16_t sl, sr; + sl = lhs.r + lhs.g + lhs.b; + sr = rhs.r + rhs.g + rhs.b; + return sl > sr; +} + +inline __attribute__((always_inline)) bool operator>= (const CRGB& lhs, const CRGB& rhs) +{ + uint16_t sl, sr; + sl = lhs.r + lhs.g + lhs.b; + sr = rhs.r + rhs.g + rhs.b; + return sl >= sr; +} + +inline __attribute__((always_inline)) bool operator<= (const CRGB& lhs, const CRGB& rhs) +{ + uint16_t sl, sr; + sl = lhs.r + lhs.g + lhs.b; + sr = rhs.r + rhs.g + rhs.b; + return sl <= sr; +} + + +__attribute__((always_inline)) +inline CRGB operator+( const CRGB& p1, const CRGB& p2) +{ + return CRGB( qadd8( p1.r, p2.r), + qadd8( p1.g, p2.g), + qadd8( p1.b, p2.b)); +} + +__attribute__((always_inline)) +inline CRGB operator-( const CRGB& p1, const CRGB& p2) +{ + return CRGB( qsub8( p1.r, p2.r), + qsub8( p1.g, p2.g), + qsub8( p1.b, p2.b)); +} + +__attribute__((always_inline)) +inline CRGB operator*( const CRGB& p1, uint8_t d) +{ + return CRGB( qmul8( p1.r, d), + qmul8( p1.g, d), + qmul8( p1.b, d)); +} + +__attribute__((always_inline)) +inline CRGB operator/( const CRGB& p1, uint8_t d) +{ + return CRGB( p1.r/d, p1.g/d, p1.b/d); +} + + +__attribute__((always_inline)) +inline CRGB operator&( const CRGB& p1, const CRGB& p2) +{ + return CRGB( p1.r < p2.r ? p1.r : p2.r, + p1.g < p2.g ? p1.g : p2.g, + p1.b < p2.b ? p1.b : p2.b); +} + +__attribute__((always_inline)) +inline CRGB operator|( const CRGB& p1, const CRGB& p2) +{ + return CRGB( p1.r > p2.r ? p1.r : p2.r, + p1.g > p2.g ? p1.g : p2.g, + p1.b > p2.b ? p1.b : p2.b); +} + +__attribute__((always_inline)) +inline CRGB operator%( const CRGB& p1, uint8_t d) +{ + CRGB retval( p1); + retval.nscale8_video( d); + return retval; +} + + + +// Define RGB orderings +enum EOrder { + RGB=0012, + RBG=0021, + GRB=0102, + GBR=0120, + BRG=0201, + BGR=0210 +}; + + +#endif diff --git a/preview_changes.txt b/preview_changes.txt new file mode 100644 index 00000000..9b1a8831 --- /dev/null +++ b/preview_changes.txt @@ -0,0 +1,57 @@ +Release Candidate 5 +* Gemma and Trinket: supported except for global "setBrightness" + +Release Candidate 4 +* Added NEOPIXEL as a synonym for WS2811 +* Fix WS2811/WS2812B timings, bring it in line to exactly 1.25ns/bit. +* Fix handling of constant color definitions (damn you, gcc!) + +Release Candidate 3 +* Fixed bug when Clock and Data were on the same port +* Added ability to set pixel color directly from HSV +* Added ability to retrieve current random16 seed + +Release Candidate 2 +* mostly bug fixes +* Fix SPI macro definitions for latest teensy3 software update +* Teensy 2 compilation fix +* hsv2rgb_rainbow performance fix + +Release Candidate 1 +* New unified/simplified API for adding/using controllers +* fleshout clockless chip support +* add hsv (spectrum and rainbow style colors) +* high speed memory management operations +* library for interpolation/easing functions +* various api changes, addition of clear and showColor functions +* scale value applied to all show methods +* bug fixes for SM16716 +* performance improvements, lpd8806 exceeds 22Mbit now +* hardware def fixes +* allow alternate rgb color orderings +* high speed math methods +* rich CRGB structure + +Preview 3 +* True hardware SPI support for teensy (up to 20Mbit output!) +* Minor bug fixes/tweaks + +Preview 2 +* Rename pin class to FastPin +* Replace latch with select, more accurate description of what it does +* Enforce intra-frame timing for ws2801s +* SM16716 support +* Add #define FAST_SPI_INTERRUPTS_WRITE_PINS to make sure world is ok w/interrupts and SPI +* Add #define FORCE_SOFTWARE_SPI for those times when you absolutely don't want to use hardware SPI, ev +en if you're using the hardware SPI pins +* Add pin definitions for the arduino megas - should fix ws2811 support +* Add pin definitions for the leonardo - should fix spi support and pin mappings +* Add warnings when pin definitions are missing +* Added google+ community for fastspi users - https://plus.google.com/communities/109127054924227823508 +# Add pin definitions for Teensy++ 2.0 + + +Preview 1 +* Initial release + + |