Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FastLED/FastLED.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'platforms/esp/32/clockless_i2s_esp32.h')
-rw-r--r--platforms/esp/32/clockless_i2s_esp32.h767
1 files changed, 767 insertions, 0 deletions
diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
new file mode 100644
index 00000000..a4d15ba7
--- /dev/null
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -0,0 +1,767 @@
+/*
+ * I2S Driver
+ *
+ * Copyright (c) 2019 Yves Bazin
+ * Copyright (c) 2019 Samuel Z. Guyer
+ * Derived from lots of code examples from other people.
+ *
+ * The I2S implementation can drive up to 24 strips in parallel, but
+ * with the following limitation: all the strips must have the same
+ * timing (i.e., they must all use the same chip).
+ *
+ * To enable the I2S driver, add the following line *before* including
+ * FastLED.h (no other changes are necessary):
+ *
+ * #define FASTLED_ESP32_I2S true
+ *
+ * The overall strategy is to use the parallel mode of the I2S "audio"
+ * peripheral to send up to 24 bits in parallel to 24 different pins.
+ * Unlike the RMT peripheral the I2S system cannot send bits of
+ * different lengths. Instead, we set the I2S data clock fairly high
+ * and then encode a signal as a series of bits.
+ *
+ * For example, with a clock divider of 10 the data clock will be
+ * 8MHz, so each bit is 125ns. The WS2812 expects a "1" bit to be
+ * encoded as a HIGH signal for around 875ns, followed by LOW for
+ * 375ns. Sending the following pattern results in the right shape
+ * signal:
+ *
+ * 1111111000 WS2812 "1" bit encoded as 10 125ns pulses
+ *
+ * The I2S peripheral expects the bits for all 24 outputs to be packed
+ * into a single 32-bit word. The complete signal is a series of these
+ * 32-bit values -- one for each bit for each strip. The pixel data,
+ * however, is stored "serially" as a series of RGB values separately
+ * for each strip. To prepare the data we need to do three things: (1)
+ * take 1 pixel from each strip, and (2) tranpose the bits so that
+ * they are in the parallel form, (3) translate each data bit into the
+ * bit pattern that encodes the signal for that bit. This code is in
+ * the fillBuffer() method:
+ *
+ * 1. Read 1 pixel from each strip into an array; store this data by
+ * color channel (e.g., all the red bytes, then all the green
+ * bytes, then all the blue bytes). For three color channels, the
+ * array is 3 X 24 X 8 bits.
+ *
+ * 2. Tranpose the array so that it is 3 X 8 X 24 bits. The hardware
+ * wants the data in 32-bit chunks, so the actual form is 3 X 8 X
+ * 32, with the low 8 bits unused.
+ *
+ * 3. Take each group of 24 parallel bits and "expand" them into a
+ * pattern according to the encoding. For example, with a 8MHz
+ * data clock, each data bit turns into 10 I2s pulses, so 24
+ * parallel data bits turn into 10 X 24 pulses.
+ *
+ * We send data to the I2S peripheral using the DMA interface. We use
+ * two DMA buffers, so that we can fill one buffer while the other
+ * buffer is being sent. Each DMA buffer holds the fully-expanded
+ * pulse pattern for one pixel on up to 24 strips. The exact amount of
+ * memory required depends on the number of color channels and the
+ * number of pulses used to encode each bit.
+ *
+ * We get an interrupt each time a buffer is sent; we then fill that
+ * buffer while the next one is being sent. The DMA interface allows
+ * us to configure the buffers as a circularly linked list, so that it
+ * can automatically start on the next buffer.
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#pragma message "NOTE: ESP32 support using I2S parallel driver. All strips must use the same chipset"
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "esp_heap_caps.h"
+#include "soc/soc.h"
+#include "soc/gpio_sig_map.h"
+#include "soc/i2s_reg.h"
+#include "soc/i2s_struct.h"
+#include "soc/io_mux_reg.h"
+#include "driver/gpio.h"
+#include "driver/periph_ctrl.h"
+#include "rom/lldesc.h"
+#include "esp_intr.h"
+#include "esp_log.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+ uint32_t cyc;
+ __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+ return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+#define NUM_COLOR_CHANNELS 3
+
+// -- Choose which I2S device to use
+#ifndef I2S_DEVICE
+#define I2S_DEVICE 0
+#endif
+
+// -- Max number of controllers we can support
+#ifndef FASTLED_I2S_MAX_CONTROLLERS
+#define FASTLED_I2S_MAX_CONTROLLERS 24
+#endif
+
+// -- I2S clock
+#define I2S_BASE_CLK (80000000L)
+#define I2S_MAX_CLK (20000000L) //more tha a certain speed and the I2s looses some bits
+#define I2S_MAX_PULSE_PER_BIT 20 //put it higher to get more accuracy but it could decrease the refresh rate without real improvement
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Array of all controllers
+static CLEDController * gControllers[FASTLED_I2S_MAX_CONTROLLERS];
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+
+// -- Global semaphore for the whole show process
+// Semaphore is not given until all data has been sent
+static xSemaphoreHandle gTX_sem = NULL;
+
+// -- One-time I2S initialization
+static bool gInitialized = false;
+
+// -- Interrupt handler
+static intr_handle_t gI2S_intr_handle = NULL;
+
+// -- A pointer to the memory-mapped structure: I2S0 or I2S1
+static i2s_dev_t * i2s;
+
+// -- I2S goes to these pins until we remap them using the GPIO matrix
+static int i2s_base_pin_index;
+
+// --- I2S DMA buffers
+struct DMABuffer {
+ lldesc_t descriptor;
+ uint8_t * buffer;
+};
+
+#define NUM_DMA_BUFFERS 2
+static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
+
+// -- Bit patterns
+// For now, we require all strips to be the same chipset, so these
+// are global variables.
+
+static int gPulsesPerBit = 0;
+static uint32_t gOneBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static uint32_t gZeroBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+// -- Counters to track progress
+static int gCurBuffer = 0;
+static bool gDoneFilling = false;
+static int ones_for_one;
+static int ones_for_zero;
+
+// -- Temp buffers for pixels and bits being formatted for DMA
+static uint8_t gPixelRow[NUM_COLOR_CHANNELS][32];
+static uint8_t gPixelBits[NUM_COLOR_CHANNELS][8][4];
+static int CLOCK_DIVIDER_N;
+static int CLOCK_DIVIDER_A;
+static int CLOCK_DIVIDER_B;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+ // -- Store the GPIO pin
+ gpio_num_t mPin;
+
+ // -- This instantiation forces a check on the pin choice
+ FastPin<DATA_PIN> mFastPin;
+
+ // -- Save the pixel controller
+ PixelController<RGB_ORDER> * mPixels;
+
+public:
+
+ void init()
+ {
+ i2sInit();
+
+ // -- Allocate space to save the pixel controller
+ // during parallel output
+ mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
+
+ gControllers[gNumControllers] = this;
+ int my_index = gNumControllers;
+ gNumControllers++;
+
+ // -- Set up the pin We have to do two things: configure the
+ // actual GPIO pin, and route the output from the default
+ // pin (determined by the I2S device) to the pin we
+ // want. We compute the default pin using the index of this
+ // controller in the array. This order is crucial because
+ // the bits must go into the DMA buffer in the same order.
+ mPin = gpio_num_t(DATA_PIN);
+
+ PIN_FUNC_SELECT(GPIO_PIN_MUX_REG[DATA_PIN], PIN_FUNC_GPIO);
+ gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
+ pinMode(mPin,OUTPUT);
+ gpio_matrix_out(mPin, i2s_base_pin_index + my_index, false, false);
+ }
+
+ virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+ static int pgcd(int smallest,int precision,int a,int b,int c)
+ {
+ int pgc_=1;
+ for( int i=smallest;i>0;i--)
+ {
+
+ if( a%i<=precision && b%i<=precision && c%i<=precision)
+ {
+ pgc_=i;
+ break;
+ }
+ }
+ return pgc_;
+ }
+
+ /** Compute pules/bit patterns
+ *
+ * This is Yves Bazin's mad code for computing the pulse pattern
+ * and clock timing given the target signal given by T1, T2, and
+ * T3. In general, these parameters are interpreted as follows:
+ *
+ * a "1" bit is encoded by setting the pin HIGH to T1+T2 ns, then LOW for T3 ns
+ * a "0" bit is encoded by setting the pin HIGH to T1 ns, then LOW for T2+T3 ns
+ *
+ */
+ static void initBitPatterns()
+ {
+ // Precompute the bit patterns based on the I2S sample rate
+ // Serial.println("Setting up fastled using I2S");
+
+ // -- First, convert back to ns from CPU clocks
+ uint32_t T1ns = ESPCLKS_TO_NS(T1);
+ uint32_t T2ns = ESPCLKS_TO_NS(T2);
+ uint32_t T3ns = ESPCLKS_TO_NS(T3);
+
+ // Serial.print("T1 = "); Serial.print(T1); Serial.print(" ns "); Serial.println(T1ns);
+ // Serial.print("T2 = "); Serial.print(T2); Serial.print(" ns "); Serial.println(T2ns);
+ // Serial.print("T3 = "); Serial.print(T3); Serial.print(" ns "); Serial.println(T3ns);
+
+ /*
+ We calculate the best pcgd to the timing
+ ie
+ WS2811 77 77 154 => 1 1 2 => nb pulses= 4
+ WS2812 60 150 90 => 2 5 3 => nb pulses=10
+ */
+ int smallest=0;
+ if (T1>T2)
+ smallest=T2;
+ else
+ smallest=T1;
+ if(smallest>T3)
+ smallest=T3;
+ double freq=(double)1/(double)(T1ns + T2ns + T3ns);
+ // Serial.printf("chipset frequency:%f Khz\n", 1000000L*freq);
+ // Serial.printf("smallest %d\n",smallest);
+ int pgc_=1;
+ int precision=0;
+ pgc_=pgcd(smallest,precision,T1,T2,T3);
+ //Serial.printf("%f\n",I2S_MAX_CLK/(1000000000L*freq));
+ while(pgc_==1 || (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_PULSE_PER_BIT) //while(pgc_==1 || (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_CLK/(1000000000L*freq))
+ {
+ precision++;
+ pgc_=pgcd(smallest,precision,T1,T2,T3);
+ //Serial.printf("%d %d\n",pgc_,(a+b+c)/pgc_);
+ }
+ pgc_=pgcd(smallest,precision,T1,T2,T3);
+ // Serial.printf("pgcd %d precision:%d\n",pgc_,precision);
+ // Serial.printf("nb pulse per bit:%d\n",T1/pgc_ +T2/pgc_ +T3/pgc_);
+ gPulsesPerBit=(int)T1/pgc_ +(int)T2/pgc_ +(int)T3/pgc_;
+ /*
+ we calculate the duration of one pulse nd htre base frequency of the led
+ ie WS2812B F=1/(250+625+375)=800kHz or 1250ns
+ as we need 10 pulses each pulse is 125ns => frequency 800Khz*10=8MHz
+ WS2811 T=320+320+641=1281ns qnd we need 4 pulses => pulse duration 320.25ns =>frequency 3.1225605Mhz
+
+ */
+
+ freq=1000000000L*freq*gPulsesPerBit;
+ // Serial.printf("needed frequency (nbpiulse per bit)*(chispset frequency):%f Mhz\n",freq/1000000);
+
+ /*
+ we do calculate the needed N a and b
+ as f=basefred/(N+b/a);
+ as a is max 63 the precision for the decimal is 1/63
+
+ */
+
+ CLOCK_DIVIDER_N=(int)((double)I2S_BASE_CLK/freq);
+ double v=I2S_BASE_CLK/freq-CLOCK_DIVIDER_N;
+
+ double prec=(double)1/63;
+ int a=1;
+ int b=0;
+ CLOCK_DIVIDER_A=1;
+ CLOCK_DIVIDER_B=0;
+ for(a=1;a<64;a++)
+ {
+ for(b=0;b<a;b++)
+ {
+ //printf("%d %d %f %f %f\n",b,a,v,(double)v*(double)a,fabsf(v-(double)b/a));
+ if(fabsf(v-(double)b/a) <= prec/2)
+ break;
+ }
+ if(fabsf(v-(double)b/a) ==0)
+ {
+ CLOCK_DIVIDER_A=a;
+ CLOCK_DIVIDER_B=b;
+ break;
+ }
+ if(fabsf(v-(double)b/a) < prec/2)
+ {
+ if (fabsf(v-(double)b/a) <fabsf(v-(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A))
+ {
+ CLOCK_DIVIDER_A=a;
+ CLOCK_DIVIDER_B=b;
+ }
+
+ }
+ }
+ //top take care of an issue with double 0.9999999999
+ if(CLOCK_DIVIDER_A==CLOCK_DIVIDER_B)
+ {
+ CLOCK_DIVIDER_A=1;
+ CLOCK_DIVIDER_B=0;
+ CLOCK_DIVIDER_N++;
+ }
+
+ //printf("%d %d %f %f %d\n",CLOCK_DIVIDER_B,CLOCK_DIVIDER_A,(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A,v,CLOCK_DIVIDER_N);
+ //Serial.printf("freq %f %f\n",freq,I2S_BASE_CLK/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A));
+ freq=1/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A);
+ freq=freq*I2S_BASE_CLK;
+ // Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
+ double pulseduration=1000000000/freq;
+ // Serial.printf("Pulse duration: %f ns\n",pulseduration);
+ // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
+
+ //Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
+
+ //int ones_for_one = ((T1ns + T2ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+ ones_for_one = T1/pgc_ +T2/pgc_;
+ //Serial.print("One bit: target ");
+ //Serial.print(T1ns+T2ns); Serial.print("ns --- ");
+ //Serial.print(ones_for_one); Serial.print(" 1 bits");
+ //Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+ // Serial.printf("one bit : target %d ns --- %d pulses 1 bit = %f ns\n",T1ns+T2ns,ones_for_one ,ones_for_one*pulseduration);
+
+
+ int i = 0;
+ while ( i < ones_for_one ) {
+ gOneBit[i] = 0xFFFFFF00;
+ i++;
+ }
+ while ( i < gPulsesPerBit ) {
+ gOneBit[i] = 0x00000000;
+ i++;
+ }
+
+ //int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+ ones_for_zero =T1/pgc_ ;
+ // Serial.print("Zero bit: target ");
+ // Serial.print(T1ns); Serial.print("ns --- ");
+ //Serial.print(ones_for_zero); Serial.print(" 1 bits");
+ //Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+ // Serial.printf("Zero bit : target %d ns --- %d pulses 1 bit = %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
+ i = 0;
+ while ( i < ones_for_zero ) {
+ gZeroBit[i] = 0xFFFFFF00;
+ i++;
+ }
+ while ( i < gPulsesPerBit ) {
+ gZeroBit[i] = 0x00000000;
+ i++;
+ }
+
+ memset(gPixelRow, 0, NUM_COLOR_CHANNELS * 32);
+ memset(gPixelBits, 0, NUM_COLOR_CHANNELS * 32);
+ }
+
+ static DMABuffer * allocateDMABuffer(int bytes)
+ {
+ DMABuffer * b = (DMABuffer *)heap_caps_malloc(sizeof(DMABuffer), MALLOC_CAP_DMA);
+
+ b->buffer = (uint8_t *)heap_caps_malloc(bytes, MALLOC_CAP_DMA);
+ memset(b->buffer, 0, bytes);
+
+ b->descriptor.length = bytes;
+ b->descriptor.size = bytes;
+ b->descriptor.owner = 1;
+ b->descriptor.sosf = 1;
+ b->descriptor.buf = b->buffer;
+ b->descriptor.offset = 0;
+ b->descriptor.empty = 0;
+ b->descriptor.eof = 1;
+ b->descriptor.qe.stqe_next = 0;
+
+ return b;
+ }
+
+ static void i2sInit()
+ {
+ // -- Only need to do this once
+ if (gInitialized) return;
+
+ // -- Construct the bit patterns for ones and zeros
+ initBitPatterns();
+
+ // -- Choose whether to use I2S device 0 or device 1
+ // Set up the various device-specific parameters
+ int interruptSource;
+ if (I2S_DEVICE == 0) {
+ i2s = &I2S0;
+ periph_module_enable(PERIPH_I2S0_MODULE);
+ interruptSource = ETS_I2S0_INTR_SOURCE;
+ i2s_base_pin_index = I2S0O_DATA_OUT0_IDX;
+ } else {
+ i2s = &I2S1;
+ periph_module_enable(PERIPH_I2S1_MODULE);
+ interruptSource = ETS_I2S1_INTR_SOURCE;
+ i2s_base_pin_index = I2S1O_DATA_OUT0_IDX;
+ }
+
+ // -- Reset everything
+ i2sReset();
+ i2sReset_DMA();
+ i2sReset_FIFO();
+
+ // -- Main configuration
+ i2s->conf.tx_msb_right = 1;
+ i2s->conf.tx_mono = 0;
+ i2s->conf.tx_short_sync = 0;
+ i2s->conf.tx_msb_shift = 0;
+ i2s->conf.tx_right_first = 1; // 0;//1;
+ i2s->conf.tx_slave_mod = 0;
+
+ // -- Set parallel mode
+ i2s->conf2.val = 0;
+ i2s->conf2.lcd_en = 1;
+ i2s->conf2.lcd_tx_wrx2_en = 0; // 0 for 16 or 32 parallel output
+ i2s->conf2.lcd_tx_sdx2_en = 0; // HN
+
+ // -- Set up the clock rate and sampling
+ i2s->sample_rate_conf.val = 0;
+ i2s->sample_rate_conf.tx_bits_mod = 32; // Number of parallel bits/pins
+ i2s->sample_rate_conf.tx_bck_div_num = 1;
+ i2s->clkm_conf.val = 0;
+ i2s->clkm_conf.clka_en = 0;
+
+ // -- Data clock is computed as Base/(div_num + (div_b/div_a))
+ // Base is 80Mhz, so 80/(10 + 0/1) = 8Mhz
+ // One cycle is 125ns
+ i2s->clkm_conf.clkm_div_a = CLOCK_DIVIDER_A;
+ i2s->clkm_conf.clkm_div_b = CLOCK_DIVIDER_B;
+ i2s->clkm_conf.clkm_div_num = CLOCK_DIVIDER_N;
+
+ i2s->fifo_conf.val = 0;
+ i2s->fifo_conf.tx_fifo_mod_force_en = 1;
+ i2s->fifo_conf.tx_fifo_mod = 3; // 32-bit single channel data
+ i2s->fifo_conf.tx_data_num = 32; // fifo length
+ i2s->fifo_conf.dscr_en = 1; // fifo will use dma
+
+ i2s->conf1.val = 0;
+ i2s->conf1.tx_stop_en = 0;
+ i2s->conf1.tx_pcm_bypass = 1;
+
+ i2s->conf_chan.val = 0;
+ i2s->conf_chan.tx_chan_mod = 1; // Mono mode, with tx_msb_right = 1, everything goes to right-channel
+
+ i2s->timing.val = 0;
+
+ // -- Allocate two DMA buffers
+ dmaBuffers[0] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
+ dmaBuffers[1] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
+
+ // -- Arrange them as a circularly linked list
+ dmaBuffers[0]->descriptor.qe.stqe_next = &(dmaBuffers[1]->descriptor);
+ dmaBuffers[1]->descriptor.qe.stqe_next = &(dmaBuffers[0]->descriptor);
+
+ // -- Allocate i2s interrupt
+ SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
+ esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
+ &interruptHandler, 0, &gI2S_intr_handle);
+
+ // -- Create a semaphore to block execution until all the controllers are done
+ if (gTX_sem == NULL) {
+ gTX_sem = xSemaphoreCreateBinary();
+ xSemaphoreGive(gTX_sem);
+ }
+
+ // Serial.println("Init I2S");
+ gInitialized = true;
+ }
+
+ /** Clear DMA buffer
+ *
+ * Yves' clever trick: initialize the bits that we know must be 0
+ * or 1 regardless of what bit they encode.
+ */
+ static void empty( uint32_t *buf)
+ {
+ for(int i=0;i<8*NUM_COLOR_CHANNELS;i++)
+ {
+ int offset=gPulsesPerBit*i;
+ for(int j=0;j<ones_for_zero;j++)
+ buf[offset+j]=0xffffffff;
+
+ for(int j=ones_for_one;j<gPulsesPerBit;j++)
+ buf[offset+j]=0;
+ }
+ }
+
+ // -- Show pixels
+ // This is the main entry point for the controller.
+ virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+ {
+ if (gNumStarted == 0) {
+ // -- First controller: make sure everything is set up
+ xSemaphoreTake(gTX_sem, portMAX_DELAY);
+ }
+
+ // -- Initialize the local state, save a pointer to the pixel
+ // data. We need to make a copy because pixels is a local
+ // variable in the calling function, and this data structure
+ // needs to outlive this call to showPixels.
+ (*mPixels) = pixels;
+
+ // -- Keep track of the number of strips we've seen
+ gNumStarted++;
+
+ // Serial.print("Show pixels ");
+ // Serial.println(gNumStarted);
+
+ // -- The last call to showPixels is the one responsible for doing
+ // all of the actual work
+ if (gNumStarted == gNumControllers) {
+ empty((uint32_t*)dmaBuffers[0]->buffer);
+ empty((uint32_t*)dmaBuffers[1]->buffer);
+ gCurBuffer = 0;
+ gDoneFilling = false;
+
+ // -- Prefill both buffers
+ fillBuffer();
+ fillBuffer();
+
+ i2sStart();
+
+ // -- Wait here while the rest of the data is sent. The interrupt handler
+ // will keep refilling the DMA buffers until it is all sent; then it
+ // gives the semaphore back.
+ xSemaphoreTake(gTX_sem, portMAX_DELAY);
+ xSemaphoreGive(gTX_sem);
+
+ i2sStop();
+
+ // -- Reset the counters
+ gNumStarted = 0;
+ }
+ }
+
+ // -- Custom interrupt handler
+ static IRAM_ATTR void interruptHandler(void *arg)
+ {
+ if (i2s->int_st.out_eof) {
+ i2s->int_clr.val = i2s->int_raw.val;
+
+ if ( ! gDoneFilling) {
+ fillBuffer();
+ } else {
+ portBASE_TYPE HPTaskAwoken = 0;
+ xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+ if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+ }
+ }
+ }
+
+ /** Fill DMA buffer
+ *
+ * This is where the real work happens: take a row of pixels (one
+ * from each strip), transpose and encode the bits, and store
+ * them in the DMA buffer for the I2S peripheral to read.
+ */
+ static void fillBuffer()
+ {
+ // -- Alternate between buffers
+ volatile uint32_t * buf = (uint32_t *) dmaBuffers[gCurBuffer]->buffer;
+ gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
+
+ // -- Get the requested pixel from each controller. Store the
+ // data for each color channel in a separate array.
+ uint32_t has_data_mask = 0;
+ for (int i = 0; i < gNumControllers; i++) {
+ // -- Store the pixels in reverse controller order starting at index 23
+ // This causes the bits to come out in the right position after we
+ // transpose them.
+ int bit_index = 23-i;
+ ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
+ if (pController->mPixels->has(1)) {
+ gPixelRow[0][bit_index] = pController->mPixels->loadAndScale0();
+ gPixelRow[1][bit_index] = pController->mPixels->loadAndScale1();
+ gPixelRow[2][bit_index] = pController->mPixels->loadAndScale2();
+ pController->mPixels->advanceData();
+ pController->mPixels->stepDithering();
+
+ // -- Record that this controller still has data to send
+ has_data_mask |= (1 << (i+8));
+ }
+ }
+
+ // -- None of the strips has data? We are done.
+ if (has_data_mask == 0) {
+ gDoneFilling = true;
+ return;
+ }
+
+ // -- Transpose and encode the pixel data for the DMA buffer
+ int buf_index = 0;
+ for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
+
+ // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
+ transpose32(gPixelRow[channel], gPixelBits[channel][0] );
+
+ //Serial.print("Channel: "); Serial.print(channel); Serial.print(" ");
+ for (int bitnum = 0; bitnum < 8; bitnum++) {
+ uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
+ uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
+
+ /* SZG: More general, but too slow:
+ for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
+ buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
+ }
+ */
+
+ // -- Only fill in the pulses that are different between the "0" and "1" encodings
+ for(int pulse_num = ones_for_zero; pulse_num < ones_for_one; pulse_num++) {
+ buf[bitnum*gPulsesPerBit+channel*8*gPulsesPerBit+pulse_num] = has_data_mask & bit;
+ }
+ }
+ }
+ }
+
+ static void transpose32(uint8_t * pixels, uint8_t * bits)
+ {
+ transpose8rS32(& pixels[0], 1, 4, & bits[0]);
+ transpose8rS32(& pixels[8], 1, 4, & bits[1]);
+ transpose8rS32(& pixels[16], 1, 4, & bits[2]);
+ //transpose8rS32(& pixels[24], 1, 4, & bits[3]); Can only use 24 bits
+ }
+
+ /** Transpose 8x8 bit matrix
+ * From Hacker's Delight
+ */
+ static void transpose8rS32(uint8_t * A, int m, int n, uint8_t * B)
+ {
+ uint32_t x, y, t;
+
+ // Load the array and pack it into x and y.
+
+ x = (A[0]<<24) | (A[m]<<16) | (A[2*m]<<8) | A[3*m];
+ y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
+
+ t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7);
+ t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7);
+
+ t = (x ^ (x >>14)) & 0x0000CCCC; x = x ^ t ^ (t <<14);
+ t = (y ^ (y >>14)) & 0x0000CCCC; y = y ^ t ^ (t <<14);
+
+ t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+ y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+ x = t;
+
+ B[0]=x>>24; B[n]=x>>16; B[2*n]=x>>8; B[3*n]=x;
+ B[4*n]=y>>24; B[5*n]=y>>16; B[6*n]=y>>8; B[7*n]=y;
+ }
+
+ /** Start I2S transmission
+ */
+ static void i2sStart()
+ {
+ // esp_intr_disable(gI2S_intr_handle);
+ // Serial.println("I2S start");
+ i2sReset();
+ //Serial.println(dmaBuffers[0]->sampleCount());
+ i2s->lc_conf.val=I2S_OUT_DATA_BURST_EN | I2S_OUTDSCR_BURST_EN | I2S_OUT_DATA_BURST_EN;
+ i2s->out_link.addr = (uint32_t) & (dmaBuffers[0]->descriptor);
+ i2s->out_link.start = 1;
+ ////vTaskDelay(5);
+ i2s->int_clr.val = i2s->int_raw.val;
+ // //vTaskDelay(5);
+ i2s->int_ena.out_dscr_err = 1;
+ //enable interrupt
+ ////vTaskDelay(5);
+ esp_intr_enable(gI2S_intr_handle);
+ // //vTaskDelay(5);
+ i2s->int_ena.val = 0;
+ i2s->int_ena.out_eof = 1;
+
+ //start transmission
+ i2s->conf.tx_start = 1;
+ }
+
+ static void i2sReset()
+ {
+ // Serial.println("I2S reset");
+ const unsigned long lc_conf_reset_flags = I2S_IN_RST_M | I2S_OUT_RST_M | I2S_AHBM_RST_M | I2S_AHBM_FIFO_RST_M;
+ i2s->lc_conf.val |= lc_conf_reset_flags;
+ i2s->lc_conf.val &= ~lc_conf_reset_flags;
+
+ const uint32_t conf_reset_flags = I2S_RX_RESET_M | I2S_RX_FIFO_RESET_M | I2S_TX_RESET_M | I2S_TX_FIFO_RESET_M;
+ i2s->conf.val |= conf_reset_flags;
+ i2s->conf.val &= ~conf_reset_flags;
+ }
+
+ static void i2sReset_DMA()
+ {
+ i2s->lc_conf.in_rst=1; i2s->lc_conf.in_rst=0;
+ i2s->lc_conf.out_rst=1; i2s->lc_conf.out_rst=0;
+ }
+
+ static void i2sReset_FIFO()
+ {
+ i2s->conf.rx_fifo_reset=1; i2s->conf.rx_fifo_reset=0;
+ i2s->conf.tx_fifo_reset=1; i2s->conf.tx_fifo_reset=0;
+ }
+
+ static void i2sStop()
+ {
+ // Serial.println("I2S stop");
+ esp_intr_disable(gI2S_intr_handle);
+ i2sReset();
+ i2s->conf.rx_start = 0;
+ i2s->conf.tx_start = 0;
+ }
+};
+
+FASTLED_NAMESPACE_END