The moon. The mother fucking moon. Working dithering, RGB adjust, inline scale8_video on an 8Mhz, hardware-mul-less -trinket-

author: Daniel Garcia <danielgarcia@gmail.com> 2014-02-19 08:37:09 +0400
committer: Daniel Garcia <danielgarcia@gmail.com> 2014-02-19 08:37:09 +0400
commit: 4ff103d94afcc33afc389dcd31537f15b98ae64a (patch)
tree: b5231757b3032ac6de990fa4db36b987b105cd2c
parent: 902bf544a44ad0ec5bd35c6b462c0f4b2e7da5c9 (diff)
5 files changed, 134 insertions, 84 deletions
diff --git a/FastLED.h b/FastLED.h
index 08f747b8..e735feca 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -38,7 +38,11 @@ enum EBlockChipsets {
 	WS2811_PORTC
 };
 
+#if defined(LIB8_ATTINY)
+#define NUM_CONTROLLERS 2
+#else
 #define NUM_CONTROLLERS 32
+#endif
 
 class CFastLED {
 	struct CControllerInfo { 
diff --git a/clockless_trinket.h b/clockless_trinket.h
index 45eea2de..52831296 100644
--- a/clockless_trinket.h
+++ b/clockless_trinket.h
@@ -11,6 +11,10 @@
 // Scaling macro choice
 #ifndef TRINKET_SCALE
 #define TRINKET_SCALE 1
+// whether or not to use dithering
+#define DITHER 1
+// whether or not to enable scale_video adjustments
+#define SCALE_VIDEO 1
 #endif
 
 // Variations on the functions in delay.h - w/a loop var passed in to preserve registers across calls by the optimizer/compiler
@@ -87,11 +91,13 @@ public:
 	}
 
 #define USE_ASM_MACROS
-				
+	
+// The variables that our various asm statemetns use.  The same block of variables needs to be declared for
+// all the asm blocks because GCC is pretty stupid and it would clobber variables happily or optimize code away too aggressively			
 #define ASM_VARS : /* write variables */				\
-				[b0] "+r" (b0),							\
-				[b1] "+r" (b1),							\
-				[b2] "+r" (b2),							\
+				[b0] "+a" (b0),							\
+				[b1] "+a" (b1),							\
+				[b2] "+a" (b2),							\
 				[count] "+x" (count),					\
 				[scale_base] "+a" (scale_base),			\
 				[data] "+z" (data),						\
@@ -106,7 +112,10 @@ public:
 				[d0] "r" (d0),							\
 				[d1] "r" (d1),							\
 				[d2] "r" (d2),							\
-				[PORT] "M" (FastPin<DATA_PIN>::port()-0x20),		\
+				[e0] "r" (e0),							\
+				[e1] "r" (e1),							\
+				[e2] "r" (e2),							\
+				[PORT] "M" (FastPin<DATA_PIN>::port()-0x20),			\
 				[O0] "M" (RGB_BYTE0(RGB_ORDER)),		\
 				[O1] "M" (RGB_BYTE1(RGB_ORDER)),		\
 				[O2] "M" (RGB_BYTE2(RGB_ORDER))		\
@@ -114,27 +123,31 @@ public:
 
 
 // 1 cycle, write hi to the port
-#define HI1 asm __volatile__("out 0x02, %[hi]" ASM_VARS );
+#define HI1 asm __volatile__("out %[PORT], %[hi]" ASM_VARS );
 // 1 cycle, write lo to the port
-#define LO1 asm __volatile__("out 0x02, %[lo]" ASM_VARS );
+#define LO1 asm __volatile__("out %[PORT], %[lo]" ASM_VARS );
 // 2 cycles, sbrs on flipping the lne to lo if we're pushing out a 0
 #define QLO2(B, N) asm __volatile__("sbrs %[" #B "], " #N ASM_VARS ); LO1;
 // load a byte from ram into the given var with the given offset
 #define LD2(B,O) asm __volatile__("ldd %[" #B "], Z + %[" #O "]" ASM_VARS );
 // 3 cycles - load a byte from ram into the scaling scratch space with the given offset, clear the target var
 #define LDSCL3(B,O) asm __volatile__("ldd %[scale_base], Z + %[" #O "]\n\tclr %[" #B "]" ASM_VARS );
-// 2 cycles - increment the data pointer
-#define IDATA2 asm __volatile__("add %A[data], %A[ADV]\n\tadc %B[data], %B[ADV]"  ASM_VARS );
-// 2 cycles - decrement the counter
-#define DCOUNT2 asm __volatile__("sbiw %[count], 1" ASM_VARS );
-// 2 cycles - jump to the beginning of the loop
-#define JMPLOOP2 asm __volatile__("rjmp 1b" ASM_VARS );
-// 2 cycles - jump out of the loop
-#define BRLOOP1 asm __volatile__("breq 2f" ASM_VARS );
 // 2 cycles - perform one step of the scaling (if a given bit is set in scale, add scale-base to the scratch space)
 #define SCALE02(B, N) asm __volatile__("sbrc %[s0], " #N "\n\tadd %[" #B "], %[scale_base]" ASM_VARS );
 #define SCALE12(B, N) asm __volatile__("sbrc %[s1], " #N "\n\tadd %[" #B "], %[scale_base]" ASM_VARS );
 #define SCALE22(B, N) asm __volatile__("sbrc %[s2], " #N "\n\tadd %[" #B "], %[scale_base]" ASM_VARS );
+
+// apply dithering value  before we do anything with scale_base
+#define PRESCALE4(D) if(DITHER) { asm __volatile__("cpse %[scale_base], __zero_reg__\n\t add %[scale_base],%[" #D "]\n\tbrcc L_%=\n\tldi %[scale_base], 0xFF\n\tL_%=:\n\t" ASM_VARS); } \
+				      else { _dc<4>(loopvar); }
+
+// Do a (rough) approximation of the nscale8_video jump
+#if (SCALE_VIDEO == 1) 
+#define VIDADJ2(B) asm __volatile__("cpse %[scale_base], __zero_reg__\n\tsubi %[" #B "], 0xFF\n\t" ASM_VARS);
+#else
+#define VIDADJ2(B) asm __volatile__("rjmp .+0" ASM_VARS);
+#endif
+
 // 1 cycle - rotate right, pulling in from carry
 #define ROR1(B) asm __volatile__("ror %[" #B "]" ASM_VARS );
 // 1 cycle, clear the carry bit
@@ -149,18 +162,32 @@ public:
 #define SCROR04(B, N) SCALE02(B,N) ROR1(B) CLC1
 #define SCROR14(B, N) SCALE12(B,N) ROR1(B) CLC1
 #define SCROR24(B, N) SCALE22(B,N) ROR1(B) CLC1
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Loop life cycle
+
+// #define ADJUST_DITHER  d0 += DADVANCE; d1 += DADVANCE; d2 += DADVANCE; d0 &= e0; d1 &= e1; d2 &= d2; 
+#define ADJDITHER2(D, E) D += DADVANCE; D &= E;
+// #define xstr(a) str(a)
+// #define str(a) #a
+// #define ADJDITHER2(D,E) asm __volatile__("subi %[" #D "], " xstr(DUSE) "\n\tand %[" #D "], %[" #E "]\n\t" ASM_VARS);
+
 // define the beginning of the loop
-#define LOOP asm __volatile__("1:" ASM_VARS ); d0 += DADVANCE; d1 += DADVANCE; d2 += DADVANCE; d0 &= e0; d1 &= e1; d2 &= d2; 
+#define LOOP asm __volatile__("1:" ASM_VARS );
+// define the end of the loop
 #define DONE asm __volatile__("2:" ASM_VARS );
-// delay time
+
+// 2 cycles - increment the data pointer
+#define IDATA2 asm __volatile__("add %A[data], %A[ADV]\n\tadc %B[data], %B[ADV]"  ASM_VARS );
+// 2 cycles - decrement the counter
+#define DCOUNT2 asm __volatile__("sbiw %[count], 1" ASM_VARS );
+// 2 cycles - jump to the beginning of the loop
+#define JMPLOOP2 asm __volatile__("rjmp 1b" ASM_VARS );
+// 2 cycles - jump out of the loop
+#define BRLOOP1 asm __volatile__("breq 2f" ASM_VARS );
+
 #define DADVANCE 3
-#define DITHER 1
-#define PRESCALE0_4() if(DITHER) { asm __volatile__("cpse %[scale_base], r1\n\t add %[scale_base],%[d0]\n\tbrcc L_%=\n\tldi %[scale_base], 0xFF\n\tL_%=:\n\t" ASM_VARS); } \
-				      else { _dc<4>(loopvar); }
-#define PRESCALE1_4() if(DITHER) { asm __volatile__("cpse %[scale_base], r1\n\t add %[scale_base],%[d1]\n\tbrcc L_%=\n\tldi %[scale_base], 0xFF\n\tL_%=:\n\t" ASM_VARS); } \
-				      else { _dc<4>(loopvar); }
-#define PRESCALE2_4() if(DITHER) { asm __volatile__("cpse %[scale_base], r1\n\t add %[scale_base],%[d2]\n\tbrcc L_%=\n\tldi %[scale_base], 0xFF\n\tL_%=:\n\t" ASM_VARS); } \
-				      else { _dc<4>(loopvar); }
+#define DUSE (0xFF - (DADVANCE-1))
 
 	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then 
 	// gcc will use register Y for the this pointer.
@@ -173,8 +200,8 @@ public:
 		data_t lo = *port & ~mask;
 		*port = lo;
 
-		uint8_t d0, d1, d2;
-		uint8_t e0, e1, e2;
+		register uint8_t d0, d1, d2;
+		register uint8_t e0, e1, e2;
 		uint8_t s0, s1, s2;
 		uint8_t b0, b1, b2;
 		static uint8_t d[3] = {0,0,0};
@@ -182,6 +209,7 @@ public:
 		uint16_t count = nLeds;
 		uint8_t scale_base = 0;
 		uint16_t advanceBy = advance ? (skip+3) : 0;
+		// uint8_t dadv = DADVANCE;
 
 		// initialize the scales
 		s0 = scale.raw[B0];
@@ -190,24 +218,29 @@ public:
 
 		// initialize the e & d values
 		uint8_t S;
-		S = s0; e0 = 0xFF; while(s0 >>= 1) { e0 >>= 1; }
+		S = s0; e0 = 0xFF; while(S >>= 1) { e0 >>= 1; }
 		d0 = d[0] & e0;
-		S = s1; e1 = 0xFF; while(s1 >>= 1) { e1 >>= 1; }
+		S = s1; e1 = 0xFF; while(S >>= 1) { e1 >>= 1; }
 		d1 = d[1] & e1;
-		S = s2; e2 = 0xFF; while(s2 >>= 1) { e2 >>= 1; }
+		S = s2; e2 = 0xFF; while(S >>= 1) { e2 >>= 1; }
 		d2 = d[2] & e0;
 
 		b0 = data[RGB_BYTE0(RGB_ORDER)];
 		if(DITHER && b0) { b0 = qadd8(b0, d0); }
-		b0 = scale8(b0, scale);
+		b0 = scale8_video(b0, s0);
 		b1 = 0;
 		b2 = 0;
 		register uint8_t loopvar=0;
 
 		{
 			{
-				/* asm */
-				LOOP
+				// Loop beginning, does some stuff that's outside of the pixel write cycle, namely incrementing d0-2 and masking off
+				// by the E values (see the definition )
+				LOOP; 
+				ADJDITHER2(d0,e0)
+				ADJDITHER2(d1,e1) 
+				ADJDITHER2(d2,e2) 
+				VIDADJ2(b0);
 				// Sum of the clock counts across each row should be 10 for 8Mhz, WS2811
 				// The values in the D1/D2/D3 indicate how many cycles the previous column takes
 				// to allow things to line back up.
@@ -219,23 +252,23 @@ public:
 #if TRINKET_SCALE
 				// Inline scaling - RGB ordering
 				HI1 D1(1) QLO2(b0, 7) LDSCL3(b1,O1) 	D2(3)	LO1					D3(0)	
-				HI1	D1(1) QLO2(b0, 6) PRESCALE1_4()		D2(4)	LO1	SCALE12(b1,0)	D3(2)		
+				HI1	D1(1) QLO2(b0, 6) PRESCALE4(d1)		D2(4)	LO1	SCALE12(b1,0)	D3(2)		
 				HI1 D1(1) QLO2(b0, 5) RORSC14(b1,1) 	D2(4)	LO1 ROR1(b1) CLC1	D3(2)
 				HI1 D1(1) QLO2(b0, 4) SCROR14(b1,2)		D2(4)	LO1 SCALE12(b1,3)	D3(2)			
 				HI1 D1(1) QLO2(b0, 3) RORSC14(b1,4) 	D2(4)	LO1 ROR1(b1) CLC1	D3(2)			
 				HI1 D1(1) QLO2(b0, 2) SCROR14(b1,5) 	D2(4)	LO1 SCALE12(b1,6)	D3(2)			
 				HI1 D1(1) QLO2(b0, 1) RORSC14(b1,7) 	D2(4)	LO1 ROR1(b1) CLC1	D3(2)		
-				HI1 D1(1) QLO2(b0, 0) 				 	D2(0)	LO1 				D3(0)			
-				HI1 D1(1) QLO2(b1, 7) LDSCL3(b2,O1) 	D2(3)	LO1					D3(0)	
-				HI1	D1(1) QLO2(b1, 6) PRESCALE2_4()		D2(4)	LO1	SCALE22(b2,0)	D3(2)		
+				HI1 D1(1) QLO2(b0, 0) 				 	D2(0)	LO1 VIDADJ2(b1)		D3(2)			
+				HI1 D1(1) QLO2(b1, 7) LDSCL3(b2,O2) 	D2(3)	LO1					D3(0)	
+				HI1	D1(1) QLO2(b1, 6) PRESCALE4(d2)		D2(4)	LO1	SCALE22(b2,0)	D3(2)		
 				HI1 D1(1) QLO2(b1, 5) RORSC24(b2,1) 	D2(4)	LO1 ROR1(b2) CLC1	D3(2)
 				HI1 D1(1) QLO2(b1, 4) SCROR24(b2,2)		D2(4)	LO1 SCALE22(b2,3)	D3(2)	
 				HI1 D1(1) QLO2(b1, 3) RORSC24(b2,4) 	D2(4)	LO1 ROR1(b2) CLC1	D3(2)	
 				HI1 D1(1) QLO2(b1, 2) SCROR24(b2,5) 	D2(4)	LO1 SCALE22(b2,6)	D3(2)	
 				HI1 D1(1) QLO2(b1, 1) RORSC24(b2,7) 	D2(4)	LO1 ROR1(b2) CLC1	D3(2)
-				HI1 D1(1) QLO2(b1, 0) IDATA2 			D2(2) 	LO1 				D3(0)
-				HI1 D1(1) QLO2(b2, 7) LDSCL3(b2,O1) 	D2(3)	LO1					D3(0)	
-				HI1	D1(1) QLO2(b2, 6) PRESCALE0_4()		D2(4)	LO1	SCALE22(b0,0)	D3(2)		
+				HI1 D1(1) QLO2(b1, 0) IDATA2 			D2(2) 	LO1 VIDADJ2(b2)		D3(0)
+				HI1 D1(1) QLO2(b2, 7) LDSCL3(b0,O0) 	D2(3)	LO1					D3(0)	
+				HI1	D1(1) QLO2(b2, 6) PRESCALE4(d0)		D2(4)	LO1	SCALE22(b0,0)	D3(2)		
 				HI1 D1(1) QLO2(b2, 5) RORSC04(b0,1) 	D2(4)	LO1 ROR1(b0) CLC1	D3(2)
 				HI1 D1(1) QLO2(b2, 4) SCROR04(b0,2)		D2(4)	LO1 SCALE02(b0,3)	D3(2)	
 				HI1 D1(1) QLO2(b2, 3) RORSC04(b0,4) 	D2(4)	LO1 ROR1(b0) CLC1	D3(2)	
diff --git a/examples/Cylon/Cylon.ino b/examples/Cylon/Cylon.ino
index 8d133da2..57a3117e 100644
--- a/examples/Cylon/Cylon.ino
+++ b/examples/Cylon/Cylon.ino
@@ -1,12 +1,12 @@
 #include "FastLED.h"
 
 // How many leds in your strip?
-#define NUM_LEDS 6
+#define NUM_LEDS 32
 
 // For led chips like Neopixels, which have a data line, ground, and power, you just
 // need to define DATA_PIN.  For led chipsets that are SPI based (four wires - data, clock,
 // ground, and power), like the LPD8806, define both DATA_PIN and CLOCK_PIN
-#define DATA_PIN 11
+#define DATA_PIN 3
 #define CLOCK_PIN 13
 
 // Define the array of leds
diff --git a/examples/RGBCalibrate/RGBCalibrate.ino b/examples/RGBCalibrate/RGBCalibrate.ino
index a8af8fc8..e37560c4 100644
--- a/examples/RGBCalibrate/RGBCalibrate.ino
+++ b/examples/RGBCalibrate/RGBCalibrate.ino
@@ -26,7 +26,7 @@
 #define NUM_LEDS 6
 
 // Data pin that led data will be written out over
-#define DATA_PIN 6
+#define DATA_PIN 3
 // Clock pin only needed for SPI based chipsets when not using hardware SPI
 //#define CLOCK_PIN 8
 
@@ -43,6 +43,7 @@ void setup() {
       // FastLED.addLeds<WS2811, DATA_PIN, RGB>(leds, NUM_LEDS);
       // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);
       FastLED.addLeds<WS2812B, DATA_PIN, GRB>(leds, NUM_LEDS);
+      FastLED.setBrightness(CRGB(16,16,16));
       // FastLED.addLeds<GW6205, DATA_PIN, RGB>(leds, NUM_LEDS);
       // FastLED.addLeds<GW6205_400, DATA_PIN, RGB>(leds, NUM_LEDS);
       // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
@@ -58,12 +59,12 @@ void setup() {
 }
 
 void loop() {
-   leds[0] = CRGB::Red; 
-   leds[1] = CRGB::Green;
-   leds[2] = CRGB::Green;
-   leds[3] = CRGB::Blue;
-   leds[4] = CRGB::Blue;
-   leds[5] = CRGB::Blue;
+   leds[0] = CRGB(255,0,0); 
+   leds[1] = CRGB(0,255,0);
+   leds[2] = CRGB(0,255,0);
+   leds[3] = CRGB(0,0,255);
+   leds[4] = CRGB(0,0,255);
+   leds[5] = CRGB(0,0,255);
    FastLED.show();
    // delay(1000);
 }
diff --git a/lib8tion.h b/lib8tion.h
index 2ccd7601..f94ec9f0 100644
--- a/lib8tion.h
+++ b/lib8tion.h
@@ -464,23 +464,34 @@ LIB8STATIC uint8_t scale8_video( uint8_t i, fract8 scale)
     uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
     return j;
 #elif SCALE8_AVRASM == 1
-    
-    uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+    uint8_t j;
     asm volatile(
-         "      tst %0           \n"
-         "      breq L_%=        \n"
-         "      mul %0, %1       \n"
-         "      mov %0, r1       \n"
-         "      add %0, %2       \n"
-         "      clr __zero_reg__ \n"
-         "L_%=:                  \n"
+        "mul %[i], %[scale]\n\t"
+        "mov %[j], r1\n\t"
+        "clr __zero_reg__\n\t"
+        "cpse %[i], r1\n\t"
+        "addi %[j], 1\n\t"
+        : "+a" (j)
+        : "a" (i), "a" (scale)
+        : "r0", "r1");
+
+    return j;
+    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+    // asm volatile(
+    //      "      tst %0           \n"
+    //      "      breq L_%=        \n"
+    //      "      mul %0, %1       \n"
+    //      "      mov %0, r1       \n"
+    //      "      add %0, %2       \n"
+    //      "      clr __zero_reg__ \n"
+    //      "L_%=:                  \n"
          
-         : "+a" (i)
-         : "a" (scale), "a" (nonzeroscale)
-         : "r0", "r1");
+    //      : "+a" (i)
+    //      : "a" (scale), "a" (nonzeroscale)
+    //      : "r0", "r1");
     
-    // Return the result
-    return i;
+    // // Return the result
+    // return i;
 #else
 #error "No implementation for scale8_video available."
 #endif
@@ -541,31 +552,32 @@ LIB8STATIC void nscale8_LEAVING_R1_DIRTY( uint8_t& i, fract8 scale)
 
 LIB8STATIC uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
 {
-#if SCALE8_C == 1
-    uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
-    uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
-    return j;
-#elif SCALE8_AVRASM == 1
+  return scale8_video(i,scale);
+// #if SCALE8_C == 1
+//     uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+//     uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
+//     return j;
+// #elif SCALE8_AVRASM == 1
     
-    uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
-    asm volatile(
-         "      tst %0          \n"
-         "      breq L_%=       \n"
-         "      mul %0, %1      \n"
-         "      mov %0, r1      \n"
-         "      add %0, %2      \n"
-         /* R1 IS LEFT DIRTY, YOU MUST ZERO IT OUT YOURSELF */
-         "L_%=:                 \n"
+//     uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+//     asm volatile(
+//          "      tst %0          \n"
+//          "      breq L_%=       \n"
+//          "      mul %0, %1      \n"
+//          "      mov %0, r1      \n"
+//          "      add %0, %2      \n"
+//          /* R1 IS LEFT DIRTY, YOU MUST ZERO IT OUT YOURSELF */
+//          "L_%=:                 \n"
          
-         : "+a" (i)
-         : "a" (scale), "a" (nonzeroscale)
-         : "r0", "r1");
+//          : "+a" (i)
+//          : "a" (scale), "a" (nonzeroscale)
+//          : "r0", "r1");
     
-    // Return the result
-    return i;
-#else
-#error "No implementation for scale8_video available."
-#endif
+//     // Return the result
+//     return i;
+// #else
+// #error "No implementation for scale8_video available."
+// #endif
 }
author	Daniel Garcia <danielgarcia@gmail.com>	2014-02-19 08:37:09 +0400
committer	Daniel Garcia <danielgarcia@gmail.com>	2014-02-19 08:37:09 +0400
commit	4ff103d94afcc33afc389dcd31537f15b98ae64a (patch)
tree	b5231757b3032ac6de990fa4db36b987b105cd2c
parent	902bf544a44ad0ec5bd35c6b462c0f4b2e7da5c9 (diff)