Deep Shadow Buffer

Since the deep shadow buffer summer of code project is not actively under development anymore, I decided to build my own DSM implementation from scratch, based on reusing as much existing shadow buffer code as possible. It's not very advanced, but implements the basic algorithm. Just enough so we can do shading tests with it, optimizations and other improvements can be done later. Supported: * Classical shadow buffer options: filter, soft, bias, .. * Multiple sample buffers, merged into one. * Halfway trick to support lower bias. * Compression with user defined threshold. * Non-textured alpha transparency, using Casting Alpha value. * Strand render. Not Supported: * Tiling disk cache, so can use a lot of memory. * Per part rendering for lower memory usage during creation. * Colored shadow. * Textured color/alpha shadow. * Mipmaps for faster filtering. * Volume shadows. Usage Hints: * Use sample buffers + smaller size rather than large size. * For example 512 size x 9 sample buffers instead of 2048 x 1. * Compression threshold 0.05 works, but is on the conservative side.
author: Brecht Van Lommel <brechtvanlommel@pandora.be> 2009-10-12 23:41:40 +0400
committer: Brecht Van Lommel <brechtvanlommel@pandora.be> 2009-10-12 23:41:40 +0400
commit: 10198e99ff398380696e3408f752280e6bb5106d (patch)
tree: 834b81d39ae9561ace7b2c607f2015cc03ce7af7
parent: b5f820cd874a7b3ca1de81103b99969429adfd6c (diff)
15 files changed, 650 insertions, 137 deletions
diff --git a/release/scripts/ui/buttons_data_lamp.py b/release/scripts/ui/buttons_data_lamp.py
index 2879da8d8d5..4e495d158eb 100644
--- a/release/scripts/ui/buttons_data_lamp.py
+++ b/release/scripts/ui/buttons_data_lamp.py
@@ -202,7 +202,7 @@ class DATA_PT_shadow(DataButtonsPanel):
 			col.itemL(text="Buffer Type:")
 			col.row().itemR(lamp, "shadow_buffer_type", expand=True)
 
-			if lamp.shadow_buffer_type in ('REGULAR', 'HALFWAY'):
+			if lamp.shadow_buffer_type in ('REGULAR', 'HALFWAY', 'DEEP'):
 				split = layout.split()
 				
 				col = split.column()
@@ -218,6 +218,8 @@ class DATA_PT_shadow(DataButtonsPanel):
 				sub = col.column(align=True)
 				sub.itemR(lamp, "shadow_buffer_size", text="Size")
 				sub.itemR(lamp, "shadow_buffer_samples", text="Samples")
+				if lamp.shadow_buffer_type == 'DEEP':
+					col.itemR(lamp, "compression_threshold")
 				
 			elif lamp.shadow_buffer_type == 'IRREGULAR':
 				layout.itemR(lamp, "shadow_buffer_bias", text="Bias")
diff --git a/source/blender/blenkernel/BKE_blender.h b/source/blender/blenkernel/BKE_blender.h
index a79bf43c354..e91e434b97d 100644
--- a/source/blender/blenkernel/BKE_blender.h
+++ b/source/blender/blenkernel/BKE_blender.h
@@ -43,7 +43,7 @@ struct bContext;
 struct ReportList;
 
 #define BLENDER_VERSION			250
-#define BLENDER_SUBVERSION		5
+#define BLENDER_SUBVERSION		6
 
 #define BLENDER_MINVERSION		250
 #define BLENDER_MINSUBVERSION	0
diff --git a/source/blender/blenkernel/intern/object.c b/source/blender/blenkernel/intern/object.c
index 64e22c85251..8494fdae954 100644
--- a/source/blender/blenkernel/intern/object.c
+++ b/source/blender/blenkernel/intern/object.c
@@ -772,6 +772,7 @@ void *add_lamp(char *name)
 	la->samp= 3;
 	la->bias= 1.0f;
 	la->soft= 3.0f;
+	la->compressthresh= 0.05f;
 	la->ray_samp= la->ray_sampy= la->ray_sampz= 1; 
 	la->area_size=la->area_sizey=la->area_sizez= 1.0f; 
 	la->buffers= 1;
diff --git a/source/blender/blenloader/intern/readfile.c b/source/blender/blenloader/intern/readfile.c
index ad51892a94f..0004187b1c6 100644
--- a/source/blender/blenloader/intern/readfile.c
+++ b/source/blender/blenloader/intern/readfile.c
@@ -9920,8 +9920,9 @@ static void do_versions(FileData *fd, Library *lib, Main *main)
 	}
 
 	/* put 2.50 compatibility code here until next subversion bump */
-	{
+	if (main->versionfile < 250 || (main->versionfile == 250 && main->subversionfile < 6)) {
 		Object *ob;
+		Lamp *la;
 		
 		/* New variables for axis-angle rotations and/or quaternion rotations were added, and need proper initialisation */
 		for (ob= main->object.first; ob; ob= ob->id.next) {
@@ -9939,6 +9940,9 @@ static void do_versions(FileData *fd, Library *lib, Main *main)
 				}
 			}
 		}
+
+		for(la = main->lamp.first; la; la=la->id.next)
+			la->compressthresh= 0.05f;
 	}
 
 	/* WATCH IT!!!: pointers from libdata have not been converted yet here! */
diff --git a/source/blender/makesdna/DNA_lamp_types.h b/source/blender/makesdna/DNA_lamp_types.h
index 78c8d1a5607..0a0046f8470 100644
--- a/source/blender/makesdna/DNA_lamp_types.h
+++ b/source/blender/makesdna/DNA_lamp_types.h
@@ -63,7 +63,7 @@ typedef struct Lamp {
 	short pad2;
 	
 	float clipsta, clipend, shadspotsize;
-	float bias, soft;
+	float bias, soft, compressthresh, pad5[3];
 	short bufsize, samp, buffers, filtertype;
 	char bufflag, buftype;
 	
@@ -167,6 +167,7 @@ typedef struct Lamp {
 #define LA_SHADBUF_REGULAR		0
 #define LA_SHADBUF_IRREGULAR	1
 #define LA_SHADBUF_HALFWAY		2
+#define LA_SHADBUF_DEEP			3
 
 /* bufflag, auto clipping */
 #define LA_SHADBUF_AUTO_START	1
diff --git a/source/blender/makesrna/intern/rna_lamp.c b/source/blender/makesrna/intern/rna_lamp.c
index 4ad1f942b33..a518bd28d24 100644
--- a/source/blender/makesrna/intern/rna_lamp.c
+++ b/source/blender/makesrna/intern/rna_lamp.c
@@ -570,6 +570,7 @@ static void rna_def_spot_lamp(BlenderRNA *brna)
 		{LA_SHADBUF_REGULAR	, "REGULAR", 0, "Classical", "Classic shadow buffer."},
 		{LA_SHADBUF_HALFWAY, "HALFWAY", 0, "Classic-Halfway", "Regular buffer, averaging the closest and 2nd closest Z value to reducing bias artifaces."},
 		{LA_SHADBUF_IRREGULAR, "IRREGULAR", 0, "Irregular", "Irregular buffer produces sharp shadow always, but it doesn't show up for raytracing."},
+		{LA_SHADBUF_DEEP, "DEEP", 0, "Deep", "Deep shadow buffer supports transparency and better filtering, at the cost of more memory usage and processing time."},
 		{0, NULL, 0, NULL, NULL}};
 
 	static EnumPropertyItem prop_shadbuffiltertype_items[] = {
@@ -690,6 +691,12 @@ static void rna_def_spot_lamp(BlenderRNA *brna)
 	RNA_def_property_boolean_sdna(prop, NULL, "bufflag", LA_SHADBUF_AUTO_END);
 	RNA_def_property_ui_text(prop, "Autoclip End", "Automatic calculation of clipping-end, based on visible vertices.");
 	RNA_def_property_update(prop, 0, "rna_Lamp_draw_update");
+
+	prop= RNA_def_property(srna, "compression_threshold", PROP_FLOAT, PROP_NONE);
+	RNA_def_property_float_sdna(prop, NULL, "compressthresh");
+	RNA_def_property_range(prop, 0.0f, 1.0f);
+	RNA_def_property_ui_text(prop, "Compress", "Deep shadow map compression threshold.");
+	RNA_def_property_update(prop, 0, "rna_Lamp_update");
 }
 
 static void rna_def_sun_lamp(BlenderRNA *brna)
diff --git a/source/blender/makesrna/intern/rna_material.c b/source/blender/makesrna/intern/rna_material.c
index b05cf1afa84..e03e221f822 100644
--- a/source/blender/makesrna/intern/rna_material.c
+++ b/source/blender/makesrna/intern/rna_material.c
@@ -1559,7 +1559,7 @@ void RNA_def_material(BlenderRNA *brna)
 	prop= RNA_def_property(srna, "shadow_casting_alpha", PROP_FLOAT, PROP_FACTOR);
 	RNA_def_property_float_sdna(prop, NULL, "shad_alpha");
 	RNA_def_property_range(prop, 0.001, 1);
-	RNA_def_property_ui_text(prop, "Shadow Casting Alpha", "Shadow casting alpha, only in use for Irregular Shadowbuffer.");
+	RNA_def_property_ui_text(prop, "Shadow Casting Alpha", "Shadow casting alpha, in use for Irregular and Deep shadow buffer.");
 	RNA_def_property_update(prop, 0, "rna_Material_update");
 
 	prop= RNA_def_property(srna, "light_group", PROP_POINTER, PROP_NONE);
diff --git a/source/blender/render/intern/include/render_types.h b/source/blender/render/intern/include/render_types.h
index 8f16d636e79..48bf34d0696 100644
--- a/source/blender/render/intern/include/render_types.h
+++ b/source/blender/render/intern/include/render_types.h
@@ -245,10 +245,17 @@ struct Render
 
 struct ISBData;
 
+typedef struct DeepSample {
+	int z;
+	float v;
+} DeepSample;
+ 
 typedef struct ShadSampleBuf {
 	struct ShadSampleBuf *next, *prev;
 	intptr_t *zbuf;
 	char *cbuf;
+	DeepSample **deepbuf;
+	int *totbuf;
 } ShadSampleBuf;
 
 typedef struct ShadBuf {
@@ -258,7 +265,7 @@ typedef struct ShadBuf {
 	float viewmat[4][4];
 	float winmat[4][4];
 	float *jit, *weight;
-	float d, clipend, pixsize, soft;
+	float d, clipend, pixsize, soft, compressthresh;
 	int co[3];
 	int size, bias;
 	ListBase buffers;
@@ -527,6 +534,8 @@ typedef struct LampRen {
 	float clipend;
 	/** A small depth offset to prevent self-shadowing. */
 	float bias;
+	/* Compression threshold for deep shadow maps */
+	float compressthresh;
 	
 	short ray_samp, ray_sampy, ray_sampz, ray_samp_method, ray_samp_type, area_shape, ray_totsamp;
 	short xold[BLENDER_MAX_THREADS], yold[BLENDER_MAX_THREADS];	/* last jitter table for area lights */
diff --git a/source/blender/render/intern/include/zbuf.h b/source/blender/render/intern/include/zbuf.h
index b6d0c656f63..a0665daf916 100644
--- a/source/blender/render/intern/include/zbuf.h
+++ b/source/blender/render/intern/include/zbuf.h
@@ -37,6 +37,7 @@ struct VlakRen;
 struct ListBase;
 struct ZSpan;
 struct APixstrand;
+struct APixstr;
 struct StrandShadeCache;
 
 void fillrect(int *rect, int x, int y, int val);
@@ -50,11 +51,12 @@ void projectverto(float *v1, float winmat[][4], float *adr);
 int testclip(float *v); 
 
 void zbuffer_shadow(struct Render *re, float winmat[][4], struct LampRen *lar, int *rectz, int size, float jitx, float jity);
+void zbuffer_abuf_shadow(struct Render *re, struct LampRen *lar, float winmat[][4], struct APixstr *APixbuf, struct APixstrand *apixbuf, struct ListBase *apsmbase, int size, int samples, float (*jit)[2]);
 void zbuffer_solid(struct RenderPart *pa, struct RenderLayer *rl, void (*fillfunc)(struct RenderPart*, struct ZSpan*, int, void*), void *data);
 
 unsigned short *zbuffer_transp_shade(struct RenderPart *pa, struct RenderLayer *rl, float *pass, struct ListBase *psmlist);
 void zbuffer_sss(RenderPart *pa, unsigned int lay, void *handle, void (*func)(void*, int, int, int, int, int));
-int zbuffer_strands_abuf(struct Render *re, struct RenderPart *pa, struct RenderLayer *rl, struct APixstrand *apixbuf, struct ListBase *apsmbase, struct StrandShadeCache *cache);
+int zbuffer_strands_abuf(struct Render *re, struct RenderPart *pa, struct APixstrand *apixbuf, struct ListBase *apsmbase, unsigned int lay, int negzmask, float winmat[][4], int winx, int winy, int sample, float (*jit)[2], float clipcrop, int shadow, struct StrandShadeCache *cache);
 
 typedef struct APixstr {
     unsigned short mask[4];		/* jitter mask */
@@ -118,6 +120,7 @@ typedef struct ZSpan {
 /* exported to shadbuf.c */
 void zbufclip4(struct ZSpan *zspan, int obi, int zvlnr, float *f1, float *f2, float *f3, float *f4, int c1, int c2, int c3, int c4);
 void zbuf_free_span(struct ZSpan *zspan);
+void freepsA(struct ListBase *lb);
 
 /* to rendercore.c */
 void zspan_scanconvert(struct ZSpan *zpan, void *handle, float *v1, float *v2, float *v3, void (*func)(void *, int, int, float, float) );
@@ -128,7 +131,7 @@ void zbuf_alloc_span(struct ZSpan *zspan, int rectx, int recty, float clipcrop);
 void zbufclipwire(struct ZSpan *zspan, int obi, int zvlnr, int ec, float *ho1, float *ho2, float *ho3, float *ho4, int c1, int c2, int c3, int c4);
 
 /* exported to shadeinput.c */
-void zbuf_make_winmat(Render *re, float duplimat[][4], float winmat[][4]);
+void zbuf_make_winmat(Render *re, float winmat[][4]);
 void zbuf_render_project(float winmat[][4], float *co, float *ho);
 
 #endif
diff --git a/source/blender/render/intern/source/convertblender.c b/source/blender/render/intern/source/convertblender.c
index af7d7a02bba..072083e58a7 100644
--- a/source/blender/render/intern/source/convertblender.c
+++ b/source/blender/render/intern/source/convertblender.c
@@ -3402,9 +3402,10 @@ static void initshadowbuf(Render *re, LampRen *lar, float mat[][4])
 	shb->bias= shb->bias*(100/re->r.size);
 	
 	/* halfway method (average of first and 2nd z) reduces bias issues */
-	if(lar->buftype==LA_SHADBUF_HALFWAY)
+	if(ELEM(lar->buftype, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP))
 		shb->bias= 0.1f*shb->bias;
 	
+	shb->compressthresh= lar->compressthresh;
 }
 
 static void area_lamp_vectors(LampRen *lar)
@@ -3486,6 +3487,7 @@ static GroupObject *add_render_lamp(Render *re, Object *ob)
 	lar->clipend = la->clipend;
 	
 	lar->bias = la->bias;
+	lar->compressthresh = la->compressthresh;
 
 	lar->type= la->type;
 	lar->mode= la->mode;
diff --git a/source/blender/render/intern/source/rendercore.c b/source/blender/render/intern/source/rendercore.c
index f3db64295a3..6c18592b8d2 100644
--- a/source/blender/render/intern/source/rendercore.c
+++ b/source/blender/render/intern/source/rendercore.c
@@ -524,7 +524,7 @@ static void add_filt_passes(RenderLayer *rl, int curmask, int rectx, int offset,
 
 			case SCE_PASS_RAYHITS:
 				/*  */
-				col= &shr->rayhits;
+				col= shr->rayhits;
 				pixsize= 4;
 				break;
 		}
diff --git a/source/blender/render/intern/source/shadbuf.c b/source/blender/render/intern/source/shadbuf.c
index 48305d31e10..50e0321a6eb 100644
--- a/source/blender/render/intern/source/shadbuf.c
+++ b/source/blender/render/intern/source/shadbuf.c
@@ -49,8 +49,8 @@
 #include "render_types.h"
 #include "renderdatabase.h"
 #include "rendercore.h"
-
 #include "shadbuf.h"
+#include "shading.h"
 #include "zbuf.h"
 
 /* XXX, could be better implemented... this is for endian issues
@@ -166,6 +166,326 @@ static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype)
 	}
 }
 
+static int verg_deepsample(const void *poin1, const void *poin2)
+{
+	const DeepSample *ds1= (const DeepSample*)poin1;
+	const DeepSample *ds2= (const DeepSample*)poin2;
+
+	if(ds1->z < ds2->z) return -1;
+	else if(ds1->z == ds2->z) return 0;
+	else return 1;
+}
+
+static int compress_deepsamples(DeepSample *dsample, int tot, float epsilon)
+{
+	/* uses doubles to avoid overflows and other numerical issues,
+	   could be improved */
+	DeepSample *ds, *newds;
+	float v;
+	double slope, slopemin, slopemax, min, max, div, newmin, newmax;
+	int a, first, z, newtot= 0;
+
+	/*if(print) {
+		for(a=0, ds=dsample; a<tot; a++, ds++)
+			printf("%lf,%f ", ds->z/(double)0x7FFFFFFF, ds->v);
+		printf("\n");
+	}*/
+
+	/* read from and write into same array */
+	ds= dsample;
+	newds= dsample;
+	a= 0;
+
+	/* as long as we are not at the end of the array */
+	for(a++, ds++; a<tot; a++, ds++) {
+		slopemin= 0.0f;
+		slopemax= 0.0f;
+		first= 1;
+
+		for(; a<tot; a++, ds++) {
+			//dz= ds->z - newds->z;
+			if(ds->z == newds->z) {
+				/* still in same z position, simply check
+				   visibility difference against epsilon */
+				if(!(fabs(newds->v - ds->v) <= epsilon)) {
+					break;
+				}
+			}
+			else {
+				/* compute slopes */
+				div= (double)0x7FFFFFFF/((double)ds->z - (double)newds->z);
+				min= ((ds->v - epsilon) - newds->v)*div;
+				max= ((ds->v + epsilon) - newds->v)*div;
+
+				/* adapt existing slopes */
+				if(first) {
+					newmin= min;
+					newmax= max;
+					first= 0;
+				}
+				else {
+					newmin= MAX2(slopemin, min);
+					newmax= MIN2(slopemax, max);
+
+					/* verify if there is still space between the slopes */
+					if(newmin > newmax) {
+						ds--;
+						a--;
+						break;
+					}
+				}
+
+				slopemin= newmin;
+				slopemax= newmax;
+			}
+		}
+
+		if(a == tot) {
+			ds--;
+			a--;
+		}
+
+		/* always previous z */
+		z= ds->z;
+
+		if(first || a==tot-1) {
+			/* if slopes were not initialized, use last visibility */
+			v= ds->v;
+		}
+		else {
+			/* compute visibility at center between slopes at z */
+			slope= (slopemin+slopemax)*0.5;
+			v= newds->v + slope*((z - newds->z)/(double)0x7FFFFFFF);
+		}
+
+		newds++;
+		newtot++;
+
+		newds->z= z;
+		newds->v= v;
+	}
+
+	if(newtot == 0 || (newds->v != (newds-1)->v))
+		newtot++;
+
+	/*if(print) {
+		for(a=0, ds=dsample; a<newtot; a++, ds++)
+			printf("%lf,%f ", ds->z/(double)0x7FFFFFFF, ds->v);
+		printf("\n");
+	}*/
+
+	return newtot;
+}
+
+static float deep_alpha(Render *re, int obinr, int facenr, int strand)
+{
+	ObjectInstanceRen *obi= &re->objectinstance[obinr];
+	Material *ma;
+
+	if(strand) {
+		StrandRen *strand= RE_findOrAddStrand(obi->obr, facenr-1);
+		ma= strand->buffer->ma;
+	}
+	else {
+		VlakRen *vlr= RE_findOrAddVlak(obi->obr, (facenr-1) & RE_QUAD_MASK);
+		ma= vlr->mat;
+	}
+
+	return ma->shad_alpha;
+}
+
+static void compress_deepshadowbuf(Render *re, ShadBuf *shb, APixstr *apixbuf, APixstrand *apixbufstrand)
+{
+	ShadSampleBuf *shsample;
+	DeepSample *ds[RE_MAX_OSA], *sampleds[RE_MAX_OSA], *dsb, *newbuf;
+	APixstr *ap, *apn;
+	APixstrand *aps, *apns;
+	float visibility, totbuf= shb->totbuf;
+	int a, b, c, tot, minz, found, size= shb->size, prevtot, newtot;
+	int sampletot[RE_MAX_OSA], totsample = 0, totsamplec = 0;
+	
+	shsample= MEM_callocN( sizeof(ShadSampleBuf), "shad sample buf");
+	BLI_addtail(&shb->buffers, shsample);
+
+	shsample->totbuf= MEM_callocN(sizeof(int)*size*size, "deeptotbuf");
+	shsample->deepbuf= MEM_callocN(sizeof(DeepSample*)*size*size, "deepbuf");
+
+	ap= apixbuf;
+	aps= apixbufstrand;
+	for(a=0; a<size*size; a++, ap++, aps++) {
+		/* count number of samples */
+		for(c=0; c<totbuf; c++)
+			sampletot[c]= 0;
+
+		tot= 0;
+		for(apn=ap; apn; apn=apn->next)
+			for(b=0; b<4; b++)
+				if(apn->p[b])
+					for(c=0; c<totbuf; c++)
+						if(apn->mask[b] & (1<<c))
+							sampletot[c]++;
+
+		if(apixbufstrand) {
+			for(apns=aps; apns; apns=apns->next)
+				for(b=0; b<4; b++)
+					if(apns->p[b])
+						for(c=0; c<totbuf; c++)
+							if(apns->mask[b] & (1<<c))
+								sampletot[c]++;
+		}
+
+		for(c=0; c<totbuf; c++)
+			tot += sampletot[c];
+
+		if(tot == 0) {
+			shsample->deepbuf[a]= NULL;
+			shsample->totbuf[a]= 0;
+			continue;
+		}
+
+		/* fill samples */
+		ds[0]= sampleds[0]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
+		for(c=1; c<totbuf; c++)
+			ds[c]= sampleds[c]= sampleds[c-1] + sampletot[c-1]*2;
+
+		for(apn=ap; apn; apn=apn->next) {
+			for(b=0; b<4; b++) {
+				if(apn->p[b]) {
+					for(c=0; c<totbuf; c++) {
+						if(apn->mask[b] & (1<<c)) {
+							/* two entries to create step profile */
+							ds[c]->z= apn->z[b];
+							ds[c]->v= 1.0f; /* not used */
+							ds[c]++;
+							ds[c]->z= apn->z[b];
+							ds[c]->v= deep_alpha(re, apn->obi[b], apn->p[b], 0);
+							ds[c]++;
+						}
+					}
+				}
+			}
+		}
+
+		if(apixbufstrand) {
+			for(apns=aps; apns; apns=apns->next) {
+				for(b=0; b<4; b++) {
+					if(apns->p[b]) {
+						for(c=0; c<totbuf; c++) {
+							if(apns->mask[b] & (1<<c)) {
+								/* two entries to create step profile */
+								ds[c]->z= apns->z[b];
+								ds[c]->v= 1.0f; /* not used */
+								ds[c]++;
+								ds[c]->z= apns->z[b];
+								ds[c]->v= deep_alpha(re, apns->obi[b], apns->p[b], 1);
+								ds[c]++;
+							}
+						}
+					}
+				}
+			}
+		}
+
+		for(c=0; c<totbuf; c++) {
+			/* sort by increasing z */
+			qsort(sampleds[c], sampletot[c], sizeof(DeepSample)*2, verg_deepsample);
+
+			/* sum visibility, replacing alpha values */
+			visibility= 1.0f;
+			ds[c]= sampleds[c];
+
+			for(b=0; b<sampletot[c]; b++) {
+				/* two entries creating step profile */
+				ds[c]->v= visibility;
+				ds[c]++;
+
+				visibility *= 1.0f-ds[c]->v;
+				ds[c]->v= visibility;
+				ds[c]++;
+			}
+
+			/* halfway trick, probably won't work well for volumes? */
+			ds[c]= sampleds[c];
+			for(b=0; b<sampletot[c]; b++) {
+				if(b+1 < sampletot[c]) {
+					ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
+					ds[c]++;
+					ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
+					ds[c]++;
+				}
+				else {
+					ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
+					ds[c]++;
+					ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
+					ds[c]++;
+				}
+			}
+
+			/* init for merge loop */
+			ds[c]= sampleds[c];
+			sampletot[c] *= 2;
+		}
+
+		shsample->deepbuf[a]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
+		shsample->totbuf[a]= 0;
+
+		/* merge buffers */
+		dsb= shsample->deepbuf[a];
+		while(1) {
+			minz= 0;
+			found= 0;
+
+			for(c=0; c<totbuf; c++) {
+				if(sampletot[c] && (!found || ds[c]->z < minz)) {
+					minz= ds[c]->z;
+					found= 1;
+				}
+			}
+
+			if(!found)
+				break;
+
+			dsb->z= minz;
+			dsb->v= 0.0f;
+
+			visibility= 0.0f;
+			for(c=0; c<totbuf; c++) {
+				if(sampletot[c] && ds[c]->z == minz) {
+					ds[c]++;
+					sampletot[c]--;
+				}
+
+				if(sampleds[c] == ds[c])
+					visibility += 1.0f/totbuf;
+				else
+					visibility += (ds[c]-1)->v/totbuf;
+			}
+
+			dsb->v= visibility;
+			dsb++;
+			shsample->totbuf[a]++;
+		}
+
+		prevtot= shsample->totbuf[a];
+		totsample += prevtot;
+
+		newtot= compress_deepsamples(shsample->deepbuf[a], prevtot, shb->compressthresh);
+		shsample->totbuf[a]= newtot;
+		totsamplec += newtot;
+
+		if(newtot < prevtot) {
+			newbuf= MEM_mallocN(sizeof(DeepSample)*newtot, "cdeepsample");
+			memcpy(newbuf, shsample->deepbuf[a], sizeof(DeepSample)*newtot);
+			MEM_freeN(shsample->deepbuf[a]);
+			shsample->deepbuf[a]= newbuf;
+		}
+
+		MEM_freeN(sampleds[0]);
+	}
+
+	//printf("%d -> %d, ratio %f\n", totsample, totsamplec, (float)totsamplec/(float)totsample);
+}
+
 /* create Z tiles (for compression): this system is 24 bits!!! */
 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
 {
@@ -176,7 +496,7 @@ static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
 	int a, x, y, minx, miny, byt1, byt2;
 	char *rc, *rcline, *ctile, *zt;
 	
-	shsample= MEM_mallocN( sizeof(ShadSampleBuf), "shad sample buf");
+	shsample= MEM_callocN( sizeof(ShadSampleBuf), "shad sample buf");
 	BLI_addtail(&shb->buffers, shsample);
 	
 	shsample->zbuf= MEM_mallocN( sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
@@ -277,7 +597,6 @@ static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
 	}
 
 	MEM_freeN(rcline);
-
 }
 
 /* sets start/end clipping. lar->shb should be initialized */
@@ -381,11 +700,54 @@ static void shadowbuf_autoclip(Render *re, LampRen *lar)
 	}
 }
 
+static void makeflatshadowbuf(Render *re, LampRen *lar, float *jitbuf)
+{
+	ShadBuf *shb= lar->shb;
+	int *rectz, samples;
+
+	/* zbuffering */
+	rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
+	
+	for(samples=0; samples<shb->totbuf; samples++) {
+		zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
+		/* create Z tiles (for compression): this system is 24 bits!!! */
+		compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
+
+		if(re->test_break(re->tbh))
+			break;
+	}
+	
+	MEM_freeN(rectz);
+}
+
+static void makedeepshadowbuf(Render *re, LampRen *lar, float *jitbuf)
+{
+	ShadBuf *shb= lar->shb;
+	APixstr *apixbuf;
+	APixstrand *apixbufstrand= NULL;
+	ListBase apsmbase= {NULL, NULL};
+
+	/* zbuffering */
+	apixbuf= MEM_callocN(sizeof(APixstr)*shb->size*shb->size, "APixbuf");
+	if(re->totstrand)
+		apixbufstrand= MEM_callocN(sizeof(APixstrand)*shb->size*shb->size, "APixbufstrand");
+
+	zbuffer_abuf_shadow(re, lar, shb->persmat, apixbuf, apixbufstrand, &apsmbase, shb->size,
+		shb->totbuf, (float(*)[2])jitbuf);
+
+	/* create Z tiles (for compression): this system is 24 bits!!! */
+	compress_deepshadowbuf(re, shb, apixbuf, apixbufstrand);
+	
+	MEM_freeN(apixbuf);
+	if(apixbufstrand)
+		MEM_freeN(apixbufstrand);
+	freepsA(&apsmbase);
+}
+
 void makeshadowbuf(Render *re, LampRen *lar)
 {
 	ShadBuf *shb= lar->shb;
 	float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
-	int *rectz, samples;
 	
 	if(lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
 		shadowbuf_autoclip(re, lar);
@@ -405,31 +767,26 @@ void makeshadowbuf(Render *re, LampRen *lar)
 	i_window(-wsize, wsize, -wsize, wsize, shb->d, shb->clipend, shb->winmat);
 	Mat4MulMat4(shb->persmat, shb->viewmat, shb->winmat);
 
-	if(ELEM(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY)) {
+	if(ELEM3(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) {
+		shb->totbuf= lar->buffers;
+
 		/* jitter, weights - not threadsafe! */
 		BLI_lock_thread(LOCK_CUSTOM1);
 		shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
 		make_jitter_weight_tab(re, shb, lar->filtertype);
 		BLI_unlock_thread(LOCK_CUSTOM1);
 		
-		shb->totbuf= lar->buffers;
 		if(shb->totbuf==4) jitbuf= give_jitter_tab(2);
 		else if(shb->totbuf==9) jitbuf= give_jitter_tab(3);
 		else jitbuf= twozero;
 		
 		/* zbuffering */
-		rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
-		
-		for(samples=0; samples<shb->totbuf; samples++) {
-			zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
-			/* create Z tiles (for compression): this system is 24 bits!!! */
-			compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
-
-			if(re->test_break(re->tbh))
-				break;
+		if(lar->buftype == LA_SHADBUF_DEEP) {
+			makedeepshadowbuf(re, lar, jitbuf);
+			shb->totbuf= 1;
 		}
-		
-		MEM_freeN(rectz);
+		else
+			makeflatshadowbuf(re, lar, jitbuf);
 
 		/* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
 	}
@@ -539,17 +896,27 @@ void freeshadowbuf(LampRen *lar)
 		ShadSampleBuf *shsample;
 		int b, v;
 		
-		v= (shb->size*shb->size)/256;
-		
 		for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
-			intptr_t *ztile= shsample->zbuf;
-			char *ctile= shsample->cbuf;
-			
-			for(b=0; b<v; b++, ztile++, ctile++)
-				if(*ctile) MEM_freeN((void *) *ztile);
-			
-			MEM_freeN(shsample->zbuf);
-			MEM_freeN(shsample->cbuf);
+			if(shsample->deepbuf) {
+				v= shb->size*shb->size;
+				for(b=0; b<v; b++)
+					if(shsample->deepbuf[b])
+						MEM_freeN(shsample->deepbuf[b]);
+					
+				MEM_freeN(shsample->deepbuf);
+				MEM_freeN(shsample->totbuf);
+			}
+			else {
+				intptr_t *ztile= shsample->zbuf;
+				char *ctile= shsample->cbuf;
+				
+				v= (shb->size*shb->size)/256;
+				for(b=0; b<v; b++, ztile++, ctile++)
+					if(*ctile) MEM_freeN((void *) *ztile);
+				
+				MEM_freeN(shsample->zbuf);
+				MEM_freeN(shsample->cbuf);
+			}
 		}
 		BLI_freelistN(&shb->buffers);
 		
@@ -567,6 +934,9 @@ static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int
 	int ofs;
 	char *ct;
 
+	if(shsample->deepbuf)
+		return 0;
+
 	/* always test borders of shadowbuffer */
 	if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
 	if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
@@ -587,6 +957,67 @@ static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int
 	return 0;
 }
 
+static float readdeepvisibility(DeepSample *dsample, int tot, int z, int bias, float *biast)
+{
+	DeepSample *ds, *prevds;
+	float t;
+	int a;
+
+	/* tricky stuff here; we use ints which can overflow easily with bias values */
+
+	ds= dsample;
+	for(a=0; a<tot && (z-bias > ds->z); a++, ds++)
+		;
+
+	if(a == tot) {
+		if(biast)
+			*biast= 0.0f;
+		return (ds-1)->v; /* completely behind all samples */
+	}
+	
+	/* check if this read needs bias blending */
+	if(biast) {
+		if(z > ds->z)
+			*biast= (float)(z - ds->z)/(float)bias;
+		else
+			*biast= 0.0f;
+	}
+
+	if(a == 0)
+		return 1.0f; /* completely in front of all samples */
+
+	prevds= ds-1;
+	t= (float)(z-bias - prevds->z)/(float)(ds->z - prevds->z);
+	return t*ds->v + (1.0f-t)*prevds->v;
+}
+
+static float readdeepshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
+{
+	float v, biasv, biast;
+	int ofs, tot;
+
+	if(zs < - 0x7FFFFE00 + bias)
+		return 1.0;	/* extreme close to clipstart */
+
+	/* calc z */
+	ofs= ys*shb->size + xs;
+	tot= shsample->totbuf[ofs];
+	if(tot == 0)
+		return 1.0f;
+
+	v= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, bias, &biast);
+
+	if(biast != 0.0f) {
+		/* in soft bias area */
+		biasv= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, 0, 0);
+
+		biast= biast*biast;
+		return (1.0f-biast)*v + biast*biasv;
+	}
+
+	return v;
+}
+
 /* return 1.0 : fully in light */
 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)	
 {
@@ -603,6 +1034,9 @@ static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int
 	if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
 	if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
 
+	if(shsample->deepbuf)
+		return readdeepshadowbuf(shb, shsample, bias, xs, ys, zs);
+
 	/* calc z */
 	ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
 	ct= shsample->cbuf+ofs;
diff --git a/source/blender/render/intern/source/shadeinput.c b/source/blender/render/intern/source/shadeinput.c
index 30ff213b95b..79ee6c89460 100644
--- a/source/blender/render/intern/source/shadeinput.c
+++ b/source/blender/render/intern/source/shadeinput.c
@@ -1232,7 +1232,7 @@ void shade_input_set_shade_texco(ShadeInput *shi)
 			s3= RE_vertren_get_sticky(obr, v3, 0);
 			
 			if(s1 && s2 && s3) {
-				float winmat[4][4], ho1[4], ho2[4], ho3[4];
+				float obwinmat[4][4], winmat[4][4], ho1[4], ho2[4], ho3[4];
 				float Zmulx, Zmuly;
 				float hox, hoy, l, dl, u, v;
 				float s00, s01, s10, s11, detsh;
@@ -1240,14 +1240,15 @@ void shade_input_set_shade_texco(ShadeInput *shi)
 				/* old globals, localized now */
 				Zmulx=  ((float)R.winx)/2.0f; Zmuly=  ((float)R.winy)/2.0f;
 
+				zbuf_make_winmat(&R, winmat);
 				if(shi->obi->flag & R_TRANSFORMED)
-					zbuf_make_winmat(&R, shi->obi->mat, winmat);
+					Mat4MulMat4(obwinmat, obi->mat, winmat);
 				else
-					zbuf_make_winmat(&R, NULL, winmat);
+					Mat4CpyMat4(obwinmat, winmat);
 
-				zbuf_render_project(winmat, v1->co, ho1);
-				zbuf_render_project(winmat, v2->co, ho2);
-				zbuf_render_project(winmat, v3->co, ho3);
+				zbuf_render_project(obwinmat, v1->co, ho1);
+				zbuf_render_project(obwinmat, v2->co, ho2);
+				zbuf_render_project(obwinmat, v3->co, ho3);
 				
 				s00= ho3[0]/ho3[3] - ho1[0]/ho1[3];
 				s01= ho3[1]/ho3[3] - ho1[1]/ho1[3];
diff --git a/source/blender/render/intern/source/strand.c b/source/blender/render/intern/source/strand.c
index d00076a80e8..61080c7d807 100644
--- a/source/blender/render/intern/source/strand.c
+++ b/source/blender/render/intern/source/strand.c
@@ -417,6 +417,8 @@ typedef struct StrandPart {
 	intptr_t *rectdaps;
 	int rectx, recty;
 	int sample;
+	int shadow;
+	float (*jit)[2];
 
 	StrandSegment *segment;
 	float t[3], s[3];
@@ -525,7 +527,7 @@ static void do_strand_fillac(void *handle, int x, int y, float u, float v, float
 		}
 	}
 	else {
-		bufferz= spart->rectz[offset];
+		bufferz= (spart->rectz)? spart->rectz[offset]: 0x7FFFFFFF;
 		if(spart->rectmask)
 			maskz= spart->rectmask[offset];
 	}
@@ -560,8 +562,10 @@ static void do_strand_fillac(void *handle, int x, int y, float u, float v, float
 				CHECK_ASSIGN(0);
 			}
 
-			strand_shade_refcount(cache, sseg->v[1]);
-			strand_shade_refcount(cache, sseg->v[2]);
+			if(cache) {
+				strand_shade_refcount(cache, sseg->v[1]);
+				strand_shade_refcount(cache, sseg->v[2]);
+			}
 			spart->totapixbuf[offset]++;
 		}
 	}
@@ -596,23 +600,16 @@ static void do_scanconvert_strand(Render *re, StrandPart *spart, ZSpan *zspan, f
 	VECCOPY(jco3, co3);
 	VECCOPY(jco4, co4);
 
-	if(re->osa) {
-		jx= -re->jit[sample][0];
-		jy= -re->jit[sample][1];
+	if(spart->jit) {
+		jx= -spart->jit[sample][0];
+		jy= -spart->jit[sample][1];
 
 		jco1[0] += jx; jco1[1] += jy;
 		jco2[0] += jx; jco2[1] += jy;
 		jco3[0] += jx; jco3[1] += jy;
 		jco4[0] += jx; jco4[1] += jy;
-	}
-	else if(re->i.curblur) {
-		jx= -re->jit[re->i.curblur-1][0];
-		jy= -re->jit[re->i.curblur-1][1];
 
-		jco1[0] += jx; jco1[1] += jy;
-		jco2[0] += jx; jco2[1] += jy;
-		jco3[0] += jx; jco3[1] += jy;
-		jco4[0] += jx; jco4[1] += jy;
+		/* XXX mblur? */
 	}
 
 	spart->sample= sample;
@@ -756,7 +753,7 @@ void render_strand_segment(Render *re, float winmat[][4], StrandPart *spart, ZSp
 }
 
 /* render call to fill in strands */
-int zbuffer_strands_abuf(Render *re, RenderPart *pa, RenderLayer *rl, APixstrand *apixbuf, ListBase *apsmbase, StrandShadeCache *cache)
+int zbuffer_strands_abuf(Render *re, RenderPart *pa, APixstrand *apixbuf, ListBase *apsmbase, unsigned int lay, int negzmask, float winmat[][4], int winx, int winy, int sample, float (*jit)[2], float clipcrop, int shadow, StrandShadeCache *cache)
 {
 	ObjectRen *obr;
 	ObjectInstanceRen *obi;
@@ -768,7 +765,7 @@ int zbuffer_strands_abuf(Render *re, RenderPart *pa, RenderLayer *rl, APixstrand
 	StrandSegment sseg;
 	StrandSortSegment *sortsegments = NULL, *sortseg, *firstseg;
 	MemArena *memarena;
-	float z[4], bounds[4], winmat[4][4];
+	float z[4], bounds[4], obwinmat[4][4];
 	int a, b, c, i, totsegment, clip[4];
 
 	if(re->test_break(re->tbh))
@@ -788,27 +785,31 @@ int zbuffer_strands_abuf(Render *re, RenderPart *pa, RenderLayer *rl, APixstrand
 	spart.rectz= pa->rectz;
 	spart.rectmask= pa->rectmask;
 	spart.cache= cache;
+	spart.shadow= shadow;
+	spart.jit= jit;
 
-	zbuf_alloc_span(&zspan, pa->rectx, pa->recty, re->clipcrop);
+	zbuf_alloc_span(&zspan, pa->rectx, pa->recty, clipcrop);
 
 	/* needed for transform from hoco to zbuffer co */
-	zspan.zmulx= ((float)re->winx)/2.0;
-	zspan.zmuly= ((float)re->winy)/2.0;
+	zspan.zmulx= ((float)winx)/2.0;
+	zspan.zmuly= ((float)winy)/2.0;
 	
 	zspan.zofsx= -pa->disprect.xmin;
 	zspan.zofsy= -pa->disprect.ymin;
 
 	/* to center the sample position */
-	zspan.zofsx -= 0.5f;
-	zspan.zofsy -= 0.5f;
+	if(!shadow) {
+		zspan.zofsx -= 0.5f;
+		zspan.zofsy -= 0.5f;
+	}
 
 	zspan.apsmbase= apsmbase;
 
 	/* clipping setup */
-	bounds[0]= (2*pa->disprect.xmin - re->winx-1)/(float)re->winx;
-	bounds[1]= (2*pa->disprect.xmax - re->winx+1)/(float)re->winx;
-	bounds[2]= (2*pa->disprect.ymin - re->winy-1)/(float)re->winy;
-	bounds[3]= (2*pa->disprect.ymax - re->winy+1)/(float)re->winy;
+	bounds[0]= (2*pa->disprect.xmin - winx-1)/(float)winx;
+	bounds[1]= (2*pa->disprect.xmax - winx+1)/(float)winx;
+	bounds[2]= (2*pa->disprect.ymin - winy-1)/(float)winy;
+	bounds[3]= (2*pa->disprect.ymax - winy+1)/(float)winy;
 
 	memarena= BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE);
 	firstseg= NULL;
@@ -819,14 +820,14 @@ int zbuffer_strands_abuf(Render *re, RenderPart *pa, RenderLayer *rl, APixstrand
 	for(obi=re->instancetable.first, i=0; obi; obi=obi->next, i++) {
 		obr= obi->obr;
 
-		if(!obr->strandbuf || !(obr->strandbuf->lay & rl->lay))
+		if(!obr->strandbuf || !(obr->strandbuf->lay & lay))
 			continue;
 
 		/* compute matrix and try clipping whole object */
 		if(obi->flag & R_TRANSFORMED)
-			zbuf_make_winmat(re, obi->mat, winmat);
+			Mat4MulMat4(obwinmat, obi->mat, winmat);
 		else
-			zbuf_make_winmat(re, NULL, winmat);
+			Mat4CpyMat4(obwinmat, winmat);
 
 		if(clip_render_object(obi->obr->boundbox, bounds, winmat))
 			continue;
@@ -843,14 +844,14 @@ int zbuffer_strands_abuf(Render *re, RenderPart *pa, RenderLayer *rl, APixstrand
 				svert= strand->vert;
 
 				/* keep clipping and z depth for 4 control points */
-				clip[1]= strand_test_clip(winmat, &zspan, bounds, svert->co, &z[1]);
-				clip[2]= strand_test_clip(winmat, &zspan, bounds, (svert+1)->co, &z[2]);
+				clip[1]= strand_test_clip(obwinmat, &zspan, bounds, svert->co, &z[1]);
+				clip[2]= strand_test_clip(obwinmat, &zspan, bounds, (svert+1)->co, &z[2]);
 				clip[0]= clip[1]; z[0]= z[1];
 
 				for(b=0; b<strand->totvert-1; b++, svert++) {
 					/* compute 4th point clipping and z depth */
 					if(b < strand->totvert-2) {
-						clip[3]= strand_test_clip(winmat, &zspan, bounds, (svert+2)->co, &z[3]);
+						clip[3]= strand_test_clip(obwinmat, &zspan, bounds, (svert+2)->co, &z[3]);
 					}
 					else {
 						clip[3]= clip[2]; z[3]= z[2];
@@ -900,7 +901,11 @@ int zbuffer_strands_abuf(Render *re, RenderPart *pa, RenderLayer *rl, APixstrand
 
 			obi= &re->objectinstance[sortseg->obi];
 			obr= obi->obr;
-			zbuf_make_winmat(re, NULL, winmat);
+
+			if(obi->flag & R_TRANSFORMED)
+				Mat4MulMat4(obwinmat, obi->mat, winmat);
+			else
+				Mat4CpyMat4(obwinmat, winmat);
 
 			sseg.obi= obi;
 			sseg.strand= RE_findOrAddStrand(obr, sortseg->strand);
@@ -917,7 +922,7 @@ int zbuffer_strands_abuf(Render *re, RenderPart *pa, RenderLayer *rl, APixstrand
 
 			spart.segment= &sseg;
 
-			render_strand_segment(re, winmat, &spart, &zspan, 1, &sseg);
+			render_strand_segment(re, obwinmat, &spart, &zspan, 1, &sseg);
 		}
 	}
 
diff --git a/source/blender/render/intern/source/zbuf.c b/source/blender/render/intern/source/zbuf.c
index 3b3a8568933..a7b9867715f 100644
--- a/source/blender/render/intern/source/zbuf.c
+++ b/source/blender/render/intern/source/zbuf.c
@@ -271,7 +271,7 @@ static APixstr *addpsmainA(ListBase *lb)
 	return psm->ps;
 }
 
-static void freepsA(ListBase *lb)
+void freepsA(ListBase *lb)
 {
 	APixstrMain *psm, *psmnext;
 
@@ -1760,12 +1760,12 @@ static int zbuf_shadow_project(ZbufProjectCache *cache, int index, float winmat[
 	}
 }
 
-static void zbuffer_part_bounds(Render *re, RenderPart *pa, float *bounds)
+static void zbuffer_part_bounds(int winx, int winy, RenderPart *pa, float *bounds)
 {
-	bounds[0]= (2*pa->disprect.xmin - re->winx-1)/(float)re->winx;
-	bounds[1]= (2*pa->disprect.xmax - re->winx+1)/(float)re->winx;
-	bounds[2]= (2*pa->disprect.ymin - re->winy-1)/(float)re->winy;
-	bounds[3]= (2*pa->disprect.ymax - re->winy+1)/(float)re->winy;
+	bounds[0]= (2*pa->disprect.xmin - winx-1)/(float)winx;
+	bounds[1]= (2*pa->disprect.xmax - winx+1)/(float)winx;
+	bounds[2]= (2*pa->disprect.ymin - winy-1)/(float)winy;
+	bounds[3]= (2*pa->disprect.ymax - winy+1)/(float)winy;
 }
 
 static int zbuf_part_project(ZbufProjectCache *cache, int index, float winmat[][4], float *bounds, float *co, float *ho)
@@ -1803,7 +1803,7 @@ void zbuf_render_project(float winmat[][4], float *co, float *ho)
 	projectvert(vec, winmat, ho);
 }
 
-void zbuf_make_winmat(Render *re, float duplimat[][4], float winmat[][4])
+void zbuf_make_winmat(Render *re, float winmat[][4])
 {
 	float panomat[4][4];
 
@@ -1814,13 +1814,8 @@ void zbuf_make_winmat(Render *re, float duplimat[][4], float winmat[][4])
 		panomat[2][0]= -re->panosi;
 		panomat[2][2]= re->panoco;
 
-		if(duplimat)
-			Mat4MulSerie(winmat, re->winmat, panomat, duplimat, 0, 0, 0, 0, 0);
-		else
-			Mat4MulMat4(winmat, panomat, re->winmat);
+		Mat4MulMat4(winmat, panomat, re->winmat);
 	}
-	else if(duplimat)
-		Mat4MulMat4(winmat, duplimat, re->winmat);
 	else
 		Mat4CpyMat4(winmat, re->winmat);
 }
@@ -2047,12 +2042,15 @@ void zbuffer_solid(RenderPart *pa, RenderLayer *rl, void(*fillfunc)(RenderPart*,
 	Material *ma=0;
 	ObjectInstanceRen *obi;
 	ObjectRen *obr;
-	float winmat[4][4], bounds[4], ho1[4], ho2[4], ho3[4], ho4[4]={0};
+	float obwinmat[4][4], winmat[4][4], bounds[4];
+	float ho1[4], ho2[4], ho3[4], ho4[4]={0};
 	unsigned int lay= rl->lay, lay_zmask= rl->lay_zmask;
 	int i, v, zvlnr, zsample, samples, c1, c2, c3, c4=0;
 	short nofill=0, env=0, wire=0, zmaskpass=0;
 	short all_z= (rl->layflag & SCE_LAY_ALL_Z) && !(rl->layflag & SCE_LAY_ZMASK);
 	short neg_zmask= (rl->layflag & SCE_LAY_ZMASK) && (rl->layflag & SCE_LAY_NEG_ZMASK);
+
+	zbuf_make_winmat(&R, winmat);
 	
 	samples= (R.osa? R.osa: 1);
 	samples= MIN2(4, samples-pa->sample);
@@ -2060,7 +2058,7 @@ void zbuffer_solid(RenderPart *pa, RenderLayer *rl, void(*fillfunc)(RenderPart*,
 	for(zsample=0; zsample<samples; zsample++) {
 		zspan= &zspans[zsample];
 
-		zbuffer_part_bounds(&R, pa, bounds);
+		zbuffer_part_bounds(R.winx, R.winy, pa, bounds);
 		zbuf_alloc_span(zspan, pa->rectx, pa->recty, R.clipcrop);
 		
 		/* needed for transform from hoco to zbuffer co */
@@ -2135,9 +2133,9 @@ void zbuffer_solid(RenderPart *pa, RenderLayer *rl, void(*fillfunc)(RenderPart*,
 				continue;
 			
 			if(obi->flag & R_TRANSFORMED)
-				zbuf_make_winmat(&R, obi->mat, winmat);
+				Mat4MulMat4(obwinmat, obi->mat, winmat);
 			else
-				zbuf_make_winmat(&R, NULL, winmat);
+				Mat4CpyMat4(obwinmat, winmat);
 
 			if(clip_render_object(obi->obr->boundbox, bounds, winmat))
 				continue;
@@ -2182,14 +2180,14 @@ void zbuffer_solid(RenderPart *pa, RenderLayer *rl, void(*fillfunc)(RenderPart*,
 					v3= vlr->v3;
 					v4= vlr->v4;
 
-					c1= zbuf_part_project(cache, v1->index, winmat, bounds, v1->co, ho1);
-					c2= zbuf_part_project(cache, v2->index, winmat, bounds, v2->co, ho2);
-					c3= zbuf_part_project(cache, v3->index, winmat, bounds, v3->co, ho3);
+					c1= zbuf_part_project(cache, v1->index, obwinmat, bounds, v1->co, ho1);
+					c2= zbuf_part_project(cache, v2->index, obwinmat, bounds, v2->co, ho2);
+					c3= zbuf_part_project(cache, v3->index, obwinmat, bounds, v3->co, ho3);
 
 					/* partclipping doesn't need viewplane clipping */
 					partclip= c1 & c2 & c3;
 					if(v4) {
-						c4= zbuf_part_project(cache, v4->index, winmat, bounds, v4->co, ho4);
+						c4= zbuf_part_project(cache, v4->index, obwinmat, bounds, v4->co, ho4);
 						partclip &= c4;
 					}
 
@@ -2511,11 +2509,13 @@ void zbuffer_sss(RenderPart *pa, unsigned int lay, void *handle, void (*func)(vo
 	VlakRen *vlr= NULL;
 	VertRen *v1, *v2, *v3, *v4;
 	Material *ma=0, *sss_ma= R.sss_mat;
-	float winmat[4][4], bounds[4], ho1[4], ho2[4], ho3[4], ho4[4]={0};
+	float obwinmat[4][4], winmat[4][4], bounds[4];
+	float ho1[4], ho2[4], ho3[4], ho4[4]={0};
 	int i, v, zvlnr, c1, c2, c3, c4=0;
 	short nofill=0, env=0, wire=0;
 	
-	zbuffer_part_bounds(&R, pa, bounds);
+	zbuf_make_winmat(&R, winmat);
+	zbuffer_part_bounds(R.winx, R.winy, pa, bounds);
 	zbuf_alloc_span(&zspan, pa->rectx, pa->recty, R.clipcrop);
 
 	zspan.sss_handle= handle;
@@ -2551,9 +2551,9 @@ void zbuffer_sss(RenderPart *pa, unsigned int lay, void *handle, void (*func)(vo
 			continue;
 
 		if(obi->flag & R_TRANSFORMED)
-			zbuf_make_winmat(&R, obi->mat, winmat);
+			Mat4MulMat4(obwinmat, obi->mat, winmat);
 		else
-			zbuf_make_winmat(&R, NULL, winmat);
+			Mat4CpyMat4(obwinmat, winmat);
 
 		if(clip_render_object(obi->obr->boundbox, bounds, winmat))
 			continue;
@@ -3181,6 +3181,11 @@ static void copyto_abufz(RenderPart *pa, int *arectz, int *rectmask, int sample)
 	int x, y, *rza, *rma;
 	intptr_t *rd;
 	
+	if((R.osa==0 && !pa->rectz) || !pa->rectdaps) {
+		fillrect(arectz, pa->rectx, pa->recty, 0x7FFFFFFE);
+		return;
+	}
+
 	if(R.osa==0) {
 		memcpy(arectz, pa->rectz, sizeof(int)*pa->rectx*pa->recty);
 		if(rectmask && pa->rectmask)
@@ -3222,7 +3227,7 @@ static void copyto_abufz(RenderPart *pa, int *arectz, int *rectmask, int sample)
  * Do accumulation z buffering.
  */
 
-static int zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, RenderLayer *rl, unsigned int lay)
+static int zbuffer_abuf(Render *re, RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, unsigned int lay, int negzmask, float winmat[][4], int winx, int winy, int samples, float (*jit)[2], float clipcrop, int shadow)
 {
 	ZbufProjectCache cache[ZBUF_PROJECT_CACHE_SIZE];
 	ZSpan zspans[16], *zspan;	/* MAX_OSA */
@@ -3232,28 +3237,27 @@ static int zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, Re
 	VlakRen *vlr=NULL;
 	VertRen *v1, *v2, *v3, *v4;
 	float vec[3], hoco[4], mul, zval, fval;
-	float winmat[4][4], bounds[4], ho1[4], ho2[4], ho3[4], ho4[4]={0};
+	float obwinmat[4][4], bounds[4], ho1[4], ho2[4], ho3[4], ho4[4]={0};
 	int i, v, zvlnr, c1, c2, c3, c4=0, dofill= 0;
-	int zsample, samples, polygon_offset;
+	int zsample, polygon_offset;
 
-	zbuffer_part_bounds(&R, pa, bounds);
-	samples= (R.osa? R.osa: 1);
+	zbuffer_part_bounds(winx, winy, pa, bounds);
 
 	for(zsample=0; zsample<samples; zsample++) {
 		zspan= &zspans[zsample];
 
-		zbuf_alloc_span(zspan, pa->rectx, pa->recty, R.clipcrop);
+		zbuf_alloc_span(zspan, pa->rectx, pa->recty, re->clipcrop);
 		
 		/* needed for transform from hoco to zbuffer co */
-		zspan->zmulx=  ((float)R.winx)/2.0;
-		zspan->zmuly=  ((float)R.winy)/2.0;
+		zspan->zmulx=  ((float)winx)/2.0;
+		zspan->zmuly=  ((float)winy)/2.0;
 		
 		/* the buffers */
 		zspan->arectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "Arectz");
 		zspan->apixbuf= APixbuf;
 		zspan->apsmbase= apsmbase;
 		
-		if((rl->layflag & SCE_LAY_ZMASK) && (rl->layflag & SCE_LAY_NEG_ZMASK))
+		if(negzmask)
 			zspan->rectmask= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "Arectmask");
 
 		/* filling methods */
@@ -3263,36 +3267,35 @@ static int zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, Re
 		copyto_abufz(pa, zspan->arectz, zspan->rectmask, zsample);	/* init zbuffer */
 		zspan->mask= 1<<zsample;
 
-		if(R.osa) {
-			zspan->zofsx= -pa->disprect.xmin - R.jit[zsample][0];
-			zspan->zofsy= -pa->disprect.ymin - R.jit[zsample][1];
-		}
-		else if(R.i.curblur) {
-			zspan->zofsx= -pa->disprect.xmin - R.jit[R.i.curblur-1][0];
-			zspan->zofsy= -pa->disprect.ymin - R.jit[R.i.curblur-1][1];
+		if(jit) {
+			zspan->zofsx= -pa->disprect.xmin + jit[zsample][0];
+			zspan->zofsy= -pa->disprect.ymin + jit[zsample][1];
 		}
 		else {
 			zspan->zofsx= -pa->disprect.xmin;
 			zspan->zofsy= -pa->disprect.ymin;
 		}
-		/* to center the sample position */
-		zspan->zofsx -= 0.5f;
-		zspan->zofsy -= 0.5f;
+
+		if(!shadow) {
+			/* to center the sample position */
+			zspan->zofsx -= 0.5f;
+			zspan->zofsy -= 0.5f;
+		}
 	}
 	
 	/* we use this to test if nothing was filled in */
 	zvlnr= 0;
 		
-	for(i=0, obi=R.instancetable.first; obi; i++, obi=obi->next) {
+	for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
 		obr= obi->obr;
 
 		if(!(obi->lay & lay))
 			continue;
 
 		if(obi->flag & R_TRANSFORMED)
-			zbuf_make_winmat(&R, obi->mat, winmat);
+			Mat4MulMat4(obwinmat, obi->mat, winmat);
 		else
-			zbuf_make_winmat(&R, NULL, winmat);
+			Mat4CpyMat4(obwinmat, winmat);
 
 		if(clip_render_object(obi->obr->boundbox, bounds, winmat))
 			continue;
@@ -3306,7 +3309,7 @@ static int zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, Re
 			
 			if(vlr->mat!=ma) {
 				ma= vlr->mat;
-				dofill= ((ma->mode & MA_TRANSP) && (ma->mode & MA_ZTRANSP)) && !(ma->mode & MA_ONLYCAST);
+				dofill= shadow || (((ma->mode & MA_TRANSP) && (ma->mode & MA_ZTRANSP)) && !(ma->mode & MA_ONLYCAST));
 			}
 			
 			if(dofill) {
@@ -3318,27 +3321,27 @@ static int zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, Re
 					v3= vlr->v3;
 					v4= vlr->v4;
 
-					c1= zbuf_part_project(cache, v1->index, winmat, bounds, v1->co, ho1);
-					c2= zbuf_part_project(cache, v2->index, winmat, bounds, v2->co, ho2);
-					c3= zbuf_part_project(cache, v3->index, winmat, bounds, v3->co, ho3);
+					c1= zbuf_part_project(cache, v1->index, obwinmat, bounds, v1->co, ho1);
+					c2= zbuf_part_project(cache, v2->index, obwinmat, bounds, v2->co, ho2);
+					c3= zbuf_part_project(cache, v3->index, obwinmat, bounds, v3->co, ho3);
 
 					/* partclipping doesn't need viewplane clipping */
 					partclip= c1 & c2 & c3;
 					if(v4) {
-						c4= zbuf_part_project(cache, v4->index, winmat, bounds, v4->co, ho4);
+						c4= zbuf_part_project(cache, v4->index, obwinmat, bounds, v4->co, ho4);
 						partclip &= c4;
 					}
 
 					if(partclip==0) {
 						/* a little advantage for transp rendering (a z offset) */
-						if( ma->zoffs != 0.0) {
+						if(!shadow && ma->zoffs != 0.0) {
 							mul= 0x7FFFFFFF;
 							zval= mul*(1.0+ho1[2]/ho1[3]);
 
 							VECCOPY(vec, v1->co);
 							/* z is negative, otherwise its being clipped */ 
 							vec[2]-= ma->zoffs;
-							projectverto(vec, R.winmat, hoco);
+							projectverto(vec, obwinmat, hoco);
 							fval= mul*(1.0+hoco[2]/hoco[3]);
 
 							polygon_offset= (int) fabs(zval - fval );
@@ -3376,13 +3379,13 @@ static int zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, Re
 						}
 					}
 					if((v & 255)==255) 
-						if(R.test_break(R.tbh)) 
+						if(re->test_break(re->tbh)) 
 							break; 
 				}
 			}
 		}
 
-		if(R.test_break(R.tbh)) break;
+		if(re->test_break(re->tbh)) break;
 	}
 	
 	for(zsample=0; zsample<samples; zsample++) {
@@ -3396,6 +3399,51 @@ static int zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, Re
 	return zvlnr;
 }
 
+static int zbuffer_abuf_render(RenderPart *pa, APixstr *APixbuf, APixstrand *APixbufstrand, ListBase *apsmbase, RenderLayer *rl, StrandShadeCache *sscache)
+{
+	float winmat[4][4], (*jit)[2];
+	int samples, negzmask, doztra= 0;
+
+	samples= (R.osa)? R.osa: 1;
+	negzmask= ((rl->layflag & SCE_LAY_ZMASK) && (rl->layflag & SCE_LAY_NEG_ZMASK));
+
+	if(R.osa)
+		jit= R.jit;
+	else if(R.i.curblur)
+		jit= &R.jit[R.i.curblur-1];
+	else
+		jit= NULL;
+	
+	zbuf_make_winmat(&R, winmat);
+
+	if(rl->layflag & SCE_LAY_ZTRA)
+		doztra+= zbuffer_abuf(&R, pa, APixbuf, apsmbase, rl->lay, negzmask, winmat, R.winx, R.winy, samples, jit, R.clipcrop, 0);
+	if((rl->layflag & SCE_LAY_STRAND) && APixbufstrand)
+		doztra+= zbuffer_strands_abuf(&R, pa, APixbufstrand, apsmbase, rl->lay, negzmask, winmat, R.winx, R.winy, samples, jit, R.clipcrop, 0, sscache);
+
+	return doztra;
+}
+
+void zbuffer_abuf_shadow(Render *re, LampRen *lar, float winmat[][4], APixstr *APixbuf, APixstrand *APixbufstrand, ListBase *apsmbase, int size, int samples, float (*jit)[2])
+{
+	RenderPart pa;
+	int lay= -1;
+
+	if(lar->mode & LA_LAYER) lay= lar->lay;
+
+	memset(&pa, 0, sizeof(RenderPart));
+	pa.rectx= size;
+	pa.recty= size;
+	pa.disprect.xmin= 0;
+	pa.disprect.ymin= 0;
+	pa.disprect.xmax= size;
+	pa.disprect.ymax= size;
+
+	zbuffer_abuf(re, &pa, APixbuf, apsmbase, lay, 0, winmat, size, size, samples, jit, 1.0f, 1);
+	if(APixbufstrand)
+		zbuffer_strands_abuf(re, &pa, APixbufstrand, apsmbase, lay, 0, winmat, size, size, samples, jit, 1.0f, 1, NULL);
+}
+
 /* different rules for speed in transparent pass...  */
 /* speed pointer NULL = sky, we clear */
 /* else if either alpha is full or no solid was filled in: copy speed */
@@ -3902,11 +3950,7 @@ unsigned short *zbuffer_transp_shade(RenderPart *pa, RenderLayer *rl, float *pas
 		sampalpha= 1.0f;
 	
 	/* fill the Apixbuf */
-	doztra= 0;
-	if(rl->layflag & SCE_LAY_ZTRA)
-		doztra+= zbuffer_abuf(pa, APixbuf, &apsmbase, rl, rl->lay);
-	if((rl->layflag & SCE_LAY_STRAND) && APixbufstrand)
-		doztra+= zbuffer_strands_abuf(&R, pa, rl, APixbufstrand, &apsmbase, sscache);
+	doztra= zbuffer_abuf_render(pa, APixbuf, APixbufstrand, &apsmbase, rl, sscache);
 
 	if(doztra == 0) {
 		/* nothing filled in */
author	Brecht Van Lommel <brechtvanlommel@pandora.be>	2009-10-12 23:41:40 +0400
committer	Brecht Van Lommel <brechtvanlommel@pandora.be>	2009-10-12 23:41:40 +0400
commit	10198e99ff398380696e3408f752280e6bb5106d (patch)
tree	834b81d39ae9561ace7b2c607f2015cc03ce7af7
parent	b5f820cd874a7b3ca1de81103b99969429adfd6c (diff)