57 files changed, 33599 insertions, 431 deletions
diff --git a/source/blender/render/CMakeLists.txt b/source/blender/render/CMakeLists.txt
index 0f0060c7578..359369228f8 100644
--- a/source/blender/render/CMakeLists.txt
+++ b/source/blender/render/CMakeLists.txt
@@ -24,7 +24,7 @@
 # ***** END GPL LICENSE BLOCK *****
 
 
-set(INC 
+set(INC
 	extern/include
 	intern/include
 	../blenkernel
diff --git a/source/blender/render/extern/include/RE_pipeline.h b/source/blender/render/extern/include/RE_pipeline.h
index 1b0707bafc0..660e81eb022 100644
--- a/source/blender/render/extern/include/RE_pipeline.h
+++ b/source/blender/render/extern/include/RE_pipeline.h
@@ -103,11 +103,11 @@ typedef struct RenderPass {
 /* after render, the Combined pass is in combined, for renderlayers read from files it is a real pass */
 typedef struct RenderLayer {
 	struct RenderLayer *next, *prev;
-	
+
 	/* copy of RenderData */
 	char name[RE_MAXNAME];
 	int layflag, passflag, pass_xor;
-	
+
 	/* MULTIVIEW_TODO: acolrect and scolrect are not supported by multiview at the moment.
 	 * If they are really required they should be in RenderView instead */
 
@@ -121,16 +121,16 @@ typedef struct RenderLayer {
 	void *exrhandle;
 
 	ListBase passes;
-	
+
 } RenderLayer;
 
 typedef struct RenderResult {
 	struct RenderResult *next, *prev;
-	
+
 	/* target image size */
 	int rectx, recty;
 	short crop, sample_nr;
-	
+
 	/* the following rect32, rectf and rectz buffers are for temporary storage only, for RenderResult structs
 	 * created in #RE_AcquireResultImage - which do not have RenderView */
 
@@ -140,25 +140,25 @@ typedef struct RenderResult {
 	float *rectf;
 	/* if this exists, a copy of one of layers, or result of composited layers */
 	float *rectz;
-	
+
 	/* coordinates within final image (after cropping) */
 	rcti tilerect;
 	/* offset to apply to get a border render in full image */
 	int xof, yof;
-	
+
 	/* the main buffers */
 	ListBase layers;
-	
+
 	/* multiView maps to a StringVector in OpenEXR */
 	ListBase views;  /* RenderView */
 
 	/* allowing live updates: */
 	volatile rcti renrect;
 	volatile RenderLayer *renlay;
-	
+
 	/* optional saved endresult on disk */
 	int do_exr_tile;
-	
+
 	/* for render results in Image, verify validity for sequences */
 	int framenr;
 
diff --git a/source/blender/render/intern/include/envmap.h b/source/blender/render/intern/include/envmap.h
new file mode 100644
index 00000000000..c66427ae788
--- /dev/null
+++ b/source/blender/render/intern/include/envmap.h
@@ -0,0 +1,54 @@
+/*
+ * envmap_ext.h
+ *
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): none yet.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/envmap.h
+ *  \ingroup render
+ */
+
+
+#ifndef __ENVMAP_H__
+#define __ENVMAP_H__
+
+/**
+ * Make environment maps for all objects in the scene that have an
+ * environment map as texture.
+ * (initrender.c)
+ */
+
+struct Render;
+struct TexResult;
+struct ImagePool;
+
+void make_envmaps(struct Render *re);
+int envmaptex(struct Tex *tex, const float texvec[3], float dxt[3], float dyt[3], int osatex, struct TexResult *texres, struct ImagePool *pool, const bool skip_image_load);
+void env_rotate_scene(struct Render *re, float mat[4][4], int do_rotate);
+
+#endif /* __ENVMAP_H__ */
+
diff --git a/source/blender/render/intern/include/initrender.h b/source/blender/render/intern/include/initrender.h
index e7ff3c7097c..b8732e7cc5c 100644
--- a/source/blender/render/intern/include/initrender.h
+++ b/source/blender/render/intern/include/initrender.h
@@ -31,7 +31,7 @@
 
 
 #ifndef __INITRENDER_H__
-#define __INITRENDER_H__ 
+#define __INITRENDER_H__
 
 /* Functions */
 
diff --git a/source/blender/render/intern/include/pixelblending.h b/source/blender/render/intern/include/pixelblending.h
new file mode 100644
index 00000000000..022510c7132
--- /dev/null
+++ b/source/blender/render/intern/include/pixelblending.h
@@ -0,0 +1,65 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributor(s): 2004-2006 Blender Foundation, full recode
+ *
+ * ***** END GPL/BL DUAL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/pixelblending.h
+ *  \ingroup render
+ */
+
+
+#ifndef __PIXELBLENDING_H__
+#define __PIXELBLENDING_H__
+
+
+/**
+ * add 1 pixel to into filtered three lines
+ * (float vecs to float vec)
+ */
+void add_filt_fmask(unsigned int mask, const float col[4], float *rowbuf, int row_w);
+void add_filt_fmask_pixsize(unsigned int mask, float *in, float *rowbuf, int row_w, int pixsize);
+void add_filt_fmask_coord(float filt[3][3], const float col[4], float *rowbuf, int row_stride, int x, int y, rcti *mask);
+void mask_array(unsigned int mask, float filt[3][3]);
+
+/**
+ * Alpha-over blending for floats.
+ */
+void addAlphaOverFloat(float dest[4], const float source[4]);
+
+/**
+ * Alpha-under blending for floats.
+ */
+void addAlphaUnderFloat(float dest[4], const float source[4]);
+
+
+/**
+ * Same for floats
+ */
+void addalphaAddfacFloat(float dest[4], const float source[4], char addfac);
+
+/**
+ * dest = dest + source
+ */
+void addalphaAddFloat(float dest[4], const float source[4]);
+
+#endif /* __PIXELBLENDING_H__ */
diff --git a/source/blender/render/intern/include/pixelshading.h b/source/blender/render/intern/include/pixelshading.h
new file mode 100644
index 00000000000..0e630eda475
--- /dev/null
+++ b/source/blender/render/intern/include/pixelshading.h
@@ -0,0 +1,62 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributor(s): 2004-2006, Blender Foundation, full recode
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/pixelshading.h
+ *  \ingroup render
+ *
+ * These functions determine what actual color a pixel will have.
+ */
+
+#ifndef __PIXELSHADING_H__
+#define __PIXELSHADING_H__
+
+
+/**
+ * Render the pixel at (x,y) for object ap. Apply the jitter mask.
+ * Output is given in float collector[4]. The type vector:
+ * t[0] - min. distance
+ * t[1] - face/halo index
+ * t[2] - jitter mask
+ * t[3] - type ZB_POLY or ZB_HALO
+ * t[4] - max. distance
+ * mask is pixel coverage in bits
+ * \return pointer to the object
+ */
+int shadeHaloFloat(HaloRen *har,
+                   float *col, int zz,
+                   float dist, float xn,
+                   float yn, short flarec);
+
+/**
+ * Render the sky at pixel (x, y).
+ */
+void shadeSkyPixel(float collector[4], float fx, float fy, short thread);
+void shadeSkyView(float col_r[3], const float rco[3], const float view[3], const float dxyview[2], short thread);
+void shadeAtmPixel(struct SunSky *sunsky, float *collector, float fx, float fy, float distance);
+void shadeSunView(float col_r[3], const float view[3]);
+/* ------------------------------------------------------------------------- */
+
+#endif
+
diff --git a/source/blender/render/intern/include/pointdensity.h b/source/blender/render/intern/include/pointdensity.h
new file mode 100644
index 00000000000..eadf714c1ba
--- /dev/null
+++ b/source/blender/render/intern/include/pointdensity.h
@@ -0,0 +1,51 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): Matt Ebb
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/pointdensity.h
+ *  \ingroup render
+ */
+
+
+#ifndef __POINTDENSITY_H__
+#define __POINTDENSITY_H__
+
+/**
+ * Make point density kd-trees for all point density textures in the scene
+ */
+
+struct PointDensity;
+struct Render;
+struct TexResult;
+
+void free_pointdensity(struct PointDensity *pd);
+void cache_pointdensity(struct Render *re, struct PointDensity *pd);
+void make_pointdensities(struct Render *re);
+void free_pointdensities(struct Render *re);
+int pointdensitytex(struct Tex *tex, const float texvec[3], struct TexResult *texres);
+
+#endif /* __POINTDENSITY_H__ */
+
diff --git a/source/blender/render/intern/include/raycounter.h b/source/blender/render/intern/include/raycounter.h
new file mode 100644
index 00000000000..e16c6e13c7e
--- /dev/null
+++ b/source/blender/render/intern/include/raycounter.h
@@ -0,0 +1,74 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/raycounter.h
+ *  \ingroup render
+ */
+
+
+#ifndef __RAYCOUNTER_H__
+#define __RAYCOUNTER_H__
+
+//#define RE_RAYCOUNTER			/* enable counters per ray, useful for measuring raytrace structures performance */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef RE_RAYCOUNTER
+
+/* ray counter functions */
+
+typedef struct RayCounter {
+	struct {
+		unsigned long long test, hit;
+	} faces, bb, simd_bb, raycast, raytrace_hint, rayshadow_last_hit;
+} RayCounter;
+
+#define RE_RC_INIT(isec, shi) (isec).raycounter = &((shi).shading.raycounter)
+void RE_RC_INFO(RayCounter *rc);
+void RE_RC_MERGE(RayCounter *rc, RayCounter *tmp);
+#define RE_RC_COUNT(var) (var)++
+
+extern RayCounter re_rc_counter[];
+
+#else
+
+/* ray counter stubs */
+
+#define RE_RC_INIT(isec,shi)
+#define RE_RC_INFO(rc)
+#define RE_RC_MERGE(dest,src)
+#define	RE_RC_COUNT(var)
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/source/blender/render/intern/include/rayintersection.h b/source/blender/render/intern/include/rayintersection.h
new file mode 100644
index 00000000000..a303301ad3b
--- /dev/null
+++ b/source/blender/render/intern/include/rayintersection.h
@@ -0,0 +1,136 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2007 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ * RE_raytrace.h: ray tracing api, can be used independently from the renderer.
+ */
+
+/** \file blender/render/intern/include/rayintersection.h
+ *  \ingroup render
+ */
+
+
+#ifndef __RAYINTERSECTION_H__
+#define __RAYINTERSECTION_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "BLI_math_geom.h"
+
+struct RayObject;
+
+/* Ray Hints */
+
+#define RE_RAY_LCTS_MAX_SIZE	256
+#define RT_USE_LAST_HIT			/* last shadow hit is reused before raycasting on whole tree */
+//#define RT_USE_HINT			/* last hit object is reused before raycasting on whole tree */
+
+typedef struct LCTSHint {
+	int size;
+	struct RayObject *stack[RE_RAY_LCTS_MAX_SIZE];
+} LCTSHint;
+
+typedef struct RayHint {
+	union { LCTSHint lcts; } data;
+} RayHint;
+
+/* Ray Intersection */
+
+typedef struct Isect {
+	/* ray start, direction (normalized vector), and max distance. on hit,
+	 * the distance is modified to be the distance to the hit point. */
+	float start[3];
+	float dir[3];
+	float dist;
+
+	/* for envmap and incremental view update renders */
+	float origstart[3];
+	float origdir[3];
+
+	/* precomputed values to accelerate bounding box intersection */
+	int bv_index[6];
+	float idot_axis[3];
+
+	/* intersection options */
+	int mode;				/* RE_RAY_SHADOW, RE_RAY_MIRROR, RE_RAY_SHADOW_TRA */
+	int lay;				/* -1 default, set for layer lamps */
+	int skip;				/* skip flags */
+	int check;				/* check flags */
+	void *userdata;			/* used by bake check */
+
+	/* hit information */
+	float u, v;
+	int isect;				/* which half of quad */
+
+	struct {
+		void *ob;
+		void *face;
+	} hit, orig;
+
+	/* last hit optimization */
+	struct RayObject *last_hit;
+
+	/* hints */
+#ifdef RT_USE_HINT
+	RayTraceHint *hint, *hit_hint;
+#endif
+	RayHint *hint;
+
+	/* ray counter */
+#ifdef RE_RAYCOUNTER
+	RayCounter *raycounter;
+#endif
+
+	/* Precalculated coefficients for watertight intersection check. */
+	struct IsectRayPrecalc isect_precalc;
+} Isect;
+
+/* ray types */
+#define RE_RAY_SHADOW 0
+#define RE_RAY_MIRROR 1
+#define RE_RAY_SHADOW_TRA 2
+
+/* skip options */
+#define RE_SKIP_CULLFACE                (1 << 0)
+/* if using this flag then *face should be a pointer to a VlakRen */
+#define RE_SKIP_VLR_NEIGHBOUR           (1 << 1)
+
+/* check options */
+#define RE_CHECK_VLR_NONE               0
+#define RE_CHECK_VLR_RENDER             1
+#define RE_CHECK_VLR_NON_SOLID_MATERIAL 2
+#define RE_CHECK_VLR_BAKE               3
+
+/* arbitrary, but can't use e.g. FLT_MAX because of precision issues */
+#define RE_RAYTRACE_MAXDIST	1e15f
+#define RE_RAYTRACE_EPSILON 0.0f
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RAYINTERSECTION_H__ */
+
diff --git a/source/blender/render/intern/include/render_types.h b/source/blender/render/intern/include/render_types.h
index 8308b5e76e4..fd24f4eb053 100644
--- a/source/blender/render/intern/include/render_types.h
+++ b/source/blender/render/intern/include/render_types.h
@@ -52,10 +52,10 @@ struct Main;
 /* this is handed over to threaded hiding/passes/shading engine */
 typedef struct RenderPart {
 	struct RenderPart *next, *prev;
-	
+
 	RenderResult *result;			/* result of part rendering */
 	ListBase fullresult;			/* optional full sample buffers */
-	
+
 	rcti disprect;					/* part coordinates within total picture */
 	int rectx, recty;				/* the size */
 	int nr;							/* nr is partnr */
@@ -74,10 +74,10 @@ struct Render {
 	struct Render *next, *prev;
 	char name[RE_MAXNAME];
 	int slot;
-	
+
 	/* state settings */
 	short flag, ok, result_ok;
-	
+
 	/* result of rendering */
 	RenderResult *result;
 	/* if render with single-layer option, other rendered layers are stored here */
@@ -88,29 +88,29 @@ struct Render {
 	 * write lock, all external code must use a read lock. internal code is assumed
 	 * to not conflict with writes, so no lock used for that */
 	ThreadRWMutex resultmutex;
-	
+
 	/* window size, display rect, viewplane */
 	int winx, winy;			/* buffer width and height with percentage applied
 							 * without border & crop. convert to long before multiplying together to avoid overflow. */
 	rcti disprect;			/* part within winx winy */
 	rctf viewplane;			/* mapped on winx winy */
-	
+
 	/* final picture width and height (within disprect) */
 	int rectx, recty;
-	
-	/* real maximum size of parts after correction for minimum 
+
+	/* real maximum size of parts after correction for minimum
 	 * partx*xparts can be larger than rectx, in that case last part is smaller */
 	int partx, party;
-	
+
 	/* Camera transform, only used by Freestyle. */
 	float viewmat[4][4], viewinv[4][4];
 	float viewmat_orig[4][4];	/* for incremental render */
 	float winmat[4][4];
-	
+
 	/* clippping */
 	float clipsta;
 	float clipend;
-	
+
 	/* main, scene, and its full copy of renderdata and world */
 	struct Main *main;
 	Scene *scene;
@@ -119,13 +119,13 @@ struct Render {
 	int active_view_layer;
 	struct Object *camera_override;
 	unsigned int lay, layer_override;
-	
+
 	ThreadRWMutex partsmutex;
 	ListBase parts;
-	
+
 	/* render engine */
 	struct RenderEngine *engine;
-	
+
 #ifdef WITH_FREESTYLE
 	struct Main *freestyle_bmain;
 	ListBase freestyle_renders;
@@ -140,17 +140,17 @@ struct Render {
 	void *duh;
 	void (*current_scene_update)(void *handle, struct Scene *scene);
 	void *suh;
-	
+
 	void (*stats_draw)(void *handle, RenderStats *ri);
 	void *sdh;
 	void (*progress)(void *handle, float i);
 	void *prh;
-	
+
 	void (*draw_lock)(void *handle, int i);
 	void *dlh;
 	int (*test_break)(void *handle);
 	void *tbh;
-	
+
 	RenderStats i;
 
 	struct ReportList *reports;
diff --git a/source/blender/render/intern/include/rendercore.h b/source/blender/render/intern/include/rendercore.h
new file mode 100644
index 00000000000..aa3efca9e5b
--- /dev/null
+++ b/source/blender/render/intern/include/rendercore.h
@@ -0,0 +1,105 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): none yet.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __RENDERCORE_H__
+#define __RENDERCORE_H__
+
+/** \file blender/render/intern/include/rendercore.h
+ *  \ingroup render
+ */
+
+#include "render_types.h"
+
+#include "RE_engine.h"
+
+#include "DNA_node_types.h"
+
+#include "NOD_composite.h"
+
+struct ShadeInput;
+struct ShadeResult;
+struct World;
+struct RenderPart;
+struct RenderLayer;
+struct RayObject;
+
+/* ------------------------------------------------------------------------- */
+
+typedef struct PixStr {
+	struct PixStr *next;
+	int obi, facenr, z, maskz;
+	unsigned short mask;
+	short shadfac;
+} PixStr;
+
+typedef struct PixStrMain {
+	struct PixStrMain *next, *prev;
+	struct PixStr *ps;
+	int counter;
+} PixStrMain;
+
+/* ------------------------------------------------------------------------- */
+
+
+void	calc_view_vector(float view[3], float x, float y);
+float   mistfactor(float zcor, const float co[3]); /* dist and height, return alpha */
+
+void	renderspothalo(struct ShadeInput *shi, float col[4], float alpha);
+void	add_halo_flare(Render *re);
+
+void calc_renderco_zbuf(float co[3], const float view[3], int z);
+void calc_renderco_ortho(float co[3], float x, float y, int z);
+
+int count_mask(unsigned short mask);
+
+void zbufshade_tile(struct RenderPart *pa);
+void zbufshadeDA_tile(struct RenderPart *pa);
+
+void zbufshade_sss_tile(struct RenderPart *pa);
+
+int get_sample_layers(struct RenderPart *pa, struct RenderLayer *rl, struct RenderLayer **rlpp);
+
+void render_internal_update_passes(struct RenderEngine *engine, struct Scene *scene, struct SceneRenderLayer *srl);
+
+
+/* -------- ray.c ------- */
+
+struct RayObject *RE_rayobject_create(int type, int size, int octree_resolution);
+
+extern void freeraytree(Render *re);
+extern void makeraytree(Render *re);
+struct RayObject* makeraytree_object(Render *re, ObjectInstanceRen *obi);
+
+extern void ray_shadow(ShadeInput *shi, LampRen *lar, float shadfac[4]);
+extern void ray_trace(ShadeInput *shi, ShadeResult *);
+extern void ray_ao(ShadeInput *shi, float ao[3], float env[3]);
+extern void init_jitter_plane(LampRen *lar);
+extern void init_ao_sphere(Render *re, struct World *wrld);
+extern void init_render_qmcsampler(Render *re);
+extern void free_render_qmcsampler(Render *re);
+
+#endif  /* __RENDERCORE_H__ */
diff --git a/source/blender/render/intern/include/shading.h b/source/blender/render/intern/include/shading.h
new file mode 100644
index 00000000000..e306c3c075c
--- /dev/null
+++ b/source/blender/render/intern/include/shading.h
@@ -0,0 +1,105 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2006 Blender Foundation
+ * All rights reserved.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/shading.h
+ *  \ingroup render
+ */
+
+
+struct ShadeInput;
+struct ShadeResult;
+struct RenderPart;
+struct RenderLayer;
+struct PixStr;
+struct LampRen;
+struct VlakRen;
+struct StrandPoint;
+struct ObjectInstanceRen;
+struct Isect;
+
+/* shadeinput.c */
+
+#define RE_MAX_OSA 16
+
+/* needed to calculate shadow and AO for an entire pixel */
+typedef struct ShadeSample {
+	int tot;						/* amount of shi in use, can be 1 for not FULL_OSA */
+
+	RenderLayer *rlpp[RE_MAX_OSA];	/* fast lookup from sample to renderlayer (fullsample buf) */
+
+	/* could be malloced once */
+	ShadeInput shi[RE_MAX_OSA];
+	ShadeResult shr[RE_MAX_OSA];
+} ShadeSample;
+
+
+	/* also the node shader callback */
+void shade_material_loop(struct ShadeInput *shi, struct ShadeResult *shr);
+
+void shade_input_set_triangle_i(struct ShadeInput *shi, struct ObjectInstanceRen *obi, struct VlakRen *vlr, short i1, short i2, short i3);
+void shade_input_set_triangle(struct ShadeInput *shi, int obi, int facenr, int normal_flip);
+void shade_input_copy_triangle(struct ShadeInput *shi, struct ShadeInput *from);
+void shade_input_calc_viewco(struct ShadeInput *shi, float x, float y, float z, float view[3], float dxyview[2], float co[3], float dxco[3], float dyco[3]);
+void shade_input_set_viewco(struct ShadeInput *shi, float x, float y, float sx, float sy, float z);
+void shade_input_set_uv(struct ShadeInput *shi);
+void shade_input_set_normals(struct ShadeInput *shi);
+void shade_input_set_vertex_normals(struct ShadeInput *shi);
+void shade_input_flip_normals(struct ShadeInput *shi);
+void shade_input_set_shade_texco(struct ShadeInput *shi);
+void shade_input_set_strand(struct ShadeInput *shi, struct StrandRen *strand, struct StrandPoint *spoint);
+void shade_input_set_strand_texco(struct ShadeInput *shi, struct StrandRen *strand, struct StrandVert *svert, struct StrandPoint *spoint);
+void shade_input_do_shade(struct ShadeInput *shi, struct ShadeResult *shr);
+
+void shade_input_init_material(struct ShadeInput *shi);
+void shade_input_initialize(struct ShadeInput *shi, struct RenderPart *pa, struct RenderLayer *rl, int sample);
+
+void shade_sample_initialize(struct ShadeSample *ssamp, struct RenderPart *pa, struct RenderLayer *rl);
+void shade_samples_do_AO(struct ShadeSample *ssamp);
+void shade_samples_fill_with_ps(struct ShadeSample *ssamp, struct PixStr *ps, int x, int y);
+int shade_samples(struct ShadeSample *ssamp, struct PixStr *ps, int x, int y);
+
+void vlr_set_uv_indices(struct VlakRen *vlr, int *i1, int *i2, int *i3);
+
+void	calc_R_ref(struct ShadeInput *shi);
+
+void barycentric_differentials_from_position(
+	const float co[3], const float v1[3], const float v2[3], const float v3[3],
+	const float dxco[3], const float dyco[3], const float facenor[3], const bool differentials,
+	float *u, float *v, float *dx_u, float *dx_v, float *dy_u, float *dy_v);
+
+/* shadeoutput. */
+void shade_lamp_loop(struct ShadeInput *shi, struct ShadeResult *shr);
+
+void shade_color(struct ShadeInput *shi, ShadeResult *shr);
+
+void ambient_occlusion(struct ShadeInput *shi);
+void environment_lighting_apply(struct ShadeInput *shi, struct ShadeResult *shr);
+
+ListBase *get_lights(struct ShadeInput *shi);
+float lamp_get_visibility(struct LampRen *lar, const float co[3], float lv[3], float *dist);
+void lamp_get_shadow(struct LampRen *lar, ShadeInput *shi, float inp, float shadfac[4], int do_real);
+
+float fresnel_fac(const float view[3], const float vn[3], float fresnel, float fac);
+
+/* rayshade.c */
+extern void shade_ray(struct Isect *is, struct ShadeInput *shi, struct ShadeResult *shr);
diff --git a/source/blender/render/intern/include/strand.h b/source/blender/render/intern/include/strand.h
new file mode 100644
index 00000000000..f4e22c78b42
--- /dev/null
+++ b/source/blender/render/intern/include/strand.h
@@ -0,0 +1,99 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Contributor(s): Brecht Van Lommel.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/strand.h
+ *  \ingroup render
+ */
+
+
+#ifndef __STRAND_H__
+#define __STRAND_H__
+
+struct StrandVert;
+struct StrandRen;
+struct StrandBuffer;
+struct ShadeSample;
+struct StrandPart;
+struct Render;
+struct ZSpan;
+struct ObjectInstanceRen;
+struct StrandSurface;
+struct DerivedMesh;
+struct ObjectRen;
+
+typedef struct StrandPoint {
+	/* position within segment */
+	float t;
+
+	/* camera space */
+	float co[3];
+	float nor[3];
+	float tan[3];
+	float strandco;
+	float width;
+
+	/* derivatives */
+	float dtco[3], dsco[3];
+	float dtstrandco;
+
+	/* outer points */
+	float co1[3], co2[3];
+	float hoco1[4], hoco2[4];
+	float zco1[3], zco2[3];
+	int clip1, clip2;
+
+	/* screen space */
+	float hoco[4];
+	float x, y;
+
+	/* simplification */
+	float alpha;
+} StrandPoint;
+
+typedef struct StrandSegment {
+	struct StrandVert *v[4];
+	struct StrandRen *strand;
+	struct StrandBuffer *buffer;
+	struct ObjectInstanceRen *obi;
+	float sqadaptcos;
+
+	StrandPoint point1, point2;
+	int shaded;
+} StrandSegment;
+
+struct StrandShadeCache;
+typedef struct StrandShadeCache StrandShadeCache;
+
+void strand_eval_point(StrandSegment *sseg, StrandPoint *spoint);
+void render_strand_segment(struct Render *re, float winmat[4][4], struct StrandPart *spart, struct ZSpan *zspan, int totzspan, StrandSegment *sseg);
+void strand_minmax(struct StrandRen *strand, float min[3], float max[3], const float width);
+
+struct StrandSurface *cache_strand_surface(struct Render *re, struct ObjectRen *obr, struct DerivedMesh *dm, float mat[4][4], int timeoffset);
+void free_strand_surface(struct Render *re);
+
+struct StrandShadeCache *strand_shade_cache_create(void);
+void strand_shade_cache_free(struct StrandShadeCache *cache);
+void strand_shade_segment(struct Render *re, struct StrandShadeCache *cache, struct StrandSegment *sseg, struct ShadeSample *ssamp, float t, float s, int addpassflag);
+void strand_shade_unref(struct StrandShadeCache *cache, struct ObjectInstanceRen *obi, struct StrandVert *svert);
+
+#endif
+
diff --git a/source/blender/render/intern/include/sunsky.h b/source/blender/render/intern/include/sunsky.h
new file mode 100644
index 00000000000..c608f9fc48c
--- /dev/null
+++ b/source/blender/render/intern/include/sunsky.h
@@ -0,0 +1,81 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Contributor(s): zaghaghi
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/sunsky.h
+ *  \ingroup render
+ */
+
+#ifndef __SUNSKY_H__
+#define __SUNSKY_H__
+
+// #define SPECTRUM_MAX_COMPONENTS     100
+
+typedef struct SunSky {
+	short effect_type, skyblendtype, sky_colorspace;
+	float turbidity;
+	float theta, phi;
+
+	float toSun[3];
+
+	/*float sunSpectralRaddata[SPECTRUM_MAX_COMPONENTS];*/
+	float sunSolidAngle;
+
+	float zenith_Y, zenith_x, zenith_y;
+
+	float perez_Y[5], perez_x[5], perez_y[5];
+
+	/* suggested by glome in patch [#8063] */
+	float horizon_brightness;
+	float spread;
+	float sun_brightness;
+	float sun_size;
+	float backscattered_light;
+	float skyblendfac;
+	float sky_exposure;
+
+	float atm_HGg;
+
+	float atm_SunIntensity;
+	float atm_InscatteringMultiplier;
+	float atm_ExtinctionMultiplier;
+	float atm_BetaRayMultiplier;
+	float atm_BetaMieMultiplier;
+	float atm_DistanceMultiplier;
+
+	float atm_BetaRay[3];
+	float atm_BetaDashRay[3];
+	float atm_BetaMie[3];
+	float atm_BetaDashMie[3];
+	float atm_BetaRM[3];
+} SunSky;
+
+void InitSunSky(struct SunSky *sunsky, float turb, const float toSun[3], float horizon_brightness,
+                float spread, float sun_brightness, float sun_size, float back_scatter,
+                float skyblendfac, short skyblendtype, float sky_exposure, float sky_colorspace);
+
+void GetSkyXYZRadiance(struct SunSky *sunsky, float theta, float phi, float color_out[3]);
+void GetSkyXYZRadiancef(struct SunSky *sunsky, const float varg[3], float color_out[3]);
+void InitAtmosphere(struct SunSky *sunSky, float sun_intens, float mief, float rayf, float inscattf, float extincf, float disf);
+void AtmospherePixleShader(struct SunSky *sunSky, float view[3], float s, float rgb[3]);
+void ClipColor(float c[3]);
+
+#endif /*__SUNSKY_H__*/
diff --git a/source/blender/render/intern/include/texture_ocean.h b/source/blender/render/intern/include/texture_ocean.h
new file mode 100644
index 00000000000..6d7bc6fe7b0
--- /dev/null
+++ b/source/blender/render/intern/include/texture_ocean.h
@@ -0,0 +1,35 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributors: Matt Ebb
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __TEXTURE_OCEAN_H__
+#define __TEXTURE_OCEAN_H__
+
+/** \file blender/render/intern/include/texture_ocean.h
+ *  \ingroup render
+ */
+
+int ocean_texture(struct Tex *tex, const float texvec[2], struct TexResult *texres);
+
+#endif  /* __TEXTURE_OCEAN_H__ */
diff --git a/source/blender/render/intern/include/voxeldata.h b/source/blender/render/intern/include/voxeldata.h
new file mode 100644
index 00000000000..041ca78a799
--- /dev/null
+++ b/source/blender/render/intern/include/voxeldata.h
@@ -0,0 +1,47 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): Raul Fernandez Hernandez (Farsthary), Matt Ebb.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/include/voxeldata.h
+ *  \ingroup render
+ */
+
+#ifndef __VOXELDATA_H__
+#define __VOXELDATA_H__
+
+struct Render;
+struct TexResult;
+
+typedef struct VoxelDataHeader {
+	int resolX, resolY, resolZ;
+	int frames;
+} VoxelDataHeader;
+
+void cache_voxeldata(Tex *tex, int scene_frame);
+void make_voxeldata(struct Render *re);
+int  voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texres);
+
+#endif /* __VOXELDATA_H__ */
diff --git a/source/blender/render/intern/include/zbuf.h b/source/blender/render/intern/include/zbuf.h
index 3dfcbc355c4..0654a4f8df6 100644
--- a/source/blender/render/intern/include/zbuf.h
+++ b/source/blender/render/intern/include/zbuf.h
@@ -36,7 +36,7 @@
 /* span fill in method, is also used to localize data for zbuffering */
 typedef struct ZSpan {
 	int rectx, recty;						/* range for clipping */
-	
+
 	int miny1, maxy1, miny2, maxy2;			/* actual filled in range */
 	const float *minp1, *maxp1, *minp2, *maxp2;	/* vertex pointers detect min/max range in */
 	float *span1, *span2;
diff --git a/source/blender/render/intern/raytrace/bvh.h b/source/blender/render/intern/raytrace/bvh.h
new file mode 100644
index 00000000000..0f9a506762b
--- /dev/null
+++ b/source/blender/render/intern/raytrace/bvh.h
@@ -0,0 +1,407 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/bvh.h
+ *  \ingroup render
+ */
+
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+
+#include "raycounter.h"
+#include "rayintersection.h"
+#include "rayobject.h"
+#include "rayobject_hint.h"
+#include "rayobject_rtbuild.h"
+
+#include <assert.h>
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#ifndef __BVH_H__
+#define __BVH_H__
+
+#ifdef __SSE__
+inline int test_bb_group4(__m128 *bb_group, const Isect *isec)
+{
+	const __m128 tmin0 = _mm_setzero_ps();
+	const __m128 tmax0 = _mm_set_ps1(isec->dist);
+
+	float start[3], idot_axis[3];
+	copy_v3_v3(start, isec->start);
+	copy_v3_v3(idot_axis, isec->idot_axis);
+
+	const __m128 tmin1 = _mm_max_ps(tmin0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[0]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
+	const __m128 tmax1 = _mm_min_ps(tmax0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[1]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
+	const __m128 tmin2 = _mm_max_ps(tmin1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[2]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
+	const __m128 tmax2 = _mm_min_ps(tmax1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[3]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
+	const __m128 tmin3 = _mm_max_ps(tmin2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[4]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
+	const __m128 tmax3 = _mm_min_ps(tmax2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[5]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
+
+	return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
+}
+#endif
+
+/*
+ * Determines the distance that the ray must travel to hit the bounding volume of the given node
+ * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe
+ *  [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9]
+ */
+static inline int rayobject_bb_intersect_test(const Isect *isec, const float *_bb)
+{
+	const float *bb = _bb;
+
+	float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
+	float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
+	float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
+	float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
+	float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
+	float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
+
+	RE_RC_COUNT(isec->raycounter->bb.test);
+
+	if (t1x > t2y  || t2x < t1y  || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
+	if (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) return 0;
+	if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0;
+	RE_RC_COUNT(isec->raycounter->bb.hit);
+
+	return 1;
+}
+
+/* bvh tree generics */
+template<class Tree> static void bvh_add(Tree *obj, RayObject *ob)
+{
+	rtbuild_add(obj->builder, ob);
+}
+
+template<class Node>
+inline bool is_leaf(Node *node)
+{
+	return !RE_rayobject_isAligned(node);
+}
+
+template<class Tree> static void bvh_done(Tree *obj);
+
+template<class Tree>
+static void bvh_free(Tree *obj)
+{
+	if (obj->builder)
+		rtbuild_free(obj->builder);
+
+	if (obj->node_arena)
+		BLI_memarena_free(obj->node_arena);
+
+	MEM_freeN(obj);
+}
+
+template<class Tree>
+static void bvh_bb(Tree *obj, float *min, float *max)
+{
+	if (obj->root)
+		bvh_node_merge_bb(obj->root, min, max);
+}
+
+
+template<class Tree>
+static float bvh_cost(Tree *obj)
+{
+	assert(obj->cost >= 0.0f);
+	return obj->cost;
+}
+
+
+
+/* bvh tree nodes generics */
+template<class Node> static inline int bvh_node_hit_test(Node *node, Isect *isec)
+{
+	return rayobject_bb_intersect_test(isec, (const float *)node->bb);
+}
+
+
+template<class Node>
+static inline void bvh_node_merge_bb(Node *node, float min[3], float max[3])
+{
+	if (is_leaf(node)) {
+		RE_rayobject_merge_bb((RayObject *)node, min, max);
+	}
+	else {
+		DO_MIN(node->bb,     min);
+		DO_MAX(node->bb + 3, max);
+	}
+}
+
+
+
+/*
+ * recursively transverse a BVH looking for a rayhit using a local stack
+ */
+template<class Node> static inline void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, int &stack_pos);
+
+template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT, bool SHADOW>
+static int bvh_node_stack_raycast(Node *root, Isect *isec)
+{
+	Node *stack[MAX_STACK_SIZE];
+	int hit = 0, stack_pos = 0;
+
+	if (!TEST_ROOT && !is_leaf(root))
+		bvh_node_push_childs(root, isec, stack, stack_pos);
+	else
+		stack[stack_pos++] = root;
+
+	while (stack_pos) {
+		Node *node = stack[--stack_pos];
+		if (!is_leaf(node)) {
+			if (bvh_node_hit_test(node, isec)) {
+				bvh_node_push_childs(node, isec, stack, stack_pos);
+				assert(stack_pos <= MAX_STACK_SIZE);
+			}
+		}
+		else {
+			hit |= RE_rayobject_intersect( (RayObject *)node, isec);
+			if (SHADOW && hit) return hit;
+		}
+	}
+	return hit;
+}
+
+
+#ifdef __SSE__
+/*
+ * Generic SIMD bvh recursion
+ * this was created to be able to use any simd (with the cost of some memmoves)
+ * it can take advantage of any SIMD width and doens't needs any special tree care
+ */
+template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT>
+static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
+{
+	Node *stack[MAX_STACK_SIZE];
+
+	int hit = 0, stack_pos = 0;
+
+	if (!TEST_ROOT) {
+		if (!is_leaf(root)) {
+			if (!is_leaf(root->child))
+				bvh_node_push_childs(root, isec, stack, stack_pos);
+			else
+				return RE_rayobject_intersect( (RayObject *)root->child, isec);
+		}
+		else
+			return RE_rayobject_intersect( (RayObject *)root, isec);
+	}
+	else {
+		if (!is_leaf(root))
+			stack[stack_pos++] = root;
+		else
+			return RE_rayobject_intersect( (RayObject *)root, isec);
+	}
+
+	while (true) {
+		//Use SIMD 4
+		if (stack_pos >= 4) {
+			__m128 t_bb[6];
+			Node *t_node[4];
+
+			stack_pos -= 4;
+
+			/* prepare the 4BB for SIMD */
+			t_node[0] = stack[stack_pos + 0]->child;
+			t_node[1] = stack[stack_pos + 1]->child;
+			t_node[2] = stack[stack_pos + 2]->child;
+			t_node[3] = stack[stack_pos + 3]->child;
+
+			const float *bb0 = stack[stack_pos + 0]->bb;
+			const float *bb1 = stack[stack_pos + 1]->bb;
+			const float *bb2 = stack[stack_pos + 2]->bb;
+			const float *bb3 = stack[stack_pos + 3]->bb;
+
+			const __m128 x0y0x1y1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1, 0, 1, 0) );
+			const __m128 x2y2x3y3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1, 0, 1, 0) );
+			t_bb[0] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2, 0, 2, 0) );
+			t_bb[1] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3, 1, 3, 1) );
+
+			const __m128 z0X0z1X1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3, 2, 3, 2) );
+			const __m128 z2X2z3X3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3, 2, 3, 2) );
+			t_bb[2] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2, 0, 2, 0) );
+			t_bb[3] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3, 1, 3, 1) );
+
+			const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps(_mm_load_ps(bb0 + 4), _mm_load_ps(bb1 + 4), _MM_SHUFFLE(1, 0, 1, 0) );
+			const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps(_mm_load_ps(bb2 + 4), _mm_load_ps(bb3 + 4), _MM_SHUFFLE(1, 0, 1, 0) );
+			t_bb[4] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2, 0, 2, 0) );
+			t_bb[5] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3, 1, 3, 1) );
+#if 0
+			for (int i = 0; i < 4; i++)
+			{
+				Node *t = stack[stack_pos + i];
+				assert(!is_leaf(t));
+
+				float *bb = ((float *)t_bb) + i;
+				bb[4 * 0] = t->bb[0];
+				bb[4 * 1] = t->bb[1];
+				bb[4 * 2] = t->bb[2];
+				bb[4 * 3] = t->bb[3];
+				bb[4 * 4] = t->bb[4];
+				bb[4 * 5] = t->bb[5];
+				t_node[i] = t->child;
+			}
+#endif
+			RE_RC_COUNT(isec->raycounter->simd_bb.test);
+			int res = test_bb_group4(t_bb, isec);
+
+			for (int i = 0; i < 4; i++)
+				if (res & (1 << i)) {
+					RE_RC_COUNT(isec->raycounter->simd_bb.hit);
+					if (!is_leaf(t_node[i])) {
+						for (Node *t = t_node[i]; t; t = t->sibling) {
+							assert(stack_pos < MAX_STACK_SIZE);
+							stack[stack_pos++] = t;
+						}
+					}
+					else {
+						hit |= RE_rayobject_intersect( (RayObject *)t_node[i], isec);
+						if (hit && isec->mode == RE_RAY_SHADOW) return hit;
+					}
+				}
+		}
+		else if (stack_pos > 0) {
+			Node *node = stack[--stack_pos];
+			assert(!is_leaf(node));
+
+			if (bvh_node_hit_test(node, isec)) {
+				if (!is_leaf(node->child)) {
+					bvh_node_push_childs(node, isec, stack, stack_pos);
+					assert(stack_pos <= MAX_STACK_SIZE);
+				}
+				else {
+					hit |= RE_rayobject_intersect( (RayObject *)node->child, isec);
+					if (hit && isec->mode == RE_RAY_SHADOW) return hit;
+				}
+			}
+		}
+		else break;
+	}
+	return hit;
+}
+#endif
+
+/*
+ * recursively transverse a BVH looking for a rayhit using system stack
+ */
+#if 0
+template<class Node>
+static int bvh_node_raycast(Node *node, Isect *isec)
+{
+	int hit = 0;
+	if (bvh_test_node(node, isec))
+	{
+		if (isec->idot_axis[node->split_axis] > 0.0f)
+		{
+			int i;
+			for (i = 0; i < BVH_NCHILDS; i++)
+				if (!is_leaf(node->child[i]))
+				{
+					if (node->child[i] == 0) break;
+
+					hit |= bvh_node_raycast(node->child[i], isec);
+					if (hit && isec->mode == RE_RAY_SHADOW) return hit;
+				}
+				else {
+					hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec);
+					if (hit && isec->mode == RE_RAY_SHADOW) return hit;
+				}
+		}
+		else {
+			int i;
+			for (i = BVH_NCHILDS - 1; i >= 0; i--)
+				if (!is_leaf(node->child[i]))
+				{
+					if (node->child[i])
+					{
+						hit |= dfs_raycast(node->child[i], isec);
+						if (hit && isec->mode == RE_RAY_SHADOW) return hit;
+					}
+				}
+				else {
+					hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec);
+					if (hit && isec->mode == RE_RAY_SHADOW) return hit;
+				}
+		}
+	}
+	return hit;
+}
+#endif
+
+template<class Node, class HintObject>
+static void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject *hintObject)
+{
+	assert(hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE);
+
+	if (is_leaf(node)) {
+		hint->stack[hint->size++] = (RayObject *)node;
+	}
+	else {
+		int childs = count_childs(node);
+		if (hint->size + reserve_space + childs <= RE_RAY_LCTS_MAX_SIZE) {
+			int result = hint_test_bb(hintObject, node->bb, node->bb + 3);
+			if (result == HINT_RECURSE) {
+				/* We are 100% sure the ray will be pass inside this node */
+				bvh_dfs_make_hint_push_siblings(node->child, hint, reserve_space, hintObject);
+			}
+			else if (result == HINT_ACCEPT) {
+				hint->stack[hint->size++] = (RayObject *)node;
+			}
+		}
+		else {
+			hint->stack[hint->size++] = (RayObject *)node;
+		}
+	}
+}
+
+
+template<class Tree>
+static RayObjectAPI *bvh_get_api(int maxstacksize);
+
+
+template<class Tree, int DFS_STACK_SIZE>
+static inline RayObject *bvh_create_tree(int size)
+{
+	Tree *obj = (Tree *)MEM_callocN(sizeof(Tree), "BVHTree");
+	assert(RE_rayobject_isAligned(obj)); /* RayObject API assumes real data to be 4-byte aligned */
+
+	obj->rayobj.api = bvh_get_api<Tree>(DFS_STACK_SIZE);
+	obj->root = NULL;
+
+	obj->node_arena = NULL;
+	obj->builder    = rtbuild_create(size);
+
+	return RE_rayobject_unalignRayAPI((RayObject *) obj);
+}
+
+#endif
diff --git a/source/blender/render/intern/raytrace/rayobject.cpp b/source/blender/render/intern/raytrace/rayobject.cpp
new file mode 100644
index 00000000000..fee877b311d
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject.cpp
@@ -0,0 +1,534 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject.cpp
+ *  \ingroup render
+ */
+
+
+#include <assert.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_material_types.h"
+
+#include "rayintersection.h"
+#include "rayobject.h"
+#include "raycounter.h"
+#include "render_types.h"
+#include "renderdatabase.h"
+
+/* RayFace
+ *
+ * note we force always inline here, because compiler refuses to otherwise
+ * because function is too long. Since this is code that is called billions
+ * of times we really do want to inline. */
+
+MALWAYS_INLINE RayObject *rayface_from_coords(RayFace *rayface, void *ob, void *face,
+                                              float *v1, float *v2, float *v3, float *v4)
+{
+	rayface->ob = ob;
+	rayface->face = face;
+
+	copy_v3_v3(rayface->v1, v1);
+	copy_v3_v3(rayface->v2, v2);
+	copy_v3_v3(rayface->v3, v3);
+
+	if (v4) {
+		copy_v3_v3(rayface->v4, v4);
+		rayface->quad = 1;
+	}
+	else {
+		rayface->quad = 0;
+	}
+
+	return RE_rayobject_unalignRayFace(rayface);
+}
+
+MALWAYS_INLINE void rayface_from_vlak(RayFace *rayface, ObjectInstanceRen *obi, VlakRen *vlr)
+{
+	rayface_from_coords(rayface, obi, vlr, vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4 ? vlr->v4->co : NULL);
+
+	if (obi->transform_primitives) {
+		mul_m4_v3(obi->mat, rayface->v1);
+		mul_m4_v3(obi->mat, rayface->v2);
+		mul_m4_v3(obi->mat, rayface->v3);
+
+		if (RE_rayface_isQuad(rayface))
+			mul_m4_v3(obi->mat, rayface->v4);
+	}
+}
+
+RayObject *RE_rayface_from_vlak(RayFace *rayface, ObjectInstanceRen *obi, VlakRen *vlr)
+{
+	return rayface_from_coords(rayface, obi, vlr, vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4 ? vlr->v4->co : NULL);
+}
+
+RayObject *RE_rayface_from_coords(RayFace *rayface, void *ob, void *face, float *v1, float *v2, float *v3, float *v4)
+{
+	return rayface_from_coords(rayface, ob, face, v1, v2, v3, v4);
+}
+
+/* VlakPrimitive */
+
+RayObject *RE_vlakprimitive_from_vlak(VlakPrimitive *face, struct ObjectInstanceRen *obi, struct VlakRen *vlr)
+{
+	face->ob = obi;
+	face->face = vlr;
+
+	return RE_rayobject_unalignVlakPrimitive(face);
+}
+
+/* Checks for ignoring faces or materials */
+
+MALWAYS_INLINE int vlr_check_intersect(Isect *is, ObjectInstanceRen *obi, VlakRen *vlr)
+{
+	/* for baking selected to active non-traceable materials might still
+	 * be in the raytree */
+	if (!(vlr->flag & R_TRACEBLE))
+		return 0;
+
+	/* I know... cpu cycle waste, might do smarter once */
+	if (is->mode == RE_RAY_MIRROR)
+		return !(vlr->mat->mode & MA_ONLYCAST);
+	else
+		return (vlr->mat->mode2 & MA_CASTSHADOW) && (is->lay & obi->lay);
+}
+
+MALWAYS_INLINE int vlr_check_intersect_solid(Isect *UNUSED(is), ObjectInstanceRen *UNUSED(obi), VlakRen *vlr)
+{
+	/* solid material types only */
+	if (vlr->mat->material_type == MA_TYPE_SURFACE)
+		return 1;
+	else
+		return 0;
+}
+
+MALWAYS_INLINE int vlr_check_bake(Isect *is, ObjectInstanceRen *obi, VlakRen *UNUSED(vlr))
+{
+	return (obi->obr->ob != is->userdata) && (obi->obr->ob->flag & SELECT);
+}
+
+/* Ray Triangle/Quad Intersection */
+
+static bool isect_ray_tri_watertight_no_sign_check_v3(
+        const float ray_origin[3], const struct IsectRayPrecalc *isect_precalc,
+        const float v0[3], const float v1[3], const float v2[3],
+        float *r_lambda, float r_uv[2])
+{
+	const int kx = isect_precalc->kx;
+	const int ky = isect_precalc->ky;
+	const int kz = isect_precalc->kz;
+	const float sx = isect_precalc->sx;
+	const float sy = isect_precalc->sy;
+	const float sz = isect_precalc->sz;
+
+	/* Calculate vertices relative to ray origin. */
+	const float a[3] = {v0[0] - ray_origin[0], v0[1] - ray_origin[1], v0[2] - ray_origin[2]};
+	const float b[3] = {v1[0] - ray_origin[0], v1[1] - ray_origin[1], v1[2] - ray_origin[2]};
+	const float c[3] = {v2[0] - ray_origin[0], v2[1] - ray_origin[1], v2[2] - ray_origin[2]};
+
+	const float a_kx = a[kx], a_ky = a[ky], a_kz = a[kz];
+	const float b_kx = b[kx], b_ky = b[ky], b_kz = b[kz];
+	const float c_kx = c[kx], c_ky = c[ky], c_kz = c[kz];
+
+	/* Perform shear and scale of vertices. */
+	const float ax = a_kx - sx * a_kz;
+	const float ay = a_ky - sy * a_kz;
+	const float bx = b_kx - sx * b_kz;
+	const float by = b_ky - sy * b_kz;
+	const float cx = c_kx - sx * c_kz;
+	const float cy = c_ky - sy * c_kz;
+
+	/* Calculate scaled barycentric coordinates. */
+	const float u = cx * by - cy * bx;
+	const float v = ax * cy - ay * cx;
+	const float w = bx * ay - by * ax;
+	float det;
+
+	if ((u < 0.0f || v < 0.0f || w < 0.0f) &&
+	    (u > 0.0f || v > 0.0f || w > 0.0f))
+	{
+		return false;
+	}
+
+	/* Calculate determinant. */
+	det = u + v + w;
+	if (UNLIKELY(det == 0.0f)) {
+		return false;
+	}
+	else {
+		/* Calculate scaled z-coordinates of vertices and use them to calculate
+		 * the hit distance.
+		 */
+		const float t = (u * a_kz + v * b_kz + w * c_kz) * sz;
+		/* Normalize u, v and t. */
+		const float inv_det = 1.0f / det;
+		if (r_uv) {
+			r_uv[0] = u * inv_det;
+			r_uv[1] = v * inv_det;
+		}
+		*r_lambda = t * inv_det;
+		return true;
+	}
+}
+
+MALWAYS_INLINE int isec_tri_quad(const float start[3],
+                                 const struct IsectRayPrecalc *isect_precalc,
+                                 const RayFace *face,
+                                 float r_uv[2], float *r_lambda)
+{
+	float uv[2], l;
+
+	if (isect_ray_tri_watertight_v3(start, isect_precalc, face->v1, face->v2, face->v3, &l, uv)) {
+		/* check if intersection is within ray length */
+		if (l > -RE_RAYTRACE_EPSILON && l < *r_lambda) {
+			r_uv[0] = -uv[0];
+			r_uv[1] = -uv[1];
+			*r_lambda = l;
+			return 1;
+		}
+	}
+
+	/* intersect second triangle in quad */
+	if (RE_rayface_isQuad(face)) {
+		if (isect_ray_tri_watertight_v3(start, isect_precalc, face->v1, face->v3, face->v4, &l, uv)) {
+			/* check if intersection is within ray length */
+			if (l > -RE_RAYTRACE_EPSILON && l < *r_lambda) {
+				r_uv[0] = -uv[0];
+				r_uv[1] = -uv[1];
+				*r_lambda = l;
+				return 2;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* Simpler yes/no Ray Triangle/Quad Intersection */
+
+MALWAYS_INLINE int isec_tri_quad_neighbour(const float start[3],
+                                           const float dir[3],
+                                           const RayFace *face)
+{
+	float r[3];
+	struct IsectRayPrecalc isect_precalc;
+	float uv[2], l;
+
+	negate_v3_v3(r, dir); /* note, different than above function */
+
+	isect_ray_tri_watertight_v3_precalc(&isect_precalc, r);
+
+	if (isect_ray_tri_watertight_no_sign_check_v3(start, &isect_precalc, face->v1, face->v2, face->v3, &l, uv)) {
+		return 1;
+	}
+
+	/* intersect second triangle in quad */
+	if (RE_rayface_isQuad(face)) {
+		if (isect_ray_tri_watertight_no_sign_check_v3(start, &isect_precalc, face->v1, face->v3, face->v4, &l, uv)) {
+			return 2;
+		}
+	}
+
+	return 0;
+}
+
+/* RayFace intersection with checks and neighbor verifaction included,
+ * Isect is modified if the face is hit. */
+
+MALWAYS_INLINE int intersect_rayface(RayObject *hit_obj, RayFace *face, Isect *is)
+{
+	float dist, uv[2];
+	int ok = 0;
+
+	/* avoid self-intersection */
+	if (is->orig.ob == face->ob && is->orig.face == face->face)
+		return 0;
+
+	/* check if we should intersect this face */
+	if (is->check == RE_CHECK_VLR_RENDER) {
+		if (vlr_check_intersect(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0)
+			return 0;
+	}
+	else if (is->check == RE_CHECK_VLR_NON_SOLID_MATERIAL) {
+		if (vlr_check_intersect(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0)
+			return 0;
+		if (vlr_check_intersect_solid(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0)
+			return 0;
+	}
+	else if (is->check == RE_CHECK_VLR_BAKE) {
+		if (vlr_check_bake(is, (ObjectInstanceRen *)face->ob, (VlakRen *)face->face) == 0)
+			return 0;
+	}
+
+	/* ray counter */
+	RE_RC_COUNT(is->raycounter->faces.test);
+
+	dist = is->dist;
+	ok = isec_tri_quad(is->start, &is->isect_precalc, face, uv, &dist);
+
+	if (ok) {
+
+		/* when a shadow ray leaves a face, it can be little outside the edges
+		 * of it, causing intersection to be detected in its neighbor face */
+		if (is->skip & RE_SKIP_VLR_NEIGHBOUR) {
+			if (dist < 0.1f && is->orig.ob == face->ob) {
+				VlakRen *a = (VlakRen *)is->orig.face;
+				VlakRen *b = (VlakRen *)face->face;
+				ObjectRen *obr = ((ObjectInstanceRen *)face->ob)->obr;
+
+				VertRen **va, **vb;
+				int *org_idx_a, *org_idx_b;
+				int i, j;
+				bool is_neighbor = false;
+
+				/* "same" vertex means either the actual same VertRen, or the same 'final org index', if available
+				 * (autosmooth only, currently). */
+				for (i = 0, va = &a->v1; !is_neighbor && i < 4 && *va; ++i, ++va) {
+					org_idx_a = RE_vertren_get_origindex(obr, *va, false);
+					for (j = 0, vb = &b->v1; !is_neighbor && j < 4 && *vb; ++j, ++vb) {
+						if (*va == *vb) {
+							is_neighbor = true;
+						}
+						else if (org_idx_a) {
+							org_idx_b = RE_vertren_get_origindex(obr, *vb, 0);
+							if (org_idx_b && *org_idx_a == *org_idx_b) {
+								is_neighbor = true;
+							}
+						}
+					}
+				}
+
+				/* So there's a shared edge or vertex, let's intersect ray with self, if that's true
+				 * we can safely return 1, otherwise we assume the intersection is invalid, 0 */
+				if (is_neighbor) {
+					/* create RayFace from original face, transformed if necessary */
+					RayFace origface;
+					ObjectInstanceRen *ob = (ObjectInstanceRen *)is->orig.ob;
+					rayface_from_vlak(&origface, ob, (VlakRen *)is->orig.face);
+
+					if (!isec_tri_quad_neighbour(is->start, is->dir, &origface)) {
+						return 0;
+					}
+				}
+			}
+		}
+
+		RE_RC_COUNT(is->raycounter->faces.hit);
+
+		is->isect = ok;  // which half of the quad
+		is->dist = dist;
+		is->u = uv[0]; is->v = uv[1];
+
+		is->hit.ob   = face->ob;
+		is->hit.face = face->face;
+#ifdef RT_USE_LAST_HIT
+		is->last_hit = hit_obj;
+#endif
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Intersection */
+
+int RE_rayobject_raycast(RayObject *r, Isect *isec)
+{
+	int i;
+
+	/* Pre-calculate orientation for watertight intersection checks. */
+	isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir);
+
+	RE_RC_COUNT(isec->raycounter->raycast.test);
+
+	/* setup vars used on raycast */
+	for (i = 0; i < 3; i++) {
+		isec->idot_axis[i]          = 1.0f / isec->dir[i];
+
+		isec->bv_index[2 * i]       = isec->idot_axis[i] < 0.0f ? 1 : 0;
+		isec->bv_index[2 * i + 1]   = 1 - isec->bv_index[2 * i];
+
+		isec->bv_index[2 * i]       = i + 3 * isec->bv_index[2 * i];
+		isec->bv_index[2 * i + 1]   = i + 3 * isec->bv_index[2 * i + 1];
+	}
+
+#ifdef RT_USE_LAST_HIT
+	/* last hit heuristic */
+	if (isec->mode == RE_RAY_SHADOW && isec->last_hit) {
+		RE_RC_COUNT(isec->raycounter->rayshadow_last_hit.test);
+
+		if (RE_rayobject_intersect(isec->last_hit, isec)) {
+			RE_RC_COUNT(isec->raycounter->raycast.hit);
+			RE_RC_COUNT(isec->raycounter->rayshadow_last_hit.hit);
+			return 1;
+		}
+	}
+#endif
+
+#ifdef RT_USE_HINT
+	isec->hit_hint = 0;
+#endif
+
+	if (RE_rayobject_intersect(r, isec)) {
+		RE_RC_COUNT(isec->raycounter->raycast.hit);
+
+#ifdef RT_USE_HINT
+		isec->hint = isec->hit_hint;
+#endif
+		return 1;
+	}
+
+	return 0;
+}
+
+int RE_rayobject_intersect(RayObject *r, Isect *i)
+{
+	if (RE_rayobject_isRayFace(r)) {
+		return intersect_rayface(r, (RayFace *) RE_rayobject_align(r), i);
+	}
+	else if (RE_rayobject_isVlakPrimitive(r)) {
+		//TODO optimize (useless copy to RayFace to avoid duplicate code)
+		VlakPrimitive *face = (VlakPrimitive *) RE_rayobject_align(r);
+		RayFace nface;
+		rayface_from_vlak(&nface, face->ob, face->face);
+
+		return intersect_rayface(r, &nface, i);
+	}
+	else if (RE_rayobject_isRayAPI(r)) {
+		r = RE_rayobject_align(r);
+		return r->api->raycast(r, i);
+	}
+	else {
+		assert(0);
+		return 0;
+	}
+}
+
+/* Building */
+
+void RE_rayobject_add(RayObject *r, RayObject *o)
+{
+	r = RE_rayobject_align(r);
+	return r->api->add(r, o);
+}
+
+void RE_rayobject_done(RayObject *r)
+{
+	r = RE_rayobject_align(r);
+	r->api->done(r);
+}
+
+void RE_rayobject_free(RayObject *r)
+{
+	r = RE_rayobject_align(r);
+	r->api->free(r);
+}
+
+float RE_rayobject_cost(RayObject *r)
+{
+	if (RE_rayobject_isRayFace(r) || RE_rayobject_isVlakPrimitive(r)) {
+		return 1.0f;
+	}
+	else if (RE_rayobject_isRayAPI(r)) {
+		r = RE_rayobject_align(r);
+		return r->api->cost(r);
+	}
+	else {
+		assert(0);
+		return 1.0f;
+	}
+}
+
+/* Bounding Boxes */
+
+void RE_rayobject_merge_bb(RayObject *r, float min[3], float max[3])
+{
+	if (RE_rayobject_isRayFace(r)) {
+		RayFace *face = (RayFace *) RE_rayobject_align(r);
+
+		DO_MINMAX(face->v1, min, max);
+		DO_MINMAX(face->v2, min, max);
+		DO_MINMAX(face->v3, min, max);
+		if (RE_rayface_isQuad(face)) DO_MINMAX(face->v4, min, max);
+	}
+	else if (RE_rayobject_isVlakPrimitive(r)) {
+		VlakPrimitive *face = (VlakPrimitive *) RE_rayobject_align(r);
+		RayFace nface;
+		rayface_from_vlak(&nface, face->ob, face->face);
+
+		DO_MINMAX(nface.v1, min, max);
+		DO_MINMAX(nface.v2, min, max);
+		DO_MINMAX(nface.v3, min, max);
+		if (RE_rayface_isQuad(&nface)) DO_MINMAX(nface.v4, min, max);
+	}
+	else if (RE_rayobject_isRayAPI(r)) {
+		r = RE_rayobject_align(r);
+		r->api->bb(r, min, max);
+	}
+	else
+		assert(0);
+}
+
+/* Hints */
+
+void RE_rayobject_hint_bb(RayObject *r, RayHint *hint, float *min, float *max)
+{
+	if (RE_rayobject_isRayFace(r) || RE_rayobject_isVlakPrimitive(r)) {
+		return;
+	}
+	else if (RE_rayobject_isRayAPI(r)) {
+		r = RE_rayobject_align(r);
+		return r->api->hint_bb(r, hint, min, max);
+	}
+	else
+		assert(0);
+}
+
+/* RayObjectControl */
+
+int RE_rayobjectcontrol_test_break(RayObjectControl *control)
+{
+	if (control->test_break)
+		return control->test_break(control->data);
+
+	return 0;
+}
+
+void RE_rayobject_set_control(RayObject *r, void *data, RE_rayobjectcontrol_test_break_callback test_break)
+{
+	if (RE_rayobject_isRayAPI(r)) {
+		r = RE_rayobject_align(r);
+		r->control.data = data;
+		r->control.test_break = test_break;
+	}
+}
+
diff --git a/source/blender/render/intern/raytrace/rayobject_hint.h b/source/blender/render/intern/raytrace/rayobject_hint.h
new file mode 100644
index 00000000000..88a32819bd2
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_hint.h
@@ -0,0 +1,72 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_hint.h
+ *  \ingroup render
+ */
+
+
+#ifndef __RAYOBJECT_HINT_H__
+#define __RAYOBJECT_HINT_H__
+
+#define HINT_RECURSE     1
+#define HINT_ACCEPT      0
+#define HINT_DISCARD    -1
+
+struct HintBB {
+	float bb[6];
+};
+
+inline int hint_test_bb(HintBB *obj, float *Nmin, float *Nmax)
+{
+	if (bb_fits_inside(Nmin, Nmax, obj->bb, obj->bb + 3) )
+		return HINT_RECURSE;
+	else
+		return HINT_ACCEPT;
+}
+#if 0
+struct HintFrustum {
+	float co[3];
+	float no[4][3];
+};
+
+inline int hint_test_bb(HintFrustum &obj, float *Nmin, float *Nmax)
+{
+	//if frustum inside BB
+	{
+		return HINT_RECURSE;
+	}
+	//if BB outside frustum
+	{
+		return HINT_DISCARD;
+	}
+
+	return HINT_ACCEPT;
+}
+#endif
+
+#endif /* __RAYOBJECT_HINT_H__ */
diff --git a/source/blender/render/intern/raytrace/rayobject_instance.cpp b/source/blender/render/intern/raytrace/rayobject_instance.cpp
new file mode 100644
index 00000000000..361e7963d96
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_instance.cpp
@@ -0,0 +1,211 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_instance.cpp
+ *  \ingroup render
+ */
+
+
+#include <assert.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+#include "rayintersection.h"
+#include "rayobject.h"
+
+#define RE_COST_INSTANCE (1.0f)
+
+static int  RE_rayobject_instance_intersect(RayObject *o, Isect *isec);
+static void RE_rayobject_instance_free(RayObject *o);
+static void RE_rayobject_instance_bb(RayObject *o, float *min, float *max);
+static float RE_rayobject_instance_cost(RayObject *o);
+
+static void RE_rayobject_instance_hint_bb(RayObject *UNUSED(o), RayHint *UNUSED(hint),
+                                          float *UNUSED(min), float *UNUSED(max))
+{}
+
+static RayObjectAPI instance_api =
+{
+	RE_rayobject_instance_intersect,
+	NULL, //static void RE_rayobject_instance_add(RayObject *o, RayObject *ob);
+	NULL, //static void RE_rayobject_instance_done(RayObject *o);
+	RE_rayobject_instance_free,
+	RE_rayobject_instance_bb,
+	RE_rayobject_instance_cost,
+	RE_rayobject_instance_hint_bb
+};
+
+typedef struct InstanceRayObject {
+	RayObject rayobj;
+	RayObject *target;
+
+	void *ob; //Object represented by this instance
+	void *target_ob; //Object represented by the inner RayObject, needed to handle self-intersection
+
+	float global2target[4][4];
+	float target2global[4][4];
+
+} InstanceRayObject;
+
+
+RayObject *RE_rayobject_instance_create(RayObject *target, float transform[4][4], void *ob, void *target_ob)
+{
+	InstanceRayObject *obj = (InstanceRayObject *)MEM_callocN(sizeof(InstanceRayObject), "InstanceRayObject");
+	assert(RE_rayobject_isAligned(obj) );  /* RayObject API assumes real data to be 4-byte aligned */
+
+	obj->rayobj.api = &instance_api;
+	obj->target = target;
+	obj->ob = ob;
+	obj->target_ob = target_ob;
+
+	copy_m4_m4(obj->target2global, transform);
+	invert_m4_m4(obj->global2target, obj->target2global);
+
+	return RE_rayobject_unalignRayAPI((RayObject *) obj);
+}
+
+static int  RE_rayobject_instance_intersect(RayObject *o, Isect *isec)
+{
+	InstanceRayObject *obj = (InstanceRayObject *)o;
+	float start[3], dir[3], idot_axis[3], dist;
+	int changed = 0, i, res;
+
+	// TODO - this is disabling self intersection on instances
+	if (isec->orig.ob == obj->ob && obj->ob) {
+		changed = 1;
+		isec->orig.ob = obj->target_ob;
+	}
+
+	// backup old values
+	copy_v3_v3(start, isec->start);
+	copy_v3_v3(dir, isec->dir);
+	copy_v3_v3(idot_axis, isec->idot_axis);
+	dist = isec->dist;
+
+	// transform to target coordinates system
+	mul_m4_v3(obj->global2target, isec->start);
+	mul_mat3_m4_v3(obj->global2target, isec->dir);
+	isec->dist *= normalize_v3(isec->dir);
+
+	// update idot_axis and bv_index
+	for (i = 0; i < 3; i++) {
+		isec->idot_axis[i]        = 1.0f / isec->dir[i];
+
+		isec->bv_index[2 * i]     = isec->idot_axis[i] < 0.0f ? 1 : 0;
+		isec->bv_index[2 * i + 1] = 1 - isec->bv_index[2 * i];
+
+		isec->bv_index[2 * i]     = i + 3 * isec->bv_index[2 * i];
+		isec->bv_index[2 * i + 1] = i + 3 * isec->bv_index[2 * i + 1];
+	}
+
+	// Pre-calculate orientation for watertight intersection checks.
+	isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir);
+
+	// raycast
+	res = RE_rayobject_intersect(obj->target, isec);
+
+	// map dist into original coordinate space
+	if (res == 0) {
+		isec->dist = dist;
+	}
+	else {
+		// note we don't just multiply dist, because of possible
+		// non-uniform scaling in the transform matrix
+		float vec[3];
+
+		mul_v3_v3fl(vec, isec->dir, isec->dist);
+		mul_mat3_m4_v3(obj->target2global, vec);
+
+		isec->dist = len_v3(vec);
+		isec->hit.ob = obj->ob;
+
+#ifdef RT_USE_LAST_HIT
+		// TODO support for last hit optimization in instances that can jump
+		// directly to the last hit face.
+		// For now it jumps directly to the last-hit instance root node.
+		isec->last_hit = RE_rayobject_unalignRayAPI((RayObject *) obj);
+#endif
+	}
+
+	// restore values
+	copy_v3_v3(isec->start, start);
+	copy_v3_v3(isec->dir, dir);
+	copy_v3_v3(isec->idot_axis, idot_axis);
+
+	if (changed)
+		isec->orig.ob = obj->ob;
+
+	// restore bv_index
+	for (i = 0; i < 3; i++) {
+		isec->bv_index[2 * i]     = isec->idot_axis[i] < 0.0f ? 1 : 0;
+		isec->bv_index[2 * i + 1] = 1 - isec->bv_index[2 * i];
+
+		isec->bv_index[2 * i]     = i + 3 * isec->bv_index[2 * i];
+		isec->bv_index[2 * i + 1] = i + 3 * isec->bv_index[2 * i + 1];
+	}
+
+	// Pre-calculate orientation for watertight intersection checks.
+	isect_ray_tri_watertight_v3_precalc(&isec->isect_precalc, isec->dir);
+
+	return res;
+}
+
+static void RE_rayobject_instance_free(RayObject *o)
+{
+	InstanceRayObject *obj = (InstanceRayObject *)o;
+	MEM_freeN(obj);
+}
+
+static float RE_rayobject_instance_cost(RayObject *o)
+{
+	InstanceRayObject *obj = (InstanceRayObject *)o;
+	return RE_rayobject_cost(obj->target) + RE_COST_INSTANCE;
+}
+
+static void RE_rayobject_instance_bb(RayObject *o, float *min, float *max)
+{
+	//TODO:
+	// *better bb.. calculated without rotations of bb
+	// *maybe cache that better-fitted-BB at the InstanceRayObject
+	InstanceRayObject *obj = (InstanceRayObject *)o;
+
+	float m[3], M[3], t[3];
+	int i, j;
+	INIT_MINMAX(m, M);
+	RE_rayobject_merge_bb(obj->target, m, M);
+
+	//There must be a faster way than rotating all the 8 vertexs of the BB
+	for (i = 0; i < 8; i++) {
+		for (j = 0; j < 3; j++) t[j] = (i & (1 << j)) ? M[j] : m[j];
+		mul_m4_v3(obj->target2global, t);
+		DO_MINMAX(t, min, max);
+	}
+}
+
diff --git a/source/blender/render/intern/raytrace/rayobject_octree.cpp b/source/blender/render/intern/raytrace/rayobject_octree.cpp
new file mode 100644
index 00000000000..4b73e64ca45
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_octree.cpp
@@ -0,0 +1,1101 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 1990-1998 NeoGeo BV.
+ * All rights reserved.
+ *
+ * Contributors: 2004/2005 Blender Foundation, full recode
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_octree.cpp
+ *  \ingroup render
+ */
+
+
+/* IMPORTANT NOTE: this code must be independent of any other render code
+ * to use it outside the renderer! */
+
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include <float.h>
+#include <assert.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_material_types.h"
+
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+#include "rayintersection.h"
+#include "rayobject.h"
+
+/* ********** structs *************** */
+#define BRANCH_ARRAY 1024
+#define NODE_ARRAY 4096
+
+typedef struct Branch {
+	struct Branch *b[8];
+} Branch;
+
+typedef struct OcVal {
+	short ocx, ocy, ocz;
+} OcVal;
+
+typedef struct Node {
+	struct RayFace *v[8];
+	struct OcVal ov[8];
+	struct Node *next;
+} Node;
+
+typedef struct Octree {
+	RayObject rayobj;
+
+	struct Branch **adrbranch;
+	struct Node **adrnode;
+	float ocsize;   /* ocsize: mult factor,  max size octree */
+	float ocfacx, ocfacy, ocfacz;
+	float min[3], max[3];
+	int ocres;
+	int branchcount, nodecount;
+
+	/* during building only */
+	char *ocface;
+
+	RayFace **ro_nodes;
+	int ro_nodes_size, ro_nodes_used;
+
+} Octree;
+
+static int  RE_rayobject_octree_intersect(RayObject *o, Isect *isec);
+static void RE_rayobject_octree_add(RayObject *o, RayObject *ob);
+static void RE_rayobject_octree_done(RayObject *o);
+static void RE_rayobject_octree_free(RayObject *o);
+static void RE_rayobject_octree_bb(RayObject *o, float *min, float *max);
+
+/*
+ * This function is not expected to be called by current code state.
+ */
+static float RE_rayobject_octree_cost(RayObject *UNUSED(o))
+{
+	return 1.0;
+}
+
+static void RE_rayobject_octree_hint_bb(RayObject *UNUSED(o), RayHint *UNUSED(hint),
+                                        float *UNUSED(min), float *UNUSED(max))
+{
+	return;
+}
+
+static RayObjectAPI octree_api =
+{
+	RE_rayobject_octree_intersect,
+	RE_rayobject_octree_add,
+	RE_rayobject_octree_done,
+	RE_rayobject_octree_free,
+	RE_rayobject_octree_bb,
+	RE_rayobject_octree_cost,
+	RE_rayobject_octree_hint_bb
+};
+
+/* **************** ocval method ******************* */
+/* within one octree node, a set of 3x15 bits defines a 'boundbox' to OR with */
+
+#define OCVALRES    15
+#define BROW16(min, max) \
+	(((max) >= OCVALRES ? 0xFFFF : (1 << ((max) + 1)) - 1) - (((min) > 0) ? ((1 << (min)) - 1) : 0))
+
+static void calc_ocval_face(float *v1, float *v2, float *v3, float *v4, short x, short y, short z, OcVal *ov)
+{
+	float min[3], max[3];
+	int ocmin, ocmax;
+
+	copy_v3_v3(min, v1);
+	copy_v3_v3(max, v1);
+	DO_MINMAX(v2, min, max);
+	DO_MINMAX(v3, min, max);
+	if (v4) {
+		DO_MINMAX(v4, min, max);
+	}
+
+	ocmin = OCVALRES * (min[0] - x);
+	ocmax = OCVALRES * (max[0] - x);
+	ov->ocx = BROW16(ocmin, ocmax);
+
+	ocmin = OCVALRES * (min[1] - y);
+	ocmax = OCVALRES * (max[1] - y);
+	ov->ocy = BROW16(ocmin, ocmax);
+
+	ocmin = OCVALRES * (min[2] - z);
+	ocmax = OCVALRES * (max[2] - z);
+	ov->ocz = BROW16(ocmin, ocmax);
+
+}
+
+static void calc_ocval_ray(OcVal *ov, float xo, float yo, float zo, float *vec1, float *vec2)
+{
+	int ocmin, ocmax;
+
+	if (vec1[0] < vec2[0]) {
+		ocmin = OCVALRES * (vec1[0] - xo);
+		ocmax = OCVALRES * (vec2[0] - xo);
+	}
+	else {
+		ocmin = OCVALRES * (vec2[0] - xo);
+		ocmax = OCVALRES * (vec1[0] - xo);
+	}
+	ov->ocx = BROW16(ocmin, ocmax);
+
+	if (vec1[1] < vec2[1]) {
+		ocmin = OCVALRES * (vec1[1] - yo);
+		ocmax = OCVALRES * (vec2[1] - yo);
+	}
+	else {
+		ocmin = OCVALRES * (vec2[1] - yo);
+		ocmax = OCVALRES * (vec1[1] - yo);
+	}
+	ov->ocy = BROW16(ocmin, ocmax);
+
+	if (vec1[2] < vec2[2]) {
+		ocmin = OCVALRES * (vec1[2] - zo);
+		ocmax = OCVALRES * (vec2[2] - zo);
+	}
+	else {
+		ocmin = OCVALRES * (vec2[2] - zo);
+		ocmax = OCVALRES * (vec1[2] - zo);
+	}
+	ov->ocz = BROW16(ocmin, ocmax);
+}
+
+/* ************* octree ************** */
+
+static Branch *addbranch(Octree *oc, Branch *br, short ocb)
+{
+	int index;
+
+	if (br->b[ocb]) return br->b[ocb];
+
+	oc->branchcount++;
+	index = oc->branchcount >> 12;
+
+	if (oc->adrbranch[index] == NULL)
+		oc->adrbranch[index] = (Branch *)MEM_callocN(4096 * sizeof(Branch), "new oc branch");
+
+	if (oc->branchcount >= BRANCH_ARRAY * 4096) {
+		printf("error; octree branches full\n");
+		oc->branchcount = 0;
+	}
+
+	return br->b[ocb] = oc->adrbranch[index] + (oc->branchcount & 4095);
+}
+
+static Node *addnode(Octree *oc)
+{
+	int index;
+
+	oc->nodecount++;
+	index = oc->nodecount >> 12;
+
+	if (oc->adrnode[index] == NULL)
+		oc->adrnode[index] = (Node *)MEM_callocN(4096 * sizeof(Node), "addnode");
+
+	if (oc->nodecount > NODE_ARRAY * NODE_ARRAY) {
+		printf("error; octree nodes full\n");
+		oc->nodecount = 0;
+	}
+
+	return oc->adrnode[index] + (oc->nodecount & 4095);
+}
+
+static bool face_in_node(RayFace *face, short x, short y, short z, float rtf[4][3])
+{
+	static float nor[3], d;
+	float fx, fy, fz;
+
+	// init static vars
+	if (face) {
+		normal_tri_v3(nor, rtf[0], rtf[1], rtf[2]);
+		d = -nor[0] * rtf[0][0] - nor[1] * rtf[0][1] - nor[2] * rtf[0][2];
+		return 0;
+	}
+
+	fx = x;
+	fy = y;
+	fz = z;
+
+	if ((fx) * nor[0] + (fy) * nor[1] + (fz) * nor[2] + d > 0.0f) {
+		if ((fx + 1) * nor[0] + (fy    ) * nor[1] + (fz    ) * nor[2] + d < 0.0f) return 1;
+		if ((fx    ) * nor[0] + (fy + 1) * nor[1] + (fz    ) * nor[2] + d < 0.0f) return 1;
+		if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz    ) * nor[2] + d < 0.0f) return 1;
+
+		if ((fx    ) * nor[0] + (fy    ) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1;
+		if ((fx + 1) * nor[0] + (fy    ) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1;
+		if ((fx    ) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1;
+		if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d < 0.0f) return 1;
+	}
+	else {
+		if ((fx + 1) * nor[0] + (fy    ) * nor[1] + (fz    ) * nor[2] + d > 0.0f) return 1;
+		if ((fx    ) * nor[0] + (fy + 1) * nor[1] + (fz    ) * nor[2] + d > 0.0f) return 1;
+		if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz    ) * nor[2] + d > 0.0f) return 1;
+
+		if ((fx    ) * nor[0] + (fy    ) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1;
+		if ((fx + 1) * nor[0] + (fy    ) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1;
+		if ((fx    ) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1;
+		if ((fx + 1) * nor[0] + (fy + 1) * nor[1] + (fz + 1) * nor[2] + d > 0.0f) return 1;
+	}
+
+	return 0;
+}
+
+static void ocwrite(Octree *oc, RayFace *face, int quad, short x, short y, short z, float rtf[4][3])
+{
+	Branch *br;
+	Node *no;
+	short a, oc0, oc1, oc2, oc3, oc4, oc5;
+
+	x <<= 2;
+	y <<= 1;
+
+	br = oc->adrbranch[0];
+
+	if (oc->ocres == 512) {
+		oc0 = ((x & 1024) + (y & 512) + (z & 256)) >> 8;
+		br = addbranch(oc, br, oc0);
+	}
+	if (oc->ocres >= 256) {
+		oc0 = ((x & 512) + (y & 256) + (z & 128)) >> 7;
+		br = addbranch(oc, br, oc0);
+	}
+	if (oc->ocres >= 128) {
+		oc0 = ((x & 256) + (y & 128) + (z & 64)) >> 6;
+		br = addbranch(oc, br, oc0);
+	}
+
+	oc0 = ((x & 128) + (y & 64) + (z & 32)) >> 5;
+	oc1 = ((x & 64) + (y & 32) + (z & 16)) >> 4;
+	oc2 = ((x & 32) + (y & 16) + (z & 8)) >> 3;
+	oc3 = ((x & 16) + (y & 8) + (z & 4)) >> 2;
+	oc4 = ((x & 8) + (y & 4) + (z & 2)) >> 1;
+	oc5 = ((x & 4) + (y & 2) + (z & 1));
+
+	br = addbranch(oc, br, oc0);
+	br = addbranch(oc, br, oc1);
+	br = addbranch(oc, br, oc2);
+	br = addbranch(oc, br, oc3);
+	br = addbranch(oc, br, oc4);
+	no = (Node *)br->b[oc5];
+	if (no == NULL) br->b[oc5] = (Branch *)(no = addnode(oc));
+
+	while (no->next) no = no->next;
+
+	a = 0;
+	if (no->v[7]) {     /* node full */
+		no->next = addnode(oc);
+		no = no->next;
+	}
+	else {
+		while (no->v[a] != NULL) a++;
+	}
+
+	no->v[a] = (RayFace *) RE_rayobject_align(face);
+
+	if (quad)
+		calc_ocval_face(rtf[0], rtf[1], rtf[2], rtf[3], x >> 2, y >> 1, z, &no->ov[a]);
+	else
+		calc_ocval_face(rtf[0], rtf[1], rtf[2], NULL, x >> 2, y >> 1, z, &no->ov[a]);
+}
+
+static void d2dda(Octree *oc, short b1, short b2, short c1, short c2, char *ocface, short rts[4][3], float rtf[4][3])
+{
+	int ocx1, ocx2, ocy1, ocy2;
+	int x, y, dx = 0, dy = 0;
+	float ox1, ox2, oy1, oy2;
+	float lambda, lambda_o, lambda_x, lambda_y, ldx, ldy;
+
+	ocx1 = rts[b1][c1];
+	ocy1 = rts[b1][c2];
+	ocx2 = rts[b2][c1];
+	ocy2 = rts[b2][c2];
+
+	if (ocx1 == ocx2 && ocy1 == ocy2) {
+		ocface[oc->ocres * ocx1 + ocy1] = 1;
+		return;
+	}
+
+	ox1 = rtf[b1][c1];
+	oy1 = rtf[b1][c2];
+	ox2 = rtf[b2][c1];
+	oy2 = rtf[b2][c2];
+
+	if (ox1 != ox2) {
+		if (ox2 - ox1 > 0.0f) {
+			lambda_x = (ox1 - ocx1 - 1.0f) / (ox1 - ox2);
+			ldx = -1.0f / (ox1 - ox2);
+			dx = 1;
+		}
+		else {
+			lambda_x = (ox1 - ocx1) / (ox1 - ox2);
+			ldx = 1.0f / (ox1 - ox2);
+			dx = -1;
+		}
+	}
+	else {
+		lambda_x = 1.0f;
+		ldx = 0;
+	}
+
+	if (oy1 != oy2) {
+		if (oy2 - oy1 > 0.0f) {
+			lambda_y = (oy1 - ocy1 - 1.0f) / (oy1 - oy2);
+			ldy = -1.0f / (oy1 - oy2);
+			dy = 1;
+		}
+		else {
+			lambda_y = (oy1 - ocy1) / (oy1 - oy2);
+			ldy = 1.0f / (oy1 - oy2);
+			dy = -1;
+		}
+	}
+	else {
+		lambda_y = 1.0f;
+		ldy = 0;
+	}
+
+	x = ocx1; y = ocy1;
+	lambda = MIN2(lambda_x, lambda_y);
+
+	while (true) {
+
+		if (x < 0 || y < 0 || x >= oc->ocres || y >= oc->ocres) {
+			/* pass*/
+		}
+		else {
+			ocface[oc->ocres * x + y] = 1;
+		}
+
+		lambda_o = lambda;
+		if (lambda_x == lambda_y) {
+			lambda_x += ldx;
+			x += dx;
+			lambda_y += ldy;
+			y += dy;
+		}
+		else {
+			if (lambda_x < lambda_y) {
+				lambda_x += ldx;
+				x += dx;
+			}
+			else {
+				lambda_y += ldy;
+				y += dy;
+			}
+		}
+		lambda = MIN2(lambda_x, lambda_y);
+		if (lambda == lambda_o) break;
+		if (lambda >= 1.0f) break;
+	}
+	ocface[oc->ocres * ocx2 + ocy2] = 1;
+}
+
+static void filltriangle(Octree *oc, short c1, short c2, char *ocface, short *ocmin, short *ocmax)
+{
+	int a, x, y, y1, y2;
+
+	for (x = ocmin[c1]; x <= ocmax[c1]; x++) {
+		a = oc->ocres * x;
+		for (y = ocmin[c2]; y <= ocmax[c2]; y++) {
+			if (ocface[a + y]) {
+				y++;
+				while (ocface[a + y] && y != ocmax[c2]) y++;
+				for (y1 = ocmax[c2]; y1 > y; y1--) {
+					if (ocface[a + y1]) {
+						for (y2 = y; y2 <= y1; y2++) ocface[a + y2] = 1;
+						y1 = 0;
+					}
+				}
+				y = ocmax[c2];
+			}
+		}
+	}
+}
+
+static void RE_rayobject_octree_free(RayObject *tree)
+{
+	Octree *oc = (Octree *)tree;
+
+#if 0
+	printf("branches %d nodes %d\n", oc->branchcount, oc->nodecount);
+	printf("raycount %d\n", raycount);
+	printf("ray coherent %d\n", coherent_ray);
+	printf("accepted %d rejected %d\n", accepted, rejected);
+#endif
+	if (oc->ocface)
+		MEM_freeN(oc->ocface);
+
+	if (oc->adrbranch) {
+		int a = 0;
+		while (oc->adrbranch[a]) {
+			MEM_freeN(oc->adrbranch[a]);
+			oc->adrbranch[a] = NULL;
+			a++;
+		}
+		MEM_freeN(oc->adrbranch);
+		oc->adrbranch = NULL;
+	}
+	oc->branchcount = 0;
+
+	if (oc->adrnode) {
+		int a = 0;
+		while (oc->adrnode[a]) {
+			MEM_freeN(oc->adrnode[a]);
+			oc->adrnode[a] = NULL;
+			a++;
+		}
+		MEM_freeN(oc->adrnode);
+		oc->adrnode = NULL;
+	}
+	oc->nodecount = 0;
+
+	MEM_freeN(oc);
+}
+
+
+RayObject *RE_rayobject_octree_create(int ocres, int size)
+{
+	Octree *oc = (Octree *)MEM_callocN(sizeof(Octree), "Octree");
+	assert(RE_rayobject_isAligned(oc) );  /* RayObject API assumes real data to be 4-byte aligned */
+
+	oc->rayobj.api = &octree_api;
+
+	oc->ocres = ocres;
+
+	oc->ro_nodes = (RayFace **)MEM_callocN(sizeof(RayFace *) * size, "octree rayobject nodes");
+	oc->ro_nodes_size = size;
+	oc->ro_nodes_used = 0;
+
+
+	return RE_rayobject_unalignRayAPI((RayObject *) oc);
+}
+
+
+static void RE_rayobject_octree_add(RayObject *tree, RayObject *node)
+{
+	Octree *oc = (Octree *)tree;
+
+	assert(RE_rayobject_isRayFace(node) );
+	assert(oc->ro_nodes_used < oc->ro_nodes_size);
+	oc->ro_nodes[oc->ro_nodes_used++] = (RayFace *)RE_rayobject_align(node);
+}
+
+static void octree_fill_rayface(Octree *oc, RayFace *face)
+{
+	float ocfac[3], rtf[4][3];
+	float co1[3], co2[3], co3[3], co4[3];
+	short rts[4][3];
+	short ocmin[3], ocmax[3];
+	char *ocface = oc->ocface;   // front, top, size view of face, to fill in
+	int a, b, c, oc1, oc2, oc3, oc4, x, y, z, ocres2;
+
+	ocfac[0] = oc->ocfacx;
+	ocfac[1] = oc->ocfacy;
+	ocfac[2] = oc->ocfacz;
+
+	ocres2 = oc->ocres * oc->ocres;
+
+	copy_v3_v3(co1, face->v1);
+	copy_v3_v3(co2, face->v2);
+	copy_v3_v3(co3, face->v3);
+	if (RE_rayface_isQuad(face))
+		copy_v3_v3(co4, face->v4);
+
+	for (c = 0; c < 3; c++) {
+		rtf[0][c] = (co1[c] - oc->min[c]) * ocfac[c];
+		rts[0][c] = (short)rtf[0][c];
+		rtf[1][c] = (co2[c] - oc->min[c]) * ocfac[c];
+		rts[1][c] = (short)rtf[1][c];
+		rtf[2][c] = (co3[c] - oc->min[c]) * ocfac[c];
+		rts[2][c] = (short)rtf[2][c];
+		if (RE_rayface_isQuad(face)) {
+			rtf[3][c] = (co4[c] - oc->min[c]) * ocfac[c];
+			rts[3][c] = (short)rtf[3][c];
+		}
+	}
+
+	for (c = 0; c < 3; c++) {
+		oc1 = rts[0][c];
+		oc2 = rts[1][c];
+		oc3 = rts[2][c];
+		if (!RE_rayface_isQuad(face)) {
+			ocmin[c] = min_iii(oc1, oc2, oc3);
+			ocmax[c] = max_iii(oc1, oc2, oc3);
+		}
+		else {
+			oc4 = rts[3][c];
+			ocmin[c] = min_iiii(oc1, oc2, oc3, oc4);
+			ocmax[c] = max_iiii(oc1, oc2, oc3, oc4);
+		}
+		if (ocmax[c] > oc->ocres - 1) ocmax[c] = oc->ocres - 1;
+		if (ocmin[c] < 0) ocmin[c] = 0;
+	}
+
+	if (ocmin[0] == ocmax[0] && ocmin[1] == ocmax[1] && ocmin[2] == ocmax[2]) {
+		ocwrite(oc, face, RE_rayface_isQuad(face), ocmin[0], ocmin[1], ocmin[2], rtf);
+	}
+	else {
+
+		d2dda(oc, 0, 1, 0, 1, ocface + ocres2, rts, rtf);
+		d2dda(oc, 0, 1, 0, 2, ocface, rts, rtf);
+		d2dda(oc, 0, 1, 1, 2, ocface + 2 * ocres2, rts, rtf);
+		d2dda(oc, 1, 2, 0, 1, ocface + ocres2, rts, rtf);
+		d2dda(oc, 1, 2, 0, 2, ocface, rts, rtf);
+		d2dda(oc, 1, 2, 1, 2, ocface + 2 * ocres2, rts, rtf);
+		if (!RE_rayface_isQuad(face)) {
+			d2dda(oc, 2, 0, 0, 1, ocface + ocres2, rts, rtf);
+			d2dda(oc, 2, 0, 0, 2, ocface, rts, rtf);
+			d2dda(oc, 2, 0, 1, 2, ocface + 2 * ocres2, rts, rtf);
+		}
+		else {
+			d2dda(oc, 2, 3, 0, 1, ocface + ocres2, rts, rtf);
+			d2dda(oc, 2, 3, 0, 2, ocface, rts, rtf);
+			d2dda(oc, 2, 3, 1, 2, ocface + 2 * ocres2, rts, rtf);
+			d2dda(oc, 3, 0, 0, 1, ocface + ocres2, rts, rtf);
+			d2dda(oc, 3, 0, 0, 2, ocface, rts, rtf);
+			d2dda(oc, 3, 0, 1, 2, ocface + 2 * ocres2, rts, rtf);
+		}
+		/* nothing todo with triangle..., just fills :) */
+		filltriangle(oc, 0, 1, ocface + ocres2, ocmin, ocmax);
+		filltriangle(oc, 0, 2, ocface, ocmin, ocmax);
+		filltriangle(oc, 1, 2, ocface + 2 * ocres2, ocmin, ocmax);
+
+		/* init static vars here */
+		face_in_node(face, 0, 0, 0, rtf);
+
+		for (x = ocmin[0]; x <= ocmax[0]; x++) {
+			a = oc->ocres * x;
+			for (y = ocmin[1]; y <= ocmax[1]; y++) {
+				if (ocface[a + y + ocres2]) {
+					b = oc->ocres * y + 2 * ocres2;
+					for (z = ocmin[2]; z <= ocmax[2]; z++) {
+						if (ocface[b + z] && ocface[a + z]) {
+							if (face_in_node(NULL, x, y, z, rtf))
+								ocwrite(oc, face, RE_rayface_isQuad(face), x, y, z, rtf);
+						}
+					}
+				}
+			}
+		}
+
+		/* same loops to clear octree, doubt it can be done smarter */
+		for (x = ocmin[0]; x <= ocmax[0]; x++) {
+			a = oc->ocres * x;
+			for (y = ocmin[1]; y <= ocmax[1]; y++) {
+				/* x-y */
+				ocface[a + y + ocres2] = 0;
+
+				b = oc->ocres * y + 2 * ocres2;
+				for (z = ocmin[2]; z <= ocmax[2]; z++) {
+					/* y-z */
+					ocface[b + z] = 0;
+					/* x-z */
+					ocface[a + z] = 0;
+				}
+			}
+		}
+	}
+}
+
+static void RE_rayobject_octree_done(RayObject *tree)
+{
+	Octree *oc = (Octree *)tree;
+	int c;
+	float t00, t01, t02;
+	int ocres2 = oc->ocres * oc->ocres;
+
+	INIT_MINMAX(oc->min, oc->max);
+
+	/* Calculate Bounding Box */
+	for (c = 0; c < oc->ro_nodes_used; c++)
+		RE_rayobject_merge_bb(RE_rayobject_unalignRayFace(oc->ro_nodes[c]), oc->min, oc->max);
+
+	/* Alloc memory */
+	oc->adrbranch = (Branch **)MEM_callocN(sizeof(void *) * BRANCH_ARRAY, "octree branches");
+	oc->adrnode = (Node **)MEM_callocN(sizeof(void *) * NODE_ARRAY, "octree nodes");
+
+	oc->adrbranch[0] = (Branch *)MEM_callocN(4096 * sizeof(Branch), "makeoctree");
+
+	/* the lookup table, per face, for which nodes to fill in */
+	oc->ocface = (char *)MEM_callocN(3 * ocres2 + 8, "ocface");
+	memset(oc->ocface, 0, 3 * ocres2);
+
+	for (c = 0; c < 3; c++) { /* octree enlarge, still needed? */
+		oc->min[c] -= 0.01f;
+		oc->max[c] += 0.01f;
+	}
+
+	t00 = oc->max[0] - oc->min[0];
+	t01 = oc->max[1] - oc->min[1];
+	t02 = oc->max[2] - oc->min[2];
+
+	/* this minus 0.1 is old safety... seems to be needed? */
+	oc->ocfacx = (oc->ocres - 0.1f) / t00;
+	oc->ocfacy = (oc->ocres - 0.1f) / t01;
+	oc->ocfacz = (oc->ocres - 0.1f) / t02;
+
+	oc->ocsize = sqrtf(t00 * t00 + t01 * t01 + t02 * t02);  /* global, max size octree */
+
+	for (c = 0; c < oc->ro_nodes_used; c++) {
+		octree_fill_rayface(oc, oc->ro_nodes[c]);
+	}
+
+	MEM_freeN(oc->ocface);
+	oc->ocface = NULL;
+	MEM_freeN(oc->ro_nodes);
+	oc->ro_nodes = NULL;
+
+#if 0
+	printf("%f %f - %f\n", oc->min[0], oc->max[0], oc->ocfacx);
+	printf("%f %f - %f\n", oc->min[1], oc->max[1], oc->ocfacy);
+	printf("%f %f - %f\n", oc->min[2], oc->max[2], oc->ocfacz);
+#endif
+}
+
+static void RE_rayobject_octree_bb(RayObject *tree, float *min, float *max)
+{
+	Octree *oc = (Octree *)tree;
+	DO_MINMAX(oc->min, min, max);
+	DO_MINMAX(oc->max, min, max);
+}
+
+/* check all faces in this node */
+static int testnode(Octree *UNUSED(oc), Isect *is, Node *no, OcVal ocval)
+{
+	short nr = 0;
+
+	/* return on any first hit */
+	if (is->mode == RE_RAY_SHADOW) {
+
+		for (; no; no = no->next) {
+			for (nr = 0; nr < 8; nr++) {
+				RayFace *face = no->v[nr];
+				OcVal     *ov = no->ov + nr;
+
+				if (!face) break;
+
+				if ( (ov->ocx & ocval.ocx) && (ov->ocy & ocval.ocy) && (ov->ocz & ocval.ocz) ) {
+					if (RE_rayobject_intersect(RE_rayobject_unalignRayFace(face), is) )
+						return 1;
+				}
+			}
+		}
+	}
+	else {
+		/* else mirror or glass or shadowtra, return closest face  */
+		int found = 0;
+
+		for (; no; no = no->next) {
+			for (nr = 0; nr < 8; nr++) {
+				RayFace *face = no->v[nr];
+				OcVal     *ov = no->ov + nr;
+
+				if (!face) break;
+
+				if ( (ov->ocx & ocval.ocx) && (ov->ocy & ocval.ocy) && (ov->ocz & ocval.ocz) ) {
+					if (RE_rayobject_intersect(RE_rayobject_unalignRayFace(face), is) ) {
+						found = 1;
+					}
+				}
+			}
+		}
+
+		return found;
+	}
+
+	return 0;
+}
+
+/* find the Node for the octree coord x y z */
+static Node *ocread(Octree *oc, int x, int y, int z)
+{
+	Branch *br;
+	int oc1;
+
+	x <<= 2;
+	y <<= 1;
+
+	br = oc->adrbranch[0];
+
+	if (oc->ocres == 512) {
+		oc1 = ((x & 1024) + (y & 512) + (z & 256)) >> 8;
+		br = br->b[oc1];
+		if (br == NULL) {
+			return NULL;
+		}
+	}
+	if (oc->ocres >= 256) {
+		oc1 = ((x & 512) + (y & 256) + (z & 128)) >> 7;
+		br = br->b[oc1];
+		if (br == NULL) {
+			return NULL;
+		}
+	}
+	if (oc->ocres >= 128) {
+		oc1 = ((x & 256) + (y & 128) + (z & 64)) >> 6;
+		br = br->b[oc1];
+		if (br == NULL) {
+			return NULL;
+		}
+	}
+
+	oc1 = ((x & 128) + (y & 64) + (z & 32)) >> 5;
+	br = br->b[oc1];
+	if (br) {
+		oc1 = ((x & 64) + (y & 32) + (z & 16)) >> 4;
+		br = br->b[oc1];
+		if (br) {
+			oc1 = ((x & 32) + (y & 16) + (z & 8)) >> 3;
+			br = br->b[oc1];
+			if (br) {
+				oc1 = ((x & 16) + (y & 8) + (z & 4)) >> 2;
+				br = br->b[oc1];
+				if (br) {
+					oc1 = ((x & 8) + (y & 4) + (z & 2)) >> 1;
+					br = br->b[oc1];
+					if (br) {
+						oc1 = ((x & 4) + (y & 2) + (z & 1));
+						return (Node *)br->b[oc1];
+					}
+				}
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static int cliptest(float p, float q, float *u1, float *u2)
+{
+	float r;
+
+	if (p < 0.0f) {
+		if (q < p) return 0;
+		else if (q < 0.0f) {
+			r = q / p;
+			if (r > *u2) return 0;
+			else if (r > *u1) *u1 = r;
+		}
+	}
+	else {
+		if (p > 0.0f) {
+			if (q < 0.0f) return 0;
+			else if (q < p) {
+				r = q / p;
+				if (r < *u1) return 0;
+				else if (r < *u2) *u2 = r;
+			}
+		}
+		else if (q < 0.0f) return 0;
+	}
+	return 1;
+}
+
+/* extensive coherence checks/storage cancels out the benefit of it, and gives errors... we
+ * need better methods, sample code commented out below (ton) */
+
+#if 0
+
+in top : static int coh_nodes[16 * 16 * 16][6];
+in makeoctree : memset(coh_nodes, 0, sizeof(coh_nodes));
+
+static void add_coherence_test(int ocx1, int ocx2, int ocy1, int ocy2, int ocz1, int ocz2)
+{
+	short *sp;
+
+	sp = coh_nodes[(ocx2 & 15) + 16 * (ocy2 & 15) + 256 * (ocz2 & 15)];
+	sp[0] = ocx1; sp[1] = ocy1; sp[2] = ocz1;
+	sp[3] = ocx2; sp[4] = ocy2; sp[5] = ocz2;
+
+}
+
+static int do_coherence_test(int ocx1, int ocx2, int ocy1, int ocy2, int ocz1, int ocz2)
+{
+	short *sp;
+
+	sp = coh_nodes[(ocx2 & 15) + 16 * (ocy2 & 15) + 256 * (ocz2 & 15)];
+	if (sp[0] == ocx1 && sp[1] == ocy1 && sp[2] == ocz1 &&
+	    sp[3] == ocx2 && sp[4] == ocy2 && sp[5] == ocz2) return 1;
+	return 0;
+}
+
+#endif
+
+/* return 1: found valid intersection */
+/* starts with is->orig.face */
+static int RE_rayobject_octree_intersect(RayObject *tree, Isect *is)
+{
+	Octree *oc = (Octree *)tree;
+	Node *no;
+	OcVal ocval;
+	float vec1[3], vec2[3], start[3], end[3];
+	float u1, u2, ox1, ox2, oy1, oy2, oz1, oz2;
+	float lambda_o, lambda_x, ldx, lambda_y, ldy, lambda_z, ldz, dda_lambda;
+	float o_lambda = 0;
+	int dx, dy, dz;
+	int xo, yo, zo, c1 = 0;
+	int ocx1, ocx2, ocy1, ocy2, ocz1, ocz2;
+
+	/* clip with octree */
+	if (oc->branchcount == 0) return 0;
+
+	/* do this before intersect calls */
+#if 0
+	is->facecontr = NULL;                /* to check shared edge */
+	is->obcontr = 0;
+	is->faceisect = is->isect = 0;        /* shared edge, quad half flag */
+	is->userdata = oc->userdata;
+#endif
+
+	copy_v3_v3(start, is->start);
+	madd_v3_v3v3fl(end, is->start, is->dir, is->dist);
+	ldx = is->dir[0] * is->dist;
+	o_lambda = is->dist;
+	u1 = 0.0f;
+	u2 = 1.0f;
+
+	/* clip with octree cube */
+	if (cliptest(-ldx, start[0] - oc->min[0], &u1, &u2)) {
+		if (cliptest(ldx, oc->max[0] - start[0], &u1, &u2)) {
+			ldy = is->dir[1] * is->dist;
+			if (cliptest(-ldy, start[1] - oc->min[1], &u1, &u2)) {
+				if (cliptest(ldy, oc->max[1] - start[1], &u1, &u2)) {
+					ldz = is->dir[2] * is->dist;
+					if (cliptest(-ldz, start[2] - oc->min[2], &u1, &u2)) {
+						if (cliptest(ldz, oc->max[2] - start[2], &u1, &u2)) {
+							c1 = 1;
+							if (u2 < 1.0f) {
+								end[0] = start[0] + u2 * ldx;
+								end[1] = start[1] + u2 * ldy;
+								end[2] = start[2] + u2 * ldz;
+							}
+
+							if (u1 > 0.0f) {
+								start[0] += u1 * ldx;
+								start[1] += u1 * ldy;
+								start[2] += u1 * ldz;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	if (c1 == 0) return 0;
+
+	/* reset static variables in ocread */
+	//ocread(oc, oc->ocres, 0, 0);
+
+	/* setup 3dda to traverse octree */
+	ox1 = (start[0] - oc->min[0]) * oc->ocfacx;
+	oy1 = (start[1] - oc->min[1]) * oc->ocfacy;
+	oz1 = (start[2] - oc->min[2]) * oc->ocfacz;
+	ox2 = (end[0] - oc->min[0]) * oc->ocfacx;
+	oy2 = (end[1] - oc->min[1]) * oc->ocfacy;
+	oz2 = (end[2] - oc->min[2]) * oc->ocfacz;
+
+	ocx1 = (int)ox1;
+	ocy1 = (int)oy1;
+	ocz1 = (int)oz1;
+	ocx2 = (int)ox2;
+	ocy2 = (int)oy2;
+	ocz2 = (int)oz2;
+
+	if (ocx1 == ocx2 && ocy1 == ocy2 && ocz1 == ocz2) {
+		no = ocread(oc, ocx1, ocy1, ocz1);
+		if (no) {
+			/* exact intersection with node */
+			vec1[0] = ox1; vec1[1] = oy1; vec1[2] = oz1;
+			vec2[0] = ox2; vec2[1] = oy2; vec2[2] = oz2;
+			calc_ocval_ray(&ocval, (float)ocx1, (float)ocy1, (float)ocz1, vec1, vec2);
+			if (testnode(oc, is, no, ocval) ) return 1;
+		}
+	}
+	else {
+		int found = 0;
+		//static int coh_ocx1, coh_ocx2, coh_ocy1, coh_ocy2, coh_ocz1, coh_ocz2;
+		float dox, doy, doz;
+		int eqval;
+
+		/* calc lambda en ld */
+		dox = ox1 - ox2;
+		doy = oy1 - oy2;
+		doz = oz1 - oz2;
+
+		if (dox < -FLT_EPSILON) {
+			ldx = -1.0f / dox;
+			lambda_x = (ocx1 - ox1 + 1.0f) * ldx;
+			dx = 1;
+		}
+		else if (dox > FLT_EPSILON) {
+			ldx = 1.0f / dox;
+			lambda_x = (ox1 - ocx1) * ldx;
+			dx = -1;
+		}
+		else {
+			lambda_x = 1.0f;
+			ldx = 0;
+			dx = 0;
+		}
+
+		if (doy < -FLT_EPSILON) {
+			ldy = -1.0f / doy;
+			lambda_y = (ocy1 - oy1 + 1.0f) * ldy;
+			dy = 1;
+		}
+		else if (doy > FLT_EPSILON) {
+			ldy = 1.0f / doy;
+			lambda_y = (oy1 - ocy1) * ldy;
+			dy = -1;
+		}
+		else {
+			lambda_y = 1.0f;
+			ldy = 0;
+			dy = 0;
+		}
+
+		if (doz < -FLT_EPSILON) {
+			ldz = -1.0f / doz;
+			lambda_z = (ocz1 - oz1 + 1.0f) * ldz;
+			dz = 1;
+		}
+		else if (doz > FLT_EPSILON) {
+			ldz = 1.0f / doz;
+			lambda_z = (oz1 - ocz1) * ldz;
+			dz = -1;
+		}
+		else {
+			lambda_z = 1.0f;
+			ldz = 0;
+			dz = 0;
+		}
+
+		xo = ocx1; yo = ocy1; zo = ocz1;
+		dda_lambda = min_fff(lambda_x, lambda_y, lambda_z);
+
+		vec2[0] = ox1;
+		vec2[1] = oy1;
+		vec2[2] = oz1;
+
+		/* this loop has been constructed to make sure the first and last node of ray
+		 * are always included, even when dda_lambda==1.0f or larger */
+
+		while (true) {
+
+			no = ocread(oc, xo, yo, zo);
+			if (no) {
+
+				/* calculate ray intersection with octree node */
+				copy_v3_v3(vec1, vec2);
+				// dox, y, z is negative
+				vec2[0] = ox1 - dda_lambda * dox;
+				vec2[1] = oy1 - dda_lambda * doy;
+				vec2[2] = oz1 - dda_lambda * doz;
+				calc_ocval_ray(&ocval, (float)xo, (float)yo, (float)zo, vec1, vec2);
+
+				//is->dist = (u1 + dda_lambda * (u2 - u1)) * o_lambda;
+				if (testnode(oc, is, no, ocval) )
+					found = 1;
+
+				if (is->dist < (u1 + dda_lambda * (u2 - u1)) * o_lambda)
+					return found;
+			}
+
+
+			lambda_o = dda_lambda;
+
+			/* traversing octree nodes need careful detection of smallest values, with proper
+			 * exceptions for equal lambdas */
+			eqval = (lambda_x == lambda_y);
+			if (lambda_y == lambda_z) eqval += 2;
+			if (lambda_x == lambda_z) eqval += 4;
+
+			if (eqval) {    // only 4 cases exist!
+				if (eqval == 7) { // x=y=z
+					xo += dx; lambda_x += ldx;
+					yo += dy; lambda_y += ldy;
+					zo += dz; lambda_z += ldz;
+				}
+				else if (eqval == 1) { // x=y
+					if (lambda_y < lambda_z) {
+						xo += dx; lambda_x += ldx;
+						yo += dy; lambda_y += ldy;
+					}
+					else {
+						zo += dz; lambda_z += ldz;
+					}
+				}
+				else if (eqval == 2) { // y=z
+					if (lambda_x < lambda_y) {
+						xo += dx; lambda_x += ldx;
+					}
+					else {
+						yo += dy; lambda_y += ldy;
+						zo += dz; lambda_z += ldz;
+					}
+				}
+				else { // x=z
+					if (lambda_y < lambda_x) {
+						yo += dy; lambda_y += ldy;
+					}
+					else {
+						xo += dx; lambda_x += ldx;
+						zo += dz; lambda_z += ldz;
+					}
+				}
+			}
+			else {  // all three different, just three cases exist
+				eqval = (lambda_x < lambda_y);
+				if (lambda_y < lambda_z) eqval += 2;
+				if (lambda_x < lambda_z) eqval += 4;
+
+				if (eqval == 7 || eqval == 5) { // x smallest
+					xo += dx; lambda_x += ldx;
+				}
+				else if (eqval == 2 || eqval == 6) { // y smallest
+					yo += dy; lambda_y += ldy;
+				}
+				else { // z smallest
+					zo += dz; lambda_z += ldz;
+				}
+
+			}
+
+			dda_lambda = min_fff(lambda_x, lambda_y, lambda_z);
+			if (dda_lambda == lambda_o) break;
+			/* to make sure the last node is always checked */
+			if (lambda_o >= 1.0f) break;
+		}
+	}
+
+	/* reached end, no intersections found */
+	return 0;
+}
+
+
+
diff --git a/source/blender/render/intern/raytrace/rayobject_qbvh.cpp b/source/blender/render/intern/raytrace/rayobject_qbvh.cpp
new file mode 100644
index 00000000000..8e3dd87efd1
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_qbvh.cpp
@@ -0,0 +1,160 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_qbvh.cpp
+ *  \ingroup render
+ */
+
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_utildefines.h"
+
+#include "vbvh.h"
+#include "svbvh.h"
+#include "reorganize.h"
+
+#ifdef __SSE__
+
+#define DFS_STACK_SIZE  256
+
+struct QBVHTree {
+	RayObject rayobj;
+
+	SVBVHNode *root;
+	MemArena *node_arena;
+
+	float cost;
+	RTBuilder *builder;
+};
+
+
+template<>
+void bvh_done<QBVHTree>(QBVHTree *obj)
+{
+	rtbuild_done(obj->builder, &obj->rayobj.control);
+
+	//TODO find a away to exactly calculate the needed memory
+	MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "qbvh arena");
+	BLI_memarena_use_malloc(arena1);
+
+	MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "qbvh arena 2");
+	BLI_memarena_use_malloc(arena2);
+	BLI_memarena_use_align(arena2, 16);
+
+	//Build and optimize the tree
+	//TODO do this in 1 pass (half memory usage during building)
+	VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder);
+
+	if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) {
+		BLI_memarena_free(arena1);
+		BLI_memarena_free(arena2);
+		return;
+	}
+
+	if (root) {
+		pushup_simd<VBVHNode, 4>(root);
+		obj->root = Reorganize_SVBVH<VBVHNode>(arena2).transform(root);
+	}
+	else
+		obj->root = NULL;
+
+	//Free data
+	BLI_memarena_free(arena1);
+
+	obj->node_arena = arena2;
+	obj->cost = 1.0;
+
+	rtbuild_free(obj->builder);
+	obj->builder = NULL;
+}
+
+template<int StackSize>
+static int intersect(QBVHTree *obj, Isect *isec)
+{
+	//TODO renable hint support
+	if (RE_rayobject_isAligned(obj->root)) {
+		if (isec->mode == RE_RAY_SHADOW)
+			return svbvh_node_stack_raycast<StackSize, true>(obj->root, isec);
+		else
+			return svbvh_node_stack_raycast<StackSize, false>(obj->root, isec);
+	}
+	else
+		return RE_rayobject_intersect((RayObject *)obj->root, isec);
+}
+
+template<class Tree>
+static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max))
+{
+	//TODO renable hint support
+	{
+		hint->size = 0;
+		hint->stack[hint->size++] = (RayObject *)tree->root;
+	}
+}
+/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */
+template<class Tree, int STACK_SIZE>
+static RayObjectAPI make_api()
+{
+	static RayObjectAPI api =
+	{
+		(RE_rayobject_raycast_callback) ((int   (*)(Tree *, Isect *)) & intersect<STACK_SIZE>),
+		(RE_rayobject_add_callback)     ((void  (*)(Tree *, RayObject *)) & bvh_add<Tree>),
+		(RE_rayobject_done_callback)    ((void  (*)(Tree *))       & bvh_done<Tree>),
+		(RE_rayobject_free_callback)    ((void  (*)(Tree *))       & bvh_free<Tree>),
+		(RE_rayobject_merge_bb_callback)((void  (*)(Tree *, float *, float *)) & bvh_bb<Tree>),
+		(RE_rayobject_cost_callback)    ((float (*)(Tree *))      & bvh_cost<Tree>),
+		(RE_rayobject_hint_bb_callback) ((void  (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>)
+	};
+
+	return api;
+}
+
+template<class Tree>
+RayObjectAPI *bvh_get_api(int maxstacksize)
+{
+	static RayObjectAPI bvh_api256 = make_api<Tree, 1024>();
+
+	if (maxstacksize <= 1024) return &bvh_api256;
+	assert(maxstacksize <= 256);
+	return NULL;
+}
+
+RayObject *RE_rayobject_qbvh_create(int size)
+{
+	return bvh_create_tree<QBVHTree, DFS_STACK_SIZE>(size);
+}
+
+#else
+
+RayObject *RE_rayobject_qbvh_create(int UNUSED(size))
+{
+	puts("WARNING: SSE disabled at compile time\n");
+	return NULL;
+}
+
+#endif
diff --git a/source/blender/render/intern/raytrace/rayobject_raycounter.cpp b/source/blender/render/intern/raytrace/rayobject_raycounter.cpp
new file mode 100644
index 00000000000..429c47f1c0f
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_raycounter.cpp
@@ -0,0 +1,91 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_raycounter.cpp
+ *  \ingroup render
+ */
+
+
+#include "rayobject.h"
+#include "raycounter.h"
+
+#ifdef RE_RAYCOUNTER
+
+void RE_RC_INFO(RayCounter *info)
+{
+	printf("----------- Raycast counter --------\n");
+	printf("Rays total: %llu\n", info->raycast.test );
+	printf("Rays hit: %llu\n",   info->raycast.hit  );
+	printf("\n");
+	printf("BB tests: %llu\n", info->bb.test );
+	printf("BB hits: %llu\n", info->bb.hit );
+	printf("\n");
+	printf("SIMD BB tests: %llu\n", info->simd_bb.test );
+	printf("SIMD BB hits: %llu\n", info->simd_bb.hit );
+	printf("\n");
+	printf("Primitives tests: %llu\n", info->faces.test );
+	printf("Primitives hits: %llu\n", info->faces.hit );
+	printf("------------------------------------\n");
+	printf("Shadow last-hit tests per ray: %f\n", info->rayshadow_last_hit.test / ((float)info->raycast.test) );
+	printf("Shadow last-hit hits per ray: %f\n",  info->rayshadow_last_hit.hit  / ((float)info->raycast.test) );
+	printf("\n");
+	printf("Hint tests per ray: %f\n", info->raytrace_hint.test / ((float)info->raycast.test) );
+	printf("Hint hits per ray: %f\n",  info->raytrace_hint.hit  / ((float)info->raycast.test) );
+	printf("\n");
+	printf("BB tests per ray: %f\n", info->bb.test / ((float)info->raycast.test) );
+	printf("BB hits per ray: %f\n", info->bb.hit / ((float)info->raycast.test) );
+	printf("\n");
+	printf("SIMD tests per ray: %f\n", info->simd_bb.test / ((float)info->raycast.test) );
+	printf("SIMD hits per ray: %f\n", info->simd_bb.hit / ((float)info->raycast.test) );
+	printf("\n");
+	printf("Primitives tests per ray: %f\n", info->faces.test / ((float)info->raycast.test) );
+	printf("Primitives hits per ray: %f\n", info->faces.hit / ((float)info->raycast.test) );
+	printf("------------------------------------\n");
+}
+
+void RE_RC_MERGE(RayCounter *dest, RayCounter *tmp)
+{
+	dest->faces.test += tmp->faces.test;
+	dest->faces.hit  += tmp->faces.hit;
+
+	dest->bb.test += tmp->bb.test;
+	dest->bb.hit  += tmp->bb.hit;
+
+	dest->simd_bb.test += tmp->simd_bb.test;
+	dest->simd_bb.hit  += tmp->simd_bb.hit;
+
+	dest->raycast.test += tmp->raycast.test;
+	dest->raycast.hit  += tmp->raycast.hit;
+
+	dest->rayshadow_last_hit.test += tmp->rayshadow_last_hit.test;
+	dest->rayshadow_last_hit.hit  += tmp->rayshadow_last_hit.hit;
+
+	dest->raytrace_hint.test += tmp->raytrace_hint.test;
+	dest->raytrace_hint.hit  += tmp->raytrace_hint.hit;
+}
+
+#endif
diff --git a/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp b/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp
new file mode 100644
index 00000000000..51f89784674
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_rtbuild.cpp
@@ -0,0 +1,531 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_rtbuild.cpp
+ *  \ingroup render
+ */
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include <algorithm>
+
+#if __cplusplus >= 201103L
+#include <cmath>
+using std::isfinite;
+#else
+#include <math.h>
+#endif
+
+#include "rayobject_rtbuild.h"
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+static bool selected_node(RTBuilder::Object *node)
+{
+	return node->selected;
+}
+
+static void rtbuild_init(RTBuilder *b)
+{
+	b->split_axis = -1;
+	b->primitives.begin   = NULL;
+	b->primitives.end     = NULL;
+	b->primitives.maxsize = 0;
+	b->depth = 0;
+
+	for (int i = 0; i < RTBUILD_MAX_CHILDS; i++)
+		b->child_offset[i] = 0;
+
+	for (int i = 0; i < 3; i++)
+		b->sorted_begin[i] = b->sorted_end[i] = NULL;
+
+	INIT_MINMAX(b->bb, b->bb + 3);
+}
+
+RTBuilder *rtbuild_create(int size)
+{
+	RTBuilder *builder  = (RTBuilder *) MEM_mallocN(sizeof(RTBuilder), "RTBuilder");
+	RTBuilder::Object *memblock = (RTBuilder::Object *)MEM_mallocN(sizeof(RTBuilder::Object) * size, "RTBuilder.objects");
+
+
+	rtbuild_init(builder);
+
+	builder->primitives.begin = builder->primitives.end = memblock;
+	builder->primitives.maxsize = size;
+
+	for (int i = 0; i < 3; i++) {
+		builder->sorted_begin[i] = (RTBuilder::Object **)MEM_mallocN(sizeof(RTBuilder::Object *) * size, "RTBuilder.sorted_objects");
+		builder->sorted_end[i]   = builder->sorted_begin[i];
+	}
+
+
+	return builder;
+}
+
+void rtbuild_free(RTBuilder *b)
+{
+	if (b->primitives.begin) MEM_freeN(b->primitives.begin);
+
+	for (int i = 0; i < 3; i++)
+		if (b->sorted_begin[i])
+			MEM_freeN(b->sorted_begin[i]);
+
+	MEM_freeN(b);
+}
+
+void rtbuild_add(RTBuilder *b, RayObject *o)
+{
+	float bb[6];
+
+	assert(b->primitives.begin + b->primitives.maxsize != b->primitives.end);
+
+	INIT_MINMAX(bb, bb + 3);
+	RE_rayobject_merge_bb(o, bb, bb + 3);
+
+	/* skip objects with invalid bounding boxes, nan causes DO_MINMAX
+	 * to do nothing, so we get these invalid values. this shouldn't
+	 * happen usually, but bugs earlier in the pipeline can cause it. */
+	if (bb[0] > bb[3] || bb[1] > bb[4] || bb[2] > bb[5])
+		return;
+	/* skip objects with inf bounding boxes */
+	if (!isfinite(bb[0]) || !isfinite(bb[1]) || !isfinite(bb[2]))
+		return;
+	if (!isfinite(bb[3]) || !isfinite(bb[4]) || !isfinite(bb[5]))
+		return;
+	/* skip objects with zero bounding box, they are of no use, and
+	 * will give problems in rtbuild_heuristic_object_split later */
+	if (bb[0] == bb[3] && bb[1] == bb[4] && bb[2] == bb[5])
+		return;
+
+	copy_v3_v3(b->primitives.end->bb, bb);
+	copy_v3_v3(b->primitives.end->bb + 3, bb + 3);
+	b->primitives.end->obj = o;
+	b->primitives.end->cost = RE_rayobject_cost(o);
+
+	for (int i = 0; i < 3; i++) {
+		*(b->sorted_end[i]) = b->primitives.end;
+		b->sorted_end[i]++;
+	}
+	b->primitives.end++;
+}
+
+int rtbuild_size(RTBuilder *b)
+{
+	return b->sorted_end[0] - b->sorted_begin[0];
+}
+
+
+template<class Obj, int Axis>
+static bool obj_bb_compare(const Obj &a, const Obj &b)
+{
+	if (a->bb[Axis] != b->bb[Axis])
+		return a->bb[Axis] < b->bb[Axis];
+	return a->obj < b->obj;
+}
+
+template<class Item>
+static void object_sort(Item *begin, Item *end, int axis)
+{
+	if (axis == 0) return std::sort(begin, end, obj_bb_compare<Item, 0> );
+	if (axis == 1) return std::sort(begin, end, obj_bb_compare<Item, 1> );
+	if (axis == 2) return std::sort(begin, end, obj_bb_compare<Item, 2> );
+	assert(false);
+}
+
+void rtbuild_done(RTBuilder *b, RayObjectControl *ctrl)
+{
+	for (int i = 0; i < 3; i++) {
+		if (b->sorted_begin[i]) {
+			if (RE_rayobjectcontrol_test_break(ctrl)) break;
+			object_sort(b->sorted_begin[i], b->sorted_end[i], i);
+		}
+	}
+}
+
+RayObject *rtbuild_get_primitive(RTBuilder *b, int index)
+{
+	return b->sorted_begin[0][index]->obj;
+}
+
+RTBuilder *rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp)
+{
+	rtbuild_init(tmp);
+
+	tmp->depth = b->depth + 1;
+
+	for (int i = 0; i < 3; i++)
+		if (b->sorted_begin[i]) {
+			tmp->sorted_begin[i] = b->sorted_begin[i] +  b->child_offset[child];
+			tmp->sorted_end[i] = b->sorted_begin[i] +  b->child_offset[child + 1];
+		}
+		else {
+			tmp->sorted_begin[i] = NULL;
+			tmp->sorted_end[i] = NULL;
+		}
+
+	return tmp;
+}
+
+static void rtbuild_calc_bb(RTBuilder *b)
+{
+	if (b->bb[0] == 1.0e30f) {
+		for (RTBuilder::Object **index = b->sorted_begin[0]; index != b->sorted_end[0]; index++)
+			RE_rayobject_merge_bb( (*index)->obj, b->bb, b->bb + 3);
+	}
+}
+
+void rtbuild_merge_bb(RTBuilder *b, float min[3], float max[3])
+{
+	rtbuild_calc_bb(b);
+	DO_MIN(b->bb, min);
+	DO_MAX(b->bb + 3, max);
+}
+
+#if 0
+int rtbuild_get_largest_axis(RTBuilder *b)
+{
+	rtbuild_calc_bb(b);
+	return bb_largest_axis(b->bb, b->bb + 3);
+}
+
+//Left balanced tree
+int rtbuild_mean_split(RTBuilder *b, int nchilds, int axis)
+{
+	int i;
+	int mleafs_per_child, Mleafs_per_child;
+	int tot_leafs  = rtbuild_size(b);
+	int missing_leafs;
+
+	long long s;
+
+	assert(nchilds <= RTBUILD_MAX_CHILDS);
+
+	//TODO optimize calc of leafs_per_child
+	for (s = nchilds; s < tot_leafs; s *= nchilds) ;
+	Mleafs_per_child = s / nchilds;
+	mleafs_per_child = Mleafs_per_child / nchilds;
+
+	//split min leafs per child
+	b->child_offset[0] = 0;
+	for (i = 1; i <= nchilds; i++)
+		b->child_offset[i] = mleafs_per_child;
+
+	//split remaining leafs
+	missing_leafs = tot_leafs - mleafs_per_child * nchilds;
+	for (i = 1; i <= nchilds; i++)
+	{
+		if (missing_leafs > Mleafs_per_child - mleafs_per_child)
+		{
+			b->child_offset[i] += Mleafs_per_child - mleafs_per_child;
+			missing_leafs -= Mleafs_per_child - mleafs_per_child;
+		}
+		else {
+			b->child_offset[i] += missing_leafs;
+			missing_leafs = 0;
+			break;
+		}
+	}
+
+	//adjust for accumulative offsets
+	for (i = 1; i <= nchilds; i++)
+		b->child_offset[i] += b->child_offset[i - 1];
+
+	//Count created childs
+	for (i = nchilds; b->child_offset[i] == b->child_offset[i - 1]; i--) ;
+	split_leafs(b, b->child_offset, i, axis);
+
+	assert(b->child_offset[0] == 0 && b->child_offset[i] == tot_leafs);
+	return i;
+}
+
+
+int rtbuild_mean_split_largest_axis(RTBuilder *b, int nchilds)
+{
+	int axis = rtbuild_get_largest_axis(b);
+	return rtbuild_mean_split(b, nchilds, axis);
+}
+#endif
+
+/*
+ * "separators" is an array of dim NCHILDS-1
+ * and indicates where to cut the childs
+ */
+#if 0
+int rtbuild_median_split(RTBuilder *b, float *separators, int nchilds, int axis)
+{
+	int size = rtbuild_size(b);
+
+	assert(nchilds <= RTBUILD_MAX_CHILDS);
+	if (size <= nchilds)
+	{
+		return rtbuild_mean_split(b, nchilds, axis);
+	}
+	else {
+		int i;
+
+		b->split_axis = axis;
+
+		//Calculate child offsets
+		b->child_offset[0] = 0;
+		for (i = 0; i < nchilds - 1; i++)
+			b->child_offset[i + 1] = split_leafs_by_plane(b, b->child_offset[i], size, separators[i]);
+		b->child_offset[nchilds] = size;
+
+		for (i = 0; i < nchilds; i++)
+			if (b->child_offset[i + 1] - b->child_offset[i] == size)
+				return rtbuild_mean_split(b, nchilds, axis);
+
+		return nchilds;
+	}
+}
+
+int rtbuild_median_split_largest_axis(RTBuilder *b, int nchilds)
+{
+	int la, i;
+	float separators[RTBUILD_MAX_CHILDS];
+
+	rtbuild_calc_bb(b);
+
+	la = bb_largest_axis(b->bb, b->bb + 3);
+	for (i = 1; i < nchilds; i++)
+		separators[i - 1] = (b->bb[la + 3] - b->bb[la]) * i / nchilds;
+
+	return rtbuild_median_split(b, separators, nchilds, la);
+}
+#endif
+
+//Heuristics Object Splitter
+
+
+struct SweepCost {
+	float bb[6];
+	float cost;
+};
+
+/* Object Surface Area Heuristic splitter */
+int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds)
+{
+	int size = rtbuild_size(b);
+	assert(nchilds == 2);
+	assert(size > 1);
+	int baxis = -1, boffset = 0;
+
+	if (size > nchilds) {
+		if (b->depth > RTBUILD_MAX_SAH_DEPTH) {
+			// for degenerate cases we avoid running out of stack space
+			// by simply splitting the children in the middle
+			b->child_offset[0] = 0;
+			b->child_offset[1] = (size+1)/2;
+			b->child_offset[2] = size;
+			return 2;
+		}
+
+		float bcost = FLT_MAX;
+		baxis = -1;
+		boffset = size / 2;
+
+		SweepCost *sweep = (SweepCost *)MEM_mallocN(sizeof(SweepCost) * size, "RTBuilder.HeuristicSweep");
+
+		for (int axis = 0; axis < 3; axis++) {
+			SweepCost sweep_left;
+
+			RTBuilder::Object **obj = b->sorted_begin[axis];
+
+//			float right_cost = 0;
+			for (int i = size - 1; i >= 0; i--) {
+				if (i == size - 1) {
+					copy_v3_v3(sweep[i].bb, obj[i]->bb);
+					copy_v3_v3(sweep[i].bb + 3, obj[i]->bb + 3);
+					sweep[i].cost = obj[i]->cost;
+				}
+				else {
+					sweep[i].bb[0] = min_ff(obj[i]->bb[0], sweep[i + 1].bb[0]);
+					sweep[i].bb[1] = min_ff(obj[i]->bb[1], sweep[i + 1].bb[1]);
+					sweep[i].bb[2] = min_ff(obj[i]->bb[2], sweep[i + 1].bb[2]);
+					sweep[i].bb[3] = max_ff(obj[i]->bb[3], sweep[i + 1].bb[3]);
+					sweep[i].bb[4] = max_ff(obj[i]->bb[4], sweep[i + 1].bb[4]);
+					sweep[i].bb[5] = max_ff(obj[i]->bb[5], sweep[i + 1].bb[5]);
+					sweep[i].cost  = obj[i]->cost + sweep[i + 1].cost;
+				}
+//				right_cost += obj[i]->cost;
+			}
+
+			sweep_left.bb[0] = obj[0]->bb[0];
+			sweep_left.bb[1] = obj[0]->bb[1];
+			sweep_left.bb[2] = obj[0]->bb[2];
+			sweep_left.bb[3] = obj[0]->bb[3];
+			sweep_left.bb[4] = obj[0]->bb[4];
+			sweep_left.bb[5] = obj[0]->bb[5];
+			sweep_left.cost  = obj[0]->cost;
+
+//			right_cost -= obj[0]->cost;	if (right_cost < 0) right_cost = 0;
+
+			for (int i = 1; i < size; i++) {
+				//Worst case heuristic (cost of each child is linear)
+				float hcost, left_side, right_side;
+
+				// not using log seems to have no impact on raytracing perf, but
+				// makes tree construction quicker, left out for now to test (brecht)
+				// left_side  = bb_area(sweep_left.bb, sweep_left.bb + 3) * (sweep_left.cost + logf((float)i));
+				// right_side = bb_area(sweep[i].bb,   sweep[i].bb   + 3) * (sweep[i].cost   + logf((float)size - i));
+				left_side = bb_area(sweep_left.bb, sweep_left.bb + 3) * (sweep_left.cost);
+				right_side = bb_area(sweep[i].bb, sweep[i].bb + 3) * (sweep[i].cost);
+				hcost = left_side + right_side;
+
+				assert(left_side >= 0);
+				assert(right_side >= 0);
+
+				if (left_side > bcost) break;   //No way we can find a better heuristic in this axis
+
+				assert(hcost >= 0);
+				// this makes sure the tree built is the same whatever is the order of the sorting axis
+				if (hcost < bcost || (hcost == bcost && axis < baxis)) {
+					bcost = hcost;
+					baxis = axis;
+					boffset = i;
+				}
+				DO_MIN(obj[i]->bb,   sweep_left.bb);
+				DO_MAX(obj[i]->bb + 3, sweep_left.bb + 3);
+
+				sweep_left.cost += obj[i]->cost;
+//				right_cost -= obj[i]->cost; if (right_cost < 0) right_cost = 0;
+			}
+
+			//assert(baxis >= 0 && baxis < 3);
+			if (!(baxis >= 0 && baxis < 3))
+				baxis = 0;
+		}
+
+
+		MEM_freeN(sweep);
+	}
+	else if (size == 2) {
+		baxis = 0;
+		boffset = 1;
+	}
+	else if (size == 1) {
+		b->child_offset[0] = 0;
+		b->child_offset[1] = 1;
+		return 1;
+	}
+
+	b->child_offset[0] = 0;
+	b->child_offset[1] = boffset;
+	b->child_offset[2] = size;
+
+
+	/* Adjust sorted arrays for childs */
+	for (int i = 0; i < boffset; i++) b->sorted_begin[baxis][i]->selected = true;
+	for (int i = boffset; i < size; i++) b->sorted_begin[baxis][i]->selected = false;
+	for (int i = 0; i < 3; i++)
+		std::stable_partition(b->sorted_begin[i], b->sorted_end[i], selected_node);
+
+	return nchilds;
+}
+
+/*
+ * Helper code
+ * PARTITION code / used on mean-split
+ * basically this a std::nth_element (like on C++ STL algorithm)
+ */
+#if 0
+static void split_leafs(RTBuilder *b, int *nth, int partitions, int split_axis)
+{
+	int i;
+	b->split_axis = split_axis;
+
+	for (i = 0; i < partitions - 1; i++)
+	{
+		assert(nth[i] < nth[i + 1] && nth[i + 1] < nth[partitions]);
+
+		if (split_axis == 0) std::nth_element(b, nth[i],  nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 0>);
+		if (split_axis == 1) std::nth_element(b, nth[i],  nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 1>);
+		if (split_axis == 2) std::nth_element(b, nth[i],  nth[i + 1], nth[partitions], obj_bb_compare<RTBuilder::Object, 2>);
+	}
+}
+#endif
+
+/*
+ * Bounding Box utils
+ */
+float bb_volume(const float min[3], const float max[3])
+{
+	return (max[0] - min[0]) * (max[1] - min[1]) * (max[2] - min[2]);
+}
+
+float bb_area(const float min[3], const float max[3])
+{
+	float sub[3], a;
+	sub[0] = max[0] - min[0];
+	sub[1] = max[1] - min[1];
+	sub[2] = max[2] - min[2];
+
+	a = (sub[0] * sub[1] + sub[0] * sub[2] + sub[1] * sub[2]) * 2.0f;
+	/* used to have an assert() here on negative results
+	 * however, in this case its likely some overflow or ffast math error.
+	 * so just return 0.0f instead. */
+	return a < 0.0f ? 0.0f : a;
+}
+
+int bb_largest_axis(const float min[3], const float max[3])
+{
+	float sub[3];
+
+	sub[0] = max[0] - min[0];
+	sub[1] = max[1] - min[1];
+	sub[2] = max[2] - min[2];
+	if (sub[0] > sub[1]) {
+		if (sub[0] > sub[2])
+			return 0;
+		else
+			return 2;
+	}
+	else {
+		if (sub[1] > sub[2])
+			return 1;
+		else
+			return 2;
+	}
+}
+
+/* only returns 0 if merging inner and outerbox would create a box larger than outer box */
+int bb_fits_inside(const float outer_min[3], const float outer_max[3],
+                   const float inner_min[3], const float inner_max[3])
+{
+	int i;
+	for (i = 0; i < 3; i++)
+		if (outer_min[i] > inner_min[i]) return 0;
+
+	for (i = 0; i < 3; i++)
+		if (outer_max[i] < inner_max[i]) return 0;
+
+	return 1;
+}
diff --git a/source/blender/render/intern/raytrace/rayobject_rtbuild.h b/source/blender/render/intern/raytrace/rayobject_rtbuild.h
new file mode 100644
index 00000000000..fc42bc36d92
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_rtbuild.h
@@ -0,0 +1,125 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_rtbuild.h
+ *  \ingroup render
+ */
+
+#ifndef __RAYOBJECT_RTBUILD_H__
+#define __RAYOBJECT_RTBUILD_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "rayobject.h"
+
+
+/*
+ * Ray Tree Builder
+ *	this structs helps building any type of tree
+ *	it contains several methods to organize/split nodes
+ *	allowing to create a given tree on the fly.
+ *
+ * Idea is that other trees BVH, BIH can use this code to
+ * generate with simple calls, and then convert to the theirs
+ * specific structure on the fly.
+ */
+#define RTBUILD_MAX_CHILDS     32
+#define RTBUILD_MAX_SAH_DEPTH  256
+
+
+typedef struct RTBuilder {
+	struct Object {
+		RayObject *obj;
+		float cost;
+		float bb[6];
+		int selected;
+	};
+
+	/* list to all primitives added in this tree */
+	struct {
+		Object *begin, *end;
+		int maxsize;
+	} primitives;
+
+	/* sorted list of rayobjects */
+	struct Object **sorted_begin[3], **sorted_end[3];
+
+	/* axis used (if any) on the split method */
+	int split_axis;
+
+	/* child partitions calculated during splitting */
+	int child_offset[RTBUILD_MAX_CHILDS + 1];
+
+//	int child_sorted_axis; /* -1 if not sorted */
+
+	float bb[6];
+
+	/* current depth */
+	int depth;
+} RTBuilder;
+
+/* used during creation */
+RTBuilder *rtbuild_create(int size);
+void rtbuild_free(RTBuilder *b);
+void rtbuild_add(RTBuilder *b, RayObject *o);
+void rtbuild_done(RTBuilder *b, RayObjectControl *c);
+void rtbuild_merge_bb(RTBuilder *b, float min[3], float max[3]);
+int rtbuild_size(RTBuilder *b);
+
+RayObject *rtbuild_get_primitive(RTBuilder *b, int offset);
+
+/* used during tree reorganization */
+RTBuilder *rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp);
+
+/* Calculates child partitions and returns number of efectively needed partitions */
+int rtbuild_get_largest_axis(RTBuilder *b);
+
+//Object partition
+int rtbuild_mean_split(RTBuilder *b, int nchilds, int axis);
+int rtbuild_mean_split_largest_axis(RTBuilder *b, int nchilds);
+
+int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds);
+
+//Space partition
+int rtbuild_median_split(RTBuilder *b, float *separators, int nchilds, int axis);
+int rtbuild_median_split_largest_axis(RTBuilder *b, int nchilds);
+
+
+/* bb utils */
+float bb_area(const float min[3], const float max[3]);
+float bb_volume(const float min[3], const float max[3]);
+int bb_largest_axis(const float min[3], const float max[3]);
+int bb_fits_inside(const float  outer_min[3], const float  outer_max[3],
+                   const float  inner_min[3], const float  inner_max[3]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __RAYOBJECT_RTBUILD_H__ */
diff --git a/source/blender/render/intern/raytrace/rayobject_svbvh.cpp b/source/blender/render/intern/raytrace/rayobject_svbvh.cpp
new file mode 100644
index 00000000000..fcd692fac02
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_svbvh.cpp
@@ -0,0 +1,192 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_svbvh.cpp
+ *  \ingroup render
+ */
+
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_utildefines.h"
+
+#include "vbvh.h"
+#include "svbvh.h"
+#include "reorganize.h"
+
+#ifdef __SSE__
+
+#define DFS_STACK_SIZE  256
+
+struct SVBVHTree {
+	RayObject rayobj;
+
+	SVBVHNode *root;
+	MemArena *node_arena;
+
+	float cost;
+	RTBuilder *builder;
+};
+
+/*
+ * Cost to test N childs
+ */
+struct PackCost {
+	float operator()(int n)
+	{
+		return (n / 4) + ((n % 4) > 2 ? 1 : n % 4);
+	}
+};
+
+
+template<>
+void bvh_done<SVBVHTree>(SVBVHTree *obj)
+{
+	rtbuild_done(obj->builder, &obj->rayobj.control);
+
+	//TODO find a away to exactly calculate the needed memory
+	MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "svbvh arena");
+	BLI_memarena_use_malloc(arena1);
+
+	MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "svbvh arena2");
+	BLI_memarena_use_malloc(arena2);
+	BLI_memarena_use_align(arena2, 16);
+
+	//Build and optimize the tree
+	if (0) {
+		VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder);
+
+		if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) {
+			BLI_memarena_free(arena1);
+			BLI_memarena_free(arena2);
+			return;
+		}
+
+		reorganize(root);
+		remove_useless(root, &root);
+		bvh_refit(root);
+
+		pushup(root);
+		pushdown(root);
+		pushup_simd<VBVHNode, 4>(root);
+
+		obj->root = Reorganize_SVBVH<VBVHNode>(arena2).transform(root);
+	}
+	else {
+		//Finds the optimal packing of this tree using a given cost model
+		//TODO this uses quite a lot of memory, find ways to reduce memory usage during building
+		OVBVHNode *root = BuildBinaryVBVH<OVBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder);
+
+		if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) {
+			BLI_memarena_free(arena1);
+			BLI_memarena_free(arena2);
+			return;
+		}
+
+		if (root) {
+			VBVH_optimalPackSIMD<OVBVHNode, PackCost>(PackCost()).transform(root);
+			obj->root = Reorganize_SVBVH<OVBVHNode>(arena2).transform(root);
+		}
+		else
+			obj->root = NULL;
+	}
+
+	//Free data
+	BLI_memarena_free(arena1);
+
+	obj->node_arena = arena2;
+	obj->cost = 1.0;
+
+	rtbuild_free(obj->builder);
+	obj->builder = NULL;
+}
+
+template<int StackSize>
+static int intersect(SVBVHTree *obj, Isect *isec)
+{
+	//TODO renable hint support
+	if (RE_rayobject_isAligned(obj->root)) {
+		if (isec->mode == RE_RAY_SHADOW)
+			return svbvh_node_stack_raycast<StackSize, true>(obj->root, isec);
+		else
+			return svbvh_node_stack_raycast<StackSize, false>(obj->root, isec);
+	}
+	else
+		return RE_rayobject_intersect( (RayObject *) obj->root, isec);
+}
+
+template<class Tree>
+static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max))
+{
+	//TODO renable hint support
+	{
+		hint->size = 0;
+		hint->stack[hint->size++] = (RayObject *)tree->root;
+	}
+}
+/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */
+template<class Tree, int STACK_SIZE>
+static RayObjectAPI make_api()
+{
+	static RayObjectAPI api =
+	{
+		(RE_rayobject_raycast_callback) ((int   (*)(Tree *, Isect *)) & intersect<STACK_SIZE>),
+		(RE_rayobject_add_callback)     ((void  (*)(Tree *, RayObject *)) & bvh_add<Tree>),
+		(RE_rayobject_done_callback)    ((void  (*)(Tree *))       & bvh_done<Tree>),
+		(RE_rayobject_free_callback)    ((void  (*)(Tree *))       & bvh_free<Tree>),
+		(RE_rayobject_merge_bb_callback)((void  (*)(Tree *, float *, float *)) & bvh_bb<Tree>),
+		(RE_rayobject_cost_callback)    ((float (*)(Tree *))      & bvh_cost<Tree>),
+		(RE_rayobject_hint_bb_callback) ((void  (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>)
+	};
+
+	return api;
+}
+
+template<class Tree>
+static RayObjectAPI *bvh_get_api(int maxstacksize)
+{
+	static RayObjectAPI bvh_api256 = make_api<Tree, 1024>();
+
+	if (maxstacksize <= 1024) return &bvh_api256;
+	assert(maxstacksize <= 256);
+	return NULL;
+}
+
+RayObject *RE_rayobject_svbvh_create(int size)
+{
+	return bvh_create_tree<SVBVHTree, DFS_STACK_SIZE>(size);
+}
+
+#else
+
+RayObject *RE_rayobject_svbvh_create(int UNUSED(size))
+{
+	puts("WARNING: SSE disabled at compile time\n");
+	return NULL;
+}
+
+#endif
diff --git a/source/blender/render/intern/raytrace/rayobject_vbvh.cpp b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp
new file mode 100644
index 00000000000..b63a11047dd
--- /dev/null
+++ b/source/blender/render/intern/raytrace/rayobject_vbvh.cpp
@@ -0,0 +1,206 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/rayobject_vbvh.cpp
+ *  \ingroup render
+ */
+
+
+int tot_pushup   = 0;
+int tot_pushdown = 0;
+int tot_hints    = 0;
+
+#include <assert.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_memarena.h"
+#include "BLI_utildefines.h"
+
+#include "BKE_global.h"
+
+#include "rayintersection.h"
+#include "rayobject.h"
+#include "rayobject_rtbuild.h"
+
+#include "reorganize.h"
+#include "bvh.h"
+#include "vbvh.h"
+
+#include <queue>
+#include <algorithm>
+
+#define DFS_STACK_SIZE  256
+
+struct VBVHTree {
+	RayObject rayobj;
+	VBVHNode *root;
+	MemArena *node_arena;
+	float cost;
+	RTBuilder *builder;
+};
+
+/*
+ * Cost to test N childs
+ */
+struct PackCost {
+	float operator()(int n)
+	{
+		return n;
+	}
+};
+
+template<>
+void bvh_done<VBVHTree>(VBVHTree *obj)
+{
+	rtbuild_done(obj->builder, &obj->rayobj.control);
+
+	//TODO find a away to exactly calculate the needed memory
+	MemArena *arena1 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "vbvh arena");
+	BLI_memarena_use_malloc(arena1);
+
+	//Build and optimize the tree
+	if (1) {
+		VBVHNode *root = BuildBinaryVBVH<VBVHNode>(arena1, &obj->rayobj.control).transform(obj->builder);
+		if (RE_rayobjectcontrol_test_break(&obj->rayobj.control)) {
+			BLI_memarena_free(arena1);
+			return;
+		}
+
+		if (root) {
+			reorganize(root);
+			remove_useless(root, &root);
+			bvh_refit(root);
+
+			pushup(root);
+			pushdown(root);
+			obj->root = root;
+		}
+		else
+			obj->root = NULL;
+	}
+	else {
+		/* TODO */
+#if 0
+		MemArena *arena2 = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "vbvh arena2");
+		BLI_memarena_use_malloc(arena2);
+
+		//Finds the optimal packing of this tree using a given cost model
+		//TODO this uses quite a lot of memory, find ways to reduce memory usage during building
+		OVBVHNode *root = BuildBinaryVBVH<OVBVHNode>(arena2).transform(obj->builder);
+		VBVH_optimalPackSIMD<OVBVHNode, PackCost>(PackCost()).transform(root);
+		obj->root = Reorganize_VBVH<OVBVHNode>(arena1).transform(root);
+
+		BLI_memarena_free(arena2);
+#endif
+	}
+
+	//Cleanup
+	rtbuild_free(obj->builder);
+	obj->builder = NULL;
+
+	obj->node_arena = arena1;
+	obj->cost = 1.0;
+}
+
+template<int StackSize>
+static int intersect(VBVHTree *obj, Isect *isec)
+{
+	//TODO renable hint support
+	if (RE_rayobject_isAligned(obj->root)) {
+		if (isec->mode == RE_RAY_SHADOW)
+			return bvh_node_stack_raycast<VBVHNode, StackSize, false, true>(obj->root, isec);
+		else
+			return bvh_node_stack_raycast<VBVHNode, StackSize, false, false>(obj->root, isec);
+	}
+	else
+		return RE_rayobject_intersect( (RayObject *) obj->root, isec);
+}
+
+template<class Tree>
+static void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *UNUSED(min), float *UNUSED(max))
+{
+	//TODO renable hint support
+	{
+		hint->size = 0;
+		hint->stack[hint->size++] = (RayObject *)tree->root;
+	}
+}
+
+#if 0  /* UNUSED */
+static void bfree(VBVHTree *tree)
+{
+	if (tot_pushup + tot_pushdown + tot_hints + tot_moves) {
+		if (G.debug & G_DEBUG) {
+			printf("tot pushups: %d\n", tot_pushup);
+			printf("tot pushdowns: %d\n", tot_pushdown);
+			printf("tot moves: %d\n", tot_moves);
+			printf("tot hints created: %d\n", tot_hints);
+		}
+
+		tot_pushup = 0;
+		tot_pushdown = 0;
+		tot_hints = 0;
+		tot_moves = 0;
+	}
+	bvh_free(tree);
+}
+#endif
+
+/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */
+template<class Tree, int STACK_SIZE>
+static RayObjectAPI make_api()
+{
+	static RayObjectAPI api =
+	{
+		(RE_rayobject_raycast_callback) ((int   (*)(Tree *, Isect *)) & intersect<STACK_SIZE>),
+		(RE_rayobject_add_callback)     ((void  (*)(Tree *, RayObject *)) & bvh_add<Tree>),
+		(RE_rayobject_done_callback)    ((void  (*)(Tree *))       & bvh_done<Tree>),
+		(RE_rayobject_free_callback)    ((void  (*)(Tree *))       & bvh_free<Tree>),
+		(RE_rayobject_merge_bb_callback)((void  (*)(Tree *, float *, float *)) & bvh_bb<Tree>),
+		(RE_rayobject_cost_callback)    ((float (*)(Tree *))      & bvh_cost<Tree>),
+		(RE_rayobject_hint_bb_callback) ((void  (*)(Tree *, LCTSHint *, float *, float *)) & bvh_hint_bb<Tree>)
+	};
+
+	return api;
+}
+
+template<class Tree>
+RayObjectAPI *bvh_get_api(int maxstacksize)
+{
+	static RayObjectAPI bvh_api256 = make_api<Tree, 1024>();
+
+	if (maxstacksize <= 1024) return &bvh_api256;
+	assert(maxstacksize <= 256);
+	return 0;
+}
+
+RayObject *RE_rayobject_vbvh_create(int size)
+{
+	return bvh_create_tree<VBVHTree, DFS_STACK_SIZE>(size);
+}
diff --git a/source/blender/render/intern/raytrace/reorganize.h b/source/blender/render/intern/raytrace/reorganize.h
new file mode 100644
index 00000000000..3fdd3363edb
--- /dev/null
+++ b/source/blender/render/intern/raytrace/reorganize.h
@@ -0,0 +1,513 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/reorganize.h
+ *  \ingroup render
+ */
+
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <queue>
+#include <vector>
+
+#include "BKE_global.h"
+
+#ifdef _WIN32
+#  ifdef INFINITY
+#    undef INFINITY
+#  endif
+#  define INFINITY FLT_MAX // in mingw math.h: (1.0F/0.0F). This generates compile error, though.
+#endif
+
+extern int tot_pushup;
+extern int tot_pushdown;
+
+#if !defined(INFINITY) && defined(HUGE_VAL)
+#define INFINITY HUGE_VAL
+#endif
+
+template<class Node>
+static bool node_fits_inside(Node *a, Node *b)
+{
+	return bb_fits_inside(b->bb, b->bb + 3, a->bb, a->bb + 3);
+}
+
+template<class Node>
+static void reorganize_find_fittest_parent(Node *tree, Node *node, std::pair<float, Node *> &cost)
+{
+	std::queue<Node *> q;
+	q.push(tree);
+
+	while (!q.empty()) {
+		Node *parent = q.front();
+		q.pop();
+
+		if (parent == node) continue;
+		if (node_fits_inside(node, parent) && RE_rayobject_isAligned(parent->child) ) {
+			float pcost = bb_area(parent->bb, parent->bb + 3);
+			cost = std::min(cost, std::make_pair(pcost, parent) );
+			for (Node *child = parent->child; child; child = child->sibling)
+				q.push(child);
+		}
+	}
+}
+
+template<class Node>
+static void reorganize(Node *root)
+{
+	std::queue<Node *> q;
+
+	q.push(root);
+	while (!q.empty()) {
+		Node *node = q.front();
+		q.pop();
+
+		if (RE_rayobject_isAligned(node->child)) {
+			for (Node **prev = &node->child; *prev; ) {
+				assert(RE_rayobject_isAligned(*prev));
+				q.push(*prev);
+
+				std::pair<float, Node *> best(FLT_MAX, root);
+				reorganize_find_fittest_parent(root, *prev, best);
+
+				if (best.second == node) {
+					//Already inside the fitnest BB
+					prev = &(*prev)->sibling;
+				}
+				else {
+					Node *tmp = *prev;
+					*prev = (*prev)->sibling;
+
+					tmp->sibling =  best.second->child;
+					best.second->child = tmp;
+				}
+
+
+			}
+		}
+		if (node != root) {
+		}
+	}
+}
+
+/*
+ * Prunes useless nodes from trees:
+ *  erases nodes with total amount of primitives = 0
+ *  prunes nodes with only one child (except if that child is a primitive)
+ */
+template<class Node>
+static void remove_useless(Node *node, Node **new_node)
+{
+	if (RE_rayobject_isAligned(node->child) ) {
+
+		for (Node **prev = &node->child; *prev; ) {
+			Node *next = (*prev)->sibling;
+			remove_useless(*prev, prev);
+			if (*prev == NULL)
+				*prev = next;
+			else {
+				(*prev)->sibling = next;
+				prev = &((*prev)->sibling);
+			}
+		}
+	}
+	if (node->child) {
+		if (RE_rayobject_isAligned(node->child) && node->child->sibling == 0)
+			*new_node = node->child;
+	}
+	else if (node->child == NULL) {
+		*new_node = NULL;
+	}
+}
+
+/*
+ * Minimizes expected number of BBtest by colapsing nodes
+ * it uses surface area heuristic for determining whether a node should be colapsed
+ */
+template<class Node>
+static void pushup(Node *parent)
+{
+	if (is_leaf(parent)) return;
+
+	float p_area = bb_area(parent->bb, parent->bb + 3);
+	Node **prev = &parent->child;
+	for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) {
+		const float c_area = bb_area(child->bb, child->bb + 3);
+		const int nchilds = count_childs(child);
+		float original_cost = ((p_area != 0.0f) ? (c_area / p_area) * nchilds : 1.0f) + 1;
+		float flatten_cost = nchilds;
+		if (flatten_cost < original_cost && nchilds >= 2) {
+			append_sibling(child, child->child);
+			child = child->sibling;
+			*prev = child;
+
+//			*prev = child->child;
+//			append_sibling( *prev, child->sibling );
+//			child = *prev;
+			tot_pushup++;
+		}
+		else {
+			*prev = child;
+			prev = &(*prev)->sibling;
+			child = *prev;
+		}
+	}
+
+	for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling)
+		pushup(child);
+}
+
+/*
+ * try to optimize number of childs to be a multiple of SSize
+ */
+template<class Node, int SSize>
+static void pushup_simd(Node *parent)
+{
+	if (is_leaf(parent)) return;
+
+	int n = count_childs(parent);
+
+	Node **prev = &parent->child;
+	for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) {
+		int cn = count_childs(child);
+		if (cn - 1 <= (SSize - (n % SSize) ) % SSize && RE_rayobject_isAligned(child->child) ) {
+			n += (cn - 1);
+			append_sibling(child, child->child);
+			child = child->sibling;
+			*prev = child;
+		}
+		else {
+			*prev = child;
+			prev = &(*prev)->sibling;
+			child = *prev;
+		}
+	}
+
+	for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling)
+		pushup_simd<Node, SSize>(child);
+}
+
+
+/*
+ * Pushdown
+ *	makes sure no child fits inside any of its sibling
+ */
+template<class Node>
+static void pushdown(Node *parent)
+{
+	Node **s_child = &parent->child;
+	Node *child = parent->child;
+
+	while (child && RE_rayobject_isAligned(child)) {
+		Node *next = child->sibling;
+		Node **next_s_child = &child->sibling;
+
+		//assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3));
+
+		for (Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling)
+			if (child != i && bb_fits_inside(i->bb, i->bb + 3, child->bb, child->bb + 3) && RE_rayobject_isAligned(i->child)) {
+//			todo optimize (should the one with the smallest area?)
+//			float ia = bb_area(i->bb, i->bb+3)
+//			if (child->i)
+				*s_child = child->sibling;
+				child->sibling = i->child;
+				i->child = child;
+				next_s_child = s_child;
+
+				tot_pushdown++;
+				break;
+			}
+		child = next;
+		s_child = next_s_child;
+	}
+
+	for (Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling) {
+		pushdown(i);
+	}
+}
+
+
+/*
+ * BVH refit
+ * readjust nodes BB (useful if nodes childs where modified)
+ */
+template<class Node>
+static float bvh_refit(Node *node)
+{
+	if (is_leaf(node)) return 0;
+	if (is_leaf(node->child)) return 0;
+
+	float total = 0;
+
+	for (Node *child = node->child; child; child = child->sibling)
+		total += bvh_refit(child);
+
+	float old_area = bb_area(node->bb, node->bb + 3);
+	INIT_MINMAX(node->bb, node->bb + 3);
+	for (Node *child = node->child; child; child = child->sibling) {
+		DO_MIN(child->bb, node->bb);
+		DO_MAX(child->bb + 3, node->bb + 3);
+	}
+	total += old_area - bb_area(node->bb, node->bb + 3);
+	return total;
+}
+
+
+/*
+ * this finds the best way to packing a tree according to a given test cost function
+ * with the purpose to reduce the expected cost (eg.: number of BB tests).
+ */
+#include <vector>
+#define MAX_CUT_SIZE         4               /* svbvh assumes max 4 children! */
+#define MAX_OPTIMIZE_CHILDS  MAX_CUT_SIZE
+
+#define CUT_SIZE_IS_VALID(cut_size) ((cut_size) < MAX_CUT_SIZE && (cut_size) >= 0)
+#define CUT_SIZE_INVALID -1
+
+
+struct OVBVHNode {
+	float bb[6];
+
+	OVBVHNode *child;
+	OVBVHNode *sibling;
+
+	/*
+	 * Returns min cost to represent the subtree starting at the given node,
+	 * allowing it to have a given cutsize
+	 */
+	float cut_cost[MAX_CUT_SIZE];
+	float get_cost(int cutsize)
+	{
+		assert(CUT_SIZE_IS_VALID(cutsize - 1));
+		return cut_cost[cutsize - 1];
+	}
+
+	/*
+	 * This saves the cut size of this child, when parent is reaching
+	 * its minimum cut with the given cut size
+	 */
+	int cut_size[MAX_CUT_SIZE];
+	int get_cut_size(int parent_cut_size)
+	{
+		assert(CUT_SIZE_IS_VALID(parent_cut_size - 1));
+		return cut_size[parent_cut_size - 1];
+	}
+
+	/*
+	 * Reorganize the node based on calculated cut costs
+	 */
+	int best_cutsize;
+	void set_cut(int cutsize, OVBVHNode ***cut)
+	{
+		if (cutsize == 1) {
+			**cut = this;
+			*cut = &(**cut)->sibling;
+		}
+		else {
+			if (cutsize > MAX_CUT_SIZE) {
+				for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
+					child->set_cut(1, cut);
+					cutsize--;
+				}
+				assert(cutsize == 0);
+			}
+			else {
+				for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
+					child->set_cut(child->get_cut_size(cutsize), cut);
+				}
+			}
+		}
+	}
+
+	void optimize()
+	{
+		if (RE_rayobject_isAligned(this->child)) {
+			//Calc new childs
+			if (this->best_cutsize != CUT_SIZE_INVALID) {
+				OVBVHNode **cut = &(this->child);
+				set_cut(this->best_cutsize, &cut);
+				*cut = NULL;
+			}
+
+			//Optimize new childs
+			for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling)
+				child->optimize();
+		}
+	}
+};
+
+/*
+ * Calculates an optimal SIMD packing
+ *
+ */
+template<class Node, class TestCost>
+struct VBVH_optimalPackSIMD {
+	TestCost testcost;
+
+	VBVH_optimalPackSIMD(TestCost testcost)
+	{
+		this->testcost = testcost;
+	}
+
+	/*
+	 * calc best cut on a node
+	 */
+	struct calc_best {
+		Node *child[MAX_OPTIMIZE_CHILDS];
+		float child_hit_prob[MAX_OPTIMIZE_CHILDS];
+
+		calc_best(Node *node)
+		{
+			int nchilds = 0;
+			//Fetch childs and needed data
+			{
+				float parent_area = bb_area(node->bb, node->bb + 3);
+				for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
+					this->child[nchilds] = child;
+					this->child_hit_prob[nchilds] = (parent_area != 0.0f) ? bb_area(child->bb, child->bb + 3) / parent_area : 1.0f;
+					nchilds++;
+				}
+
+				assert(nchilds >= 2 && nchilds <= MAX_OPTIMIZE_CHILDS);
+			}
+
+
+			//Build DP table to find minimum cost to represent this node with a given cutsize
+			int     bt[MAX_OPTIMIZE_CHILDS + 1][MAX_CUT_SIZE + 1];     //backtrace table
+			float cost[MAX_OPTIMIZE_CHILDS + 1][MAX_CUT_SIZE + 1]; //cost table (can be reduced to float[2][MAX_CUT_COST])
+
+			for (int i = 0; i <= nchilds; i++) {
+				for (int j = 0; j <= MAX_CUT_SIZE; j++) {
+					cost[i][j] = INFINITY;
+				}
+			}
+
+			cost[0][0] = 0;
+
+			for (int i = 1; i <= nchilds; i++) {
+				for (int size = i - 1; size /*+(nchilds-i)*/ <= MAX_CUT_SIZE; size++) {
+					for (int cut = 1; cut + size /*+(nchilds-i)*/ <= MAX_CUT_SIZE; cut++) {
+						float new_cost = cost[i - 1][size] + child_hit_prob[i - 1] * child[i - 1]->get_cost(cut);
+						if (new_cost < cost[i][size + cut]) {
+							cost[i][size + cut] = new_cost;
+							bt[i][size + cut] = cut;
+						}
+					}
+				}
+			}
+
+			/* Save the ways to archive the minimum cost with a given cutsize */
+			for (int i = nchilds; i <= MAX_CUT_SIZE; i++) {
+				node->cut_cost[i - 1] = cost[nchilds][i];
+				if (cost[nchilds][i] < INFINITY) {
+					int current_size = i;
+					for (int j = nchilds; j > 0; j--) {
+						child[j - 1]->cut_size[i - 1] = bt[j][current_size];
+						current_size -= bt[j][current_size];
+					}
+				}
+			}
+		}
+	};
+
+	void calc_costs(Node *node)
+	{
+
+		if (RE_rayobject_isAligned(node->child) ) {
+			int nchilds = 0;
+			for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
+				calc_costs(child);
+				nchilds++;
+			}
+
+			for (int i = 0; i < MAX_CUT_SIZE; i++)
+				node->cut_cost[i] = INFINITY;
+
+			//We are not allowed to look on nodes with with so many childs
+			if (nchilds > MAX_CUT_SIZE) {
+				float cost = 0;
+
+				float parent_area = bb_area(node->bb, node->bb + 3);
+				for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
+					cost += ((parent_area != 0.0f) ? (bb_area(child->bb, child->bb + 3) / parent_area) : 1.0f) * child->get_cost(1);
+				}
+
+				cost += testcost(nchilds);
+				node->cut_cost[0] = cost;
+				node->best_cutsize = nchilds;
+			}
+			else {
+				calc_best calc(node);
+
+				//calc expected cost if we optimaly pack this node
+				for (int cutsize = nchilds; cutsize <= MAX_CUT_SIZE; cutsize++) {
+					float m = node->get_cost(cutsize) + testcost(cutsize);
+					if (m < node->cut_cost[0]) {
+						node->cut_cost[0] = m;
+						node->best_cutsize = cutsize;
+					}
+				}
+			}
+
+			if (node->cut_cost[0] == INFINITY) {
+				node->best_cutsize = CUT_SIZE_INVALID;
+			}
+		}
+		else {
+			node->cut_cost[0] = 1.0f;
+			for (int i = 1; i < MAX_CUT_SIZE; i++)
+				node->cut_cost[i] = INFINITY;
+
+			/* node->best_cutsize can remain unset here */
+		}
+	}
+
+	Node *transform(Node *node)
+	{
+		if (RE_rayobject_isAligned(node->child)) {
+#ifdef DEBUG
+			static int num = 0;
+			bool first = false;
+			if (num == 0) { num++; first = true; }
+#endif
+
+			calc_costs(node);
+
+#ifdef DEBUG
+			if (first && G.debug) {
+				printf("expected cost = %f (%d)\n", node->cut_cost[0], node->best_cutsize);
+			}
+#endif
+			node->optimize();
+		}
+		return node;
+	}
+};
diff --git a/source/blender/render/intern/raytrace/svbvh.h b/source/blender/render/intern/raytrace/svbvh.h
new file mode 100644
index 00000000000..0a5690deb46
--- /dev/null
+++ b/source/blender/render/intern/raytrace/svbvh.h
@@ -0,0 +1,317 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/svbvh.h
+ *  \ingroup render
+ */
+
+#ifndef __SVBVH_H__
+#define __SVBVH_H__
+
+#ifdef __SSE__
+
+#include "bvh.h"
+#include "BLI_memarena.h"
+#include <algorithm>
+
+struct SVBVHNode {
+	float child_bb[24];
+	SVBVHNode *child[4];
+	int nchilds;
+};
+
+static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group)
+{
+	const __m128 tmin0 = _mm_setzero_ps();
+	const __m128 tmax0 = _mm_set_ps1(isec->dist);
+
+	const __m128 start0 = _mm_set_ps1(isec->start[0]);
+	const __m128 start1 = _mm_set_ps1(isec->start[1]);
+	const __m128 start2 = _mm_set_ps1(isec->start[2]);
+	const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0);
+	const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0);
+	const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1);
+	const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1);
+	const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2);
+	const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2);
+	const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]);
+	const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]);
+	const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]);
+	const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0);
+	const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0);
+	const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1);
+	const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1);
+	const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2);
+	const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2);
+	const __m128 tmin1 = _mm_max_ps(tmin0, mul0);
+	const __m128 tmax1 = _mm_min_ps(tmax0, mul1);
+	const __m128 tmin2 = _mm_max_ps(tmin1, mul2);
+	const __m128 tmax2 = _mm_min_ps(tmax1, mul3);
+	const __m128 tmin3 = _mm_max_ps(tmin2, mul4);
+	const __m128 tmax3 = _mm_min_ps(tmax2, mul5);
+
+	return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
+}
+
+static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb)
+{
+	const float *bb = _bb;
+
+	float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
+	float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
+	float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
+	float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
+	float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
+	float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
+
+	RE_RC_COUNT(isec->raycounter->bb.test);
+
+	if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
+	if (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) return 0;
+	if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0;
+
+	RE_RC_COUNT(isec->raycounter->bb.hit);
+
+	return 1;
+}
+
+static bool svbvh_node_is_leaf(const SVBVHNode *node)
+{
+	return !RE_rayobject_isAligned(node);
+}
+
+template<int MAX_STACK_SIZE, bool SHADOW>
+static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec)
+{
+	SVBVHNode *stack[MAX_STACK_SIZE], *node;
+	int hit = 0, stack_pos = 0;
+
+	stack[stack_pos++] = root;
+
+	while (stack_pos) {
+		node = stack[--stack_pos];
+
+		if (!svbvh_node_is_leaf(node)) {
+			int nchilds = node->nchilds;
+
+			if (nchilds == 4) {
+				float *child_bb = node->child_bb;
+				int res = svbvh_bb_intersect_test_simd4(isec, ((__m128 *) (child_bb)));
+				SVBVHNode **child = node->child;
+
+				RE_RC_COUNT(isec->raycounter->simd_bb.test);
+
+				if (res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+				if (res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+				if (res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+				if (res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
+			}
+			else {
+				float *child_bb = node->child_bb;
+				SVBVHNode **child = node->child;
+				int i;
+
+				for (i = 0; i < nchilds; i++) {
+					if (svbvh_bb_intersect_test(isec, (float *)child_bb + 6 * i)) {
+						stack[stack_pos++] = child[i];
+					}
+				}
+			}
+		}
+		else {
+			hit |= RE_rayobject_intersect((RayObject *)node, isec);
+			if (SHADOW && hit) break;
+		}
+	}
+
+	return hit;
+}
+
+
+template<>
+inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float min[3], float max[3])
+{
+	if (is_leaf(node)) {
+		RE_rayobject_merge_bb((RayObject *)node, min, max);
+	}
+	else {
+		int i;
+		for (i = 0; i + 4 <= node->nchilds; i += 4) {
+			float *res = node->child_bb + 6 * i;
+			for (int j = 0; j < 3; j++) {
+				min[j] = min_ff(min[j],
+				                min_ffff(res[4 * j + 0],
+				                         res[4 * j + 1],
+				                         res[4 * j + 2],
+				                         res[4 * j + 3]));
+			}
+			for (int j = 0; j < 3; j++) {
+				max[j] = max_ff(max[j],
+				                max_ffff(res[4 * (j + 3) + 0],
+				                         res[4 * (j + 3) + 1],
+				                         res[4 * (j + 3) + 2],
+				                         res[4 * (j + 3) + 3]));
+			}
+		}
+
+		for (; i < node->nchilds; i++) {
+			DO_MIN(node->child_bb + 6 * i, min);
+			DO_MAX(node->child_bb + 3 + 6 * i, max);
+		}
+	}
+}
+
+
+
+/*
+ * Builds a SVBVH tree form a VBVHTree
+ */
+template<class OldNode>
+struct Reorganize_SVBVH {
+	MemArena *arena;
+
+	float childs_per_node;
+	int nodes_with_childs[16];
+	int useless_bb;
+	int nodes;
+
+	Reorganize_SVBVH(MemArena *a)
+	{
+		arena = a;
+		nodes = 0;
+		childs_per_node = 0;
+		useless_bb = 0;
+
+		for (int i = 0; i < 16; i++) {
+			nodes_with_childs[i] = 0;
+		}
+	}
+
+	~Reorganize_SVBVH()
+	{
+#if 0
+		{
+			printf("%f childs per node\n", childs_per_node / nodes);
+			printf("%d childs BB are useless\n", useless_bb);
+			for (int i = 0; i < 16; i++) {
+				printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes,  nodes_with_childs[i] / float(nodes));
+			}
+		}
+#endif
+	}
+
+	SVBVHNode *create_node(int nchilds)
+	{
+		SVBVHNode *node = (SVBVHNode *)BLI_memarena_alloc(arena, sizeof(SVBVHNode));
+		node->nchilds = nchilds;
+
+		return node;
+	}
+
+	void copy_bb(float bb[6], const float old_bb[6])
+	{
+		std::copy(old_bb, old_bb + 6, bb);
+	}
+
+	void prepare_for_simd(SVBVHNode *node)
+	{
+		int i = 0;
+		while (i + 4 <= node->nchilds) {
+			float vec_tmp[4 * 6];
+			float *res = node->child_bb + 6 * i;
+			std::copy(res, res + 6 * 4, vec_tmp);
+
+			for (int j = 0; j < 6; j++) {
+				res[4 * j + 0] = vec_tmp[6 * 0 + j];
+				res[4 * j + 1] = vec_tmp[6 * 1 + j];
+				res[4 * j + 2] = vec_tmp[6 * 2 + j];
+				res[4 * j + 3] = vec_tmp[6 * 3 + j];
+			}
+
+			i += 4;
+		}
+	}
+
+	/* amt must be power of two */
+	inline int padup(int num, int amt)
+	{
+		return ((num + (amt - 1)) & ~(amt - 1));
+	}
+
+	SVBVHNode *transform(OldNode *old)
+	{
+		if (is_leaf(old))
+			return (SVBVHNode *)old;
+		if (is_leaf(old->child))
+			return (SVBVHNode *)old->child;
+
+		int nchilds = count_childs(old);
+		int alloc_childs = nchilds;
+		if (nchilds % 4 > 2)
+			alloc_childs = padup(nchilds, 4);
+
+		SVBVHNode *node = create_node(alloc_childs);
+
+		childs_per_node += nchilds;
+		nodes++;
+		if (nchilds < 16)
+			nodes_with_childs[nchilds]++;
+
+		useless_bb += alloc_childs - nchilds;
+		while (alloc_childs > nchilds) {
+			const static float def_bb[6] = {FLT_MAX,  FLT_MAX,  FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX};
+			alloc_childs--;
+			node->child[alloc_childs] = NULL;
+			copy_bb(node->child_bb + alloc_childs * 6, def_bb);
+		}
+
+		int i = nchilds;
+		for (OldNode *o_child = old->child; o_child; o_child = o_child->sibling) {
+			i--;
+			node->child[i] = transform(o_child);
+			if (is_leaf(o_child)) {
+				float bb[6];
+				INIT_MINMAX(bb, bb + 3);
+				RE_rayobject_merge_bb((RayObject *)o_child, bb, bb + 3);
+				copy_bb(node->child_bb + i * 6, bb);
+				break;
+			}
+			else {
+				copy_bb(node->child_bb + i * 6, o_child->bb);
+			}
+		}
+		assert(i == 0);
+
+		prepare_for_simd(node);
+
+		return node;
+	}
+};
+
+#endif  /* __SSE__ */
+
+#endif  /* __SVBVH_H__ */
diff --git a/source/blender/render/intern/raytrace/vbvh.h b/source/blender/render/intern/raytrace/vbvh.h
new file mode 100644
index 00000000000..0b0bbd19116
--- /dev/null
+++ b/source/blender/render/intern/raytrace/vbvh.h
@@ -0,0 +1,238 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): André Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/raytrace/vbvh.h
+ *  \ingroup render
+ */
+
+
+#include <assert.h>
+#include <algorithm>
+
+#include "BLI_memarena.h"
+
+#include "rayobject_rtbuild.h"
+
+/*
+ * VBVHNode represents a BVHNode with support for a variable number of childrens
+ */
+struct VBVHNode {
+	float bb[6];
+
+	VBVHNode *child;
+	VBVHNode *sibling;
+};
+
+
+/*
+ * Push nodes (used on dfs)
+ */
+template<class Node>
+inline static void bvh_node_push_childs(Node *node, Isect *UNUSED(isec), Node **stack, int &stack_pos)
+{
+	Node *child = node->child;
+
+	if (is_leaf(child)) {
+		stack[stack_pos++] = child;
+	}
+	else {
+		while (child) {
+			/* Skips BB tests on primitives */
+#if 0
+			if (is_leaf(child->child)) {
+				stack[stack_pos++] = child->child;
+			}
+			else
+#endif
+			{
+				stack[stack_pos++] = child;
+			}
+
+			child = child->sibling;
+		}
+	}
+}
+
+
+template<class Node>
+static int count_childs(Node *parent)
+{
+	int n = 0;
+	for (Node *i = parent->child; i; i = i->sibling) {
+		n++;
+		if (is_leaf(i))
+			break;
+	}
+
+	return n;
+}
+
+
+template<class Node>
+static void append_sibling(Node *node, Node *sibling)
+{
+	while (node->sibling)
+		node = node->sibling;
+
+	node->sibling = sibling;
+}
+
+
+/*
+ * Builds a binary VBVH from a rtbuild
+ */
+template<class Node>
+struct BuildBinaryVBVH {
+	MemArena *arena;
+	RayObjectControl *control;
+
+	void test_break()
+	{
+		if (RE_rayobjectcontrol_test_break(control))
+			throw "Stop";
+	}
+
+	BuildBinaryVBVH(MemArena *a, RayObjectControl *c)
+	{
+		arena = a;
+		control = c;
+	}
+
+	Node *create_node()
+	{
+		Node *node = (Node *)BLI_memarena_alloc(arena, sizeof(Node) );
+		assert(RE_rayobject_isAligned(node));
+
+		node->sibling = NULL;
+		node->child   = NULL;
+
+		return node;
+	}
+
+	int rtbuild_split(RTBuilder *builder)
+	{
+		return ::rtbuild_heuristic_object_split(builder, 2);
+	}
+
+	Node *transform(RTBuilder *builder)
+	{
+		try
+		{
+			return _transform(builder);
+
+		} catch (...)
+		{
+		}
+		return NULL;
+	}
+
+	Node *_transform(RTBuilder *builder)
+	{
+		int size = rtbuild_size(builder);
+
+		if (size == 0) {
+			return NULL;
+		}
+		else if (size == 1) {
+			Node *node = create_node();
+			INIT_MINMAX(node->bb, node->bb + 3);
+			rtbuild_merge_bb(builder, node->bb, node->bb + 3);
+			node->child = (Node *) rtbuild_get_primitive(builder, 0);
+			return node;
+		}
+		else {
+			test_break();
+
+			Node *node = create_node();
+
+			Node **child = &node->child;
+
+			int nc = rtbuild_split(builder);
+			INIT_MINMAX(node->bb, node->bb + 3);
+
+			assert(nc == 2);
+			for (int i = 0; i < nc; i++) {
+				RTBuilder tmp;
+				rtbuild_get_child(builder, i, &tmp);
+
+				*child = _transform(&tmp);
+				DO_MIN((*child)->bb, node->bb);
+				DO_MAX((*child)->bb + 3, node->bb + 3);
+				child = &((*child)->sibling);
+			}
+
+			*child = NULL;
+			return node;
+		}
+	}
+};
+
+#if 0
+template<class Tree, class OldNode>
+struct Reorganize_VBVH {
+	Tree *tree;
+
+	Reorganize_VBVH(Tree *t)
+	{
+		tree = t;
+	}
+
+	VBVHNode *create_node()
+	{
+		VBVHNode *node = (VBVHNode *)BLI_memarena_alloc(tree->node_arena, sizeof(VBVHNode));
+		return node;
+	}
+
+	void copy_bb(VBVHNode *node, OldNode *old)
+	{
+		std::copy(old->bb, old->bb + 6, node->bb);
+	}
+
+	VBVHNode *transform(OldNode *old)
+	{
+		if (is_leaf(old))
+			return (VBVHNode *)old;
+
+		VBVHNode *node = create_node();
+		VBVHNode **child_ptr = &node->child;
+		node->sibling = 0;
+
+		copy_bb(node, old);
+
+		for (OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
+		{
+			VBVHNode *n_child = transform(o_child);
+			*child_ptr = n_child;
+			if (is_leaf(n_child)) return node;
+			child_ptr = &n_child->sibling;
+		}
+		*child_ptr = 0;
+
+		return node;
+	}
+};
+#endif
diff --git a/source/blender/render/intern/source/bake.c b/source/blender/render/intern/source/bake.c
new file mode 100644
index 00000000000..4a7962b1776
--- /dev/null
+++ b/source/blender/render/intern/source/bake.c
@@ -0,0 +1,1342 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Contributors: 2004/2005/2006 Blender Foundation, full recode
+ * Contributors: Vertex color baking, Copyright 2011 AutoCRC
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/bake.c
+ *  \ingroup render
+ */
+
+
+/* system includes */
+#include <stdio.h>
+#include <string.h>
+
+/* External modules: */
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_rand.h"
+#include "BLI_threads.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_image_types.h"
+#include "DNA_material_types.h"
+#include "DNA_mesh_types.h"
+#include "DNA_meshdata_types.h"
+
+#include "BKE_customdata.h"
+#include "BKE_global.h"
+#include "BKE_image.h"
+#include "BKE_main.h"
+#include "BKE_node.h"
+#include "BKE_scene.h"
+#include "BKE_library.h"
+
+#include "IMB_imbuf_types.h"
+#include "IMB_imbuf.h"
+#include "IMB_colormanagement.h"
+
+/* local include */
+#include "rayintersection.h"
+#include "rayobject.h"
+#include "render_types.h"
+#include "renderdatabase.h"
+#include "shading.h"
+#include "zbuf.h"
+
+#include "PIL_time.h"
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+
+/* ************************* bake ************************ */
+
+
+typedef struct BakeShade {
+	int thread;
+
+	ShadeSample ssamp;
+	ObjectInstanceRen *obi;
+	VlakRen *vlr;
+
+	ZSpan *zspan;
+	Image *ima;
+	ImBuf *ibuf;
+
+	int rectx, recty, quad, type, vdone;
+	bool ready;
+
+	float dir[3];
+	Object *actob;
+
+	/* Output: vertex color or image data. If vcol is not NULL, rect and
+	 * rect_float should be NULL. */
+	MPoly *mpoly;
+	MLoop *mloop;
+	MLoopCol *vcol;
+
+	unsigned int *rect;
+	float *rect_float;
+
+	/* displacement buffer used for normalization with unknown maximal distance */
+	bool use_displacement_buffer;
+	float *displacement_buffer;
+	float displacement_min, displacement_max;
+
+	bool use_mask;
+	char *rect_mask; /* bake pixel mask */
+
+	float dxco[3], dyco[3];
+
+	short *do_update;
+
+	struct ColorSpace *rect_colorspace;
+} BakeShade;
+
+static void bake_set_shade_input(ObjectInstanceRen *obi, VlakRen *vlr, ShadeInput *shi, int quad, int UNUSED(isect), int x, int y, float u, float v)
+{
+	if (quad)
+		shade_input_set_triangle_i(shi, obi, vlr, 0, 2, 3);
+	else
+		shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
+
+	/* cache for shadow */
+	shi->samplenr = R.shadowsamplenr[shi->thread]++;
+
+	shi->mask = 0xFFFF; /* all samples */
+
+	shi->u = -u;
+	shi->v = -v;
+	shi->xs = x;
+	shi->ys = y;
+
+	shade_input_set_uv(shi);
+	shade_input_set_normals(shi);
+
+	/* no normal flip */
+	if (shi->flippednor)
+		shade_input_flip_normals(shi);
+
+	/* set up view vector to look right at the surface (note that the normal
+	 * is negated in the renderer so it does not need to be done here) */
+	shi->view[0] = shi->vn[0];
+	shi->view[1] = shi->vn[1];
+	shi->view[2] = shi->vn[2];
+}
+
+static void bake_shade(void *handle, Object *ob, ShadeInput *shi, int UNUSED(quad), int x, int y, float UNUSED(u), float UNUSED(v), float *tvn, float *ttang)
+{
+	BakeShade *bs = handle;
+	ShadeSample *ssamp = &bs->ssamp;
+	ShadeResult shr;
+	VlakRen *vlr = shi->vlr;
+
+	shade_input_init_material(shi);
+
+	if (bs->type == RE_BAKE_AO) {
+		ambient_occlusion(shi);
+
+		if (R.r.bake_flag & R_BAKE_NORMALIZE) {
+			copy_v3_v3(shr.combined, shi->ao);
+		}
+		else {
+			zero_v3(shr.combined);
+			environment_lighting_apply(shi, &shr);
+		}
+	}
+	else {
+		if (bs->type == RE_BAKE_SHADOW) /* Why do shadows set the color anyhow?, ignore material color for baking */
+			shi->r = shi->g = shi->b = 1.0f;
+
+		shade_input_set_shade_texco(shi);
+
+		/* only do AO for a full bake (and obviously AO bakes)
+		 * AO for light bakes is a leftover and might not be needed */
+		if (ELEM(bs->type, RE_BAKE_ALL, RE_BAKE_AO, RE_BAKE_LIGHT))
+			shade_samples_do_AO(ssamp);
+
+		if (shi->mat->nodetree && shi->mat->use_nodes) {
+			ntreeShaderExecTree(shi->mat->nodetree, shi, &shr);
+			shi->mat = vlr->mat;  /* shi->mat is being set in nodetree */
+		}
+		else
+			shade_material_loop(shi, &shr);
+
+		if (bs->type == RE_BAKE_NORMALS) {
+			float nor[3];
+
+			copy_v3_v3(nor, shi->vn);
+
+			if (R.r.bake_normal_space == R_BAKE_SPACE_CAMERA) {
+				/* pass */
+			}
+			else if (R.r.bake_normal_space == R_BAKE_SPACE_TANGENT) {
+				float mat[3][3], imat[3][3];
+
+				/* bitangent */
+				if (tvn && ttang) {
+					copy_v3_v3(mat[0], ttang);
+					cross_v3_v3v3(mat[1], tvn, ttang);
+					mul_v3_fl(mat[1], ttang[3]);
+					copy_v3_v3(mat[2], tvn);
+				}
+				else {
+					copy_v3_v3(mat[0], shi->nmaptang);
+					cross_v3_v3v3(mat[1], shi->nmapnorm, shi->nmaptang);
+					mul_v3_fl(mat[1], shi->nmaptang[3]);
+					copy_v3_v3(mat[2], shi->nmapnorm);
+				}
+
+				invert_m3_m3(imat, mat);
+				mul_m3_v3(imat, nor);
+			}
+			else if (R.r.bake_normal_space == R_BAKE_SPACE_OBJECT)
+				mul_mat3_m4_v3(ob->imat_ren, nor);  /* ob->imat_ren includes viewinv! */
+			else if (R.r.bake_normal_space == R_BAKE_SPACE_WORLD)
+				mul_mat3_m4_v3(R.viewinv, nor);
+
+			normalize_v3(nor); /* in case object has scaling */
+
+			/* The invert of the red channel is to make
+			 * the normal map compliant with the outside world.
+			 * It needs to be done because in Blender
+			 * the normal used in the renderer points inward. It is generated
+			 * this way in calc_vertexnormals(). Should this ever change
+			 * this negate must be removed.
+			 *
+			 * there is also a small 1e-5f bias for precision issues. otherwise
+			 * we randomly get 127 or 128 for neutral colors. we choose 128
+			 * because it is the convention flat color. * */
+			shr.combined[0] = (-nor[0]) / 2.0f + 0.5f + 1e-5f;
+			shr.combined[1] = nor[1]    / 2.0f + 0.5f + 1e-5f;
+			shr.combined[2] = nor[2]    / 2.0f + 0.5f + 1e-5f;
+		}
+		else if (bs->type == RE_BAKE_TEXTURE) {
+			copy_v3_v3(shr.combined, &shi->r);
+			shr.alpha = shi->alpha;
+		}
+		else if (bs->type == RE_BAKE_SHADOW) {
+			copy_v3_v3(shr.combined, shr.shad);
+			shr.alpha = shi->alpha;
+		}
+		else if (bs->type == RE_BAKE_SPEC_COLOR) {
+			copy_v3_v3(shr.combined, &shi->specr);
+			shr.alpha = 1.0f;
+		}
+		else if (bs->type == RE_BAKE_SPEC_INTENSITY) {
+			copy_v3_fl(shr.combined, shi->spec);
+			shr.alpha = 1.0f;
+		}
+		else if (bs->type == RE_BAKE_MIRROR_COLOR) {
+			copy_v3_v3(shr.combined, &shi->mirr);
+			shr.alpha = 1.0f;
+		}
+		else if (bs->type == RE_BAKE_MIRROR_INTENSITY) {
+			copy_v3_fl(shr.combined, shi->ray_mirror);
+			shr.alpha = 1.0f;
+		}
+		else if (bs->type == RE_BAKE_ALPHA) {
+			copy_v3_fl(shr.combined, shi->alpha);
+			shr.alpha = 1.0f;
+		}
+		else if (bs->type == RE_BAKE_EMIT) {
+			copy_v3_fl(shr.combined, shi->emit);
+			shr.alpha = 1.0f;
+		}
+		else if (bs->type == RE_BAKE_VERTEX_COLORS) {
+			copy_v3_v3(shr.combined, shi->vcol);
+			shr.alpha = shi->vcol[3];
+		}
+	}
+
+	if (bs->rect_float && !bs->vcol) {
+		float *col = bs->rect_float + 4 * (bs->rectx * y + x);
+		copy_v3_v3(col, shr.combined);
+		if (bs->type == RE_BAKE_ALL || bs->type == RE_BAKE_TEXTURE || bs->type == RE_BAKE_VERTEX_COLORS) {
+			col[3] = shr.alpha;
+		}
+		else {
+			col[3] = 1.0;
+		}
+	}
+	else {
+		/* Target is char (LDR). */
+		unsigned char col[4];
+
+		if (ELEM(bs->type, RE_BAKE_ALL, RE_BAKE_TEXTURE)) {
+			float rgb[3];
+
+			copy_v3_v3(rgb, shr.combined);
+			if (R.scene_color_manage) {
+				/* Vertex colors have no way to specify color space, so they
+				 * default to sRGB. */
+				if (!bs->vcol)
+					IMB_colormanagement_scene_linear_to_colorspace_v3(rgb, bs->rect_colorspace);
+				else
+					linearrgb_to_srgb_v3_v3(rgb, rgb);
+			}
+			rgb_float_to_uchar(col, rgb);
+		}
+		else {
+			rgb_float_to_uchar(col, shr.combined);
+		}
+
+		if (ELEM(bs->type, RE_BAKE_ALL, RE_BAKE_TEXTURE, RE_BAKE_VERTEX_COLORS)) {
+			col[3] = unit_float_to_uchar_clamp(shr.alpha);
+		}
+		else {
+			col[3] = 255;
+		}
+
+		if (bs->vcol) {
+			/* Vertex color baking. Vcol has no useful alpha channel (it exists
+			 * but is used only for vertex painting). */
+			bs->vcol->r = col[0];
+			bs->vcol->g = col[1];
+			bs->vcol->b = col[2];
+		}
+		else {
+			unsigned char *imcol = (unsigned char *)(bs->rect + bs->rectx * y + x);
+			copy_v4_v4_uchar(imcol, col);
+		}
+
+	}
+
+	if (bs->rect_mask) {
+		bs->rect_mask[bs->rectx * y + x] = FILTER_MASK_USED;
+	}
+
+	if (bs->do_update) {
+		*bs->do_update = true;
+	}
+}
+
+static void bake_displacement(void *handle, ShadeInput *UNUSED(shi), float dist, int x, int y)
+{
+	BakeShade *bs = handle;
+	float disp;
+
+	if (R.r.bake_flag & R_BAKE_NORMALIZE) {
+		if (R.r.bake_maxdist)
+			disp = (dist + R.r.bake_maxdist) / (R.r.bake_maxdist * 2);  /* alter the range from [-bake_maxdist, bake_maxdist] to [0, 1]*/
+		else
+			disp = dist;
+	}
+	else {
+		disp = 0.5f + dist; /* alter the range from [-0.5,0.5] to [0,1]*/
+	}
+
+	if (bs->displacement_buffer) {
+		float *displacement = bs->displacement_buffer + (bs->rectx * y + x);
+		*displacement = disp;
+		bs->displacement_min = min_ff(bs->displacement_min, disp);
+		bs->displacement_max = max_ff(bs->displacement_max, disp);
+	}
+
+	if (bs->rect_float && !bs->vcol) {
+		float *col = bs->rect_float + 4 * (bs->rectx * y + x);
+		col[0] = col[1] = col[2] = disp;
+		col[3] = 1.0f;
+	}
+	else {
+		/* Target is char (LDR). */
+		unsigned char col[4];
+		col[0] = col[1] = col[2] = unit_float_to_uchar_clamp(disp);
+		col[3] = 255;
+
+		if (bs->vcol) {
+			/* Vertex color baking. Vcol has no useful alpha channel (it exists
+			 * but is used only for vertex painting). */
+			bs->vcol->r = col[0];
+			bs->vcol->g = col[1];
+			bs->vcol->b = col[2];
+		}
+		else {
+			unsigned char *imcol = (unsigned char *)(bs->rect + bs->rectx * y + x);
+			copy_v4_v4_uchar(imcol, col);
+		}
+	}
+	if (bs->rect_mask) {
+		bs->rect_mask[bs->rectx * y + x] = FILTER_MASK_USED;
+	}
+}
+
+static int bake_intersect_tree(RayObject *raytree, Isect *isect, float *start, float *dir, float sign, float *hitco, float *dist)
+{
+	float maxdist;
+	int hit;
+
+	/* might be useful to make a user setting for maxsize*/
+	if (R.r.bake_maxdist > 0.0f)
+		maxdist = R.r.bake_maxdist;
+	else
+		maxdist = RE_RAYTRACE_MAXDIST + R.r.bake_biasdist;
+
+	/* 'dir' is always normalized */
+	madd_v3_v3v3fl(isect->start, start, dir, -R.r.bake_biasdist);
+
+	mul_v3_v3fl(isect->dir, dir, sign);
+
+	isect->dist = maxdist;
+
+	hit = RE_rayobject_raycast(raytree, isect);
+	if (hit) {
+		madd_v3_v3v3fl(hitco, isect->start, isect->dir, isect->dist);
+
+		*dist = isect->dist;
+	}
+
+	return hit;
+}
+
+static void bake_set_vlr_dxyco(BakeShade *bs, float *uv1, float *uv2, float *uv3)
+{
+	VlakRen *vlr = bs->vlr;
+	float A, d1, d2, d3, *v1, *v2, *v3;
+
+	if (bs->quad) {
+		v1 = vlr->v1->co;
+		v2 = vlr->v3->co;
+		v3 = vlr->v4->co;
+	}
+	else {
+		v1 = vlr->v1->co;
+		v2 = vlr->v2->co;
+		v3 = vlr->v3->co;
+	}
+
+	/* formula derived from barycentric coordinates:
+	 * (uvArea1*v1 + uvArea2*v2 + uvArea3*v3)/uvArea
+	 * then taking u and v partial derivatives to get dxco and dyco */
+	A = (uv2[0] - uv1[0]) * (uv3[1] - uv1[1]) - (uv3[0] - uv1[0]) * (uv2[1] - uv1[1]);
+
+	if (fabsf(A) > FLT_EPSILON) {
+		A = 0.5f / A;
+
+		d1 = uv2[1] - uv3[1];
+		d2 = uv3[1] - uv1[1];
+		d3 = uv1[1] - uv2[1];
+		bs->dxco[0] = (v1[0] * d1 + v2[0] * d2 + v3[0] * d3) * A;
+		bs->dxco[1] = (v1[1] * d1 + v2[1] * d2 + v3[1] * d3) * A;
+		bs->dxco[2] = (v1[2] * d1 + v2[2] * d2 + v3[2] * d3) * A;
+
+		d1 = uv3[0] - uv2[0];
+		d2 = uv1[0] - uv3[0];
+		d3 = uv2[0] - uv1[0];
+		bs->dyco[0] = (v1[0] * d1 + v2[0] * d2 + v3[0] * d3) * A;
+		bs->dyco[1] = (v1[1] * d1 + v2[1] * d2 + v3[1] * d3) * A;
+		bs->dyco[2] = (v1[2] * d1 + v2[2] * d2 + v3[2] * d3) * A;
+	}
+	else {
+		bs->dxco[0] = bs->dxco[1] = bs->dxco[2] = 0.0f;
+		bs->dyco[0] = bs->dyco[1] = bs->dyco[2] = 0.0f;
+	}
+
+	if (bs->obi->flag & R_TRANSFORMED) {
+		mul_m3_v3(bs->obi->nmat, bs->dxco);
+		mul_m3_v3(bs->obi->nmat, bs->dyco);
+	}
+}
+
+static void do_bake_shade(void *handle, int x, int y, float u, float v)
+{
+	BakeShade *bs = handle;
+	VlakRen *vlr = bs->vlr;
+	ObjectInstanceRen *obi = bs->obi;
+	Object *ob = obi->obr->ob;
+	float l, *v1, *v2, *v3, tvn[3], ttang[4];
+	int quad;
+	ShadeSample *ssamp = &bs->ssamp;
+	ShadeInput *shi = ssamp->shi;
+
+	/* fast threadsafe break test */
+	if (R.test_break(R.tbh))
+		return;
+
+	/* setup render coordinates */
+	if (bs->quad) {
+		v1 = vlr->v1->co;
+		v2 = vlr->v3->co;
+		v3 = vlr->v4->co;
+	}
+	else {
+		v1 = vlr->v1->co;
+		v2 = vlr->v2->co;
+		v3 = vlr->v3->co;
+	}
+
+	l = 1.0f - u - v;
+
+	/* shrink barycentric coordinates inwards slightly to avoid some issues
+	 * where baking selected to active might just miss the other face at the
+	 * near the edge of a face */
+	if (bs->actob) {
+		const float eps = 1.0f - 1e-4f;
+		float invsum;
+
+		u = (u - 0.5f) * eps + 0.5f;
+		v = (v - 0.5f) * eps + 0.5f;
+		l = (l - 0.5f) * eps + 0.5f;
+
+		invsum = 1.0f / (u + v + l);
+
+		u *= invsum;
+		v *= invsum;
+		l *= invsum;
+	}
+
+	/* renderco */
+	shi->co[0] = l * v3[0] + u * v1[0] + v * v2[0];
+	shi->co[1] = l * v3[1] + u * v1[1] + v * v2[1];
+	shi->co[2] = l * v3[2] + u * v1[2] + v * v2[2];
+
+	/* avoid self shadow with vertex bake from adjacent faces [#33729] */
+	if ((bs->vcol != NULL) && (bs->actob == NULL)) {
+		madd_v3_v3fl(shi->co, vlr->n, 0.0001f);
+	}
+
+	if (obi->flag & R_TRANSFORMED)
+		mul_m4_v3(obi->mat, shi->co);
+
+	copy_v3_v3(shi->dxco, bs->dxco);
+	copy_v3_v3(shi->dyco, bs->dyco);
+
+	quad = bs->quad;
+	bake_set_shade_input(obi, vlr, shi, quad, 0, x, y, u, v);
+
+	if (bs->type == RE_BAKE_NORMALS && R.r.bake_normal_space == R_BAKE_SPACE_TANGENT) {
+		shade_input_set_shade_texco(shi);
+		copy_v3_v3(tvn, shi->nmapnorm);
+		copy_v4_v4(ttang, shi->nmaptang);
+	}
+
+	/* if we are doing selected to active baking, find point on other face */
+	if (bs->actob) {
+		Isect isec, minisec;
+		float co[3], minco[3], dist, mindist = 0.0f;
+		int hit, sign, dir = 1;
+
+		/* intersect with ray going forward and backward*/
+		hit = 0;
+		memset(&minisec, 0, sizeof(minisec));
+		minco[0] = minco[1] = minco[2] = 0.0f;
+
+		copy_v3_v3(bs->dir, shi->vn);
+
+		for (sign = -1; sign <= 1; sign += 2) {
+			memset(&isec, 0, sizeof(isec));
+			isec.mode = RE_RAY_MIRROR;
+
+			isec.orig.ob   = obi;
+			isec.orig.face = vlr;
+			isec.userdata = bs->actob;
+			isec.check = RE_CHECK_VLR_BAKE;
+			isec.skip = RE_SKIP_VLR_NEIGHBOUR;
+
+			if (bake_intersect_tree(R.raytree, &isec, shi->co, shi->vn, sign, co, &dist)) {
+				if (!hit || len_squared_v3v3(shi->co, co) < len_squared_v3v3(shi->co, minco)) {
+					minisec = isec;
+					mindist = dist;
+					copy_v3_v3(minco, co);
+					hit = 1;
+					dir = sign;
+				}
+			}
+		}
+
+		if (ELEM(bs->type, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE)) {
+			if (hit)
+				bake_displacement(handle, shi, (dir == -1) ? mindist : -mindist, x, y);
+			else
+				bake_displacement(handle, shi, 0.0f, x, y);
+			return;
+		}
+
+		/* if hit, we shade from the new point, otherwise from point one starting face */
+		if (hit) {
+			obi = (ObjectInstanceRen *)minisec.hit.ob;
+			vlr = (VlakRen *)minisec.hit.face;
+			quad = (minisec.isect == 2);
+			copy_v3_v3(shi->co, minco);
+
+			u = -minisec.u;
+			v = -minisec.v;
+			bake_set_shade_input(obi, vlr, shi, quad, 1, x, y, u, v);
+		}
+	}
+
+	if (bs->type == RE_BAKE_NORMALS && R.r.bake_normal_space == R_BAKE_SPACE_TANGENT)
+		bake_shade(handle, ob, shi, quad, x, y, u, v, tvn, ttang);
+	else
+		bake_shade(handle, ob, shi, quad, x, y, u, v, NULL, NULL);
+}
+
+static int get_next_bake_face(BakeShade *bs)
+{
+	ObjectRen *obr;
+	VlakRen *vlr;
+	MTFace *tface;
+	static int v = 0, vdone = false;
+	static ObjectInstanceRen *obi = NULL;
+
+	if (bs == NULL) {
+		vlr = NULL;
+		v = vdone = false;
+		obi = R.instancetable.first;
+		return 0;
+	}
+
+	BLI_thread_lock(LOCK_CUSTOM1);
+
+	for (; obi; obi = obi->next, v = 0) {
+		obr = obi->obr;
+
+		/* only allow non instances here */
+		if (obr->flag & R_INSTANCEABLE)
+			continue;
+
+		for (; v < obr->totvlak; v++) {
+			vlr = RE_findOrAddVlak(obr, v);
+
+			if ((bs->actob && bs->actob == obr->ob) || (!bs->actob && (obr->ob->flag & SELECT))) {
+				if (R.r.bake_flag & R_BAKE_VCOL) {
+					/* Gather face data for vertex color bake */
+					Mesh *me;
+					int *origindex, vcollayer;
+					CustomDataLayer *cdl;
+
+					if (obr->ob->type != OB_MESH)
+						continue;
+					me = obr->ob->data;
+
+					origindex = RE_vlakren_get_origindex(obr, vlr, 0);
+					if (origindex == NULL)
+						continue;
+					if (*origindex >= me->totpoly) {
+						/* Small hack for Array modifier, which gives false
+						 * original indices - z0r */
+						continue;
+					}
+#if 0
+					/* Only shade selected faces. */
+					if ((me->mface[*origindex].flag & ME_FACE_SEL) == 0)
+						continue;
+#endif
+
+					vcollayer = CustomData_get_render_layer_index(&me->ldata, CD_MLOOPCOL);
+					if (vcollayer == -1)
+						continue;
+
+					cdl = &me->ldata.layers[vcollayer];
+					bs->mpoly = me->mpoly + *origindex;
+					bs->vcol = ((MLoopCol *)cdl->data) + bs->mpoly->loopstart;
+					bs->mloop = me->mloop + bs->mpoly->loopstart;
+
+					/* Tag mesh for reevaluation. */
+					me->id.tag |= LIB_TAG_DOIT;
+				}
+				else {
+					Image *ima = NULL;
+					ImBuf *ibuf = NULL;
+					const float vec_alpha[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+					const float vec_solid[4] = {0.0f, 0.0f, 0.0f, 1.0f};
+					const float nor_alpha[4] = {0.5f, 0.5f, 1.0f, 0.0f};
+					const float nor_solid[4] = {0.5f, 0.5f, 1.0f, 1.0f};
+					const float disp_alpha[4] = {0.5f, 0.5f, 0.5f, 0.0f};
+					const float disp_solid[4] = {0.5f, 0.5f, 0.5f, 1.0f};
+
+					tface = RE_vlakren_get_tface(obr, vlr, obr->bakemtface, NULL, 0);
+
+					if (!tface || !tface->tpage)
+						continue;
+
+					ima = tface->tpage;
+					ibuf = BKE_image_acquire_ibuf(ima, NULL, NULL);
+
+					if (ibuf == NULL)
+						continue;
+
+					if (ibuf->rect == NULL && ibuf->rect_float == NULL) {
+						BKE_image_release_ibuf(ima, ibuf, NULL);
+						continue;
+					}
+
+					if (ibuf->rect_float && !(ibuf->channels == 0 || ibuf->channels == 4)) {
+						BKE_image_release_ibuf(ima, ibuf, NULL);
+						continue;
+					}
+
+					if (ima->flag & IMA_USED_FOR_RENDER) {
+						ima->id.tag &= ~LIB_TAG_DOIT;
+						BKE_image_release_ibuf(ima, ibuf, NULL);
+						continue;
+					}
+
+					/* find the image for the first time? */
+					if (ima->id.tag & LIB_TAG_DOIT) {
+						ima->id.tag &= ~LIB_TAG_DOIT;
+
+						/* we either fill in float or char, this ensures things go fine */
+						if (ibuf->rect_float)
+							imb_freerectImBuf(ibuf);
+						/* clear image */
+						if (R.r.bake_flag & R_BAKE_CLEAR) {
+							if (R.r.bake_mode == RE_BAKE_NORMALS && R.r.bake_normal_space == R_BAKE_SPACE_TANGENT)
+								IMB_rectfill(ibuf, (ibuf->planes == R_IMF_PLANES_RGBA) ? nor_alpha : nor_solid);
+							else if (ELEM(R.r.bake_mode, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE))
+								IMB_rectfill(ibuf, (ibuf->planes == R_IMF_PLANES_RGBA) ? disp_alpha : disp_solid);
+							else
+								IMB_rectfill(ibuf, (ibuf->planes == R_IMF_PLANES_RGBA) ? vec_alpha : vec_solid);
+						}
+						/* might be read by UI to set active image for display */
+						R.bakebuf = ima;
+					}
+
+					/* Tag image for redraw. */
+					ibuf->userflags |= IB_DISPLAY_BUFFER_INVALID;
+					BKE_image_release_ibuf(ima, ibuf, NULL);
+				}
+
+				bs->obi = obi;
+				bs->vlr = vlr;
+				bs->vdone++;  /* only for error message if nothing was rendered */
+				v++;
+				BLI_thread_unlock(LOCK_CUSTOM1);
+				return 1;
+			}
+		}
+	}
+
+	BLI_thread_unlock(LOCK_CUSTOM1);
+	return 0;
+}
+
+static void bake_single_vertex(BakeShade *bs, VertRen *vert, float u, float v)
+{
+	int *origindex, i;
+	MLoopCol *basevcol;
+	MLoop *mloop;
+
+	/* per vertex fixed seed */
+	BLI_thread_srandom(bs->thread, vert->index);
+
+	origindex = RE_vertren_get_origindex(bs->obi->obr, vert, 0);
+	if (!origindex || *origindex == ORIGINDEX_NONE)
+		return;
+
+	/* Search for matching vertex index and apply shading. */
+	for (i = 0; i < bs->mpoly->totloop; i++) {
+		mloop = bs->mloop + i;
+		if (mloop->v != *origindex)
+			continue;
+		basevcol = bs->vcol;
+		bs->vcol = basevcol + i;
+		do_bake_shade(bs, 0, 0, u, v);
+		bs->vcol = basevcol;
+		break;
+	}
+}
+
+/* Bake all vertices of a face. Actually, this still works on a face-by-face
+ * basis, and each vertex on each face is shaded. Vertex colors are a property
+ * of loops, not vertices. */
+static void shade_verts(BakeShade *bs)
+{
+	VlakRen *vlr = bs->vlr;
+
+	/* Disable baking to image; write to vcol instead. vcol pointer is set in
+	 * bake_single_vertex. */
+	bs->ima = NULL;
+	bs->rect = NULL;
+	bs->rect_float = NULL;
+	bs->displacement_buffer = NULL;
+	bs->displacement_min = FLT_MAX;
+	bs->displacement_max = -FLT_MAX;
+
+	bs->quad = 0;
+
+	/* No anti-aliasing for vertices. */
+	zero_v3(bs->dxco);
+	zero_v3(bs->dyco);
+
+	/* Shade each vertex of the face. u and v are barycentric coordinates; since
+	 * we're only interested in vertices, these will be 0 or 1. */
+	if ((vlr->flag & R_FACE_SPLIT) == 0) {
+		/* Processing triangle face, whole quad, or first half of split quad. */
+
+		bake_single_vertex(bs, bs->vlr->v1, 1.0f, 0.0f);
+		bake_single_vertex(bs, bs->vlr->v2, 0.0f, 1.0f);
+		bake_single_vertex(bs, bs->vlr->v3, 0.0f, 0.0f);
+
+		if (vlr->v4) {
+			bs->quad = 1;
+			bake_single_vertex(bs, bs->vlr->v4, 0.0f, 0.0f);
+		}
+	}
+	else {
+		/* Processing second half of split quad. Only one vertex to go. */
+		if (vlr->flag & R_DIVIDE_24) {
+			bake_single_vertex(bs, bs->vlr->v2, 0.0f, 1.0f);
+		}
+		else {
+			bake_single_vertex(bs, bs->vlr->v3, 0.0f, 0.0f);
+		}
+	}
+}
+
+/* already have tested for tface and ima and zspan */
+static void shade_tface(BakeShade *bs)
+{
+	VlakRen *vlr = bs->vlr;
+	ObjectInstanceRen *obi = bs->obi;
+	ObjectRen *obr = obi->obr;
+	MTFace *tface = RE_vlakren_get_tface(obr, vlr, obr->bakemtface, NULL, 0);
+	Image *ima = tface->tpage;
+	float vec[4][2];
+	int a, i1, i2, i3;
+
+	/* per face fixed seed */
+	BLI_thread_srandom(bs->thread, vlr->index);
+
+	/* check valid zspan */
+	if (ima != bs->ima) {
+		BKE_image_release_ibuf(bs->ima, bs->ibuf, NULL);
+
+		bs->ima = ima;
+		bs->ibuf = BKE_image_acquire_ibuf(ima, NULL, NULL);
+		/* note, these calls only free/fill contents of zspan struct, not zspan itself */
+		zbuf_free_span(bs->zspan);
+		zbuf_alloc_span(bs->zspan, bs->ibuf->x, bs->ibuf->y, R.clipcrop);
+	}
+
+	bs->rectx = bs->ibuf->x;
+	bs->recty = bs->ibuf->y;
+	bs->rect = bs->ibuf->rect;
+	bs->rect_colorspace = bs->ibuf->rect_colorspace;
+	bs->rect_float = bs->ibuf->rect_float;
+	bs->vcol = NULL;
+	bs->quad = 0;
+	bs->rect_mask = NULL;
+	bs->displacement_buffer = NULL;
+
+	if (bs->use_mask || bs->use_displacement_buffer) {
+		BakeImBufuserData *userdata = bs->ibuf->userdata;
+		if (userdata == NULL) {
+			BLI_thread_lock(LOCK_CUSTOM1);
+			userdata = bs->ibuf->userdata;
+			if (userdata == NULL) /* since the thread was locked, its possible another thread alloced the value */
+				userdata = MEM_callocN(sizeof(BakeImBufuserData), "BakeImBufuserData");
+
+			if (bs->use_mask) {
+				if (userdata->mask_buffer == NULL) {
+					userdata->mask_buffer = MEM_callocN(sizeof(char) * bs->rectx * bs->recty, "BakeMask");
+				}
+			}
+
+			if (bs->use_displacement_buffer) {
+				if (userdata->displacement_buffer == NULL) {
+					userdata->displacement_buffer = MEM_callocN(sizeof(float) * bs->rectx * bs->recty, "BakeDisp");
+				}
+			}
+
+			bs->ibuf->userdata = userdata;
+
+			BLI_thread_unlock(LOCK_CUSTOM1);
+		}
+
+		bs->rect_mask = userdata->mask_buffer;
+		bs->displacement_buffer = userdata->displacement_buffer;
+	}
+
+	/* get pixel level vertex coordinates */
+	for (a = 0; a < 4; a++) {
+		/* Note, workaround for pixel aligned UVs which are common and can screw up our intersection tests
+		 * where a pixel gets in between 2 faces or the middle of a quad,
+		 * camera aligned quads also have this problem but they are less common.
+		 * Add a small offset to the UVs, fixes bug #18685 - Campbell */
+		vec[a][0] = tface->uv[a][0] * (float)bs->rectx - (0.5f + 0.001f);
+		vec[a][1] = tface->uv[a][1] * (float)bs->recty - (0.5f + 0.002f);
+	}
+
+	/* UV indices have to be corrected for possible quad->tria splits */
+	i1 = 0; i2 = 1; i3 = 2;
+	vlr_set_uv_indices(vlr, &i1, &i2, &i3);
+	bake_set_vlr_dxyco(bs, vec[i1], vec[i2], vec[i3]);
+	zspan_scanconvert(bs->zspan, bs, vec[i1], vec[i2], vec[i3], do_bake_shade);
+
+	if (vlr->v4) {
+		bs->quad = 1;
+		bake_set_vlr_dxyco(bs, vec[0], vec[2], vec[3]);
+		zspan_scanconvert(bs->zspan, bs, vec[0], vec[2], vec[3], do_bake_shade);
+	}
+}
+
+static void *do_bake_thread(void *bs_v)
+{
+	BakeShade *bs = bs_v;
+
+	while (get_next_bake_face(bs)) {
+		if (R.r.bake_flag & R_BAKE_VCOL) {
+			shade_verts(bs);
+		}
+		else {
+			shade_tface(bs);
+		}
+
+		/* fast threadsafe break test */
+		if (R.test_break(R.tbh))
+			break;
+
+		/* access is not threadsafe but since its just true/false probably ok
+		 * only used for interactive baking */
+		if (bs->do_update) {
+			*bs->do_update = true;
+		}
+	}
+	bs->ready = true;
+
+	BKE_image_release_ibuf(bs->ima, bs->ibuf, NULL);
+
+	return NULL;
+}
+
+void RE_bake_ibuf_filter(ImBuf *ibuf, char *mask, const int filter)
+{
+	/* must check before filtering */
+	const bool is_new_alpha = (ibuf->planes != R_IMF_PLANES_RGBA) && BKE_imbuf_alpha_test(ibuf);
+
+	/* Margin */
+	if (filter) {
+		IMB_filter_extend(ibuf, mask, filter);
+	}
+
+	/* if the bake results in new alpha then change the image setting */
+	if (is_new_alpha) {
+		ibuf->planes = R_IMF_PLANES_RGBA;
+	}
+	else {
+		if (filter && ibuf->planes != R_IMF_PLANES_RGBA) {
+			/* clear alpha added by filtering */
+			IMB_rectfill_alpha(ibuf, 1.0f);
+		}
+	}
+}
+
+void RE_bake_ibuf_normalize_displacement(ImBuf *ibuf, float *displacement, char *mask, float displacement_min, float displacement_max)
+{
+	int i;
+	const float *current_displacement = displacement;
+	const char *current_mask = mask;
+	float max_distance;
+
+	max_distance = max_ff(fabsf(displacement_min), fabsf(displacement_max));
+
+	for (i = 0; i < ibuf->x * ibuf->y; i++) {
+		if (*current_mask == FILTER_MASK_USED) {
+			float normalized_displacement;
+
+			if (max_distance > 1e-5f)
+				normalized_displacement = (*current_displacement + max_distance) / (max_distance * 2);
+			else
+				normalized_displacement = 0.5f;
+
+			if (ibuf->rect_float) {
+				/* currently baking happens to RGBA only */
+				float *fp = ibuf->rect_float + i * 4;
+				fp[0] = fp[1] = fp[2] = normalized_displacement;
+				fp[3] = 1.0f;
+			}
+
+			if (ibuf->rect) {
+				unsigned char *cp = (unsigned char *) (ibuf->rect + i);
+				cp[0] = cp[1] = cp[2] = unit_float_to_uchar_clamp(normalized_displacement);
+				cp[3] = 255;
+			}
+		}
+
+		current_displacement++;
+		current_mask++;
+	}
+}
+
+/* using object selection tags, the faces with UV maps get baked */
+/* render should have been setup */
+/* returns 0 if nothing was handled */
+int RE_bake_shade_all_selected(Render *re, int type, Object *actob, short *do_update, float *progress)
+{
+	BakeShade *handles;
+	ListBase threads;
+	Image *ima;
+	int a, vdone = false, result = BAKE_RESULT_OK;
+	bool use_mask = false;
+	bool use_displacement_buffer = false;
+	bool do_manage = false;
+
+	if (ELEM(type, RE_BAKE_ALL, RE_BAKE_TEXTURE)) {
+		do_manage = BKE_scene_check_color_management_enabled(re->scene);
+	}
+
+	re->scene_color_manage = BKE_scene_check_color_management_enabled(re->scene);
+
+	/* initialize render global */
+	R = *re;
+	R.bakebuf = NULL;
+
+	/* initialize static vars */
+	get_next_bake_face(NULL);
+
+	/* do we need a mask? */
+	if (re->r.bake_filter)
+		use_mask = true;
+
+	/* do we need buffer to store displacements  */
+	if (ELEM(type, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE)) {
+		if (((R.r.bake_flag & R_BAKE_NORMALIZE) && R.r.bake_maxdist == 0.0f) ||
+		    (type == RE_BAKE_DERIVATIVE))
+		{
+			use_displacement_buffer = true;
+			use_mask = true;
+		}
+	}
+
+	/* baker uses this flag to detect if image was initialized */
+	if ((R.r.bake_flag & R_BAKE_VCOL) == 0) {
+		for (ima = G.main->image.first; ima; ima = ima->id.next) {
+			ImBuf *ibuf = BKE_image_acquire_ibuf(ima, NULL, NULL);
+			ima->id.tag |= LIB_TAG_DOIT;
+			ima->flag &= ~IMA_USED_FOR_RENDER;
+			if (ibuf) {
+				ibuf->userdata = NULL; /* use for masking if needed */
+			}
+			BKE_image_release_ibuf(ima, ibuf, NULL);
+		}
+	}
+
+	if (R.r.bake_flag & R_BAKE_VCOL) {
+		/* untag all meshes */
+		BKE_main_id_tag_listbase(&G.main->mesh, LIB_TAG_DOIT, false);
+	}
+
+	BLI_threadpool_init(&threads, do_bake_thread, re->r.threads);
+
+	handles = MEM_callocN(sizeof(BakeShade) * re->r.threads, "BakeShade");
+
+	/* get the threads running */
+	for (a = 0; a < re->r.threads; a++) {
+		handles[a].thread = a;
+
+		/* set defaults in handles */
+		handles[a].ssamp.shi[0].lay = re->lay;
+
+		if (type == RE_BAKE_SHADOW) {
+			handles[a].ssamp.shi[0].passflag = SCE_PASS_SHADOW;
+		}
+		else {
+			handles[a].ssamp.shi[0].passflag = SCE_PASS_COMBINED;
+		}
+		handles[a].ssamp.shi[0].combinedflag = ~(SCE_PASS_SPEC);
+		handles[a].ssamp.shi[0].thread = a;
+		handles[a].ssamp.shi[0].do_manage = do_manage;
+		handles[a].ssamp.tot = 1;
+
+		handles[a].type = type;
+		handles[a].actob = actob;
+		if (R.r.bake_flag & R_BAKE_VCOL)
+			handles[a].zspan = NULL;
+		else
+			handles[a].zspan = MEM_callocN(sizeof(ZSpan), "zspan for bake");
+
+		handles[a].use_mask = use_mask;
+		handles[a].use_displacement_buffer = use_displacement_buffer;
+
+		handles[a].do_update = do_update; /* use to tell the view to update */
+
+		handles[a].displacement_min = FLT_MAX;
+		handles[a].displacement_max = -FLT_MAX;
+
+		BLI_threadpool_insert(&threads, &handles[a]);
+	}
+
+	/* wait for everything to be done */
+	a = 0;
+	while (a != re->r.threads) {
+		PIL_sleep_ms(50);
+
+		/* calculate progress */
+		for (vdone = false, a = 0; a < re->r.threads; a++)
+			vdone += handles[a].vdone;
+		if (progress)
+			*progress = (float)(vdone / (float)re->totvlak);
+
+		for (a = 0; a < re->r.threads; a++) {
+			if (handles[a].ready == false) {
+				break;
+			}
+		}
+	}
+
+	/* filter and refresh images */
+	if ((R.r.bake_flag & R_BAKE_VCOL) == 0) {
+		float displacement_min = FLT_MAX, displacement_max = -FLT_MAX;
+
+		if (use_displacement_buffer) {
+			for (a = 0; a < re->r.threads; a++) {
+				displacement_min = min_ff(displacement_min, handles[a].displacement_min);
+				displacement_max = max_ff(displacement_max, handles[a].displacement_max);
+			}
+		}
+
+		for (ima = G.main->image.first; ima; ima = ima->id.next) {
+			if ((ima->id.tag & LIB_TAG_DOIT) == 0) {
+				ImBuf *ibuf = BKE_image_acquire_ibuf(ima, NULL, NULL);
+				BakeImBufuserData *userdata;
+
+				if (ima->flag & IMA_USED_FOR_RENDER)
+					result = BAKE_RESULT_FEEDBACK_LOOP;
+
+				if (!ibuf)
+					continue;
+
+				userdata = (BakeImBufuserData *)ibuf->userdata;
+				if (userdata) {
+					if (use_displacement_buffer) {
+						if (type == RE_BAKE_DERIVATIVE) {
+							float user_scale = (R.r.bake_flag & R_BAKE_USERSCALE) ? R.r.bake_user_scale : -1.0f;
+							RE_bake_make_derivative(ibuf, userdata->displacement_buffer, userdata->mask_buffer,
+							                        displacement_min, displacement_max, user_scale);
+						}
+						else {
+							RE_bake_ibuf_normalize_displacement(ibuf, userdata->displacement_buffer, userdata->mask_buffer,
+							                                    displacement_min, displacement_max);
+						}
+					}
+
+					RE_bake_ibuf_filter(ibuf, userdata->mask_buffer, re->r.bake_filter);
+				}
+
+				ibuf->userflags |= IB_BITMAPDIRTY;
+				BKE_image_release_ibuf(ima, ibuf, NULL);
+			}
+		}
+
+		/* calculate return value */
+		for (a = 0; a < re->r.threads; a++) {
+			zbuf_free_span(handles[a].zspan);
+			MEM_freeN(handles[a].zspan);
+		}
+	}
+
+	MEM_freeN(handles);
+
+	BLI_threadpool_end(&threads);
+
+	if (vdone == 0) {
+		result = BAKE_RESULT_NO_OBJECTS;
+	}
+
+	return result;
+}
+
+struct Image *RE_bake_shade_get_image(void)
+{
+	return R.bakebuf;
+}
+
+/* **************** Derivative Maps Baker **************** */
+
+static void add_single_heights_margin(const ImBuf *ibuf, const char *mask, float *heights_buffer)
+{
+	int x, y;
+
+	for (y = 0; y < ibuf->y; y++) {
+		for (x = 0; x < ibuf->x; x++) {
+			int index = ibuf->x * y + x;
+
+			/* If unassigned pixel, look for neighbors. */
+			if (mask[index] != FILTER_MASK_USED) {
+				float height_acc = 0;
+				int denom = 0;
+				int i, j;
+
+				for (j = -1; j <= 1; j++)
+					for (i = -1; i <= 1; i++) {
+						int w = (i == 0 ? 1 : 0) + (j == 0 ? 1 : 0) + 1;
+
+						if (i != 0 || j != 0) {
+							int index2 = 0;
+							int x0 = x + i;
+							int y0 = y + j;
+
+							CLAMP(x0, 0, ibuf->x - 1);
+							CLAMP(y0, 0, ibuf->y - 1);
+
+							index2 = ibuf->x * y0 + x0;
+
+							if (mask[index2] == FILTER_MASK_USED) {
+								height_acc += w * heights_buffer[index2];
+								denom += w;
+							}
+						}
+					}
+
+				/* Insert final value. */
+				if (denom > 0) {
+					heights_buffer[index] = height_acc / denom;
+				}
+			}
+		}
+	}
+}
+
+/* returns user-scale */
+float RE_bake_make_derivative(ImBuf *ibuf, float *heights_buffer, const char *mask,
+                              const float height_min, const float height_max,
+                              const float fmult)
+{
+	const float delta_height = height_max - height_min;
+	const float denom = delta_height > 0.0f ? (8 * delta_height) : 1.0f;
+	bool auto_range_fit = fmult <= 0.0f;
+	float max_num_deriv = -1.0f;
+	int x, y, index;
+
+	/* Need a single margin to calculate good derivatives. */
+	add_single_heights_margin(ibuf, mask, heights_buffer);
+
+	if (auto_range_fit) {
+		/* If automatic range fitting is enabled. */
+		for (y = 0; y < ibuf->y; y++) {
+			const int Yu = y == (ibuf->y - 1) ? (ibuf->y - 1) : (y + 1);
+			const int Yc = y;
+			const int Yd = y == 0 ? 0 : (y - 1);
+
+			for (x = 0; x < ibuf->x; x++) {
+				const int Xl = x == 0 ? 0 : (x - 1);
+				const int Xc = x;
+				const int Xr = x == (ibuf->x - 1) ? (ibuf->x - 1) : (x + 1);
+
+				const float Hcy = heights_buffer[Yc * ibuf->x + Xr] - heights_buffer[Yc * ibuf->x + Xl];
+				const float Hu  = heights_buffer[Yu * ibuf->x + Xr] - heights_buffer[Yu * ibuf->x + Xl];
+				const float Hd  = heights_buffer[Yd * ibuf->x + Xr] - heights_buffer[Yd * ibuf->x + Xl];
+
+				const float Hl  = heights_buffer[Yu * ibuf->x + Xl] - heights_buffer[Yd * ibuf->x + Xl];
+				const float Hcx = heights_buffer[Yu * ibuf->x + Xc] - heights_buffer[Yd * ibuf->x + Xc];
+				const float Hr  = heights_buffer[Yu * ibuf->x + Xr] - heights_buffer[Yd * ibuf->x + Xr];
+
+				/* This corresponds to using the sobel kernel on the heights buffer
+				 * to obtain the derivative multiplied by 8.
+				 */
+				const float deriv_x = Hu + 2 * Hcy + Hd;
+				const float deriv_y = Hr + 2 * Hcx + Hl;
+
+				/* early out */
+				index = ibuf->x * y + x;
+				if (mask[index] != FILTER_MASK_USED) {
+					continue;
+				}
+
+				/* Widen bound. */
+				if (fabsf(deriv_x) > max_num_deriv) {
+					max_num_deriv = fabsf(deriv_x);
+				}
+
+				if (fabsf(deriv_y) > max_num_deriv) {
+					max_num_deriv = fabsf(deriv_y);
+				}
+			}
+		}
+	}
+
+	/* Output derivatives. */
+	auto_range_fit &= (max_num_deriv > 0);
+	for (y = 0; y < ibuf->y; y++) {
+		const int Yu = y == (ibuf->y - 1) ? (ibuf->y - 1) : (y + 1);
+		const int Yc = y;
+		const int Yd = y == 0 ? 0 : (y - 1);
+
+		for (x = 0; x < ibuf->x; x++) {
+			const int Xl = x == 0 ? 0 : (x - 1);
+			const int Xc = x;
+			const int Xr = x == (ibuf->x - 1) ? (ibuf->x - 1) : (x + 1);
+
+			const float Hcy = heights_buffer[Yc * ibuf->x + Xr] - heights_buffer[Yc * ibuf->x + Xl];
+			const float Hu  = heights_buffer[Yu * ibuf->x + Xr] - heights_buffer[Yu * ibuf->x + Xl];
+			const float Hd  = heights_buffer[Yd * ibuf->x + Xr] - heights_buffer[Yd * ibuf->x + Xl];
+
+			const float Hl  = heights_buffer[Yu * ibuf->x + Xl] - heights_buffer[Yd * ibuf->x + Xl];
+			const float Hcx = heights_buffer[Yu * ibuf->x + Xc] - heights_buffer[Yd * ibuf->x + Xc];
+			const float Hr  = heights_buffer[Yu * ibuf->x + Xr] - heights_buffer[Yd * ibuf->x + Xr];
+
+			/* This corresponds to using the sobel kernel on the heights buffer
+			 * to obtain the derivative multiplied by 8.
+			 */
+			float deriv_x = Hu + 2 * Hcy + Hd;
+			float deriv_y = Hr + 2 * Hcx + Hl;
+
+			/* Early out. */
+			index = ibuf->x * y + x;
+			if (mask[index] != FILTER_MASK_USED) {
+				continue;
+			}
+
+			if (auto_range_fit) {
+				deriv_x /= max_num_deriv;
+				deriv_y /= max_num_deriv;
+			}
+			else {
+				deriv_x *= (fmult / denom);
+				deriv_y *= (fmult / denom);
+			}
+
+			deriv_x = deriv_x * 0.5f + 0.5f;
+			deriv_y = deriv_y * 0.5f + 0.5f;
+
+			/* Clamp. */
+			CLAMP(deriv_x, 0.0f, 1.0f);
+			CLAMP(deriv_y, 0.0f, 1.0f);
+
+			/* Write out derivatives. */
+			if (ibuf->rect_float) {
+				float *rrgbf = ibuf->rect_float + index * 4;
+
+				rrgbf[0] = deriv_x;
+				rrgbf[1] = deriv_y;
+				rrgbf[2] = 0.0f;
+				rrgbf[3] = 1.0f;
+			}
+			else {
+				char *rrgb = (char *)ibuf->rect + index * 4;
+
+				rrgb[0] = unit_float_to_uchar_clamp(deriv_x);
+				rrgb[1] = unit_float_to_uchar_clamp(deriv_y);
+				rrgb[2] = 0;
+				rrgb[3] = 255;
+			}
+		}
+	}
+
+	/* Eeturn user-scale (for rendering). */
+	return auto_range_fit ? (max_num_deriv / denom) : (fmult > 0.0f ? (1.0f / fmult) : 0.0f);
+}
diff --git a/source/blender/render/intern/source/convertblender.c b/source/blender/render/intern/source/convertblender.c
new file mode 100644
index 00000000000..8675ffec313
--- /dev/null
+++ b/source/blender/render/intern/source/convertblender.c
@@ -0,0 +1,6014 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributors: 2004/2005/2006 Blender Foundation, full recode
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/convertblender.c
+ *  \ingroup render
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_blenlib.h"
+#include "BLI_utildefines.h"
+#include "BLI_rand.h"
+#include "BLI_memarena.h"
+#ifdef WITH_FREESTYLE
+#  include "BLI_edgehash.h"
+#endif
+
+#include "BLT_translation.h"
+
+#include "DNA_material_types.h"
+#include "DNA_curve_types.h"
+#include "DNA_group_types.h"
+#include "DNA_lamp_types.h"
+#include "DNA_image_types.h"
+#include "DNA_mesh_types.h"
+#include "DNA_meshdata_types.h"
+#include "DNA_modifier_types.h"
+#include "DNA_node_types.h"
+#include "DNA_object_types.h"
+#include "DNA_object_fluidsim_types.h"
+#include "DNA_particle_types.h"
+#include "DNA_scene_types.h"
+#include "DNA_texture_types.h"
+
+#include "BKE_anim.h"
+#include "BKE_curve.h"
+#include "BKE_customdata.h"
+#include "BKE_colortools.h"
+#include "BKE_displist.h"
+#include "BKE_depsgraph.h"
+#include "BKE_DerivedMesh.h"
+#include "BKE_global.h"
+#include "BKE_key.h"
+#include "BKE_image.h"
+#include "BKE_lattice.h"
+#include "BKE_material.h"
+#include "BKE_main.h"
+#include "BKE_mball.h"
+#include "BKE_mesh.h"
+#include "BKE_modifier.h"
+#include "BKE_node.h"
+#include "BKE_object.h"
+#include "BKE_particle.h"
+#include "BKE_scene.h"
+
+#include "PIL_time.h"
+
+#include "envmap.h"
+#include "occlusion.h"
+#include "pointdensity.h"
+#include "voxeldata.h"
+#include "render_types.h"
+#include "rendercore.h"
+#include "renderdatabase.h"
+#include "renderpipeline.h"
+#include "shadbuf.h"
+#include "shading.h"
+#include "strand.h"
+#include "texture.h"
+#include "volume_precache.h"
+#include "sss.h"
+#include "zbuf.h"
+#include "sunsky.h"
+
+/* 10 times larger than normal epsilon, test it on default nurbs sphere with ray_transp (for quad detection) */
+/* or for checking vertex normal flips */
+#define FLT_EPSILON10 1.19209290e-06F
+
+/* could enable at some point but for now there are far too many conversions */
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wdouble-promotion"
+#endif
+
+/* ------------------------------------------------------------------------- */
+/* tool functions/defines for ad hoc simplification and possible future
+ * cleanup      */
+/* ------------------------------------------------------------------------- */
+
+#define UVTOINDEX(u, v) (startvlak + (u) * sizev + (v))
+/*
+ *
+ * NOTE THAT U/V COORDINATES ARE SOMETIMES SWAPPED !!
+ *
+ * ^   ()----p4----p3----()
+ * |   |     |     |     |
+ * u   |     |  F1 |  F2 |
+ *     |     |     |     |
+ *     ()----p1----p2----()
+ *            v ->
+ */
+
+/* ------------------------------------------------------------------------- */
+
+#define CD_MASK_RENDER_INTERNAL \
+    (CD_MASK_BAREMESH | CD_MASK_MFACE | CD_MASK_MTFACE | CD_MASK_MCOL)
+
+static void split_v_renderfaces(ObjectRen *obr, int startvlak, int UNUSED(startvert), int UNUSED(usize), int vsize, int uIndex, int UNUSED(cyclu), int cyclv)
+{
+	int vLen = vsize-1+(!!cyclv);
+	int v;
+
+	for (v=0; v<vLen; v++) {
+		VlakRen *vlr = RE_findOrAddVlak(obr, startvlak + vLen*uIndex + v);
+		VlakRen *vlr_other;
+		VertRen *vert = RE_vertren_copy(obr, vlr->v2);
+
+		if (cyclv) {
+			vlr->v2 = vert;
+
+			if (v == vLen - 1) {
+				vlr_other = RE_findOrAddVlak(obr, startvlak + vLen*uIndex + 0);
+				vlr_other->v1 = vert;
+			}
+			else {
+				vlr_other = RE_findOrAddVlak(obr, startvlak + vLen*uIndex + v+1);
+				vlr_other->v1 = vert;
+			}
+		}
+		else {
+			vlr->v2 = vert;
+
+			if (v < vLen - 1) {
+				vlr_other = RE_findOrAddVlak(obr, startvlak + vLen*uIndex + v+1);
+				vlr_other->v1 = vert;
+			}
+
+			if (v == 0) {
+				vlr->v1 = RE_vertren_copy(obr, vlr->v1);
+			}
+		}
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* Stress, tangents and normals                                              */
+/* ------------------------------------------------------------------------- */
+
+static void calc_edge_stress_add(float *accum, VertRen *v1, VertRen *v2)
+{
+	float len= len_v3v3(v1->co, v2->co)/len_v3v3(v1->orco, v2->orco);
+	float *acc;
+
+	acc= accum + 2*v1->index;
+	acc[0]+= len;
+	acc[1]+= 1.0f;
+
+	acc= accum + 2*v2->index;
+	acc[0]+= len;
+	acc[1]+= 1.0f;
+}
+
+static void calc_edge_stress(Render *UNUSED(re), ObjectRen *obr, Mesh *me)
+{
+	float loc[3], size[3], *accum, *acc, *accumoffs, *stress;
+	int a;
+
+	if (obr->totvert==0) return;
+
+	BKE_mesh_texspace_get(me, loc, NULL, size);
+
+	accum= MEM_callocN(2*sizeof(float)*obr->totvert, "temp accum for stress");
+
+	/* de-normalize orco */
+	for (a=0; a<obr->totvert; a++) {
+		VertRen *ver= RE_findOrAddVert(obr, a);
+		if (ver->orco) {
+			ver->orco[0]= ver->orco[0]*size[0] +loc[0];
+			ver->orco[1]= ver->orco[1]*size[1] +loc[1];
+			ver->orco[2]= ver->orco[2]*size[2] +loc[2];
+		}
+	}
+
+	/* add stress values */
+	accumoffs= accum;	/* so we can use vertex index */
+	for (a=0; a<obr->totvlak; a++) {
+		VlakRen *vlr= RE_findOrAddVlak(obr, a);
+
+		if (vlr->v1->orco && vlr->v4) {
+			calc_edge_stress_add(accumoffs, vlr->v1, vlr->v2);
+			calc_edge_stress_add(accumoffs, vlr->v2, vlr->v3);
+			calc_edge_stress_add(accumoffs, vlr->v3, vlr->v1);
+			if (vlr->v4) {
+				calc_edge_stress_add(accumoffs, vlr->v3, vlr->v4);
+				calc_edge_stress_add(accumoffs, vlr->v4, vlr->v1);
+				calc_edge_stress_add(accumoffs, vlr->v2, vlr->v4);
+			}
+		}
+	}
+
+	for (a=0; a<obr->totvert; a++) {
+		VertRen *ver= RE_findOrAddVert(obr, a);
+		if (ver->orco) {
+			/* find stress value */
+			acc= accumoffs + 2*ver->index;
+			if (acc[1]!=0.0f)
+				acc[0]/= acc[1];
+			stress= RE_vertren_get_stress(obr, ver, 1);
+			*stress= *acc;
+
+			/* restore orcos */
+			ver->orco[0] = (ver->orco[0]-loc[0])/size[0];
+			ver->orco[1] = (ver->orco[1]-loc[1])/size[1];
+			ver->orco[2] = (ver->orco[2]-loc[2])/size[2];
+		}
+	}
+
+	MEM_freeN(accum);
+}
+
+/* gets tangent from tface or orco */
+static void calc_tangent_vector(ObjectRen *obr, VlakRen *vlr, int do_tangent)
+{
+	MTFace *tface= RE_vlakren_get_tface(obr, vlr, obr->actmtface, NULL, 0);
+	VertRen *v1=vlr->v1, *v2=vlr->v2, *v3=vlr->v3, *v4=vlr->v4;
+	float tang[3], *tav;
+	float *uv1, *uv2, *uv3, *uv4;
+	float uv[4][2];
+
+	if (tface) {
+		uv1= tface->uv[0];
+		uv2= tface->uv[1];
+		uv3= tface->uv[2];
+		uv4= tface->uv[3];
+	}
+	else if (v1->orco) {
+		uv1= uv[0]; uv2= uv[1]; uv3= uv[2]; uv4= uv[3];
+		map_to_sphere(&uv[0][0], &uv[0][1], v1->orco[0], v1->orco[1], v1->orco[2]);
+		map_to_sphere(&uv[1][0], &uv[1][1], v2->orco[0], v2->orco[1], v2->orco[2]);
+		map_to_sphere(&uv[2][0], &uv[2][1], v3->orco[0], v3->orco[1], v3->orco[2]);
+		if (v4)
+			map_to_sphere(&uv[3][0], &uv[3][1], v4->orco[0], v4->orco[1], v4->orco[2]);
+	}
+	else return;
+
+	tangent_from_uv_v3(uv1, uv2, uv3, v1->co, v2->co, v3->co, vlr->n, tang);
+
+	if (do_tangent) {
+		tav= RE_vertren_get_tangent(obr, v1, 1);
+		add_v3_v3(tav, tang);
+		tav= RE_vertren_get_tangent(obr, v2, 1);
+		add_v3_v3(tav, tang);
+		tav= RE_vertren_get_tangent(obr, v3, 1);
+		add_v3_v3(tav, tang);
+	}
+
+	if (v4) {
+		tangent_from_uv_v3(uv1, uv3, uv4, v1->co, v3->co, v4->co, vlr->n, tang);
+
+		if (do_tangent) {
+			tav= RE_vertren_get_tangent(obr, v1, 1);
+			add_v3_v3(tav, tang);
+			tav= RE_vertren_get_tangent(obr, v3, 1);
+			add_v3_v3(tav, tang);
+			tav= RE_vertren_get_tangent(obr, v4, 1);
+			add_v3_v3(tav, tang);
+		}
+	}
+}
+
+
+
+/****************************************************************
+ ************ tangent space generation interface ****************
+ ****************************************************************/
+
+typedef struct {
+	ObjectRen *obr;
+	int mtface_index;
+} SRenderMeshToTangent;
+
+/* interface */
+#include "mikktspace.h"
+
+static int GetNumFaces(const SMikkTSpaceContext *pContext)
+{
+	SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData;
+	return pMesh->obr->totvlak;
+}
+
+static int GetNumVertsOfFace(const SMikkTSpaceContext *pContext, const int face_num)
+{
+	SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData;
+	VlakRen *vlr= RE_findOrAddVlak(pMesh->obr, face_num);
+	return vlr->v4!=NULL ? 4 : 3;
+}
+
+static void GetPosition(const SMikkTSpaceContext *pContext, float r_co[3], const int face_num, const int vert_index)
+{
+	//assert(vert_index>=0 && vert_index<4);
+	SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData;
+	VlakRen *vlr= RE_findOrAddVlak(pMesh->obr, face_num);
+	const float *co = (&vlr->v1)[vert_index]->co;
+	copy_v3_v3(r_co, co);
+}
+
+static void GetTextureCoordinate(const SMikkTSpaceContext *pContext, float r_uv[2], const int face_num, const int vert_index)
+{
+	//assert(vert_index>=0 && vert_index<4);
+	SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData;
+	VlakRen *vlr= RE_findOrAddVlak(pMesh->obr, face_num);
+	MTFace *tface= RE_vlakren_get_tface(pMesh->obr, vlr, pMesh->mtface_index, NULL, 0);
+	const float *coord;
+
+	if (tface  != NULL) {
+		coord= tface->uv[vert_index];
+		copy_v2_v2(r_uv, coord);
+	}
+	else if ((coord = (&vlr->v1)[vert_index]->orco)) {
+		map_to_sphere(&r_uv[0], &r_uv[1], coord[0], coord[1], coord[2]);
+	}
+	else { /* else we get un-initialized value, 0.0 ok default? */
+		zero_v2(r_uv);
+	}
+}
+
+static void GetNormal(const SMikkTSpaceContext *pContext, float r_no[3], const int face_num, const int vert_index)
+{
+	//assert(vert_index>=0 && vert_index<4);
+	SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData;
+	VlakRen *vlr= RE_findOrAddVlak(pMesh->obr, face_num);
+
+	if (vlr->flag & ME_SMOOTH) {
+		const float *n = (&vlr->v1)[vert_index]->n;
+		copy_v3_v3(r_no, n);
+	}
+	else {
+		negate_v3_v3(r_no, vlr->n);
+	}
+}
+static void SetTSpace(const SMikkTSpaceContext *pContext, const float fvTangent[3], const float fSign, const int face_num, const int iVert)
+{
+	//assert(vert_index>=0 && vert_index<4);
+	SRenderMeshToTangent *pMesh = (SRenderMeshToTangent *) pContext->m_pUserData;
+	VlakRen *vlr = RE_findOrAddVlak(pMesh->obr, face_num);
+	float *ftang = RE_vlakren_get_nmap_tangent(pMesh->obr, vlr, pMesh->mtface_index, true);
+	if (ftang!=NULL) {
+		copy_v3_v3(&ftang[iVert*4+0], fvTangent);
+		ftang[iVert*4+3]=fSign;
+	}
+}
+
+static void calc_vertexnormals(Render *UNUSED(re), ObjectRen *obr, bool do_vertex_normal, bool do_tangent, bool do_nmap_tangent)
+{
+	int a;
+
+	/* clear all vertex normals */
+	if (do_vertex_normal) {
+		for (a=0; a<obr->totvert; a++) {
+			VertRen *ver= RE_findOrAddVert(obr, a);
+			ver->n[0]=ver->n[1]=ver->n[2]= 0.0f;
+		}
+	}
+
+	/* calculate cos of angles and point-masses, use as weight factor to
+	 * add face normal to vertex */
+	for (a=0; a<obr->totvlak; a++) {
+		VlakRen *vlr= RE_findOrAddVlak(obr, a);
+		if (do_vertex_normal && vlr->flag & ME_SMOOTH) {
+			float *n4= (vlr->v4)? vlr->v4->n: NULL;
+			const float *c4= (vlr->v4)? vlr->v4->co: NULL;
+
+			accumulate_vertex_normals_v3(vlr->v1->n, vlr->v2->n, vlr->v3->n, n4,
+				vlr->n, vlr->v1->co, vlr->v2->co, vlr->v3->co, c4);
+		}
+		if (do_tangent) {
+			/* tangents still need to be calculated for flat faces too */
+			/* weighting removed, they are not vertexnormals */
+			calc_tangent_vector(obr, vlr, do_tangent);
+		}
+	}
+
+	/* do solid faces */
+	for (a=0; a<obr->totvlak; a++) {
+		VlakRen *vlr= RE_findOrAddVlak(obr, a);
+
+		if (do_vertex_normal && (vlr->flag & ME_SMOOTH)==0) {
+			if (is_zero_v3(vlr->v1->n)) copy_v3_v3(vlr->v1->n, vlr->n);
+			if (is_zero_v3(vlr->v2->n)) copy_v3_v3(vlr->v2->n, vlr->n);
+			if (is_zero_v3(vlr->v3->n)) copy_v3_v3(vlr->v3->n, vlr->n);
+			if (vlr->v4 && is_zero_v3(vlr->v4->n)) copy_v3_v3(vlr->v4->n, vlr->n);
+		}
+	}
+
+	/* normalize vertex normals */
+	for (a=0; a<obr->totvert; a++) {
+		VertRen *ver= RE_findOrAddVert(obr, a);
+		normalize_v3(ver->n);
+		if (do_tangent) {
+			float *tav= RE_vertren_get_tangent(obr, ver, 0);
+			if (tav) {
+				/* orthonorm. */
+				const float tdn = dot_v3v3(tav, ver->n);
+				tav[0] -= ver->n[0]*tdn;
+				tav[1] -= ver->n[1]*tdn;
+				tav[2] -= ver->n[2]*tdn;
+				normalize_v3(tav);
+			}
+		}
+	}
+
+	/* normal mapping tangent with mikktspace */
+	if (do_nmap_tangent != false) {
+		SRenderMeshToTangent mesh2tangent;
+		SMikkTSpaceContext sContext;
+		SMikkTSpaceInterface sInterface;
+		memset(&mesh2tangent, 0, sizeof(SRenderMeshToTangent));
+		memset(&sContext, 0, sizeof(SMikkTSpaceContext));
+		memset(&sInterface, 0, sizeof(SMikkTSpaceInterface));
+
+		mesh2tangent.obr = obr;
+
+		sContext.m_pUserData = &mesh2tangent;
+		sContext.m_pInterface = &sInterface;
+		sInterface.m_getNumFaces = GetNumFaces;
+		sInterface.m_getNumVerticesOfFace = GetNumVertsOfFace;
+		sInterface.m_getPosition = GetPosition;
+		sInterface.m_getTexCoord = GetTextureCoordinate;
+		sInterface.m_getNormal = GetNormal;
+		sInterface.m_setTSpaceBasic = SetTSpace;
+
+		for (a = 0; a < MAX_MTFACE; a++) {
+			if (obr->tangent_mask & 1 << a) {
+				mesh2tangent.mtface_index = a;
+				genTangSpaceDefault(&sContext);
+			}
+		}
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* Autosmoothing:                                                            */
+/* ------------------------------------------------------------------------- */
+
+typedef struct ASvert {
+	int totface;
+	ListBase faces;
+} ASvert;
+
+typedef struct ASface {
+	struct ASface *next, *prev;
+	VlakRen *vlr[4];
+	VertRen *nver[4];
+} ASface;
+
+static int as_addvert(ASvert *asv, VertRen *v1, VlakRen *vlr)
+{
+	ASface *asf;
+	int a = -1;
+
+	if (v1 == NULL)
+		return a;
+
+	asf = asv->faces.last;
+	if (asf) {
+		for (a = 0; a < 4 && asf->vlr[a]; a++) {
+		}
+	}
+	else {
+		a = 4;
+	}
+
+	/* new face struct */
+	if (a == 4) {
+		a = 0;
+		asf = MEM_callocN(sizeof(ASface), "asface");
+		BLI_addtail(&asv->faces, asf);
+	}
+
+	asf->vlr[a] = vlr;
+	asv->totface++;
+
+	return a;
+}
+
+static VertRen *as_findvertex_lnor(VlakRen *vlr, VertRen *ver, ASvert *asv, const float lnor[3])
+{
+	/* return when new vertex already was made, or existing one is OK */
+	ASface *asf;
+	int a;
+
+	/* First face, we can use existing vert and assign it current lnor! */
+	if (asv->totface == 1) {
+		copy_v3_v3(ver->n, lnor);
+		return ver;
+	}
+
+	/* In case existing ver has same normal as current lnor, we can simply use it! */
+	if (equals_v3v3(lnor, ver->n)) {
+		return ver;
+	}
+
+	asf = asv->faces.first;
+	while (asf) {
+		for (a = 0; a < 4; a++) {
+			if (asf->vlr[a] && asf->vlr[a] != vlr) {
+				/* this face already made a copy for this vertex! */
+				if (asf->nver[a]) {
+					if (equals_v3v3(lnor, asf->nver[a]->n)) {
+						return asf->nver[a];
+					}
+				}
+			}
+		}
+		asf = asf->next;
+	}
+
+	return NULL;
+}
+
+static void as_addvert_lnor(ObjectRen *obr, ASvert *asv, VertRen *ver, VlakRen *vlr, const short _lnor[3])
+{
+	VertRen *v1;
+	ASface *asf;
+	int asf_idx;
+	float lnor[3];
+
+	normal_short_to_float_v3(lnor, _lnor);
+
+	asf_idx = as_addvert(asv, ver, vlr);
+	if (asf_idx < 0) {
+		return;
+	}
+	asf = asv->faces.last;
+
+	/* already made a new vertex within threshold? */
+	v1 = as_findvertex_lnor(vlr, ver, asv, lnor);
+	if (v1 == NULL) {
+		/* make a new vertex */
+		v1 = RE_vertren_copy(obr, ver);
+		copy_v3_v3(v1->n, lnor);
+	}
+	if (v1 != ver) {
+		asf->nver[asf_idx] = v1;
+		if (vlr->v1 == ver) vlr->v1 = v1;
+		if (vlr->v2 == ver) vlr->v2 = v1;
+		if (vlr->v3 == ver) vlr->v3 = v1;
+		if (vlr->v4 == ver) vlr->v4 = v1;
+	}
+}
+
+/* note; autosmooth happens in object space still, after applying autosmooth we rotate */
+/* note2; actually, when original mesh and displist are equal sized, face normals are from original mesh */
+static void autosmooth(Render *UNUSED(re), ObjectRen *obr, float mat[4][4], short (*lnors)[4][3])
+{
+	ASvert *asverts;
+	VertRen *ver;
+	VlakRen *vlr;
+	int a, totvert;
+
+	float rot[3][3];
+
+	/* Note: For normals, we only want rotation, not scaling component.
+	 *       Negative scales (aka mirroring) give wrong results, see T44102. */
+	if (lnors) {
+		float mat3[3][3], size[3];
+
+		copy_m3_m4(mat3, mat);
+		mat3_to_rot_size(rot, size, mat3);
+	}
+
+	if (obr->totvert == 0)
+		return;
+
+	totvert = obr->totvert;
+	asverts = MEM_callocN(sizeof(ASvert) * totvert, "all smooth verts");
+
+	if (lnors) {
+		/* We construct listbase of all vertices and pointers to faces, and add new verts when needed
+		 * (i.e. when existing ones do not share the same (loop)normal).
+		 */
+		for (a = 0; a < obr->totvlak; a++, lnors++) {
+			vlr = RE_findOrAddVlak(obr, a);
+			/* skip wire faces */
+			if (vlr->v2 != vlr->v3) {
+				as_addvert_lnor(obr, asverts+vlr->v1->index, vlr->v1, vlr, (const short*)lnors[0][0]);
+				as_addvert_lnor(obr, asverts+vlr->v2->index, vlr->v2, vlr, (const short*)lnors[0][1]);
+				as_addvert_lnor(obr, asverts+vlr->v3->index, vlr->v3, vlr, (const short*)lnors[0][2]);
+				if (vlr->v4)
+					as_addvert_lnor(obr, asverts+vlr->v4->index, vlr->v4, vlr, (const short*)lnors[0][3]);
+			}
+		}
+	}
+
+	/* free */
+	for (a = 0; a < totvert; a++) {
+		BLI_freelistN(&asverts[a].faces);
+	}
+	MEM_freeN(asverts);
+
+	/* rotate vertices and calculate normal of faces */
+	for (a = 0; a < obr->totvert; a++) {
+		ver = RE_findOrAddVert(obr, a);
+		mul_m4_v3(mat, ver->co);
+		if (lnors) {
+			mul_m3_v3(rot, ver->n);
+			negate_v3(ver->n);
+		}
+	}
+	for (a = 0; a < obr->totvlak; a++) {
+		vlr = RE_findOrAddVlak(obr, a);
+
+		/* skip wire faces */
+		if (vlr->v2 != vlr->v3) {
+			if (vlr->v4)
+				normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+			else
+				normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+		}
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* Orco hash and Materials                                                   */
+/* ------------------------------------------------------------------------- */
+
+static float *get_object_orco(Render *re, void *ob)
+{
+	if (!re->orco_hash) {
+		return NULL;
+	}
+
+	return BLI_ghash_lookup(re->orco_hash, ob);
+}
+
+static void set_object_orco(Render *re, void *ob, float *orco)
+{
+	if (!re->orco_hash)
+		re->orco_hash = BLI_ghash_ptr_new("set_object_orco gh");
+
+	BLI_ghash_insert(re->orco_hash, ob, orco);
+}
+
+static void free_mesh_orco_hash(Render *re)
+{
+	if (re->orco_hash) {
+		BLI_ghash_free(re->orco_hash, NULL, MEM_freeN);
+		re->orco_hash = NULL;
+	}
+}
+
+static void check_material_mapto(Material *ma)
+{
+	int a;
+	ma->mapto_textured = 0;
+
+	/* cache which inputs are actually textured.
+	 * this can avoid a bit of time spent iterating through all the texture slots, map inputs and map tos
+	 * every time a property which may or may not be textured is accessed */
+
+	for (a=0; a<MAX_MTEX; a++) {
+		if (ma->mtex[a] && ma->mtex[a]->tex) {
+			/* currently used only in volume render, so we'll check for those flags */
+			if (ma->mtex[a]->mapto & MAP_DENSITY) ma->mapto_textured |= MAP_DENSITY;
+			if (ma->mtex[a]->mapto & MAP_EMISSION) ma->mapto_textured |= MAP_EMISSION;
+			if (ma->mtex[a]->mapto & MAP_EMISSION_COL) ma->mapto_textured |= MAP_EMISSION_COL;
+			if (ma->mtex[a]->mapto & MAP_SCATTERING) ma->mapto_textured |= MAP_SCATTERING;
+			if (ma->mtex[a]->mapto & MAP_TRANSMISSION_COL) ma->mapto_textured |= MAP_TRANSMISSION_COL;
+			if (ma->mtex[a]->mapto & MAP_REFLECTION) ma->mapto_textured |= MAP_REFLECTION;
+			if (ma->mtex[a]->mapto & MAP_REFLECTION_COL) ma->mapto_textured |= MAP_REFLECTION_COL;
+		}
+	}
+}
+static void flag_render_node_material(Render *re, bNodeTree *ntree)
+{
+	bNode *node;
+
+	for (node = ntree->nodes.first; node; node = node->next) {
+		if (node->id) {
+			if (GS(node->id->name)==ID_MA) {
+				Material *ma= (Material *)node->id;
+
+				if ((ma->mode & MA_TRANSP) && (ma->mode & MA_ZTRANSP))
+					re->flag |= R_ZTRA;
+
+				ma->flag |= MA_IS_USED;
+			}
+			else if (node->type==NODE_GROUP)
+				flag_render_node_material(re, (bNodeTree *)node->id);
+		}
+	}
+}
+
+static Material *give_render_material(Render *re, Object *ob, short nr)
+{
+	extern Material defmaterial;	/* material.c */
+	Material *ma;
+
+	ma= give_current_material(ob, nr);
+	if (ma==NULL)
+		ma= &defmaterial;
+
+	if (re->r.mode & R_SPEED) ma->texco |= NEED_UV;
+
+	if (ma->material_type == MA_TYPE_VOLUME) {
+		ma->mode |= MA_TRANSP;
+		ma->mode &= ~MA_SHADBUF;
+	}
+	if ((ma->mode & MA_TRANSP) && (ma->mode & MA_ZTRANSP))
+		re->flag |= R_ZTRA;
+
+	/* for light groups and SSS */
+	ma->flag |= MA_IS_USED;
+
+	if (ma->nodetree && ma->use_nodes)
+		flag_render_node_material(re, ma->nodetree);
+
+	check_material_mapto(ma);
+
+	return ma;
+}
+
+/* ------------------------------------------------------------------------- */
+/* Particles                                                                 */
+/* ------------------------------------------------------------------------- */
+typedef struct ParticleStrandData {
+	struct MCol *mcol;
+	float *orco, *uvco, *surfnor;
+	float time, adapt_angle, adapt_pix, size;
+	int totuv, totcol;
+	int first, line, adapt, override_uv;
+}
+ParticleStrandData;
+/* future thread problem... */
+static void static_particle_strand(Render *re, ObjectRen *obr, Material *ma, ParticleStrandData *sd, const float vec[3], const float vec1[3])
+{
+	static VertRen *v1= NULL, *v2= NULL;
+	VlakRen *vlr= NULL;
+	float nor[3], cross[3], crosslen, w, dx, dy, width;
+	static float anor[3], avec[3];
+	int flag, i;
+	static int second=0;
+
+	sub_v3_v3v3(nor, vec, vec1);
+	normalize_v3(nor);  /* nor needed as tangent */
+	cross_v3_v3v3(cross, vec, nor);
+
+	/* turn cross in pixelsize */
+	w= vec[2]*re->winmat[2][3] + re->winmat[3][3];
+	dx= re->winx*cross[0]*re->winmat[0][0];
+	dy= re->winy*cross[1]*re->winmat[1][1];
+	w = sqrtf(dx * dx + dy * dy) / w;
+
+	if (w!=0.0f) {
+		float fac;
+		if (ma->strand_ease!=0.0f) {
+			if (ma->strand_ease<0.0f)
+				fac= pow(sd->time, 1.0f+ma->strand_ease);
+			else
+				fac= pow(sd->time, 1.0f/(1.0f-ma->strand_ease));
+		}
+		else fac= sd->time;
+
+		width= ((1.0f-fac)*ma->strand_sta + (fac)*ma->strand_end);
+
+		/* use actual Blender units for strand width and fall back to minimum width */
+		if (ma->mode & MA_STR_B_UNITS) {
+			crosslen= len_v3(cross);
+			w= 2.0f*crosslen*ma->strand_min/w;
+
+			if (width < w)
+				width= w;
+
+			/*cross is the radius of the strand so we want it to be half of full width */
+			mul_v3_fl(cross, 0.5f/crosslen);
+		}
+		else
+			width/=w;
+
+		mul_v3_fl(cross, width);
+	}
+
+	if (ma->mode & MA_TANGENT_STR)
+		flag= R_SMOOTH|R_TANGENT;
+	else
+		flag= R_SMOOTH;
+
+	/* only 1 pixel wide strands filled in as quads now, otherwise zbuf errors */
+	if (ma->strand_sta==1.0f)
+		flag |= R_STRAND;
+
+	/* single face line */
+	if (sd->line) {
+		vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+		vlr->flag= flag;
+		vlr->v1= RE_findOrAddVert(obr, obr->totvert++);
+		vlr->v2= RE_findOrAddVert(obr, obr->totvert++);
+		vlr->v3= RE_findOrAddVert(obr, obr->totvert++);
+		vlr->v4= RE_findOrAddVert(obr, obr->totvert++);
+
+		copy_v3_v3(vlr->v1->co, vec);
+		add_v3_v3(vlr->v1->co, cross);
+		copy_v3_v3(vlr->v1->n, nor);
+		vlr->v1->orco= sd->orco;
+		vlr->v1->accum = -1.0f;  /* accum abuse for strand texco */
+
+		copy_v3_v3(vlr->v2->co, vec);
+		sub_v3_v3v3(vlr->v2->co, vlr->v2->co, cross);
+		copy_v3_v3(vlr->v2->n, nor);
+		vlr->v2->orco= sd->orco;
+		vlr->v2->accum= vlr->v1->accum;
+
+		copy_v3_v3(vlr->v4->co, vec1);
+		add_v3_v3(vlr->v4->co, cross);
+		copy_v3_v3(vlr->v4->n, nor);
+		vlr->v4->orco= sd->orco;
+		vlr->v4->accum = 1.0f;  /* accum abuse for strand texco */
+
+		copy_v3_v3(vlr->v3->co, vec1);
+		sub_v3_v3v3(vlr->v3->co, vlr->v3->co, cross);
+		copy_v3_v3(vlr->v3->n, nor);
+		vlr->v3->orco= sd->orco;
+		vlr->v3->accum= vlr->v4->accum;
+
+		normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+
+		vlr->mat= ma;
+		vlr->ec= ME_V2V3;
+
+		if (sd->surfnor) {
+			float *snor= RE_vlakren_get_surfnor(obr, vlr, 1);
+			copy_v3_v3(snor, sd->surfnor);
+		}
+
+		if (sd->uvco) {
+			for (i=0; i<sd->totuv; i++) {
+				MTFace *mtf;
+				mtf=RE_vlakren_get_tface(obr, vlr, i, NULL, 1);
+				mtf->uv[0][0]=mtf->uv[1][0]=
+				mtf->uv[2][0]=mtf->uv[3][0]=(sd->uvco+2*i)[0];
+				mtf->uv[0][1]=mtf->uv[1][1]=
+				mtf->uv[2][1]=mtf->uv[3][1]=(sd->uvco+2*i)[1];
+			}
+			if (sd->override_uv>=0) {
+				MTFace *mtf;
+				mtf=RE_vlakren_get_tface(obr, vlr, sd->override_uv, NULL, 0);
+
+				mtf->uv[0][0]=mtf->uv[3][0]=0.0f;
+				mtf->uv[1][0]=mtf->uv[2][0]=1.0f;
+
+				mtf->uv[0][1]=mtf->uv[1][1]=0.0f;
+				mtf->uv[2][1]=mtf->uv[3][1]=1.0f;
+			}
+		}
+		if (sd->mcol) {
+			for (i=0; i<sd->totcol; i++) {
+				MCol *mc;
+				mc=RE_vlakren_get_mcol(obr, vlr, i, NULL, 1);
+				mc[0]=mc[1]=mc[2]=mc[3]=sd->mcol[i];
+				mc[0]=mc[1]=mc[2]=mc[3]=sd->mcol[i];
+			}
+		}
+	}
+	/* first two vertices of a strand */
+	else if (sd->first) {
+		if (sd->adapt) {
+			copy_v3_v3(anor, nor);
+			copy_v3_v3(avec, vec);
+			second=1;
+		}
+
+		v1= RE_findOrAddVert(obr, obr->totvert++);
+		v2= RE_findOrAddVert(obr, obr->totvert++);
+
+		copy_v3_v3(v1->co, vec);
+		add_v3_v3(v1->co, cross);
+		copy_v3_v3(v1->n, nor);
+		v1->orco= sd->orco;
+		v1->accum = -1.0f;  /* accum abuse for strand texco */
+
+		copy_v3_v3(v2->co, vec);
+		sub_v3_v3v3(v2->co, v2->co, cross);
+		copy_v3_v3(v2->n, nor);
+		v2->orco= sd->orco;
+		v2->accum= v1->accum;
+	}
+	/* more vertices & faces to strand */
+	else {
+		if (sd->adapt==0 || second) {
+			vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+			vlr->flag= flag;
+			vlr->v1= v1;
+			vlr->v2= v2;
+			vlr->v3= RE_findOrAddVert(obr, obr->totvert++);
+			vlr->v4= RE_findOrAddVert(obr, obr->totvert++);
+
+			v1= vlr->v4; /* cycle */
+			v2= vlr->v3; /* cycle */
+
+
+			if (sd->adapt) {
+				second=0;
+				copy_v3_v3(anor, nor);
+				copy_v3_v3(avec, vec);
+			}
+
+		}
+		else if (sd->adapt) {
+			float dvec[3], pvec[3];
+			sub_v3_v3v3(dvec, avec, vec);
+			project_v3_v3v3(pvec, dvec, vec);
+			sub_v3_v3v3(dvec, dvec, pvec);
+
+			w= vec[2]*re->winmat[2][3] + re->winmat[3][3];
+			dx= re->winx*dvec[0]*re->winmat[0][0]/w;
+			dy= re->winy*dvec[1]*re->winmat[1][1]/w;
+			w = sqrtf(dx * dx + dy * dy);
+			if (dot_v3v3(anor, nor)<sd->adapt_angle && w>sd->adapt_pix) {
+				vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+				vlr->flag= flag;
+				vlr->v1= v1;
+				vlr->v2= v2;
+				vlr->v3= RE_findOrAddVert(obr, obr->totvert++);
+				vlr->v4= RE_findOrAddVert(obr, obr->totvert++);
+
+				v1= vlr->v4; /* cycle */
+				v2= vlr->v3; /* cycle */
+
+				copy_v3_v3(anor, nor);
+				copy_v3_v3(avec, vec);
+			}
+			else {
+				vlr= RE_findOrAddVlak(obr, obr->totvlak-1);
+			}
+		}
+
+		copy_v3_v3(vlr->v4->co, vec);
+		add_v3_v3(vlr->v4->co, cross);
+		copy_v3_v3(vlr->v4->n, nor);
+		vlr->v4->orco= sd->orco;
+		vlr->v4->accum= -1.0f + 2.0f * sd->time;  /* accum abuse for strand texco */
+
+		copy_v3_v3(vlr->v3->co, vec);
+		sub_v3_v3v3(vlr->v3->co, vlr->v3->co, cross);
+		copy_v3_v3(vlr->v3->n, nor);
+		vlr->v3->orco= sd->orco;
+		vlr->v3->accum= vlr->v4->accum;
+
+		normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+
+		vlr->mat= ma;
+		vlr->ec= ME_V2V3;
+
+		if (sd->surfnor) {
+			float *snor= RE_vlakren_get_surfnor(obr, vlr, 1);
+			copy_v3_v3(snor, sd->surfnor);
+		}
+
+		if (sd->uvco) {
+			for (i=0; i<sd->totuv; i++) {
+				MTFace *mtf;
+				mtf=RE_vlakren_get_tface(obr, vlr, i, NULL, 1);
+				mtf->uv[0][0]=mtf->uv[1][0]=
+				mtf->uv[2][0]=mtf->uv[3][0]=(sd->uvco+2*i)[0];
+				mtf->uv[0][1]=mtf->uv[1][1]=
+				mtf->uv[2][1]=mtf->uv[3][1]=(sd->uvco+2*i)[1];
+			}
+			if (sd->override_uv>=0) {
+				MTFace *mtf;
+				mtf=RE_vlakren_get_tface(obr, vlr, sd->override_uv, NULL, 0);
+
+				mtf->uv[0][0]=mtf->uv[3][0]=0.0f;
+				mtf->uv[1][0]=mtf->uv[2][0]=1.0f;
+
+				mtf->uv[0][1]=mtf->uv[1][1]=(vlr->v1->accum+1.0f)/2.0f;
+				mtf->uv[2][1]=mtf->uv[3][1]=(vlr->v3->accum+1.0f)/2.0f;
+			}
+		}
+		if (sd->mcol) {
+			for (i=0; i<sd->totcol; i++) {
+				MCol *mc;
+				mc=RE_vlakren_get_mcol(obr, vlr, i, NULL, 1);
+				mc[0]=mc[1]=mc[2]=mc[3]=sd->mcol[i];
+				mc[0]=mc[1]=mc[2]=mc[3]=sd->mcol[i];
+			}
+		}
+	}
+}
+
+static void static_particle_wire(ObjectRen *obr, Material *ma, const float vec[3], const float vec1[3], int first, int line)
+{
+	VlakRen *vlr;
+	static VertRen *v1;
+
+	if (line) {
+		vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+		vlr->v1= RE_findOrAddVert(obr, obr->totvert++);
+		vlr->v2= RE_findOrAddVert(obr, obr->totvert++);
+		vlr->v3= vlr->v2;
+		vlr->v4= NULL;
+
+		copy_v3_v3(vlr->v1->co, vec);
+		copy_v3_v3(vlr->v2->co, vec1);
+
+		sub_v3_v3v3(vlr->n, vec, vec1);
+		normalize_v3(vlr->n);
+		copy_v3_v3(vlr->v1->n, vlr->n);
+		copy_v3_v3(vlr->v2->n, vlr->n);
+
+		vlr->mat= ma;
+		vlr->ec= ME_V1V2;
+
+	}
+	else if (first) {
+		v1= RE_findOrAddVert(obr, obr->totvert++);
+		copy_v3_v3(v1->co, vec);
+	}
+	else {
+		vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+		vlr->v1= v1;
+		vlr->v2= RE_findOrAddVert(obr, obr->totvert++);
+		vlr->v3= vlr->v2;
+		vlr->v4= NULL;
+
+		v1= vlr->v2; /* cycle */
+		copy_v3_v3(v1->co, vec);
+
+		sub_v3_v3v3(vlr->n, vec, vec1);
+		normalize_v3(vlr->n);
+		copy_v3_v3(v1->n, vlr->n);
+
+		vlr->mat= ma;
+		vlr->ec= ME_V1V2;
+	}
+
+}
+
+static void particle_curve(Render *re, ObjectRen *obr, DerivedMesh *dm, Material *ma, ParticleStrandData *sd,
+                           const float loc[3], const float loc1[3], int seed, float *pa_co)
+{
+	HaloRen *har = NULL;
+
+	if (ma->material_type == MA_TYPE_WIRE)
+		static_particle_wire(obr, ma, loc, loc1, sd->first, sd->line);
+	else if (ma->material_type == MA_TYPE_HALO) {
+		har= RE_inithalo_particle(re, obr, dm, ma, loc, loc1, sd->orco, sd->uvco, sd->size, 1.0, seed, pa_co);
+		if (har) har->lay= obr->ob->lay;
+	}
+	else
+		static_particle_strand(re, obr, ma, sd, loc, loc1);
+}
+static void particle_billboard(Render *re, ObjectRen *obr, Material *ma, ParticleBillboardData *bb)
+{
+	VlakRen *vlr;
+	MTFace *mtf;
+	float xvec[3], yvec[3], zvec[3], bb_center[3];
+	/* Number of tiles */
+	int totsplit = bb->uv_split * bb->uv_split;
+	int tile, x, y;
+	/* Tile offsets */
+	float uvx = 0.0f, uvy = 0.0f, uvdx = 1.0f, uvdy = 1.0f, time = 0.0f;
+
+	vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+	vlr->v1= RE_findOrAddVert(obr, obr->totvert++);
+	vlr->v2= RE_findOrAddVert(obr, obr->totvert++);
+	vlr->v3= RE_findOrAddVert(obr, obr->totvert++);
+	vlr->v4= RE_findOrAddVert(obr, obr->totvert++);
+
+	psys_make_billboard(bb, xvec, yvec, zvec, bb_center);
+
+	add_v3_v3v3(vlr->v1->co, bb_center, xvec);
+	add_v3_v3(vlr->v1->co, yvec);
+	mul_m4_v3(re->viewmat, vlr->v1->co);
+
+	sub_v3_v3v3(vlr->v2->co, bb_center, xvec);
+	add_v3_v3(vlr->v2->co, yvec);
+	mul_m4_v3(re->viewmat, vlr->v2->co);
+
+	sub_v3_v3v3(vlr->v3->co, bb_center, xvec);
+	sub_v3_v3v3(vlr->v3->co, vlr->v3->co, yvec);
+	mul_m4_v3(re->viewmat, vlr->v3->co);
+
+	add_v3_v3v3(vlr->v4->co, bb_center, xvec);
+	sub_v3_v3(vlr->v4->co, yvec);
+	mul_m4_v3(re->viewmat, vlr->v4->co);
+
+	normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+	copy_v3_v3(vlr->v1->n, vlr->n);
+	copy_v3_v3(vlr->v2->n, vlr->n);
+	copy_v3_v3(vlr->v3->n, vlr->n);
+	copy_v3_v3(vlr->v4->n, vlr->n);
+
+	vlr->mat= ma;
+	vlr->ec= ME_V2V3;
+
+	if (bb->uv_split > 1) {
+		uvdx = uvdy = 1.0f / (float)bb->uv_split;
+
+		if (ELEM(bb->anim, PART_BB_ANIM_AGE, PART_BB_ANIM_FRAME)) {
+			if (bb->anim == PART_BB_ANIM_FRAME)
+				time = ((int)(bb->time * bb->lifetime) % totsplit)/(float)totsplit;
+			else
+				time = bb->time;
+		}
+		else if (bb->anim == PART_BB_ANIM_ANGLE) {
+			if (bb->align == PART_BB_VIEW) {
+				time = (float)fmod((bb->tilt + 1.0f) / 2.0f, 1.0);
+			}
+			else {
+				float axis1[3] = {0.0f, 0.0f, 0.0f};
+				float axis2[3] = {0.0f, 0.0f, 0.0f};
+
+				axis1[(bb->align + 1) % 3] = 1.0f;
+				axis2[(bb->align + 2) % 3] = 1.0f;
+
+				if (bb->lock == 0) {
+					zvec[bb->align] = 0.0f;
+					normalize_v3(zvec);
+				}
+
+				time = saacos(dot_v3v3(zvec, axis1)) / (float)M_PI;
+
+				if (dot_v3v3(zvec, axis2) < 0.0f)
+					time = 1.0f - time / 2.0f;
+				else
+					time /= 2.0f;
+			}
+		}
+
+		if (bb->split_offset == PART_BB_OFF_LINEAR)
+			time = (float)fmod(time + (float)bb->num / (float)totsplit, 1.0f);
+		else if (bb->split_offset==PART_BB_OFF_RANDOM)
+			time = (float)fmod(time + bb->random, 1.0f);
+
+		/* Find the coordinates in tile space (integer), then convert to UV
+		 * space (float). Note that Y is flipped. */
+		tile = (int)((time + FLT_EPSILON10) * totsplit);
+		x = tile % bb->uv_split;
+		y = tile / bb->uv_split;
+		y = (bb->uv_split - 1) - y;
+		uvx = uvdx * x;
+		uvy = uvdy * y;
+	}
+
+	/* normal UVs */
+	if (bb->uv[0] >= 0) {
+		mtf = RE_vlakren_get_tface(obr, vlr, bb->uv[0], NULL, 1);
+		mtf->uv[0][0] = 1.0f;
+		mtf->uv[0][1] = 1.0f;
+		mtf->uv[1][0] = 0.0f;
+		mtf->uv[1][1] = 1.0f;
+		mtf->uv[2][0] = 0.0f;
+		mtf->uv[2][1] = 0.0f;
+		mtf->uv[3][0] = 1.0f;
+		mtf->uv[3][1] = 0.0f;
+	}
+
+	/* time-index UVs */
+	if (bb->uv[1] >= 0) {
+		mtf = RE_vlakren_get_tface(obr, vlr, bb->uv[1], NULL, 1);
+		mtf->uv[0][0] = mtf->uv[1][0] = mtf->uv[2][0] = mtf->uv[3][0] = bb->time;
+		mtf->uv[0][1] = mtf->uv[1][1] = mtf->uv[2][1] = mtf->uv[3][1] = (float)bb->num/(float)bb->totnum;
+	}
+
+	/* split UVs */
+	if (bb->uv_split > 1 && bb->uv[2] >= 0) {
+		mtf = RE_vlakren_get_tface(obr, vlr, bb->uv[2], NULL, 1);
+		mtf->uv[0][0] = uvx + uvdx;
+		mtf->uv[0][1] = uvy + uvdy;
+		mtf->uv[1][0] = uvx;
+		mtf->uv[1][1] = uvy + uvdy;
+		mtf->uv[2][0] = uvx;
+		mtf->uv[2][1] = uvy;
+		mtf->uv[3][0] = uvx + uvdx;
+		mtf->uv[3][1] = uvy;
+	}
+}
+static void particle_normal_ren(short ren_as, ParticleSettings *part, Render *re, ObjectRen *obr, DerivedMesh *dm, Material *ma, ParticleStrandData *sd, ParticleBillboardData *bb, ParticleKey *state, int seed, float hasize, float *pa_co)
+{
+	float loc[3], loc0[3], loc1[3], vel[3];
+
+	copy_v3_v3(loc, state->co);
+
+	if (ren_as != PART_DRAW_BB)
+		mul_m4_v3(re->viewmat, loc);
+
+	switch (ren_as) {
+		case PART_DRAW_LINE:
+			sd->line = 1;
+			sd->time = 0.0f;
+			sd->size = hasize;
+
+			mul_v3_mat3_m4v3(vel, re->viewmat, state->vel);
+			normalize_v3(vel);
+
+			if (part->draw & PART_DRAW_VEL_LENGTH)
+				mul_v3_fl(vel, len_v3(state->vel));
+
+			madd_v3_v3v3fl(loc0, loc, vel, -part->draw_line[0]);
+			madd_v3_v3v3fl(loc1, loc, vel, part->draw_line[1]);
+
+			particle_curve(re, obr, dm, ma, sd, loc0, loc1, seed, pa_co);
+
+			break;
+
+		case PART_DRAW_BB:
+
+			copy_v3_v3(bb->vec, loc);
+			copy_v3_v3(bb->vel, state->vel);
+
+			particle_billboard(re, obr, ma, bb);
+
+			break;
+
+		default:
+		{
+			HaloRen *har = NULL;
+
+			har = RE_inithalo_particle(re, obr, dm, ma, loc, NULL, sd->orco, sd->uvco, hasize, 0.0, seed, pa_co);
+
+			if (har) har->lay= obr->ob->lay;
+
+			break;
+		}
+	}
+}
+static void get_particle_uvco_mcol(short from, DerivedMesh *dm, float *fuv, int num, ParticleStrandData *sd)
+{
+	int i;
+
+	/* get uvco */
+	if (sd->uvco && ELEM(from, PART_FROM_FACE, PART_FROM_VOLUME)) {
+		for (i=0; i<sd->totuv; i++) {
+			if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) {
+				MFace *mface = dm->getTessFaceData(dm, num, CD_MFACE);
+				MTFace *mtface = (MTFace*)CustomData_get_layer_n(&dm->faceData, CD_MTFACE, i);
+				mtface += num;
+
+				psys_interpolate_uvs(mtface, mface->v4, fuv, sd->uvco + 2 * i);
+			}
+			else {
+				sd->uvco[2*i] = 0.0f;
+				sd->uvco[2*i + 1] = 0.0f;
+			}
+		}
+	}
+
+	/* get mcol */
+	if (sd->mcol && ELEM(from, PART_FROM_FACE, PART_FROM_VOLUME)) {
+		for (i=0; i<sd->totcol; i++) {
+			if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) {
+				MFace *mface = dm->getTessFaceData(dm, num, CD_MFACE);
+				MCol *mc = (MCol*)CustomData_get_layer_n(&dm->faceData, CD_MCOL, i);
+				mc += num * 4;
+
+				psys_interpolate_mcol(mc, mface->v4, fuv, sd->mcol + i);
+			}
+			else
+				memset(&sd->mcol[i], 0, sizeof(MCol));
+		}
+	}
+}
+static int render_new_particle_system(Render *re, ObjectRen *obr, ParticleSystem *psys, int timeoffset)
+{
+	Object *ob= obr->ob;
+//	Object *tob=0;
+	Material *ma = NULL;
+	ParticleSystemModifierData *psmd;
+	ParticleSystem *tpsys = NULL;
+	ParticleSettings *part, *tpart = NULL;
+	ParticleData *pars, *pa = NULL, *tpa = NULL;
+	ParticleKey *states = NULL;
+	ParticleKey state;
+	ParticleCacheKey *cache = NULL;
+	ParticleBillboardData bb;
+	ParticleSimulationData sim = {NULL};
+	ParticleStrandData sd;
+	StrandBuffer *strandbuf = NULL;
+	StrandVert *svert = NULL;
+	StrandBound *sbound = NULL;
+	StrandRen *strand = NULL;
+	RNG *rng = NULL;
+	float loc[3], loc1[3], loc0[3], mat[4][4], nmat[3][3], co[3], nor[3], duplimat[4][4];
+	float strandlen=0.0f, curlen=0.0f;
+	float hasize, pa_size, r_tilt, r_length;
+	float pa_time, pa_birthtime, pa_dietime;
+	float random, simplify[2], pa_co[3];
+	const float cfra= BKE_scene_frame_get(re->scene);
+	int i, a, k, max_k=0, totpart;
+	bool do_simplify = false, do_surfacecache = false, use_duplimat = false;
+	int totchild=0, step_nbr;
+	int seed, path_nbr=0, orco1=0, num;
+	int totface;
+
+	const int *index_mf_to_mpoly = NULL;
+	const int *index_mp_to_orig = NULL;
+
+/* 1. check that everything is ok & updated */
+	if (psys==NULL)
+		return 0;
+
+	part=psys->part;
+	pars=psys->particles;
+
+	if (part==NULL || pars==NULL || !psys_check_enabled(ob, psys, G.is_rendering))
+		return 0;
+
+	if (part->ren_as==PART_DRAW_OB || part->ren_as==PART_DRAW_GR || part->ren_as==PART_DRAW_NOT)
+		return 1;
+
+	if ((re->r.scemode & R_VIEWPORT_PREVIEW) && (ob->mode & OB_MODE_PARTICLE_EDIT))
+		return 0;
+
+	if (part->ren_as == PART_DRAW_BB && part->bb_ob == NULL && RE_GetCamera(re) == NULL)
+		return 0;
+
+/* 2. start initializing things */
+
+	/* last possibility to bail out! */
+	psmd = psys_get_modifier(ob, psys);
+	if (!(psmd->modifier.mode & eModifierMode_Render))
+		return 0;
+
+	sim.scene= re->scene;
+	sim.ob= ob;
+	sim.psys= psys;
+	sim.psmd= psmd;
+
+	if (part->phystype==PART_PHYS_KEYED)
+		psys_count_keyed_targets(&sim);
+
+	totchild=psys->totchild;
+
+	/* can happen for disconnected/global hair */
+	if (part->type==PART_HAIR && !psys->childcache)
+		totchild= 0;
+
+	if (re->r.scemode & R_VIEWPORT_PREVIEW) { /* preview render */
+		totchild = (int)((float)totchild * (float)part->disp / 100.0f);
+		step_nbr = 1 << part->draw_step;
+	}
+	else {
+		step_nbr = 1 << part->ren_step;
+	}
+	if (ELEM(part->kink, PART_KINK_SPIRAL))
+		step_nbr += part->kink_extra_steps;
+
+	psys->flag |= PSYS_DRAWING;
+
+	rng= BLI_rng_new(psys->seed);
+
+	totpart=psys->totpart;
+
+	memset(&sd, 0, sizeof(ParticleStrandData));
+	sd.override_uv = -1;
+
+/* 2.1 setup material stff */
+	ma= give_render_material(re, ob, part->omat);
+
+#if 0  /* XXX old animation system */
+	if (ma->ipo) {
+		calc_ipo(ma->ipo, cfra);
+		execute_ipo((ID *)ma, ma->ipo);
+	}
+#endif  /* XXX old animation system */
+
+	hasize = ma->hasize;
+	seed = ma->seed1;
+
+	re->flag |= R_HALO;
+
+	RE_set_customdata_names(obr, &psmd->dm_final->faceData);
+	sd.totuv = CustomData_number_of_layers(&psmd->dm_final->faceData, CD_MTFACE);
+	sd.totcol = CustomData_number_of_layers(&psmd->dm_final->faceData, CD_MCOL);
+
+	if (ma->texco & TEXCO_UV && sd.totuv) {
+		sd.uvco = MEM_callocN(sd.totuv * 2 * sizeof(float), "particle_uvs");
+
+		if (ma->strand_uvname[0]) {
+			sd.override_uv = CustomData_get_named_layer_index(&psmd->dm_final->faceData, CD_MTFACE, ma->strand_uvname);
+			sd.override_uv -= CustomData_get_layer_index(&psmd->dm_final->faceData, CD_MTFACE);
+		}
+	}
+	else
+		sd.uvco = NULL;
+
+	if (sd.totcol)
+		sd.mcol = MEM_callocN(sd.totcol * sizeof(MCol), "particle_mcols");
+
+/* 2.2 setup billboards */
+	if (part->ren_as == PART_DRAW_BB) {
+		int first_uv = CustomData_get_layer_index(&psmd->dm_final->faceData, CD_MTFACE);
+
+		bb.uv[0] = CustomData_get_named_layer_index(&psmd->dm_final->faceData, CD_MTFACE, psys->bb_uvname[0]);
+		if (bb.uv[0] < 0)
+			bb.uv[0] = CustomData_get_active_layer_index(&psmd->dm_final->faceData, CD_MTFACE);
+
+		bb.uv[1] = CustomData_get_named_layer_index(&psmd->dm_final->faceData, CD_MTFACE, psys->bb_uvname[1]);
+
+		bb.uv[2] = CustomData_get_named_layer_index(&psmd->dm_final->faceData, CD_MTFACE, psys->bb_uvname[2]);
+
+		if (first_uv >= 0) {
+			bb.uv[0] -= first_uv;
+			bb.uv[1] -= first_uv;
+			bb.uv[2] -= first_uv;
+		}
+
+		bb.align = part->bb_align;
+		bb.anim = part->bb_anim;
+		bb.lock = part->draw & PART_DRAW_BB_LOCK;
+		bb.ob = (part->bb_ob ? part->bb_ob : RE_GetCamera(re));
+		bb.split_offset = part->bb_split_offset;
+		bb.totnum = totpart+totchild;
+		bb.uv_split = part->bb_uv_split;
+	}
+
+/* 2.5 setup matrices */
+	mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+	invert_m4_m4(ob->imat, mat);	/* need to be that way, for imat texture */
+	transpose_m3_m4(nmat, ob->imat);
+
+	if (psys->flag & PSYS_USE_IMAT) {
+		/* psys->imat is the original emitter's inverse matrix, ob->obmat is the duplicated object's matrix */
+		mul_m4_m4m4(duplimat, ob->obmat, psys->imat);
+		use_duplimat = true;
+	}
+
+/* 2.6 setup strand rendering */
+	if (part->ren_as == PART_DRAW_PATH && psys->pathcache) {
+		path_nbr = step_nbr;
+
+		if (path_nbr) {
+			if (!ELEM(ma->material_type, MA_TYPE_HALO, MA_TYPE_WIRE)) {
+				sd.orco = get_object_orco(re, psys);
+				if (!sd.orco) {
+					sd.orco = MEM_mallocN(3*sizeof(float)*(totpart+totchild), "particle orcos");
+					set_object_orco(re, psys, sd.orco);
+				}
+			}
+		}
+
+		if (part->draw & PART_DRAW_REN_ADAPT) {
+			sd.adapt = 1;
+			sd.adapt_pix = (float)part->adapt_pix;
+			sd.adapt_angle = cosf(DEG2RADF((float)part->adapt_angle));
+		}
+
+		if (part->draw & PART_DRAW_REN_STRAND) {
+			strandbuf= RE_addStrandBuffer(obr, (totpart+totchild)*(path_nbr+1));
+			strandbuf->ma= ma;
+			strandbuf->lay= ob->lay;
+			copy_m4_m4(strandbuf->winmat, re->winmat);
+			strandbuf->winx= re->winx;
+			strandbuf->winy= re->winy;
+			strandbuf->maxdepth= 2;
+			strandbuf->adaptcos= cosf(DEG2RADF((float)part->adapt_angle));
+			strandbuf->overrideuv= sd.override_uv;
+			strandbuf->minwidth= ma->strand_min;
+
+			if (ma->strand_widthfade == 0.0f)
+				strandbuf->widthfade= -1.0f;
+			else if (ma->strand_widthfade >= 1.0f)
+				strandbuf->widthfade= 2.0f - ma->strand_widthfade;
+			else
+				strandbuf->widthfade= 1.0f/MAX2(ma->strand_widthfade, 1e-5f);
+
+			if (part->flag & PART_HAIR_BSPLINE)
+				strandbuf->flag |= R_STRAND_BSPLINE;
+			if (ma->mode & MA_STR_B_UNITS)
+				strandbuf->flag |= R_STRAND_B_UNITS;
+
+			svert= strandbuf->vert;
+
+			if (re->r.mode & R_SPEED)
+				do_surfacecache = true;
+			else if ((re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) && (re->wrld.ao_gather_method == WO_AOGATHER_APPROX))
+				if (ma->amb != 0.0f)
+					do_surfacecache = true;
+
+			totface= psmd->dm_final->getNumTessFaces(psmd->dm_final);
+			index_mf_to_mpoly = psmd->dm_final->getTessFaceDataArray(psmd->dm_final, CD_ORIGINDEX);
+			index_mp_to_orig = psmd->dm_final->getPolyDataArray(psmd->dm_final, CD_ORIGINDEX);
+			if (index_mf_to_mpoly == NULL) {
+				index_mp_to_orig = NULL;
+			}
+			for (a=0; a<totface; a++)
+				strandbuf->totbound = max_ii(strandbuf->totbound, (index_mf_to_mpoly) ? DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, a): a);
+
+			strandbuf->totbound++;
+			strandbuf->bound= MEM_callocN(sizeof(StrandBound)*strandbuf->totbound, "StrandBound");
+			sbound= strandbuf->bound;
+			sbound->start= sbound->end= 0;
+		}
+	}
+
+	if (sd.orco == NULL) {
+		sd.orco = MEM_mallocN(3 * sizeof(float), "particle orco");
+		orco1 = 1;
+	}
+
+	if (path_nbr == 0)
+		psys->lattice_deform_data = psys_create_lattice_deform_data(&sim);
+
+/* 3. start creating renderable things */
+	for (a=0, pa=pars; a<totpart+totchild; a++, pa++, seed++) {
+		random = BLI_rng_get_float(rng);
+		/* setup per particle individual stuff */
+		if (a<totpart) {
+			if (pa->flag & PARS_UNEXIST) continue;
+
+			pa_time=(cfra-pa->time)/pa->lifetime;
+			pa_birthtime = pa->time;
+			pa_dietime = pa->dietime;
+
+			hasize = ma->hasize;
+
+			/* XXX 'tpsys' is alwyas NULL, this code won't run! */
+			/* get orco */
+			if (tpsys && part->phystype == PART_PHYS_NO) {
+				tpa = tpsys->particles + pa->num;
+				psys_particle_on_emitter(
+				        psmd,
+				        tpart->from, tpa->num, pa->num_dmcache, tpa->fuv,
+				        tpa->foffset, co, nor, NULL, NULL, sd.orco, NULL);
+			}
+			else {
+				psys_particle_on_emitter(
+				        psmd,
+				        part->from, pa->num, pa->num_dmcache,
+				        pa->fuv, pa->foffset, co, nor, NULL, NULL, sd.orco, NULL);
+			}
+
+			/* get uvco & mcol */
+			num= pa->num_dmcache;
+
+			if (num == DMCACHE_NOTFOUND)
+				if (pa->num < psmd->dm_final->getNumTessFaces(psmd->dm_final))
+					num= pa->num;
+
+			get_particle_uvco_mcol(part->from, psmd->dm_final, pa->fuv, num, &sd);
+
+			pa_size = pa->size;
+
+			r_tilt = 2.0f*(psys_frand(psys, a) - 0.5f);
+			r_length = psys_frand(psys, a+1);
+
+			if (path_nbr) {
+				cache = psys->pathcache[a];
+				max_k = (int)cache->segments;
+			}
+
+			if (totchild && (part->draw&PART_DRAW_PARENT)==0) continue;
+		}
+		else {
+			ChildParticle *cpa= psys->child+a-totpart;
+
+			if (path_nbr) {
+				cache = psys->childcache[a-totpart];
+
+				if (cache->segments < 0)
+					continue;
+
+				max_k = (int)cache->segments;
+			}
+
+			pa_time = psys_get_child_time(psys, cpa, cfra, &pa_birthtime, &pa_dietime);
+			pa_size = psys_get_child_size(psys, cpa, cfra, &pa_time);
+
+			r_tilt = 2.0f*(psys_frand(psys, a + 21) - 0.5f);
+			r_length = psys_frand(psys, a + 22);
+
+			num = cpa->num;
+
+			/* get orco */
+			if (part->childtype == PART_CHILD_FACES) {
+				psys_particle_on_emitter(
+				        psmd,
+				        PART_FROM_FACE, cpa->num, DMCACHE_ISCHILD,
+				        cpa->fuv, cpa->foffset, co, nor, NULL, NULL, sd.orco, NULL);
+			}
+			else {
+				ParticleData *par = psys->particles + cpa->parent;
+				psys_particle_on_emitter(
+				        psmd,
+				        part->from, par->num, DMCACHE_ISCHILD, par->fuv,
+				        par->foffset, co, nor, NULL, NULL, sd.orco, NULL);
+			}
+
+			/* get uvco & mcol */
+			if (part->childtype==PART_CHILD_FACES) {
+				get_particle_uvco_mcol(PART_FROM_FACE, psmd->dm_final, cpa->fuv, cpa->num, &sd);
+			}
+			else {
+				ParticleData *parent = psys->particles + cpa->parent;
+				num = parent->num_dmcache;
+
+				if (num == DMCACHE_NOTFOUND)
+					if (parent->num < psmd->dm_final->getNumTessFaces(psmd->dm_final))
+						num = parent->num;
+
+				get_particle_uvco_mcol(part->from, psmd->dm_final, parent->fuv, num, &sd);
+			}
+
+			do_simplify = psys_render_simplify_params(psys, cpa, simplify);
+
+			if (strandbuf) {
+				int orignum = (index_mf_to_mpoly) ? DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, cpa->num) : cpa->num;
+
+				if ((orignum > sbound - strandbuf->bound) &&
+				    (orignum < strandbuf->totbound))
+				{
+					sbound = &strandbuf->bound[orignum];
+					sbound->start = sbound->end = obr->totstrand;
+				}
+			}
+		}
+
+		/* TEXCO_PARTICLE */
+		pa_co[0] = pa_time;
+		pa_co[1] = 0.f;
+		pa_co[2] = 0.f;
+
+		/* surface normal shading setup */
+		if (ma->mode_l & MA_STR_SURFDIFF) {
+			mul_m3_v3(nmat, nor);
+			sd.surfnor= nor;
+		}
+		else
+			sd.surfnor= NULL;
+
+		/* strand render setup */
+		if (strandbuf) {
+			strand= RE_findOrAddStrand(obr, obr->totstrand++);
+			strand->buffer= strandbuf;
+			strand->vert= svert;
+			copy_v3_v3(strand->orco, sd.orco);
+
+			if (do_simplify) {
+				float *ssimplify= RE_strandren_get_simplify(obr, strand, 1);
+				ssimplify[0]= simplify[0];
+				ssimplify[1]= simplify[1];
+			}
+
+			if (sd.surfnor) {
+				float *snor= RE_strandren_get_surfnor(obr, strand, 1);
+				copy_v3_v3(snor, sd.surfnor);
+			}
+
+			if (do_surfacecache && num >= 0) {
+				int *facenum= RE_strandren_get_face(obr, strand, 1);
+				*facenum= num;
+			}
+
+			if (sd.uvco) {
+				for (i=0; i<sd.totuv; i++) {
+					if (i != sd.override_uv) {
+						float *uv= RE_strandren_get_uv(obr, strand, i, NULL, 1);
+
+						uv[0]= sd.uvco[2*i];
+						uv[1]= sd.uvco[2*i+1];
+					}
+				}
+			}
+			if (sd.mcol) {
+				for (i=0; i<sd.totcol; i++) {
+					MCol *mc= RE_strandren_get_mcol(obr, strand, i, NULL, 1);
+					*mc = sd.mcol[i];
+				}
+			}
+
+			sbound->end++;
+		}
+
+		/* strandco computation setup */
+		if (path_nbr) {
+			strandlen= 0.0f;
+			curlen= 0.0f;
+			for (k=1; k<=path_nbr; k++)
+				if (k<=max_k)
+					strandlen += len_v3v3((cache+k-1)->co, (cache+k)->co);
+		}
+
+		if (path_nbr) {
+			/* render strands */
+			for (k=0; k<=path_nbr; k++) {
+				float time;
+
+				if (k<=max_k) {
+					copy_v3_v3(state.co, (cache+k)->co);
+					copy_v3_v3(state.vel, (cache+k)->vel);
+				}
+				else
+					continue;
+
+				if (k > 0)
+					curlen += len_v3v3((cache+k-1)->co, (cache+k)->co);
+				time= curlen/strandlen;
+
+				copy_v3_v3(loc, state.co);
+				mul_m4_v3(re->viewmat, loc);
+
+				if (strandbuf) {
+					copy_v3_v3(svert->co, loc);
+					svert->strandco= -1.0f + 2.0f*time;
+					svert++;
+					strand->totvert++;
+				}
+				else {
+					sd.size = hasize;
+
+					if (k==1) {
+						sd.first = 1;
+						sd.time = 0.0f;
+						sub_v3_v3v3(loc0, loc1, loc);
+						add_v3_v3v3(loc0, loc1, loc0);
+
+						particle_curve(re, obr, psmd->dm_final, ma, &sd, loc1, loc0, seed, pa_co);
+					}
+
+					sd.first = 0;
+					sd.time = time;
+
+					if (k)
+						particle_curve(re, obr, psmd->dm_final, ma, &sd, loc, loc1, seed, pa_co);
+
+					copy_v3_v3(loc1, loc);
+				}
+			}
+
+		}
+		else {
+			/* render normal particles */
+			if (part->trail_count > 1) {
+				float length = part->path_end * (1.0f - part->randlength * r_length);
+				int trail_count = part->trail_count * (1.0f - part->randlength * r_length);
+				float ct = (part->draw & PART_ABS_PATH_TIME) ? cfra : pa_time;
+				float dt = length / (trail_count ? (float)trail_count : 1.0f);
+
+				/* make sure we have pointcache in memory before getting particle on path */
+				psys_make_temp_pointcache(ob, psys);
+
+				for (i=0; i < trail_count; i++, ct -= dt) {
+					if (part->draw & PART_ABS_PATH_TIME) {
+						if (ct < pa_birthtime || ct > pa_dietime)
+							continue;
+					}
+					else if (ct < 0.0f || ct > 1.0f)
+						continue;
+
+					state.time = (part->draw & PART_ABS_PATH_TIME) ? -ct : ct;
+					psys_get_particle_on_path(&sim, a, &state, 1);
+
+					if (psys->parent)
+						mul_m4_v3(psys->parent->obmat, state.co);
+
+					if (use_duplimat)
+						mul_m4_v4(duplimat, state.co);
+
+					if (part->ren_as == PART_DRAW_BB) {
+						bb.random = random;
+						bb.offset[0] = part->bb_offset[0];
+						bb.offset[1] = part->bb_offset[1];
+						bb.size[0] = part->bb_size[0] * pa_size;
+						if (part->bb_align==PART_BB_VEL) {
+							float pa_vel = len_v3(state.vel);
+							float head = part->bb_vel_head*pa_vel;
+							float tail = part->bb_vel_tail*pa_vel;
+							bb.size[1] = part->bb_size[1]*pa_size + head + tail;
+							/* use offset to adjust the particle center. this is relative to size, so need to divide! */
+							if (bb.size[1] > 0.0f)
+								bb.offset[1] += (head-tail) / bb.size[1];
+						}
+						else
+							bb.size[1] = part->bb_size[1] * pa_size;
+						bb.tilt = part->bb_tilt * (1.0f - part->bb_rand_tilt * r_tilt);
+						bb.time = ct;
+						bb.num = a;
+					}
+
+					pa_co[0] = (part->draw & PART_ABS_PATH_TIME) ? (ct-pa_birthtime)/(pa_dietime-pa_birthtime) : ct;
+					pa_co[1] = (float)i/(float)(trail_count-1);
+
+					particle_normal_ren(part->ren_as, part, re, obr, psmd->dm_final, ma, &sd, &bb, &state, seed, hasize, pa_co);
+				}
+			}
+			else {
+				state.time=cfra;
+				if (psys_get_particle_state(&sim, a, &state, 0)==0)
+					continue;
+
+				if (psys->parent)
+					mul_m4_v3(psys->parent->obmat, state.co);
+
+				if (use_duplimat)
+					mul_m4_v3(duplimat, state.co);
+
+				if (part->ren_as == PART_DRAW_BB) {
+					bb.random = random;
+					bb.offset[0] = part->bb_offset[0];
+					bb.offset[1] = part->bb_offset[1];
+					bb.size[0] = part->bb_size[0] * pa_size;
+					if (part->bb_align==PART_BB_VEL) {
+						float pa_vel = len_v3(state.vel);
+						float head = part->bb_vel_head*pa_vel;
+						float tail = part->bb_vel_tail*pa_vel;
+						bb.size[1] = part->bb_size[1]*pa_size + head + tail;
+						/* use offset to adjust the particle center. this is relative to size, so need to divide! */
+						if (bb.size[1] > 0.0f)
+							bb.offset[1] += (head-tail) / bb.size[1];
+					}
+					else
+						bb.size[1] = part->bb_size[1] * pa_size;
+					bb.tilt = part->bb_tilt * (1.0f - part->bb_rand_tilt * r_tilt);
+					bb.time = pa_time;
+					bb.num = a;
+					bb.lifetime = pa_dietime-pa_birthtime;
+				}
+
+				particle_normal_ren(part->ren_as, part, re, obr, psmd->dm_final, ma, &sd, &bb, &state, seed, hasize, pa_co);
+			}
+		}
+
+		if (orco1==0)
+			sd.orco+=3;
+
+		if (re->test_break(re->tbh))
+			break;
+	}
+
+	if (do_surfacecache)
+		strandbuf->surface= cache_strand_surface(re, obr, psmd->dm_final, mat, timeoffset);
+
+/* 4. clean up */
+#if 0  /* XXX old animation system */
+	if (ma) do_mat_ipo(re->scene, ma);
+#endif  /* XXX old animation system */
+
+	if (orco1)
+		MEM_freeN(sd.orco);
+
+	if (sd.uvco)
+		MEM_freeN(sd.uvco);
+
+	if (sd.mcol)
+		MEM_freeN(sd.mcol);
+
+	if (states)
+		MEM_freeN(states);
+
+	BLI_rng_free(rng);
+
+	psys->flag &= ~PSYS_DRAWING;
+
+	if (psys->lattice_deform_data) {
+		end_latt_deform(psys->lattice_deform_data);
+		psys->lattice_deform_data = NULL;
+	}
+
+	if (path_nbr && (ma->mode_l & MA_TANGENT_STR)==0)
+		calc_vertexnormals(re, obr, 1, 0, 0);
+
+	return 1;
+}
+
+/* ------------------------------------------------------------------------- */
+/* Halo's   																 */
+/* ------------------------------------------------------------------------- */
+
+static void make_render_halos(Render *re, ObjectRen *obr, Mesh *UNUSED(me), int totvert, MVert *mvert, Material *ma, float *orco)
+{
+	Object *ob= obr->ob;
+	HaloRen *har;
+	float xn, yn, zn, nor[3], view[3];
+	float vec[3], hasize, mat[4][4], imat[3][3];
+	int a, ok, seed= ma->seed1;
+
+	mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+	copy_m3_m4(imat, ob->imat);
+
+	re->flag |= R_HALO;
+
+	for (a=0; a<totvert; a++, mvert++) {
+		ok= 1;
+
+		if (ok) {
+			hasize= ma->hasize;
+
+			copy_v3_v3(vec, mvert->co);
+			mul_m4_v3(mat, vec);
+
+			if (ma->mode & MA_HALOPUNO) {
+				xn= mvert->no[0];
+				yn= mvert->no[1];
+				zn= mvert->no[2];
+
+				/* transpose ! */
+				nor[0]= imat[0][0]*xn+imat[0][1]*yn+imat[0][2]*zn;
+				nor[1]= imat[1][0]*xn+imat[1][1]*yn+imat[1][2]*zn;
+				nor[2]= imat[2][0]*xn+imat[2][1]*yn+imat[2][2]*zn;
+				normalize_v3(nor);
+
+				copy_v3_v3(view, vec);
+				normalize_v3(view);
+
+				zn = dot_v3v3(nor, view);
+				if (zn>=0.0f) hasize= 0.0f;
+				else hasize*= zn*zn*zn*zn;
+			}
+
+			if (orco) har= RE_inithalo(re, obr, ma, vec, NULL, orco, hasize, 0.0, seed);
+			else har= RE_inithalo(re, obr, ma, vec, NULL, mvert->co, hasize, 0.0, seed);
+			if (har) har->lay= ob->lay;
+		}
+		if (orco) orco+= 3;
+		seed++;
+	}
+}
+
+static int verghalo(const void *a1, const void *a2)
+{
+	const HaloRen *har1= *(const HaloRen**)a1;
+	const HaloRen *har2= *(const HaloRen**)a2;
+
+	if (har1->zs < har2->zs) return 1;
+	else if (har1->zs > har2->zs) return -1;
+	return 0;
+}
+
+static void sort_halos(Render *re, int totsort)
+{
+	ObjectRen *obr;
+	HaloRen *har= NULL, **haso;
+	int a;
+
+	if (re->tothalo==0) return;
+
+	re->sortedhalos= MEM_callocN(sizeof(HaloRen*)*re->tothalo, "sorthalos");
+	haso= re->sortedhalos;
+
+	for (obr=re->objecttable.first; obr; obr=obr->next) {
+		for (a=0; a<obr->tothalo; a++) {
+			if ((a & 255)==0) har= obr->bloha[a>>8];
+			else har++;
+
+			*(haso++)= har;
+		}
+	}
+
+	qsort(re->sortedhalos, totsort, sizeof(HaloRen*), verghalo);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Displacement Mapping														 */
+/* ------------------------------------------------------------------------- */
+
+static short test_for_displace(Render *re, Object *ob)
+{
+	/* return 1 when this object uses displacement textures. */
+	Material *ma;
+	int i;
+
+	for (i=1; i<=ob->totcol; i++) {
+		ma=give_render_material(re, ob, i);
+		/* ma->mapto is ORed total of all mapto channels */
+		if (ma && (ma->mapto & MAP_DISPLACE)) return 1;
+	}
+	return 0;
+}
+
+static void displace_render_vert(Render *re, ObjectRen *obr, ShadeInput *shi, VertRen *vr, int vindex, float *scale)
+{
+	MTFace *tface;
+	short texco= shi->mat->texco;
+	float sample=0, displace[3];
+	char *name;
+	int i;
+
+	/* shi->co is current render coord, just make sure at least some vector is here */
+	copy_v3_v3(shi->co, vr->co);
+	/* vertex normal is used for textures type 'col' and 'var' */
+	copy_v3_v3(shi->vn, vr->n);
+
+	if (texco & TEXCO_UV) {
+		shi->totuv= 0;
+		shi->actuv= obr->actmtface;
+
+		for (i=0; (tface=RE_vlakren_get_tface(obr, shi->vlr, i, &name, 0)); i++) {
+			ShadeInputUV *suv= &shi->uv[i];
+
+			/* shi.uv needs scale correction from tface uv */
+			suv->uv[0]= 2*tface->uv[vindex][0]-1.0f;
+			suv->uv[1]= 2*tface->uv[vindex][1]-1.0f;
+			suv->uv[2]= 0.0f;
+			suv->name= name;
+			shi->totuv++;
+		}
+	}
+
+	/* set all rendercoords, 'texco' is an ORed value for all textures needed */
+	if ((texco & TEXCO_ORCO) && (vr->orco)) {
+		copy_v3_v3(shi->lo, vr->orco);
+	}
+	if (texco & TEXCO_GLOB) {
+		copy_v3_v3(shi->gl, shi->co);
+		mul_m4_v3(re->viewinv, shi->gl);
+	}
+	if (texco & TEXCO_NORM) {
+		copy_v3_v3(shi->orn, shi->vn);
+	}
+	if (texco & TEXCO_REFL) {
+		/* not (yet?) */
+	}
+	if (texco & TEXCO_STRESS) {
+		const float *s= RE_vertren_get_stress(obr, vr, 0);
+
+		if (s) {
+			shi->stress= *s;
+			if (shi->stress<1.0f) shi->stress-= 1.0f;
+			else shi->stress= (shi->stress-1.0f)/shi->stress;
+		}
+		else
+			shi->stress= 0.0f;
+	}
+
+	shi->displace[0]= shi->displace[1]= shi->displace[2]= 0.0;
+
+	do_material_tex(shi, re);
+
+	//printf("no=%f, %f, %f\nbefore co=%f, %f, %f\n", vr->n[0], vr->n[1], vr->n[2],
+	//vr->co[0], vr->co[1], vr->co[2]);
+
+	displace[0]= shi->displace[0] * scale[0];
+	displace[1]= shi->displace[1] * scale[1];
+	displace[2]= shi->displace[2] * scale[2];
+
+	/* 0.5 could become button once?  */
+	vr->co[0] += displace[0];
+	vr->co[1] += displace[1];
+	vr->co[2] += displace[2];
+
+	//printf("after co=%f, %f, %f\n", vr->co[0], vr->co[1], vr->co[2]);
+
+	/* we just don't do this vertex again, bad luck for other face using same vertex with
+	 * different material... */
+	vr->flag |= 1;
+
+	/* Pass sample back so displace_face can decide which way to split the quad */
+	sample  = shi->displace[0]*shi->displace[0];
+	sample += shi->displace[1]*shi->displace[1];
+	sample += shi->displace[2]*shi->displace[2];
+
+	vr->accum=sample;
+	/* Should be sqrt(sample), but I'm only looking for "bigger".  Save the cycles. */
+	return;
+}
+
+static void displace_render_face(Render *re, ObjectRen *obr, VlakRen *vlr, float *scale)
+{
+	ShadeInput shi;
+
+	/* Warning, This is not that nice, and possibly a bit slow,
+	 * however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
+	memset(&shi, 0, sizeof(ShadeInput));
+	/* end warning! - Campbell */
+
+	/* set up shadeinput struct for multitex() */
+
+	/* memset above means we don't need this */
+	/*shi.osatex= 0;*/		/* signal not to use dx[] and dy[] texture AA vectors */
+
+	shi.obr= obr;
+	shi.vlr= vlr;		/* current render face */
+	shi.mat= vlr->mat;		/* current input material */
+	shi.thread= 0;
+
+	/* TODO, assign these, displacement with new bumpmap is skipped without - campbell */
+#if 0
+	/* order is not known ? */
+	shi.v1= vlr->v1;
+	shi.v2= vlr->v2;
+	shi.v3= vlr->v3;
+#endif
+
+	/* Displace the verts, flag is set when done */
+	if (!vlr->v1->flag)
+		displace_render_vert(re, obr, &shi, vlr->v1, 0,  scale);
+
+	if (!vlr->v2->flag)
+		displace_render_vert(re, obr, &shi, vlr->v2, 1, scale);
+
+	if (!vlr->v3->flag)
+		displace_render_vert(re, obr, &shi, vlr->v3, 2, scale);
+
+	if (vlr->v4) {
+		if (!vlr->v4->flag)
+			displace_render_vert(re, obr, &shi, vlr->v4, 3, scale);
+
+		/*	closest in displace value.  This will help smooth edges.   */
+		if (fabsf(vlr->v1->accum - vlr->v3->accum) > fabsf(vlr->v2->accum - vlr->v4->accum)) vlr->flag |=  R_DIVIDE_24;
+		else                                                                                 vlr->flag &= ~R_DIVIDE_24;
+	}
+
+	/* Recalculate the face normal  - if flipped before, flip now */
+	if (vlr->v4) {
+		normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+	}
+	else {
+		normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+	}
+}
+
+static void displace(Render *re, ObjectRen *obr)
+{
+	VertRen *vr;
+	VlakRen *vlr;
+//	float min[3]={1e30, 1e30, 1e30}, max[3]={-1e30, -1e30, -1e30};
+	float scale[3]={1.0f, 1.0f, 1.0f}, temp[3];//, xn
+	int i; //, texflag=0;
+	Object *obt;
+
+	/* Object Size with parenting */
+	obt=obr->ob;
+	while (obt) {
+		mul_v3_v3v3(temp, obt->size, obt->dscale);
+		scale[0]*=temp[0]; scale[1]*=temp[1]; scale[2]*=temp[2];
+		obt=obt->parent;
+	}
+
+	/* Clear all flags */
+	for (i=0; i<obr->totvert; i++) {
+		vr= RE_findOrAddVert(obr, i);
+		vr->flag= 0;
+	}
+
+	for (i=0; i<obr->totvlak; i++) {
+		vlr=RE_findOrAddVlak(obr, i);
+		displace_render_face(re, obr, vlr, scale);
+	}
+
+	/* Recalc vertex normals */
+	calc_vertexnormals(re, obr, 1, 0, 0);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Metaball   																 */
+/* ------------------------------------------------------------------------- */
+
+static void init_render_mball(Render *re, ObjectRen *obr)
+{
+	Object *ob= obr->ob;
+	DispList *dl;
+	VertRen *ver;
+	VlakRen *vlr, *vlr1;
+	Material *ma;
+	float *data, *nors, *orco=NULL, mat[4][4], imat[3][3], xn, yn, zn;
+	int a, need_orco, vlakindex, *index, negative_scale;
+	ListBase dispbase= {NULL, NULL};
+
+	if (ob!=BKE_mball_basis_find(re->eval_ctx, re->scene, ob))
+		return;
+
+	mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+	invert_m4_m4(ob->imat, mat);
+	copy_m3_m4(imat, ob->imat);
+	negative_scale = is_negative_m4(mat);
+
+	ma= give_render_material(re, ob, 1);
+
+	need_orco= 0;
+	if (ma->texco & TEXCO_ORCO) {
+		need_orco= 1;
+	}
+
+	BKE_displist_make_mball_forRender(re->eval_ctx, re->scene, ob, &dispbase);
+	dl= dispbase.first;
+	if (dl == NULL) return;
+
+	data= dl->verts;
+	nors= dl->nors;
+	if (need_orco) {
+		orco= get_object_orco(re, ob);
+
+		if (!orco) {
+			/* orco hasn't been found in cache - create new one and add to cache */
+			orco= BKE_mball_make_orco(ob, &dispbase);
+			set_object_orco(re, ob, orco);
+		}
+	}
+
+	for (a=0; a<dl->nr; a++, data+=3, nors+=3) {
+
+		ver= RE_findOrAddVert(obr, obr->totvert++);
+		copy_v3_v3(ver->co, data);
+		mul_m4_v3(mat, ver->co);
+
+		/* render normals are inverted */
+		xn= -nors[0];
+		yn= -nors[1];
+		zn= -nors[2];
+
+		/* transpose ! */
+		ver->n[0]= imat[0][0]*xn+imat[0][1]*yn+imat[0][2]*zn;
+		ver->n[1]= imat[1][0]*xn+imat[1][1]*yn+imat[1][2]*zn;
+		ver->n[2]= imat[2][0]*xn+imat[2][1]*yn+imat[2][2]*zn;
+		normalize_v3(ver->n);
+		//if (ob->transflag & OB_NEG_SCALE) negate_v3(ver->n);
+
+		if (need_orco) {
+			ver->orco= orco;
+			orco+=3;
+		}
+	}
+
+	index= dl->index;
+	for (a=0; a<dl->parts; a++, index+=4) {
+
+		vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+		vlr->v1= RE_findOrAddVert(obr, index[0]);
+		vlr->v2= RE_findOrAddVert(obr, index[1]);
+		vlr->v3= RE_findOrAddVert(obr, index[2]);
+		vlr->v4 = NULL;
+
+		if (negative_scale)
+			normal_tri_v3(vlr->n, vlr->v1->co, vlr->v2->co, vlr->v3->co);
+		else
+			normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+
+		vlr->mat= ma;
+		vlr->flag= ME_SMOOTH;
+		vlr->ec= 0;
+
+		/* mball -too bad- always has triangles, because quads can be non-planar */
+		if (index[3] && index[3]!=index[2]) {
+			vlr1= RE_findOrAddVlak(obr, obr->totvlak++);
+			vlakindex= vlr1->index;
+			*vlr1= *vlr;
+			vlr1->index= vlakindex;
+			vlr1->v2= vlr1->v3;
+			vlr1->v3= RE_findOrAddVert(obr, index[3]);
+			if (negative_scale)
+				normal_tri_v3(vlr1->n, vlr1->v1->co, vlr1->v2->co, vlr1->v3->co);
+			else
+				normal_tri_v3(vlr1->n, vlr1->v3->co, vlr1->v2->co, vlr1->v1->co);
+		}
+	}
+
+	/* enforce display lists remade */
+	BKE_displist_free(&dispbase);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Surfaces and Curves														 */
+/* ------------------------------------------------------------------------- */
+
+/* returns amount of vertices added for orco */
+static int dl_surf_to_renderdata(ObjectRen *obr, DispList *dl, Material **matar, float *orco, float mat[4][4])
+{
+	VertRen *v1, *v2, *v3, *v4, *ver;
+	VlakRen *vlr, *vlr1, *vlr2, *vlr3;
+	float *data, n1[3];
+	int u, v, orcoret= 0;
+	int p1, p2, p3, p4, a;
+	int sizeu, nsizeu, sizev, nsizev;
+	int startvert, startvlak;
+
+	startvert= obr->totvert;
+	nsizeu = sizeu = dl->parts; nsizev = sizev = dl->nr;
+
+	data= dl->verts;
+	for (u = 0; u < sizeu; u++) {
+		v1 = RE_findOrAddVert(obr, obr->totvert++); /* save this for possible V wrapping */
+		copy_v3_v3(v1->co, data); data += 3;
+		if (orco) {
+			v1->orco= orco; orco+= 3; orcoret++;
+		}
+		mul_m4_v3(mat, v1->co);
+
+		for (v = 1; v < sizev; v++) {
+			ver= RE_findOrAddVert(obr, obr->totvert++);
+			copy_v3_v3(ver->co, data); data += 3;
+			if (orco) {
+				ver->orco= orco; orco+= 3; orcoret++;
+			}
+			mul_m4_v3(mat, ver->co);
+		}
+		/* if V-cyclic, add extra vertices at end of the row */
+		if (dl->flag & DL_CYCL_U) {
+			ver= RE_findOrAddVert(obr, obr->totvert++);
+			copy_v3_v3(ver->co, v1->co);
+			if (orco) {
+				ver->orco= orco; orco+=3; orcoret++; //orcobase + 3*(u*sizev + 0);
+			}
+		}
+	}
+
+	/* Done before next loop to get corner vert */
+	if (dl->flag & DL_CYCL_U) nsizev++;
+	if (dl->flag & DL_CYCL_V) nsizeu++;
+
+	/* if U cyclic, add extra row at end of column */
+	if (dl->flag & DL_CYCL_V) {
+		for (v = 0; v < nsizev; v++) {
+			v1= RE_findOrAddVert(obr, startvert + v);
+			ver= RE_findOrAddVert(obr, obr->totvert++);
+			copy_v3_v3(ver->co, v1->co);
+			if (orco) {
+				ver->orco= orco; orco+=3; orcoret++; //ver->orco= orcobase + 3*(0*sizev + v);
+			}
+		}
+	}
+
+	sizeu = nsizeu;
+	sizev = nsizev;
+
+	startvlak= obr->totvlak;
+
+	for (u = 0; u < sizeu - 1; u++) {
+		p1 = startvert + u * sizev; /* walk through face list */
+		p2 = p1 + 1;
+		p3 = p2 + sizev;
+		p4 = p3 - 1;
+
+		for (v = 0; v < sizev - 1; v++) {
+			v1= RE_findOrAddVert(obr, p1);
+			v2= RE_findOrAddVert(obr, p2);
+			v3= RE_findOrAddVert(obr, p3);
+			v4= RE_findOrAddVert(obr, p4);
+
+			vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+			vlr->v1= v1; vlr->v2= v2; vlr->v3= v3; vlr->v4= v4;
+
+			normal_quad_v3(n1, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+
+			copy_v3_v3(vlr->n, n1);
+
+			vlr->mat= matar[ dl->col];
+			vlr->ec= ME_V1V2+ME_V2V3;
+			vlr->flag= dl->rt;
+
+			add_v3_v3(v1->n, n1);
+			add_v3_v3(v2->n, n1);
+			add_v3_v3(v3->n, n1);
+			add_v3_v3(v4->n, n1);
+
+			p1++; p2++; p3++; p4++;
+		}
+	}
+	/* fix normals for U resp. V cyclic faces */
+	sizeu--; sizev--;  /* dec size for face array */
+	if (dl->flag & DL_CYCL_V) {
+
+		for (v = 0; v < sizev; v++) {
+			/* optimize! :*/
+			vlr= RE_findOrAddVlak(obr, UVTOINDEX(sizeu - 1, v));
+			vlr1= RE_findOrAddVlak(obr, UVTOINDEX(0, v));
+			add_v3_v3(vlr1->v1->n, vlr->n);
+			add_v3_v3(vlr1->v2->n, vlr->n);
+			add_v3_v3(vlr->v3->n, vlr1->n);
+			add_v3_v3(vlr->v4->n, vlr1->n);
+		}
+	}
+	if (dl->flag & DL_CYCL_U) {
+
+		for (u = 0; u < sizeu; u++) {
+			/* optimize! :*/
+			vlr= RE_findOrAddVlak(obr, UVTOINDEX(u, 0));
+			vlr1= RE_findOrAddVlak(obr, UVTOINDEX(u, sizev-1));
+			add_v3_v3(vlr1->v2->n, vlr->n);
+			add_v3_v3(vlr1->v3->n, vlr->n);
+			add_v3_v3(vlr->v1->n, vlr1->n);
+			add_v3_v3(vlr->v4->n, vlr1->n);
+		}
+	}
+
+	/* last vertex is an extra case:
+	 *
+	 *     ^     ()----()----()----()
+	 *     |     |     |     ||     |
+	 *     u     |     |(0,n)||(0,0)|
+	 *     |     |     ||     |
+	 *     ()====()====[]====()
+	 *     |     |     ||     |
+	 *     |     |(m,n)||(m,0)|
+	 *     |     |     ||     |
+	 *     ()----()----()----()
+	 *     v ->
+	 *
+	 *  vertex [] is no longer shared, therefore distribute
+	 *  normals of the surrounding faces to all of the duplicates of []
+	 */
+
+	if ((dl->flag & DL_CYCL_V) && (dl->flag & DL_CYCL_U)) {
+		vlr= RE_findOrAddVlak(obr, UVTOINDEX(sizeu - 1, sizev - 1)); /* (m, n) */
+		vlr1= RE_findOrAddVlak(obr, UVTOINDEX(0, 0));  /* (0, 0) */
+		add_v3_v3v3(n1, vlr->n, vlr1->n);
+		vlr2= RE_findOrAddVlak(obr, UVTOINDEX(0, sizev-1)); /* (0, n) */
+		add_v3_v3(n1, vlr2->n);
+		vlr3= RE_findOrAddVlak(obr, UVTOINDEX(sizeu-1, 0)); /* (m, 0) */
+		add_v3_v3(n1, vlr3->n);
+		copy_v3_v3(vlr->v3->n, n1);
+		copy_v3_v3(vlr1->v1->n, n1);
+		copy_v3_v3(vlr2->v2->n, n1);
+		copy_v3_v3(vlr3->v4->n, n1);
+	}
+	for (a = startvert; a < obr->totvert; a++) {
+		ver= RE_findOrAddVert(obr, a);
+		normalize_v3(ver->n);
+	}
+
+
+	return orcoret;
+}
+
+static void init_render_dm(DerivedMesh *dm, Render *re, ObjectRen *obr,
+	int timeoffset, float *orco, float mat[4][4])
+{
+	Object *ob= obr->ob;
+	int a, end, totvert, vertofs;
+	short mat_iter;
+	VertRen *ver;
+	VlakRen *vlr;
+	MVert *mvert = NULL;
+	MFace *mface;
+	Material *ma;
+#ifdef WITH_FREESTYLE
+	const int *index_mf_to_mpoly = NULL;
+	const int *index_mp_to_orig = NULL;
+	FreestyleFace *ffa = NULL;
+#endif
+	/* Curve *cu= ELEM(ob->type, OB_FONT, OB_CURVE) ? ob->data : NULL; */
+
+	mvert= dm->getVertArray(dm);
+	totvert= dm->getNumVerts(dm);
+
+	for (a=0; a<totvert; a++, mvert++) {
+		ver= RE_findOrAddVert(obr, obr->totvert++);
+		copy_v3_v3(ver->co, mvert->co);
+		mul_m4_v3(mat, ver->co);
+
+		if (orco) {
+			ver->orco= orco;
+			orco+=3;
+		}
+	}
+
+	if (!timeoffset) {
+		/* store customdata names, because DerivedMesh is freed */
+		RE_set_customdata_names(obr, &dm->faceData);
+
+		/* still to do for keys: the correct local texture coordinate */
+
+		/* faces in order of color blocks */
+		vertofs= obr->totvert - totvert;
+		for (mat_iter= 0; (mat_iter < ob->totcol || (mat_iter==0 && ob->totcol==0)); mat_iter++) {
+
+			ma= give_render_material(re, ob, mat_iter+1);
+			end= dm->getNumTessFaces(dm);
+			mface= dm->getTessFaceArray(dm);
+
+#ifdef WITH_FREESTYLE
+			if (ob->type == OB_MESH) {
+				Mesh *me= ob->data;
+				index_mf_to_mpoly= dm->getTessFaceDataArray(dm, CD_ORIGINDEX);
+				index_mp_to_orig= dm->getPolyDataArray(dm, CD_ORIGINDEX);
+				ffa= CustomData_get_layer(&me->pdata, CD_FREESTYLE_FACE);
+			}
+#endif
+
+			for (a=0; a<end; a++, mface++) {
+				int v1, v2, v3, v4, flag;
+
+				if (mface->mat_nr == mat_iter) {
+					float len;
+
+					v1= mface->v1;
+					v2= mface->v2;
+					v3= mface->v3;
+					v4= mface->v4;
+					flag= mface->flag & ME_SMOOTH;
+
+					vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+					vlr->v1= RE_findOrAddVert(obr, vertofs+v1);
+					vlr->v2= RE_findOrAddVert(obr, vertofs+v2);
+					vlr->v3= RE_findOrAddVert(obr, vertofs+v3);
+					if (v4) vlr->v4= RE_findOrAddVert(obr, vertofs+v4);
+					else vlr->v4 = NULL;
+
+					/* render normals are inverted in render */
+					if (vlr->v4)
+						len= normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+					else
+						len= normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+
+					vlr->mat= ma;
+					vlr->flag= flag;
+					vlr->ec= 0; /* mesh edges rendered separately */
+#ifdef WITH_FREESTYLE
+					if (ffa) {
+						int index = (index_mf_to_mpoly) ? DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, a) : a;
+						vlr->freestyle_face_mark= (ffa[index].flag & FREESTYLE_FACE_MARK) ? 1 : 0;
+					}
+					else {
+						vlr->freestyle_face_mark= 0;
+					}
+#endif
+
+					if (len==0) obr->totvlak--;
+					else {
+						CustomDataLayer *layer;
+						MTFace *mtface, *mtf;
+						MCol *mcol, *mc;
+						int index, mtfn= 0, mcn= 0;
+						char *name;
+
+						for (index=0; index<dm->faceData.totlayer; index++) {
+							layer= &dm->faceData.layers[index];
+							name= layer->name;
+
+							if (layer->type == CD_MTFACE && mtfn < MAX_MTFACE) {
+								mtf= RE_vlakren_get_tface(obr, vlr, mtfn++, &name, 1);
+								mtface= (MTFace*)layer->data;
+								*mtf= mtface[a];
+							}
+							else if (layer->type == CD_MCOL && mcn < MAX_MCOL) {
+								mc= RE_vlakren_get_mcol(obr, vlr, mcn++, &name, 1);
+								mcol= (MCol*)layer->data;
+								memcpy(mc, &mcol[a*4], sizeof(MCol)*4);
+							}
+						}
+					}
+				}
+			}
+		}
+
+		/* Normals */
+		calc_vertexnormals(re, obr, 1, 0, 0);
+	}
+
+}
+
+static void init_render_surf(Render *re, ObjectRen *obr, int timeoffset)
+{
+	Object *ob= obr->ob;
+	Nurb *nu = NULL;
+	Curve *cu;
+	ListBase displist= {NULL, NULL};
+	DispList *dl;
+	Material **matar;
+	float *orco=NULL, mat[4][4];
+	int a, totmat;
+	bool need_orco = false;
+	DerivedMesh *dm= NULL;
+
+	cu= ob->data;
+	nu= cu->nurb.first;
+	if (nu == NULL) return;
+
+	mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+	invert_m4_m4(ob->imat, mat);
+
+	/* material array */
+	totmat= ob->totcol+1;
+	matar= MEM_callocN(sizeof(Material*)*totmat, "init_render_surf matar");
+
+	for (a=0; a<totmat; a++) {
+		matar[a]= give_render_material(re, ob, a+1);
+
+		if (matar[a] && matar[a]->texco & TEXCO_ORCO)
+			need_orco= 1;
+	}
+
+	if (ob->parent && (ob->parent->type==OB_LATTICE)) need_orco= 1;
+
+	BKE_displist_make_surf(re->scene, ob, &displist, &dm, 1, 0, 1);
+
+	if (dm) {
+		if (need_orco) {
+			orco = get_object_orco(re, ob);
+			if (!orco) {
+				orco= BKE_displist_make_orco(re->scene, ob, dm, true, true);
+				if (orco) {
+					set_object_orco(re, ob, orco);
+				}
+			}
+		}
+
+		init_render_dm(dm, re, obr, timeoffset, orco, mat);
+		dm->release(dm);
+	}
+	else {
+		if (need_orco) {
+			orco = get_object_orco(re, ob);
+			if (!orco) {
+				orco = BKE_curve_surf_make_orco(ob);
+				set_object_orco(re, ob, orco);
+			}
+		}
+
+		/* walk along displaylist and create rendervertices/-faces */
+		for (dl=displist.first; dl; dl=dl->next) {
+			/* watch out: u ^= y, v ^= x !! */
+			if (dl->type==DL_SURF)
+				orco+= 3*dl_surf_to_renderdata(obr, dl, matar, orco, mat);
+		}
+	}
+
+	BKE_displist_free(&displist);
+
+	MEM_freeN(matar);
+}
+
+static void init_render_curve(Render *re, ObjectRen *obr, int timeoffset)
+{
+	Object *ob= obr->ob;
+	Curve *cu;
+	VertRen *ver;
+	VlakRen *vlr;
+	DispList *dl;
+	DerivedMesh *dm = NULL;
+	ListBase disp={NULL, NULL};
+	Material **matar;
+	float *data, *fp, *orco=NULL;
+	float n[3], mat[4][4], nmat[4][4];
+	int nr, startvert, a, b, negative_scale;
+	bool need_orco = false;
+	int totmat;
+
+	cu= ob->data;
+	if (ob->type==OB_FONT && cu->str==NULL) return;
+	else if (ob->type==OB_CURVE && cu->nurb.first==NULL) return;
+
+	BKE_displist_make_curveTypes_forRender(re->scene, ob, &disp, &dm, false, true);
+	dl= disp.first;
+	if (dl==NULL) return;
+
+	mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+	invert_m4_m4(ob->imat, mat);
+	negative_scale = is_negative_m4(mat);
+
+	/* local object -> world space transform for normals */
+	transpose_m4_m4(nmat, mat);
+	invert_m4(nmat);
+
+	/* material array */
+	totmat= ob->totcol+1;
+	matar= MEM_callocN(sizeof(Material*)*totmat, "init_render_surf matar");
+
+	for (a=0; a<totmat; a++) {
+		matar[a]= give_render_material(re, ob, a+1);
+
+		if (matar[a] && matar[a]->texco & TEXCO_ORCO)
+			need_orco= 1;
+	}
+
+	if (dm) {
+		if (need_orco) {
+			orco = get_object_orco(re, ob);
+			if (!orco) {
+				orco = BKE_displist_make_orco(re->scene, ob, dm, true, true);
+				if (orco) {
+					set_object_orco(re, ob, orco);
+				}
+			}
+		}
+
+		init_render_dm(dm, re, obr, timeoffset, orco, mat);
+		dm->release(dm);
+	}
+	else {
+		if (need_orco) {
+			orco = get_object_orco(re, ob);
+			if (!orco) {
+				orco = BKE_curve_make_orco(re->scene, ob, NULL);
+				set_object_orco(re, ob, orco);
+			}
+		}
+
+		while (dl) {
+			if (dl->col > ob->totcol) {
+				/* pass */
+			}
+			else if (dl->type==DL_INDEX3) {
+				const int *index;
+
+				startvert= obr->totvert;
+				data= dl->verts;
+
+				for (a=0; a<dl->nr; a++, data+=3) {
+					ver= RE_findOrAddVert(obr, obr->totvert++);
+					copy_v3_v3(ver->co, data);
+
+					mul_m4_v3(mat, ver->co);
+
+					if (orco) {
+						ver->orco = orco;
+						orco += 3;
+					}
+				}
+
+				if (timeoffset==0) {
+					float tmp[3];
+					const int startvlak= obr->totvlak;
+
+					zero_v3(n);
+					index= dl->index;
+					for (a=0; a<dl->parts; a++, index+=3) {
+						int v1 = index[0], v2 = index[2], v3 = index[1];
+						float *co1 = &dl->verts[v1 * 3],
+						      *co2 = &dl->verts[v2 * 3],
+						      *co3 = &dl->verts[v3 * 3];
+
+						vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+						vlr->v1= RE_findOrAddVert(obr, startvert + v1);
+						vlr->v2= RE_findOrAddVert(obr, startvert + v2);
+						vlr->v3= RE_findOrAddVert(obr, startvert + v3);
+						vlr->v4= NULL;
+
+						/* to prevent float accuracy issues, we calculate normal in local object space (not world) */
+						if (normal_tri_v3(tmp, co1, co2, co3) > FLT_EPSILON) {
+							if (negative_scale == false) {
+								add_v3_v3(n, tmp);
+							}
+							else {
+								sub_v3_v3(n, tmp);
+							}
+						}
+
+						vlr->mat= matar[ dl->col ];
+						vlr->flag= 0;
+						vlr->ec= 0;
+					}
+
+					/* transform normal to world space */
+					mul_m4_v3(nmat, n);
+					normalize_v3(n);
+
+					/* vertex normals */
+					for (a= startvlak; a<obr->totvlak; a++) {
+						vlr= RE_findOrAddVlak(obr, a);
+
+						copy_v3_v3(vlr->n, n);
+						add_v3_v3(vlr->v1->n, vlr->n);
+						add_v3_v3(vlr->v3->n, vlr->n);
+						add_v3_v3(vlr->v2->n, vlr->n);
+					}
+					for (a=startvert; a<obr->totvert; a++) {
+						ver= RE_findOrAddVert(obr, a);
+						normalize_v3(ver->n);
+					}
+				}
+			}
+			else if (dl->type==DL_SURF) {
+
+				/* cyclic U means an extruded full circular curve, we skip bevel splitting then */
+				if (dl->flag & DL_CYCL_U) {
+					orco+= 3*dl_surf_to_renderdata(obr, dl, matar, orco, mat);
+				}
+				else {
+					int p1, p2, p3, p4;
+
+					fp= dl->verts;
+					startvert= obr->totvert;
+					nr= dl->nr*dl->parts;
+
+					while (nr--) {
+						ver= RE_findOrAddVert(obr, obr->totvert++);
+
+						copy_v3_v3(ver->co, fp);
+						mul_m4_v3(mat, ver->co);
+						fp+= 3;
+
+						if (orco) {
+							ver->orco = orco;
+							orco += 3;
+						}
+					}
+
+					if (dl->flag & DL_CYCL_V && orco) {
+						fp = dl->verts;
+						nr = dl->nr;
+						while (nr--) {
+							ver = RE_findOrAddVert(obr, obr->totvert++);
+							copy_v3_v3(ver->co, fp);
+							mul_m4_v3(mat, ver->co);
+							ver->orco = orco;
+							fp += 3;
+							orco += 3;
+						}
+					}
+
+					if (dl->bevel_split || timeoffset == 0) {
+						const int startvlak= obr->totvlak;
+
+						for (a=0; a<dl->parts; a++) {
+
+							if (BKE_displist_surfindex_get(dl, a, &b, &p1, &p2, &p3, &p4)==0)
+								break;
+
+							p1+= startvert;
+							p2+= startvert;
+							p3+= startvert;
+							p4+= startvert;
+
+							if (dl->flag & DL_CYCL_V && orco && a == dl->parts - 1) {
+								p3 = p1 + dl->nr;
+								p4 = p2 + dl->nr;
+							}
+
+							for (; b<dl->nr; b++) {
+								vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+								/* important 1 offset in order is kept [#24913] */
+								vlr->v1= RE_findOrAddVert(obr, p2);
+								vlr->v2= RE_findOrAddVert(obr, p1);
+								vlr->v3= RE_findOrAddVert(obr, p3);
+								vlr->v4= RE_findOrAddVert(obr, p4);
+								vlr->ec= ME_V2V3+ME_V3V4;
+								if (a==0) vlr->ec+= ME_V1V2;
+
+								vlr->flag= dl->rt;
+
+								normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+								vlr->mat= matar[ dl->col ];
+
+								p4= p3;
+								p3++;
+								p2= p1;
+								p1++;
+							}
+						}
+
+						if (dl->bevel_split) {
+							for (a = 0; a < dl->parts - 1 + !!(dl->flag & DL_CYCL_V); a++) {
+								if (BLI_BITMAP_TEST(dl->bevel_split, a)) {
+									split_v_renderfaces(
+									        obr, startvlak, startvert, dl->parts, dl->nr, a,
+									        /* intentionally swap (v, u) --> (u, v) */
+									        dl->flag & DL_CYCL_V, dl->flag & DL_CYCL_U);
+								}
+							}
+						}
+
+						/* vertex normals */
+						for (a= startvlak; a<obr->totvlak; a++) {
+							vlr= RE_findOrAddVlak(obr, a);
+
+							add_v3_v3(vlr->v1->n, vlr->n);
+							add_v3_v3(vlr->v3->n, vlr->n);
+							add_v3_v3(vlr->v2->n, vlr->n);
+							add_v3_v3(vlr->v4->n, vlr->n);
+						}
+						for (a=startvert; a<obr->totvert; a++) {
+							ver= RE_findOrAddVert(obr, a);
+							normalize_v3(ver->n);
+						}
+					}
+				}
+			}
+
+			dl= dl->next;
+		}
+	}
+
+	BKE_displist_free(&disp);
+
+	MEM_freeN(matar);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Mesh     																 */
+/* ------------------------------------------------------------------------- */
+
+struct edgesort {
+	unsigned int v1, v2;
+	int f;
+	unsigned int i1, i2;
+};
+
+/* edges have to be added with lowest index first for sorting */
+static void to_edgesort(struct edgesort *ed,
+                        unsigned int i1, unsigned int i2,
+                        unsigned int v1, unsigned int v2, int f)
+{
+	if (v1 > v2) {
+		SWAP(unsigned int, v1, v2);
+		SWAP(unsigned int, i1, i2);
+	}
+
+	ed->v1= v1;
+	ed->v2= v2;
+	ed->i1= i1;
+	ed->i2= i2;
+	ed->f = f;
+}
+
+static int vergedgesort(const void *v1, const void *v2)
+{
+	const struct edgesort *x1=v1, *x2=v2;
+
+	if ( x1->v1 > x2->v1) return 1;
+	else if ( x1->v1 < x2->v1) return -1;
+	else if ( x1->v2 > x2->v2) return 1;
+	else if ( x1->v2 < x2->v2) return -1;
+
+	return 0;
+}
+
+static struct edgesort *make_mesh_edge_lookup(DerivedMesh *dm, int *totedgesort)
+{
+	MFace *mf, *mface;
+	MTFace *tface=NULL;
+	struct edgesort *edsort, *ed;
+	unsigned int *mcol=NULL;
+	int a, totedge=0, totface;
+
+	mface= dm->getTessFaceArray(dm);
+	totface= dm->getNumTessFaces(dm);
+	tface= dm->getTessFaceDataArray(dm, CD_MTFACE);
+	mcol= dm->getTessFaceDataArray(dm, CD_MCOL);
+
+	if (mcol==NULL && tface==NULL) return NULL;
+
+	/* make sorted table with edges and face indices in it */
+	for (a= totface, mf= mface; a>0; a--, mf++) {
+		totedge += mf->v4 ? 4 : 3;
+	}
+
+	if (totedge==0)
+		return NULL;
+
+	ed= edsort= MEM_callocN(totedge*sizeof(struct edgesort), "edgesort");
+
+	for (a=0, mf=mface; a<totface; a++, mf++) {
+		to_edgesort(ed++, 0, 1, mf->v1, mf->v2, a);
+		to_edgesort(ed++, 1, 2, mf->v2, mf->v3, a);
+		if (mf->v4) {
+			to_edgesort(ed++, 2, 3, mf->v3, mf->v4, a);
+			to_edgesort(ed++, 3, 0, mf->v4, mf->v1, a);
+		}
+		else {
+			to_edgesort(ed++, 2, 3, mf->v3, mf->v1, a);
+		}
+	}
+
+	qsort(edsort, totedge, sizeof(struct edgesort), vergedgesort);
+
+	*totedgesort= totedge;
+
+	return edsort;
+}
+
+static void use_mesh_edge_lookup(ObjectRen *obr, DerivedMesh *dm, MEdge *medge, VlakRen *vlr, struct edgesort *edgetable, int totedge)
+{
+	struct edgesort ed, *edp;
+	CustomDataLayer *layer;
+	MTFace *mtface, *mtf;
+	MCol *mcol, *mc;
+	int index, mtfn, mcn;
+	char *name;
+
+	if (medge->v1 < medge->v2) {
+		ed.v1= medge->v1;
+		ed.v2= medge->v2;
+	}
+	else {
+		ed.v1= medge->v2;
+		ed.v2= medge->v1;
+	}
+
+	edp= bsearch(&ed, edgetable, totedge, sizeof(struct edgesort), vergedgesort);
+
+	/* since edges have different index ordering, we have to duplicate mcol and tface */
+	if (edp) {
+		mtfn= mcn= 0;
+
+		for (index=0; index<dm->faceData.totlayer; index++) {
+			layer= &dm->faceData.layers[index];
+			name= layer->name;
+
+			if (layer->type == CD_MTFACE && mtfn < MAX_MTFACE) {
+				mtface= &((MTFace*)layer->data)[edp->f];
+				mtf= RE_vlakren_get_tface(obr, vlr, mtfn++, &name, 1);
+
+				*mtf= *mtface;
+
+				memcpy(mtf->uv[0], mtface->uv[edp->i1], sizeof(float)*2);
+				memcpy(mtf->uv[1], mtface->uv[edp->i2], sizeof(float)*2);
+				memcpy(mtf->uv[2], mtface->uv[1], sizeof(float)*2);
+				memcpy(mtf->uv[3], mtface->uv[1], sizeof(float)*2);
+			}
+			else if (layer->type == CD_MCOL && mcn < MAX_MCOL) {
+				mcol= &((MCol*)layer->data)[edp->f*4];
+				mc= RE_vlakren_get_mcol(obr, vlr, mcn++, &name, 1);
+
+				mc[0]= mcol[edp->i1];
+				mc[1]= mc[2]= mc[3]= mcol[edp->i2];
+			}
+		}
+	}
+}
+
+static void free_camera_inside_volumes(Render *re)
+{
+	BLI_freelistN(&re->render_volumes_inside);
+}
+
+static void init_camera_inside_volumes(Render *re)
+{
+	ObjectInstanceRen *obi;
+	VolumeOb *vo;
+	/* coordinates are all in camera space, so camera coordinate is zero. we also
+	 * add an offset for the clip start, however note that with clip start it's
+	 * actually impossible to do a single 'inside' test, since there will not be
+	 * a single point where all camera rays start from, though for small clip start
+	 * they will be close together. */
+	float co[3] = {0.f, 0.f, -re->clipsta};
+
+	for (vo= re->volumes.first; vo; vo= vo->next) {
+		for (obi= re->instancetable.first; obi; obi= obi->next) {
+			if (obi->obr == vo->obr) {
+				if (point_inside_volume_objectinstance(re, obi, co)) {
+					MatInside *mi;
+
+					mi = MEM_mallocN(sizeof(MatInside), "camera inside material");
+					mi->ma = vo->ma;
+					mi->obi = obi;
+
+					BLI_addtail(&(re->render_volumes_inside), mi);
+				}
+			}
+		}
+	}
+
+
+#if 0 /* debug */
+	{
+		MatInside *m;
+		for (m = re->render_volumes_inside.first; m; m = m->next) {
+			printf("matinside: ma: %s\n", m->ma->id.name + 2);
+		}
+	}
+#endif
+}
+
+static void add_volume(Render *re, ObjectRen *obr, Material *ma)
+{
+	struct VolumeOb *vo;
+
+	vo = MEM_mallocN(sizeof(VolumeOb), "volume object");
+
+	vo->ma = ma;
+	vo->obr = obr;
+
+	BLI_addtail(&re->volumes, vo);
+}
+
+#ifdef WITH_FREESTYLE
+static EdgeHash *make_freestyle_edge_mark_hash(DerivedMesh *dm)
+{
+	EdgeHash *edge_hash= NULL;
+	FreestyleEdge *fed;
+	MEdge *medge;
+	int totedge, a;
+
+	medge = dm->getEdgeArray(dm);
+	totedge = dm->getNumEdges(dm);
+	fed = dm->getEdgeDataArray(dm, CD_FREESTYLE_EDGE);
+	if (fed) {
+		edge_hash = BLI_edgehash_new(__func__);
+		for (a = 0; a < totedge; a++) {
+			if (fed[a].flag & FREESTYLE_EDGE_MARK)
+				BLI_edgehash_insert(edge_hash, medge[a].v1, medge[a].v2, medge+a);
+		}
+	}
+	return edge_hash;
+}
+
+static bool has_freestyle_edge_mark(EdgeHash *edge_hash, int v1, int v2)
+{
+	MEdge *medge= BLI_edgehash_lookup(edge_hash, v1, v2);
+	return (!medge) ? 0 : 1;
+}
+#endif
+
+static void init_render_mesh(Render *re, ObjectRen *obr, int timeoffset)
+{
+	Object *ob= obr->ob;
+	Mesh *me;
+	MVert *mvert = NULL;
+	MFace *mface;
+	VlakRen *vlr; //, *vlr1;
+	VertRen *ver;
+	Material *ma;
+	DerivedMesh *dm;
+	CustomDataMask mask;
+	float xn, yn, zn,  imat[3][3], mat[4][4];  //nor[3],
+	float *orco = NULL;
+	short (*loop_nors)[4][3] = NULL;
+	bool need_orco = false, need_stress = false, need_tangent = false, need_origindex = false;
+	bool need_nmap_tangent_concrete = false;
+	int a, a1, ok, vertofs;
+	int end, totvert = 0;
+	bool do_autosmooth = false, do_displace = false;
+	bool use_original_normals = false;
+	int recalc_normals = 0;	/* false by default */
+	int negative_scale;
+#ifdef WITH_FREESTYLE
+	FreestyleFace *ffa;
+#endif
+
+	me= ob->data;
+
+	mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+	invert_m4_m4(ob->imat, mat);
+	copy_m3_m4(imat, ob->imat);
+	negative_scale= is_negative_m4(mat);
+
+	need_orco= 0;
+	for (a=1; a<=ob->totcol; a++) {
+		ma= give_render_material(re, ob, a);
+		if (ma) {
+			if (ma->texco & (TEXCO_ORCO|TEXCO_STRESS))
+				need_orco= 1;
+			if (ma->texco & TEXCO_STRESS)
+				need_stress= 1;
+			/* normalmaps, test if tangents needed, separated from shading */
+			if (ma->mode_l & MA_TANGENT_V) {
+				need_tangent= 1;
+				if (me->mtpoly==NULL)
+					need_orco= 1;
+			}
+			if (ma->mode_l & MA_NORMAP_TANG) {
+				if (me->mtpoly==NULL) {
+					need_orco= 1;
+				}
+				need_tangent= 1;
+			}
+			if (ma->mode2_l & MA_TANGENT_CONCRETE) {
+				need_nmap_tangent_concrete = true;
+			}
+		}
+	}
+
+	if (re->flag & R_NEED_TANGENT) {
+		/* exception for tangent space baking */
+		if (me->mtpoly==NULL) {
+			need_orco= 1;
+		}
+		need_tangent= 1;
+	}
+
+	/* check autosmooth and displacement, we then have to skip only-verts optimize
+	 * Note: not sure what we want to give higher priority, currently do_displace
+	 *       takes precedence over do_autosmooth.
+	 */
+	do_displace = test_for_displace(re, ob);
+	do_autosmooth = ((me->flag & ME_AUTOSMOOTH) != 0) && !do_displace;
+	if (do_autosmooth || do_displace)
+		timeoffset = 0;
+
+	/* origindex currently used when using autosmooth, or baking to vertex colors. */
+	need_origindex = (do_autosmooth || ((re->flag & R_BAKING) && (re->r.bake_flag & R_BAKE_VCOL)));
+
+	mask = CD_MASK_RENDER_INTERNAL;
+	if (!timeoffset)
+		if (need_orco)
+			mask |= CD_MASK_ORCO;
+
+#ifdef WITH_FREESTYLE
+	mask |= CD_MASK_ORIGINDEX | CD_MASK_FREESTYLE_EDGE | CD_MASK_FREESTYLE_FACE;
+#endif
+
+	if (re->r.scemode & R_VIEWPORT_PREVIEW)
+		dm= mesh_create_derived_view(re->scene, ob, mask);
+	else
+		dm= mesh_create_derived_render(re->scene, ob, mask);
+	if (dm==NULL) return;	/* in case duplicated object fails? */
+
+	mvert= dm->getVertArray(dm);
+	totvert= dm->getNumVerts(dm);
+
+	if (totvert == 0) {
+		dm->release(dm);
+		return;
+	}
+
+	if (mask & CD_MASK_ORCO) {
+		orco = get_object_orco(re, ob);
+		if (!orco) {
+			orco= dm->getVertDataArray(dm, CD_ORCO);
+			if (orco) {
+				orco= MEM_dupallocN(orco);
+				set_object_orco(re, ob, orco);
+			}
+		}
+	}
+
+	/* attempt to autsmooth on original mesh, only without subsurf */
+	if (do_autosmooth && me->totvert==totvert && me->totface==dm->getNumTessFaces(dm))
+		use_original_normals= true;
+
+	ma= give_render_material(re, ob, 1);
+
+
+	if (ma->material_type == MA_TYPE_HALO) {
+		make_render_halos(re, obr, me, totvert, mvert, ma, orco);
+	}
+	else {
+		const int *index_vert_orig = NULL;
+		const int *index_mf_to_mpoly = NULL;
+		const int *index_mp_to_orig = NULL;
+		if (need_origindex) {
+			index_vert_orig = dm->getVertDataArray(dm, CD_ORIGINDEX);
+			/* double lookup for faces -> polys */
+#ifdef WITH_FREESTYLE
+			index_mf_to_mpoly = dm->getTessFaceDataArray(dm, CD_ORIGINDEX);
+			index_mp_to_orig = dm->getPolyDataArray(dm, CD_ORIGINDEX);
+#endif
+		}
+
+		for (a=0; a<totvert; a++, mvert++) {
+			ver= RE_findOrAddVert(obr, obr->totvert++);
+			copy_v3_v3(ver->co, mvert->co);
+			if (do_autosmooth == false) {	/* autosmooth on original unrotated data to prevent differences between frames */
+				normal_short_to_float_v3(ver->n, mvert->no);
+				mul_m4_v3(mat, ver->co);
+				mul_transposed_m3_v3(imat, ver->n);
+				normalize_v3(ver->n);
+				negate_v3(ver->n);
+			}
+
+			if (orco) {
+				ver->orco= orco;
+				orco+=3;
+			}
+
+			if (need_origindex) {
+				int *origindex;
+				origindex = RE_vertren_get_origindex(obr, ver, 1);
+
+				/* Use orig index array if it's available (e.g. in the presence
+				 * of modifiers). */
+				if (index_vert_orig)
+					*origindex = index_vert_orig[a];
+				else
+					*origindex = a;
+			}
+		}
+
+		if (!timeoffset) {
+			short (*lnp)[4][3] = NULL;
+#ifdef WITH_FREESTYLE
+			EdgeHash *edge_hash;
+
+			/* create a hash table of Freestyle edge marks */
+			edge_hash = make_freestyle_edge_mark_hash(dm);
+#endif
+
+			/* store customdata names, because DerivedMesh is freed */
+			RE_set_customdata_names(obr, &dm->faceData);
+
+			/* add tangent layers if we need */
+			if ((ma->nmap_tangent_names_count && need_nmap_tangent_concrete) || need_tangent) {
+				dm->calcLoopTangents(
+				        dm, need_tangent,
+				        (const char (*)[MAX_NAME])ma->nmap_tangent_names, ma->nmap_tangent_names_count);
+				obr->tangent_mask = dm->tangent_mask;
+				DM_generate_tangent_tessface_data(dm, need_nmap_tangent_concrete || need_tangent);
+			}
+
+			/* still to do for keys: the correct local texture coordinate */
+
+			/* faces in order of color blocks */
+			vertofs= obr->totvert - totvert;
+			for (a1=0; (a1<ob->totcol || (a1==0 && ob->totcol==0)); a1++) {
+
+				ma= give_render_material(re, ob, a1+1);
+
+				/* test for 100% transparent */
+				ok = 1;
+				if ((ma->alpha == 0.0f) &&
+				    (ma->spectra == 0.0f) &&
+				    /* No need to test filter here, it's only active with MA_RAYTRANSP and we check against it below. */
+				    /* (ma->filter == 0.0f) && */
+				    (ma->mode & MA_TRANSP) &&
+				    (ma->mode & (MA_RAYTRANSP | MA_RAYMIRROR)) == 0)
+				{
+					ok = 0;
+					/* texture on transparency? */
+					for (a=0; a<MAX_MTEX; a++) {
+						if (ma->mtex[a] && ma->mtex[a]->tex) {
+							if (ma->mtex[a]->mapto & MAP_ALPHA) ok= 1;
+						}
+					}
+				}
+
+				/* if wire material, and we got edges, don't do the faces */
+				if (ma->material_type == MA_TYPE_WIRE) {
+					end= dm->getNumEdges(dm);
+					if (end) ok= 0;
+				}
+
+				if (ok) {
+					end= dm->getNumTessFaces(dm);
+					mface= dm->getTessFaceArray(dm);
+					if (!loop_nors && do_autosmooth &&
+					    (dm->getTessFaceDataArray(dm, CD_TESSLOOPNORMAL) != NULL))
+					{
+						lnp = loop_nors = MEM_mallocN(sizeof(*loop_nors) * end, __func__);
+					}
+#ifdef WITH_FREESTYLE
+					index_mf_to_mpoly= dm->getTessFaceDataArray(dm, CD_ORIGINDEX);
+					index_mp_to_orig= dm->getPolyDataArray(dm, CD_ORIGINDEX);
+					ffa= CustomData_get_layer(&me->pdata, CD_FREESTYLE_FACE);
+#endif
+
+					for (a=0; a<end; a++, mface++) {
+						int v1, v2, v3, v4, flag;
+
+						if ( mface->mat_nr==a1 ) {
+							float len;
+							bool reverse_verts = (negative_scale != 0 && do_autosmooth == false);
+							int rev_tab[] = {reverse_verts==0 ? 0 : 2, 1, reverse_verts==0 ? 2 : 0, 3};
+							v1= reverse_verts==0 ? mface->v1 : mface->v3;
+							v2= mface->v2;
+							v3= reverse_verts==0 ? mface->v3 : mface->v1;
+							v4= mface->v4;
+							flag = do_autosmooth ? ME_SMOOTH : mface->flag & ME_SMOOTH;
+
+							vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+							vlr->v1= RE_findOrAddVert(obr, vertofs+v1);
+							vlr->v2= RE_findOrAddVert(obr, vertofs+v2);
+							vlr->v3= RE_findOrAddVert(obr, vertofs+v3);
+							if (v4) vlr->v4 = RE_findOrAddVert(obr, vertofs+v4);
+							else vlr->v4 = NULL;
+
+#ifdef WITH_FREESTYLE
+							/* Freestyle edge/face marks */
+							if (edge_hash) {
+								int edge_mark = 0;
+
+								if (has_freestyle_edge_mark(edge_hash, v1, v2)) edge_mark |= R_EDGE_V1V2;
+								if (has_freestyle_edge_mark(edge_hash, v2, v3)) edge_mark |= R_EDGE_V2V3;
+								if (!v4) {
+									if (has_freestyle_edge_mark(edge_hash, v3, v1)) edge_mark |= R_EDGE_V3V1;
+								}
+								else {
+									if (has_freestyle_edge_mark(edge_hash, v3, v4)) edge_mark |= R_EDGE_V3V4;
+									if (has_freestyle_edge_mark(edge_hash, v4, v1)) edge_mark |= R_EDGE_V4V1;
+								}
+								vlr->freestyle_edge_mark= edge_mark;
+							}
+							if (ffa) {
+								int index = (index_mf_to_mpoly) ? DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, a) : a;
+								vlr->freestyle_face_mark= (ffa[index].flag & FREESTYLE_FACE_MARK) ? 1 : 0;
+							}
+							else {
+								vlr->freestyle_face_mark= 0;
+							}
+#endif
+
+							/* render normals are inverted in render */
+							if (use_original_normals) {
+								MFace *mf= me->mface+a;
+								MVert *mv= me->mvert;
+
+								if (vlr->v4)
+									len= normal_quad_v3(vlr->n, mv[mf->v4].co, mv[mf->v3].co, mv[mf->v2].co, mv[mf->v1].co);
+								else
+									len= normal_tri_v3(vlr->n, mv[mf->v3].co, mv[mf->v2].co, mv[mf->v1].co);
+							}
+							else {
+								if (vlr->v4)
+									len= normal_quad_v3(vlr->n, vlr->v4->co, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+								else
+									len= normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+							}
+
+							vlr->mat= ma;
+							vlr->flag= flag;
+							vlr->ec= 0; /* mesh edges rendered separately */
+
+							if (len==0) obr->totvlak--;
+							else {
+								CustomDataLayer *layer;
+								MTFace *mtface, *mtf;
+								MCol *mcol, *mc;
+								int index, mtfn= 0, mcn= 0, mln = 0, vindex;
+								char *name;
+								int nr_verts = v4!=0 ? 4 : 3;
+
+								for (index=0; index<dm->faceData.totlayer; index++) {
+									layer= &dm->faceData.layers[index];
+									name= layer->name;
+
+									if (layer->type == CD_MTFACE && mtfn < MAX_MTFACE) {
+										int t;
+										mtf= RE_vlakren_get_tface(obr, vlr, mtfn++, &name, 1);
+										mtface= (MTFace*)layer->data;
+										*mtf = mtface[a];  /* copy face info */
+										for (vindex=0; vindex<nr_verts; vindex++)
+											for (t=0; t<2; t++)
+												mtf->uv[vindex][t]=mtface[a].uv[rev_tab[vindex]][t];
+									}
+									else if (layer->type == CD_MCOL && mcn < MAX_MCOL) {
+										mc= RE_vlakren_get_mcol(obr, vlr, mcn++, &name, 1);
+										mcol= (MCol*)layer->data;
+										for (vindex=0; vindex<nr_verts; vindex++)
+											mc[vindex]=mcol[a*4+rev_tab[vindex]];
+									}
+									else if (layer->type == CD_TANGENT) {
+										if (need_nmap_tangent_concrete || need_tangent) {
+											int uv_start = CustomData_get_layer_index(&dm->faceData, CD_MTFACE);
+											int uv_index = CustomData_get_named_layer_index(&dm->faceData, CD_MTFACE, layer->name);
+
+											/* if there are no UVs, orco tangents are in first slot */
+											int n = (uv_start >= 0 && uv_index >= 0) ? uv_index - uv_start : 0;
+
+											const float *tangent = (const float *) layer->data;
+											float *ftang = RE_vlakren_get_nmap_tangent(obr, vlr, n, true);
+
+											for (vindex=0; vindex<nr_verts; vindex++) {
+												copy_v4_v4(ftang+vindex*4, tangent+a*16+rev_tab[vindex]*4);
+												mul_mat3_m4_v3(mat, ftang+vindex*4);
+												normalize_v3(ftang+vindex*4);
+											}
+										}
+									}
+									else if (layer->type == CD_TESSLOOPNORMAL && mln < 1) {
+										if (loop_nors) {
+											const short (*lnors)[4][3] = (const short (*)[4][3])layer->data;
+											for (vindex = 0; vindex < 4; vindex++) {
+												//print_v3("lnors[a][rev_tab[vindex]]", lnors[a][rev_tab[vindex]]);
+												copy_v3_v3_short((short *)lnp[0][vindex], lnors[a][rev_tab[vindex]]);
+												/* If we copy loop normals, we are doing autosmooth, so we are still
+												 * in object space, no need to multiply with mat!
+												 */
+											}
+											lnp++;
+										}
+										mln++;
+									}
+								}
+
+								if (need_origindex) {
+									/* Find original index of mpoly for this tessface. Options:
+									 * - Modified mesh; two-step look up from tessface -> modified mpoly -> original mpoly
+									 * - OR Tesselated mesh; look up from tessface -> mpoly
+									 * - OR Failsafe; tessface == mpoly. Could probably assert(false) in this case? */
+									int *origindex;
+									origindex = RE_vlakren_get_origindex(obr, vlr, 1);
+									if (index_mf_to_mpoly && index_mp_to_orig)
+										*origindex = DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig, a);
+									else if (index_mf_to_mpoly)
+										*origindex = index_mf_to_mpoly[a];
+									else
+										*origindex = a;
+								}
+							}
+						}
+					}
+				}
+			}
+
+#ifdef WITH_FREESTYLE
+			/* release the hash table of Freestyle edge marks */
+			if (edge_hash)
+				BLI_edgehash_free(edge_hash, NULL);
+#endif
+
+			/* exception... we do edges for wire mode. potential conflict when faces exist... */
+			end= dm->getNumEdges(dm);
+			mvert= dm->getVertArray(dm);
+			ma= give_render_material(re, ob, 1);
+			if (end && (ma->material_type == MA_TYPE_WIRE)) {
+				MEdge *medge;
+				struct edgesort *edgetable;
+				int totedge= 0;
+				recalc_normals= 1;
+
+				medge= dm->getEdgeArray(dm);
+
+				/* we want edges to have UV and vcol too... */
+				edgetable= make_mesh_edge_lookup(dm, &totedge);
+
+				for (a1=0; a1<end; a1++, medge++) {
+					if (medge->flag&ME_EDGERENDER) {
+						MVert *v0 = &mvert[medge->v1];
+						MVert *v1 = &mvert[medge->v2];
+
+						vlr= RE_findOrAddVlak(obr, obr->totvlak++);
+						vlr->v1= RE_findOrAddVert(obr, vertofs+medge->v1);
+						vlr->v2= RE_findOrAddVert(obr, vertofs+medge->v2);
+						vlr->v3= vlr->v2;
+						vlr->v4= NULL;
+
+						if (edgetable)
+							use_mesh_edge_lookup(obr, dm, medge, vlr, edgetable, totedge);
+
+						xn= -(v0->no[0]+v1->no[0]);
+						yn= -(v0->no[1]+v1->no[1]);
+						zn= -(v0->no[2]+v1->no[2]);
+						/* transpose ! */
+						vlr->n[0]= imat[0][0]*xn+imat[0][1]*yn+imat[0][2]*zn;
+						vlr->n[1]= imat[1][0]*xn+imat[1][1]*yn+imat[1][2]*zn;
+						vlr->n[2]= imat[2][0]*xn+imat[2][1]*yn+imat[2][2]*zn;
+						normalize_v3(vlr->n);
+
+						vlr->mat= ma;
+						vlr->flag= 0;
+						vlr->ec= ME_V1V2;
+					}
+				}
+				if (edgetable)
+					MEM_freeN(edgetable);
+			}
+		}
+	}
+
+	if (!timeoffset) {
+		if (need_stress)
+			calc_edge_stress(re, obr, me);
+
+		if (do_displace) {
+			calc_vertexnormals(re, obr, 1, 0, 0);
+			displace(re, obr);
+			recalc_normals = 0;  /* Already computed by displace! */
+		}
+		else if (do_autosmooth) {
+			recalc_normals = (loop_nors == NULL);  /* Should never happen, but better be safe than sorry. */
+			autosmooth(re, obr, mat, loop_nors);
+		}
+
+		if (recalc_normals!=0 || need_tangent!=0)
+			calc_vertexnormals(re, obr, recalc_normals, need_tangent, need_nmap_tangent_concrete);
+	}
+
+	MEM_SAFE_FREE(loop_nors);
+
+	dm->release(dm);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Lamps and Shadowbuffers													 */
+/* ------------------------------------------------------------------------- */
+
+static void initshadowbuf(Render *re, LampRen *lar, float mat[4][4])
+{
+	struct ShadBuf *shb;
+	float viewinv[4][4];
+
+	/* if (la->spsi<16) return; */
+
+	/* memory alloc */
+	shb= (struct ShadBuf *)MEM_callocN(sizeof(struct ShadBuf), "initshadbuf");
+	lar->shb= shb;
+
+	if (shb==NULL) return;
+
+	VECCOPY(shb->co, lar->co); /* int copy */
+
+	/* percentage render: keep track of min and max */
+	shb->size= (lar->bufsize*re->r.size)/100;
+
+	if (shb->size<512) shb->size= 512;
+	else if (shb->size > lar->bufsize) shb->size= lar->bufsize;
+
+	shb->size &= ~15;	/* make sure its multiples of 16 */
+
+	shb->samp= lar->samp;
+	shb->soft= lar->soft;
+	shb->shadhalostep= lar->shadhalostep;
+
+	normalize_m4(mat);
+	invert_m4_m4(shb->winmat, mat);	/* winmat is temp */
+
+	/* matrix: combination of inverse view and lampmat */
+	/* calculate again: the ortho-render has no correct viewinv */
+	invert_m4_m4(viewinv, re->viewmat);
+	mul_m4_m4m4(shb->viewmat, shb->winmat, viewinv);
+
+	/* projection */
+	shb->d= lar->clipsta;
+	shb->clipend= lar->clipend;
+
+	/* bias is percentage, made 2x larger because of correction for angle of incidence */
+	/* when a ray is closer to parallel of a face, bias value is increased during render */
+	shb->bias= (0.02f*lar->bias)*0x7FFFFFFF;
+
+	/* halfway method (average of first and 2nd z) reduces bias issues */
+	if (ELEM(lar->buftype, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP))
+		shb->bias= 0.1f*shb->bias;
+
+	shb->compressthresh= lar->compressthresh;
+}
+
+void area_lamp_vectors(LampRen *lar)
+{
+	float xsize= 0.5f*lar->area_size, ysize= 0.5f*lar->area_sizey, multifac;
+
+	/* make it smaller, so area light can be multisampled */
+	multifac= 1.0f/sqrtf((float)lar->ray_totsamp);
+	xsize *= multifac;
+	ysize *= multifac;
+
+	/* corner vectors */
+	lar->area[0][0]= lar->co[0] - xsize*lar->mat[0][0] - ysize*lar->mat[1][0];
+	lar->area[0][1]= lar->co[1] - xsize*lar->mat[0][1] - ysize*lar->mat[1][1];
+	lar->area[0][2]= lar->co[2] - xsize*lar->mat[0][2] - ysize*lar->mat[1][2];
+
+	/* corner vectors */
+	lar->area[1][0]= lar->co[0] - xsize*lar->mat[0][0] + ysize*lar->mat[1][0];
+	lar->area[1][1]= lar->co[1] - xsize*lar->mat[0][1] + ysize*lar->mat[1][1];
+	lar->area[1][2]= lar->co[2] - xsize*lar->mat[0][2] + ysize*lar->mat[1][2];
+
+	/* corner vectors */
+	lar->area[2][0]= lar->co[0] + xsize*lar->mat[0][0] + ysize*lar->mat[1][0];
+	lar->area[2][1]= lar->co[1] + xsize*lar->mat[0][1] + ysize*lar->mat[1][1];
+	lar->area[2][2]= lar->co[2] + xsize*lar->mat[0][2] + ysize*lar->mat[1][2];
+
+	/* corner vectors */
+	lar->area[3][0]= lar->co[0] + xsize*lar->mat[0][0] - ysize*lar->mat[1][0];
+	lar->area[3][1]= lar->co[1] + xsize*lar->mat[0][1] - ysize*lar->mat[1][1];
+	lar->area[3][2]= lar->co[2] + xsize*lar->mat[0][2] - ysize*lar->mat[1][2];
+	/* only for correction button size, matrix size works on energy */
+	lar->areasize= lar->dist*lar->dist/(4.0f*xsize*ysize);
+}
+
+/* If lar takes more lamp data, the decoupling will be better. */
+static GroupObject *add_render_lamp(Render *re, Object *ob)
+{
+	Lamp *la= ob->data;
+	LampRen *lar;
+	GroupObject *go;
+	float mat[4][4], angle, xn, yn;
+	float vec[3];
+	int c;
+
+	/* previewrender sets this to zero... prevent accidents */
+	if (la==NULL) return NULL;
+
+	/* prevent only shadow from rendering light */
+	if (la->mode & LA_ONLYSHADOW)
+		if ((re->r.mode & R_SHADOW)==0)
+			return NULL;
+
+	re->totlamp++;
+
+	/* groups is used to unify support for lightgroups, this is the global lightgroup */
+	go= MEM_callocN(sizeof(GroupObject), "groupobject");
+	BLI_addtail(&re->lights, go);
+	go->ob= ob;
+	/* lamprens are in own list, for freeing */
+	lar= (LampRen *)MEM_callocN(sizeof(LampRen), "lampren");
+	BLI_addtail(&re->lampren, lar);
+	go->lampren= lar;
+
+	mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+	invert_m4_m4(ob->imat, mat);
+
+	copy_m4_m4(lar->lampmat, ob->obmat);
+	copy_m3_m4(lar->mat, mat);
+	copy_m3_m4(lar->imat, ob->imat);
+
+	lar->bufsize = la->bufsize;
+	lar->samp = la->samp;
+	lar->buffers= la->buffers;
+	if (lar->buffers==0) lar->buffers= 1;
+	lar->buftype= la->buftype;
+	lar->filtertype= la->filtertype;
+	lar->soft = la->soft;
+	lar->shadhalostep = la->shadhalostep;
+	lar->clipsta = la->clipsta;
+	lar->clipend = la->clipend;
+
+	lar->bias = la->bias;
+	lar->compressthresh = la->compressthresh;
+
+	lar->type= la->type;
+	lar->mode= la->mode;
+
+	lar->energy= la->energy;
+	if (la->mode & LA_NEG) lar->energy= -lar->energy;
+
+	lar->vec[0]= -mat[2][0];
+	lar->vec[1]= -mat[2][1];
+	lar->vec[2]= -mat[2][2];
+	normalize_v3(lar->vec);
+	lar->co[0]= mat[3][0];
+	lar->co[1]= mat[3][1];
+	lar->co[2]= mat[3][2];
+	lar->dist= la->dist;
+	lar->haint= la->haint;
+	lar->distkw= lar->dist*lar->dist;
+	lar->r= lar->energy*la->r;
+	lar->g= lar->energy*la->g;
+	lar->b= lar->energy*la->b;
+	lar->shdwr= la->shdwr;
+	lar->shdwg= la->shdwg;
+	lar->shdwb= la->shdwb;
+	lar->k= la->k;
+
+	/* area */
+	lar->ray_samp= la->ray_samp;
+	lar->ray_sampy= la->ray_sampy;
+	lar->ray_sampz= la->ray_sampz;
+
+	lar->area_size= la->area_size;
+	lar->area_sizey= la->area_sizey;
+	lar->area_sizez= la->area_sizez;
+
+	lar->area_shape= la->area_shape;
+
+	/* Annoying, lamp UI does this, but the UI might not have been used? - add here too.
+	 * make sure this matches buttons_shading.c's logic */
+	if (ELEM(la->type, LA_AREA, LA_SPOT, LA_SUN, LA_LOCAL) && (la->mode & LA_SHAD_RAY))
+		if (ELEM(la->type, LA_SPOT, LA_SUN, LA_LOCAL))
+			if (la->ray_samp_method == LA_SAMP_CONSTANT) la->ray_samp_method = LA_SAMP_HALTON;
+
+	lar->ray_samp_method= la->ray_samp_method;
+	lar->ray_samp_type= la->ray_samp_type;
+
+	lar->adapt_thresh= la->adapt_thresh;
+	lar->sunsky = NULL;
+
+	if ( ELEM(lar->type, LA_SPOT, LA_LOCAL)) {
+		lar->ray_totsamp= lar->ray_samp*lar->ray_samp;
+		lar->area_shape = LA_AREA_SQUARE;
+		lar->area_sizey= lar->area_size;
+	}
+	else if (lar->type==LA_AREA) {
+		switch (lar->area_shape) {
+		case LA_AREA_SQUARE:
+			lar->ray_totsamp= lar->ray_samp*lar->ray_samp;
+			lar->ray_sampy= lar->ray_samp;
+			lar->area_sizey= lar->area_size;
+			break;
+		case LA_AREA_RECT:
+			lar->ray_totsamp= lar->ray_samp*lar->ray_sampy;
+			break;
+		case LA_AREA_CUBE:
+			lar->ray_totsamp= lar->ray_samp*lar->ray_samp*lar->ray_samp;
+			lar->ray_sampy= lar->ray_samp;
+			lar->ray_sampz= lar->ray_samp;
+			lar->area_sizey= lar->area_size;
+			lar->area_sizez= lar->area_size;
+			break;
+		case LA_AREA_BOX:
+			lar->ray_totsamp= lar->ray_samp*lar->ray_sampy*lar->ray_sampz;
+			break;
+		}
+
+		area_lamp_vectors(lar);
+		init_jitter_plane(lar);	 /* subsamples */
+	}
+	else if (lar->type==LA_SUN) {
+		lar->ray_totsamp= lar->ray_samp*lar->ray_samp;
+		lar->area_shape = LA_AREA_SQUARE;
+		lar->area_sizey= lar->area_size;
+
+		if ((la->sun_effect_type & LA_SUN_EFFECT_SKY) ||
+		    (la->sun_effect_type & LA_SUN_EFFECT_AP))
+		{
+			lar->sunsky = (struct SunSky*)MEM_callocN(sizeof(struct SunSky), "sunskyren");
+			lar->sunsky->effect_type = la->sun_effect_type;
+
+			copy_v3_v3(vec, ob->obmat[2]);
+			normalize_v3(vec);
+
+			InitSunSky(
+			        lar->sunsky, la->atm_turbidity, vec, la->horizon_brightness,
+			        la->spread, la->sun_brightness, la->sun_size, la->backscattered_light,
+			        la->skyblendfac, la->skyblendtype, la->sky_exposure, la->sky_colorspace);
+			InitAtmosphere(
+			        lar->sunsky, la->sun_intensity, 1.0, 1.0, la->atm_inscattering_factor, la->atm_extinction_factor,
+			        la->atm_distance_factor);
+		}
+	}
+	else lar->ray_totsamp= 0;
+
+	lar->spotsi= la->spotsize;
+	if (lar->mode & LA_HALO) {
+		if (lar->spotsi > DEG2RADF(170.0f)) lar->spotsi = DEG2RADF(170.0f);
+	}
+	lar->spotsi= cosf(lar->spotsi * 0.5f);
+	lar->spotbl= (1.0f-lar->spotsi)*la->spotblend;
+
+	memcpy(lar->mtex, la->mtex, MAX_MTEX*sizeof(void *));
+
+	lar->lay = ob->lay & 0xFFFFFF;  /* higher 8 bits are localview layers */
+
+	lar->falloff_type = la->falloff_type;
+	lar->ld1= la->att1;
+	lar->ld2= la->att2;
+	lar->coeff_const= la->coeff_const;
+	lar->coeff_lin= la->coeff_lin;
+	lar->coeff_quad= la->coeff_quad;
+	lar->curfalloff = curvemapping_copy(la->curfalloff);
+
+	if (lar->curfalloff) {
+		/* so threads don't conflict on init */
+		curvemapping_initialize(lar->curfalloff);
+	}
+
+	if (lar->type==LA_SPOT) {
+
+		normalize_v3(lar->imat[0]);
+		normalize_v3(lar->imat[1]);
+		normalize_v3(lar->imat[2]);
+
+		xn = saacos(lar->spotsi);
+		xn = sinf(xn) / cosf(xn);
+		lar->spottexfac= 1.0f/(xn);
+
+		if (lar->mode & LA_ONLYSHADOW) {
+			if ((lar->mode & (LA_SHAD_BUF|LA_SHAD_RAY))==0) lar->mode -= LA_ONLYSHADOW;
+		}
+
+	}
+
+	/* set flag for spothalo en initvars */
+	if ((la->type == LA_SPOT) && (la->mode & LA_HALO) &&
+	    (!(la->mode & LA_SHAD_BUF) || la->buftype != LA_SHADBUF_DEEP))
+	{
+		if (la->haint>0.0f) {
+			re->flag |= R_LAMPHALO;
+
+			/* camera position (0, 0, 0) rotate around lamp */
+			lar->sh_invcampos[0]= -lar->co[0];
+			lar->sh_invcampos[1]= -lar->co[1];
+			lar->sh_invcampos[2]= -lar->co[2];
+			mul_m3_v3(lar->imat, lar->sh_invcampos);
+
+			/* z factor, for a normalized volume */
+			angle= saacos(lar->spotsi);
+			xn= lar->spotsi;
+			yn = sinf(angle);
+			lar->sh_zfac= yn/xn;
+			/* pre-scale */
+			lar->sh_invcampos[2]*= lar->sh_zfac;
+
+			/* halfway shadow buffer doesn't work for volumetric effects */
+			if (ELEM(lar->buftype, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP))
+				lar->buftype = LA_SHADBUF_REGULAR;
+
+		}
+	}
+	else if (la->type==LA_HEMI) {
+		lar->mode &= ~(LA_SHAD_RAY|LA_SHAD_BUF);
+	}
+
+	for (c=0; c<MAX_MTEX; c++) {
+		if (la->mtex[c] && la->mtex[c]->tex) {
+			if (la->mtex[c]->mapto & LAMAP_COL)
+				lar->mode |= LA_TEXTURE;
+			if (la->mtex[c]->mapto & LAMAP_SHAD)
+				lar->mode |= LA_SHAD_TEX;
+
+			if (G.is_rendering) {
+				if (re->osa) {
+					if (la->mtex[c]->tex->type==TEX_IMAGE) lar->mode |= LA_OSATEX;
+				}
+			}
+		}
+	}
+
+	/* old code checked for internal render (aka not yafray) */
+	{
+		/* to make sure we can check ray shadow easily in the render code */
+		if (lar->mode & LA_SHAD_RAY) {
+			if ( (re->r.mode & R_RAYTRACE)==0)
+				lar->mode &= ~LA_SHAD_RAY;
+		}
+
+
+		if (re->r.mode & R_SHADOW) {
+
+			if (la->type==LA_AREA && (lar->mode & LA_SHAD_RAY) && (lar->ray_samp_method == LA_SAMP_CONSTANT)) {
+				init_jitter_plane(lar);
+			}
+			else if (la->type==LA_SPOT && (lar->mode & LA_SHAD_BUF) ) {
+				/* Per lamp, one shadow buffer is made. */
+				lar->bufflag= la->bufflag;
+				copy_m4_m4(mat, ob->obmat);
+				initshadowbuf(re, lar, mat);  /* mat is altered */
+			}
+
+
+			/* this is the way used all over to check for shadow */
+			if (lar->shb || (lar->mode & LA_SHAD_RAY)) {
+				LampShadowSample *ls;
+				LampShadowSubSample *lss;
+				int a, b;
+
+				memset(re->shadowsamplenr, 0, sizeof(re->shadowsamplenr));
+
+				lar->shadsamp= MEM_mallocN(re->r.threads*sizeof(LampShadowSample), "lamp shadow sample");
+				ls= lar->shadsamp;
+
+				/* shadfacs actually mean light, let's put them to 1 to prevent unitialized accidents */
+				for (a=0; a<re->r.threads; a++, ls++) {
+					lss= ls->s;
+					for (b=0; b<re->r.osa; b++, lss++) {
+						lss->samplenr= -1;	/* used to detect whether we store or read */
+						lss->shadfac[0]= 1.0f;
+						lss->shadfac[1]= 1.0f;
+						lss->shadfac[2]= 1.0f;
+						lss->shadfac[3]= 1.0f;
+					}
+				}
+			}
+		}
+	}
+
+	return go;
+}
+
+static bool is_object_restricted(Render *re, Object *ob)
+{
+	if (re->r.scemode & R_VIEWPORT_PREVIEW)
+		return (ob->restrictflag & OB_RESTRICT_VIEW) != 0;
+	else
+		return (ob->restrictflag & OB_RESTRICT_RENDER) != 0;
+}
+
+static bool is_object_hidden(Render *re, Object *ob)
+{
+	if (is_object_restricted(re, ob))
+		return true;
+
+	if (re->r.scemode & R_VIEWPORT_PREVIEW) {
+		/* Mesh deform cages and so on mess up the preview. To avoid the problem,
+		 * viewport doesn't show mesh object if its draw type is bounding box or wireframe.
+		 * Unless it's an active smoke domain!
+		 */
+		ModifierData *md = NULL;
+
+		if ((md = modifiers_findByType(ob, eModifierType_Smoke)) &&
+		    (modifier_isEnabled(re->scene, md, eModifierMode_Realtime)))
+		{
+			return false;
+		}
+		return ELEM(ob->dt, OB_BOUNDBOX, OB_WIRE);
+	}
+	else {
+		return false;
+	}
+}
+
+/* layflag: allows material group to ignore layerflag */
+static void add_lightgroup(Render *re, Group *group, int exclusive)
+{
+	GroupObject *go, *gol;
+
+	group->id.tag &= ~LIB_TAG_DOIT;
+
+	/* it's a bit too many loops in loops... but will survive */
+	/* note that 'exclusive' will remove it from the global list */
+	for (go= group->gobject.first; go; go= go->next) {
+		go->lampren= NULL;
+
+		if (is_object_hidden(re, go->ob))
+			continue;
+
+		if (go->ob->lay & re->lay) {
+			if (go->ob && go->ob->type==OB_LAMP) {
+				for (gol= re->lights.first; gol; gol= gol->next) {
+					if (gol->ob==go->ob) {
+						go->lampren= gol->lampren;
+						break;
+					}
+				}
+				if (go->lampren==NULL)
+					gol= add_render_lamp(re, go->ob);
+				if (gol && exclusive) {
+					BLI_remlink(&re->lights, gol);
+					MEM_freeN(gol);
+				}
+			}
+		}
+	}
+}
+
+static void set_material_lightgroups(Render *re)
+{
+	Group *group;
+	Material *ma;
+
+	/* not for preview render */
+	if (re->scene->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))
+		return;
+
+	for (group= re->main->group.first; group; group=group->id.next)
+		group->id.tag |= LIB_TAG_DOIT;
+
+	/* it's a bit too many loops in loops... but will survive */
+	/* hola! materials not in use...? */
+	for (ma= re->main->mat.first; ma; ma=ma->id.next) {
+		if (ma->group && (ma->group->id.tag & LIB_TAG_DOIT))
+			add_lightgroup(re, ma->group, ma->mode & MA_GROUP_NOLAY);
+	}
+}
+
+static void set_renderlayer_lightgroups(Render *re, Scene *sce)
+{
+	SceneRenderLayer *srl;
+
+	for (srl= sce->r.layers.first; srl; srl= srl->next) {
+		if (srl->light_override)
+			add_lightgroup(re, srl->light_override, 0);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* World																	 */
+/* ------------------------------------------------------------------------- */
+
+void init_render_world(Render *re)
+{
+	void *wrld_prev[2] = {
+	    re->wrld.aotables,
+	    re->wrld.aosphere,
+	};
+
+	int a;
+
+	if (re->scene && re->scene->world) {
+		re->wrld = *(re->scene->world);
+
+		copy_v3_v3(re->grvec, re->viewmat[2]);
+		normalize_v3(re->grvec);
+		copy_m3_m4(re->imat, re->viewinv);
+
+		for (a=0; a<MAX_MTEX; a++)
+			if (re->wrld.mtex[a] && re->wrld.mtex[a]->tex) re->wrld.skytype |= WO_SKYTEX;
+
+		/* AO samples should be OSA minimum */
+		if (re->osa)
+			while (re->wrld.aosamp*re->wrld.aosamp < re->osa)
+				re->wrld.aosamp++;
+		if (!(re->r.mode & R_RAYTRACE) && (re->wrld.ao_gather_method == WO_AOGATHER_RAYTRACE))
+			re->wrld.mode &= ~(WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT);
+	}
+	else {
+		memset(&re->wrld, 0, sizeof(World));
+		re->wrld.exp= 0.0f;
+		re->wrld.range= 1.0f;
+
+		/* for mist pass */
+		re->wrld.miststa= re->clipsta;
+		re->wrld.mistdist= re->clipend-re->clipsta;
+		re->wrld.misi= 1.0f;
+	}
+
+	re->wrld.linfac= 1.0f + powf((2.0f*re->wrld.exp + 0.5f), -10);
+	re->wrld.logfac= logf((re->wrld.linfac-1.0f)/re->wrld.linfac) / re->wrld.range;
+
+	/* restore runtime vars, needed for viewport rendering [#36005] */
+	re->wrld.aotables = wrld_prev[0];
+	re->wrld.aosphere = wrld_prev[1];
+}
+
+
+
+/* ------------------------------------------------------------------------- */
+/* Object Finalization														 */
+/* ------------------------------------------------------------------------- */
+
+/* prevent phong interpolation for giving ray shadow errors (terminator problem) */
+static void set_phong_threshold(ObjectRen *obr)
+{
+//	VertRen *ver;
+	VlakRen *vlr;
+	float thresh= 0.0, dot;
+	int tot=0, i;
+
+	/* Added check for 'pointy' situations, only dotproducts of 0.9 and larger
+	 * are taken into account. This threshold is meant to work on smooth geometry, not
+	 * for extreme cases (ton) */
+
+	for (i=0; i<obr->totvlak; i++) {
+		vlr= RE_findOrAddVlak(obr, i);
+		if ((vlr->flag & R_SMOOTH) && (vlr->flag & R_STRAND)==0) {
+			dot= dot_v3v3(vlr->n, vlr->v1->n);
+			dot= ABS(dot);
+			if (dot>0.9f) {
+				thresh+= dot; tot++;
+			}
+			dot= dot_v3v3(vlr->n, vlr->v2->n);
+			dot= ABS(dot);
+			if (dot>0.9f) {
+				thresh+= dot; tot++;
+			}
+
+			dot= dot_v3v3(vlr->n, vlr->v3->n);
+			dot= ABS(dot);
+			if (dot>0.9f) {
+				thresh+= dot; tot++;
+			}
+
+			if (vlr->v4) {
+				dot= dot_v3v3(vlr->n, vlr->v4->n);
+				dot= ABS(dot);
+				if (dot>0.9f) {
+					thresh+= dot; tot++;
+				}
+			}
+		}
+	}
+
+	if (tot) {
+		thresh/= (float)tot;
+		obr->ob->smoothresh= cosf(0.5f*(float)M_PI-saacos(thresh));
+	}
+}
+
+/* per face check if all samples should be taken.
+ * if raytrace or multisample, do always for raytraced material, or when material full_osa set */
+static void set_fullsample_trace_flag(Render *re, ObjectRen *obr)
+{
+	VlakRen *vlr;
+	int a, trace, mode, osa;
+
+	osa= re->osa;
+	trace= re->r.mode & R_RAYTRACE;
+
+	for (a=obr->totvlak-1; a>=0; a--) {
+		vlr= RE_findOrAddVlak(obr, a);
+		mode= vlr->mat->mode;
+
+		if (trace && (mode & MA_TRACEBLE))
+			vlr->flag |= R_TRACEBLE;
+
+		if (osa) {
+			if (mode & MA_FULL_OSA) {
+				vlr->flag |= R_FULL_OSA;
+			}
+			else if (trace) {
+				if (mode & MA_SHLESS) {
+					/* pass */
+				}
+				else if (vlr->mat->material_type == MA_TYPE_VOLUME) {
+					/* pass */
+				}
+				else if ((mode & MA_RAYMIRROR) || ((mode & MA_TRANSP) && (mode & MA_RAYTRANSP))) {
+					/* for blurry reflect/refract, better to take more samples
+					 * inside the raytrace than as OSA samples */
+					if ((vlr->mat->gloss_mir == 1.0f) && (vlr->mat->gloss_tra == 1.0f))
+						vlr->flag |= R_FULL_OSA;
+				}
+			}
+		}
+	}
+}
+
+/* split quads for predictable baking
+ * dir 1 == (0, 1, 2) (0, 2, 3),  2 == (1, 3, 0) (1, 2, 3)
+ */
+static void split_quads(ObjectRen *obr, int dir)
+{
+	VlakRen *vlr, *vlr1;
+	int a;
+
+	for (a=obr->totvlak-1; a>=0; a--) {
+		vlr= RE_findOrAddVlak(obr, a);
+
+		/* test if rendering as a quad or triangle, skip wire */
+		if ((vlr->flag & R_STRAND)==0 && (vlr->mat->material_type != MA_TYPE_WIRE)) {
+
+			if (vlr->v4) {
+
+				vlr1= RE_vlakren_copy(obr, vlr);
+				vlr1->flag |= R_FACE_SPLIT;
+
+				if ( dir==2 ) vlr->flag |= R_DIVIDE_24;
+				else vlr->flag &= ~R_DIVIDE_24;
+
+				/* new vertex pointers */
+				if (vlr->flag & R_DIVIDE_24) {
+					vlr1->v1= vlr->v2;
+					vlr1->v2= vlr->v3;
+					vlr1->v3= vlr->v4;
+
+					vlr->v3 = vlr->v4;
+
+					vlr1->flag |= R_DIVIDE_24;
+				}
+				else {
+					vlr1->v1= vlr->v1;
+					vlr1->v2= vlr->v3;
+					vlr1->v3= vlr->v4;
+
+					vlr1->flag &= ~R_DIVIDE_24;
+				}
+				vlr->v4 = vlr1->v4 = NULL;
+
+#ifdef WITH_FREESTYLE
+				/* Freestyle edge marks */
+				if (vlr->flag & R_DIVIDE_24) {
+					vlr1->freestyle_edge_mark=
+						((vlr->freestyle_edge_mark & R_EDGE_V2V3) ? R_EDGE_V1V2 : 0) |
+						((vlr->freestyle_edge_mark & R_EDGE_V3V4) ? R_EDGE_V2V3 : 0);
+					vlr->freestyle_edge_mark=
+						((vlr->freestyle_edge_mark & R_EDGE_V1V2) ? R_EDGE_V1V2 : 0) |
+						((vlr->freestyle_edge_mark & R_EDGE_V4V1) ? R_EDGE_V3V1 : 0);
+				}
+				else {
+					vlr1->freestyle_edge_mark=
+						((vlr->freestyle_edge_mark & R_EDGE_V3V4) ? R_EDGE_V2V3 : 0) |
+						((vlr->freestyle_edge_mark & R_EDGE_V4V1) ? R_EDGE_V3V1 : 0);
+					vlr->freestyle_edge_mark=
+						((vlr->freestyle_edge_mark & R_EDGE_V1V2) ? R_EDGE_V1V2 : 0) |
+						((vlr->freestyle_edge_mark & R_EDGE_V2V3) ? R_EDGE_V2V3 : 0);
+				}
+#endif
+
+				/* new normals */
+				normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+				normal_tri_v3(vlr1->n, vlr1->v3->co, vlr1->v2->co, vlr1->v1->co);
+			}
+			/* clear the flag when not divided */
+			else vlr->flag &= ~R_DIVIDE_24;
+		}
+	}
+}
+
+static void check_non_flat_quads(ObjectRen *obr)
+{
+	VlakRen *vlr, *vlr1;
+	VertRen *v1, *v2, *v3, *v4;
+	float nor[3], xn, flen;
+	int a;
+
+	for (a=obr->totvlak-1; a>=0; a--) {
+		vlr= RE_findOrAddVlak(obr, a);
+
+		/* test if rendering as a quad or triangle, skip wire */
+		if (vlr->v4 && (vlr->flag & R_STRAND)==0 && (vlr->mat->material_type != MA_TYPE_WIRE)) {
+
+			/* check if quad is actually triangle */
+			v1= vlr->v1;
+			v2= vlr->v2;
+			v3= vlr->v3;
+			v4= vlr->v4;
+			sub_v3_v3v3(nor, v1->co, v2->co);
+			if ( ABS(nor[0])<FLT_EPSILON10 &&  ABS(nor[1])<FLT_EPSILON10 && ABS(nor[2])<FLT_EPSILON10 ) {
+				vlr->v1= v2;
+				vlr->v2= v3;
+				vlr->v3= v4;
+				vlr->v4= NULL;
+				vlr->flag |= (R_DIVIDE_24 | R_FACE_SPLIT);
+			}
+			else {
+				sub_v3_v3v3(nor, v2->co, v3->co);
+				if ( ABS(nor[0])<FLT_EPSILON10 &&  ABS(nor[1])<FLT_EPSILON10 && ABS(nor[2])<FLT_EPSILON10 ) {
+					vlr->v2= v3;
+					vlr->v3= v4;
+					vlr->v4= NULL;
+					vlr->flag |= R_FACE_SPLIT;
+				}
+				else {
+					sub_v3_v3v3(nor, v3->co, v4->co);
+					if ( ABS(nor[0])<FLT_EPSILON10 &&  ABS(nor[1])<FLT_EPSILON10 && ABS(nor[2])<FLT_EPSILON10 ) {
+						vlr->v4= NULL;
+					}
+					else {
+						sub_v3_v3v3(nor, v4->co, v1->co);
+						if ( ABS(nor[0])<FLT_EPSILON10 &&  ABS(nor[1])<FLT_EPSILON10 && ABS(nor[2])<FLT_EPSILON10 ) {
+							vlr->v4= NULL;
+						}
+					}
+				}
+			}
+
+			if (vlr->v4) {
+
+				/* Face is divided along edge with the least gradient 		*/
+				/* Flagged with R_DIVIDE_24 if divide is from vert 2 to 4 	*/
+				/* 		4---3		4---3 */
+				/*		|\ 1|	or  |1 /| */
+				/*		|0\ |		|/ 0| */
+				/*		1---2		1---2 	0 = orig face, 1 = new face */
+
+				/* render normals are inverted in render! we calculate normal of single tria here */
+				flen= normal_tri_v3(nor, vlr->v4->co, vlr->v3->co, vlr->v1->co);
+				if (flen==0.0f) normal_tri_v3(nor, vlr->v4->co, vlr->v2->co, vlr->v1->co);
+
+				xn = dot_v3v3(nor, vlr->n);
+
+				if (ABS(xn) < 0.999995f ) {  /* checked on noisy fractal grid */
+
+					float d1, d2;
+
+					vlr1= RE_vlakren_copy(obr, vlr);
+					vlr1->flag |= R_FACE_SPLIT;
+
+					/* split direction based on vnorms */
+					normal_tri_v3(nor, vlr->v1->co, vlr->v2->co, vlr->v3->co);
+					d1 = dot_v3v3(nor, vlr->v1->n);
+
+					normal_tri_v3(nor, vlr->v2->co, vlr->v3->co, vlr->v4->co);
+					d2 = dot_v3v3(nor, vlr->v2->n);
+
+					if (fabsf(d1) < fabsf(d2) ) vlr->flag |=  R_DIVIDE_24;
+					else                        vlr->flag &= ~R_DIVIDE_24;
+
+					/* new vertex pointers */
+					if (vlr->flag & R_DIVIDE_24) {
+						vlr1->v1= vlr->v2;
+						vlr1->v2= vlr->v3;
+						vlr1->v3= vlr->v4;
+
+						vlr->v3 = vlr->v4;
+
+						vlr1->flag |= R_DIVIDE_24;
+					}
+					else {
+						vlr1->v1= vlr->v1;
+						vlr1->v2= vlr->v3;
+						vlr1->v3= vlr->v4;
+
+						vlr1->flag &= ~R_DIVIDE_24;
+					}
+					vlr->v4 = vlr1->v4 = NULL;
+
+					/* new normals */
+					normal_tri_v3(vlr->n, vlr->v3->co, vlr->v2->co, vlr->v1->co);
+					normal_tri_v3(vlr1->n, vlr1->v3->co, vlr1->v2->co, vlr1->v1->co);
+
+#ifdef WITH_FREESTYLE
+					/* Freestyle edge marks */
+					if (vlr->flag & R_DIVIDE_24) {
+						vlr1->freestyle_edge_mark=
+							((vlr->freestyle_edge_mark & R_EDGE_V2V3) ? R_EDGE_V1V2 : 0) |
+							((vlr->freestyle_edge_mark & R_EDGE_V3V4) ? R_EDGE_V2V3 : 0);
+						vlr->freestyle_edge_mark=
+							((vlr->freestyle_edge_mark & R_EDGE_V1V2) ? R_EDGE_V1V2 : 0) |
+							((vlr->freestyle_edge_mark & R_EDGE_V4V1) ? R_EDGE_V3V1 : 0);
+					}
+					else {
+						vlr1->freestyle_edge_mark=
+							((vlr->freestyle_edge_mark & R_EDGE_V3V4) ? R_EDGE_V2V3 : 0) |
+							((vlr->freestyle_edge_mark & R_EDGE_V4V1) ? R_EDGE_V3V1 : 0);
+						vlr->freestyle_edge_mark=
+							((vlr->freestyle_edge_mark & R_EDGE_V1V2) ? R_EDGE_V1V2 : 0) |
+							((vlr->freestyle_edge_mark & R_EDGE_V2V3) ? R_EDGE_V2V3 : 0);
+					}
+#endif
+				}
+				/* clear the flag when not divided */
+				else vlr->flag &= ~R_DIVIDE_24;
+			}
+		}
+	}
+}
+
+static void finalize_render_object(Render *re, ObjectRen *obr, int timeoffset)
+{
+	Object *ob= obr->ob;
+	VertRen *ver= NULL;
+	StrandRen *strand= NULL;
+	StrandBound *sbound= NULL;
+	float min[3], max[3], smin[3], smax[3];
+	int a, b;
+
+	if (obr->totvert || obr->totvlak || obr->tothalo || obr->totstrand) {
+		/* the exception below is because displace code now is in init_render_mesh call,
+		 * I will look at means to have autosmooth enabled for all object types
+		 * and have it as general postprocess, like displace */
+		if (ob->type!=OB_MESH && test_for_displace(re, ob))
+			displace(re, obr);
+
+		if (!timeoffset) {
+			/* phong normal interpolation can cause error in tracing
+			 * (terminator problem) */
+			ob->smoothresh= 0.0;
+			if ((re->r.mode & R_RAYTRACE) && (re->r.mode & R_SHADOW))
+				set_phong_threshold(obr);
+
+			if (re->flag & R_BAKING && re->r.bake_quad_split != 0) {
+				/* Baking lets us define a quad split order */
+				split_quads(obr, re->r.bake_quad_split);
+			}
+			else if (BKE_object_is_animated(re->scene, ob))
+				split_quads(obr, 1);
+			else {
+				if ((re->r.mode & R_SIMPLIFY && re->r.simplify_flag & R_SIMPLE_NO_TRIANGULATE) == 0)
+					check_non_flat_quads(obr);
+			}
+
+			set_fullsample_trace_flag(re, obr);
+
+			/* compute bounding boxes for clipping */
+			INIT_MINMAX(min, max);
+			for (a=0; a<obr->totvert; a++) {
+				if ((a & 255)==0) ver= obr->vertnodes[a>>8].vert;
+				else ver++;
+
+				minmax_v3v3_v3(min, max, ver->co);
+			}
+
+			if (obr->strandbuf) {
+				float width;
+
+				/* compute average bounding box of strandpoint itself (width) */
+				if (obr->strandbuf->flag & R_STRAND_B_UNITS)
+					obr->strandbuf->maxwidth = max_ff(obr->strandbuf->ma->strand_sta, obr->strandbuf->ma->strand_end);
+				else
+					obr->strandbuf->maxwidth= 0.0f;
+
+				width= obr->strandbuf->maxwidth;
+				sbound= obr->strandbuf->bound;
+				for (b=0; b<obr->strandbuf->totbound; b++, sbound++) {
+
+					INIT_MINMAX(smin, smax);
+
+					for (a=sbound->start; a<sbound->end; a++) {
+						strand= RE_findOrAddStrand(obr, a);
+						strand_minmax(strand, smin, smax, width);
+					}
+
+					copy_v3_v3(sbound->boundbox[0], smin);
+					copy_v3_v3(sbound->boundbox[1], smax);
+
+					minmax_v3v3_v3(min, max, smin);
+					minmax_v3v3_v3(min, max, smax);
+				}
+			}
+
+			copy_v3_v3(obr->boundbox[0], min);
+			copy_v3_v3(obr->boundbox[1], max);
+		}
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* Database																	 */
+/* ------------------------------------------------------------------------- */
+
+static int render_object_type(short type)
+{
+	return OB_TYPE_SUPPORT_MATERIAL(type);
+}
+
+static void find_dupli_instances(Render *re, ObjectRen *obr, DupliObject *dob)
+{
+	ObjectInstanceRen *obi;
+	float imat[4][4], obmat[4][4], obimat[4][4], nmat[3][3];
+	int first = 1;
+
+	mul_m4_m4m4(obmat, re->viewmat, obr->obmat);
+	invert_m4_m4(imat, obmat);
+
+	/* for objects instanced by dupliverts/faces/particles, we go over the
+	 * list of instances to find ones that instance obr, and setup their
+	 * matrices and obr pointer */
+	for (obi=re->instancetable.last; obi; obi=obi->prev) {
+		if (!obi->obr && obi->ob == obr->ob && obi->psysindex == obr->psysindex) {
+			obi->obr= obr;
+
+			/* compute difference between object matrix and
+			 * object matrix with dupli transform, in viewspace */
+			copy_m4_m4(obimat, obi->mat);
+			mul_m4_m4m4(obi->mat, obimat, imat);
+
+			copy_m3_m4(nmat, obi->mat);
+			invert_m3_m3(obi->nmat, nmat);
+			transpose_m3(obi->nmat);
+
+			if (dob) {
+				copy_v3_v3(obi->dupliorco, dob->orco);
+				obi->dupliuv[0]= dob->uv[0];
+				obi->dupliuv[1]= dob->uv[1];
+			}
+
+			if (!first) {
+				re->totvert += obr->totvert;
+				re->totvlak += obr->totvlak;
+				re->tothalo += obr->tothalo;
+				re->totstrand += obr->totstrand;
+			}
+			else
+				first= 0;
+		}
+	}
+}
+
+static void assign_dupligroup_dupli(Render *re, ObjectInstanceRen *obi, ObjectRen *obr, DupliObject *dob)
+{
+	float imat[4][4], obmat[4][4], obimat[4][4], nmat[3][3];
+
+	mul_m4_m4m4(obmat, re->viewmat, obr->obmat);
+	invert_m4_m4(imat, obmat);
+
+	obi->obr= obr;
+
+	/* compute difference between object matrix and
+	 * object matrix with dupli transform, in viewspace */
+	copy_m4_m4(obimat, obi->mat);
+	mul_m4_m4m4(obi->mat, obimat, imat);
+
+	copy_m3_m4(nmat, obi->mat);
+	invert_m3_m3(obi->nmat, nmat);
+	transpose_m3(obi->nmat);
+
+	if (dob) {
+		copy_v3_v3(obi->dupliorco, dob->orco);
+		obi->dupliuv[0]= dob->uv[0];
+		obi->dupliuv[1]= dob->uv[1];
+	}
+
+	re->totvert += obr->totvert;
+	re->totvlak += obr->totvlak;
+	re->tothalo += obr->tothalo;
+	re->totstrand += obr->totstrand;
+}
+
+static ObjectRen *find_dupligroup_dupli(Render *re, Object *ob, int psysindex)
+{
+	ObjectRen *obr;
+
+	/* if the object is itself instanced, we don't want to create an instance
+	 * for it */
+	if (ob->transflag & OB_RENDER_DUPLI)
+		return NULL;
+
+	/* try to find an object that was already created so we can reuse it
+	 * and save memory */
+	for (obr=re->objecttable.first; obr; obr=obr->next)
+		if (obr->ob == ob && obr->psysindex == psysindex && (obr->flag & R_INSTANCEABLE))
+			return obr;
+
+	return NULL;
+}
+
+static void set_dupli_tex_mat(Render *re, ObjectInstanceRen *obi, DupliObject *dob, float omat[4][4])
+{
+	/* For duplis we need to have a matrix that transform the coordinate back
+	 * to it's original position, without the dupli transforms. We also check
+	 * the matrix is actually needed, to save memory on lots of dupliverts for
+	 * example */
+	static Object *lastob= NULL;
+	static int needtexmat= 0;
+
+	/* init */
+	if (!re) {
+		lastob= NULL;
+		needtexmat= 0;
+		return;
+	}
+
+	/* check if we actually need it */
+	if (lastob != dob->ob) {
+		Material ***material;
+		short a, *totmaterial;
+
+		lastob= dob->ob;
+		needtexmat= 0;
+
+		totmaterial= give_totcolp(dob->ob);
+		material= give_matarar(dob->ob);
+
+		if (totmaterial && material)
+			for (a= 0; a<*totmaterial; a++)
+				if ((*material)[a] && (*material)[a]->texco & TEXCO_OBJECT)
+					needtexmat= 1;
+	}
+
+	if (needtexmat) {
+		float imat[4][4];
+
+		obi->duplitexmat= BLI_memarena_alloc(re->memArena, sizeof(float)*4*4);
+		invert_m4_m4(imat, dob->mat);
+		mul_m4_series(obi->duplitexmat, re->viewmat, omat, imat, re->viewinv);
+	}
+
+	copy_v3_v3(obi->dupliorco, dob->orco);
+	copy_v2_v2(obi->dupliuv, dob->uv);
+}
+
+static void init_render_object_data(Render *re, ObjectRen *obr, int timeoffset)
+{
+	Object *ob= obr->ob;
+	ParticleSystem *psys;
+	int i;
+
+	if (obr->psysindex) {
+		if ((!obr->prev || obr->prev->ob != ob || (obr->prev->flag & R_INSTANCEABLE)==0) && ob->type==OB_MESH) {
+			/* the emitter mesh wasn't rendered so the modifier stack wasn't
+			 * evaluated with render settings */
+			DerivedMesh *dm;
+			const CustomDataMask mask = CD_MASK_RENDER_INTERNAL;
+
+			if (re->r.scemode & R_VIEWPORT_PREVIEW)
+				dm = mesh_create_derived_view(re->scene, ob, mask);
+			else
+				dm = mesh_create_derived_render(re->scene, ob, mask);
+			dm->release(dm);
+		}
+
+		for (psys=ob->particlesystem.first, i=0; i<obr->psysindex-1; i++)
+			psys= psys->next;
+
+		render_new_particle_system(re, obr, psys, timeoffset);
+	}
+	else {
+		if (ELEM(ob->type, OB_FONT, OB_CURVE))
+			init_render_curve(re, obr, timeoffset);
+		else if (ob->type==OB_SURF)
+			init_render_surf(re, obr, timeoffset);
+		else if (ob->type==OB_MESH)
+			init_render_mesh(re, obr, timeoffset);
+		else if (ob->type==OB_MBALL)
+			init_render_mball(re, obr);
+	}
+
+	finalize_render_object(re, obr, timeoffset);
+
+	re->totvert += obr->totvert;
+	re->totvlak += obr->totvlak;
+	re->tothalo += obr->tothalo;
+	re->totstrand += obr->totstrand;
+}
+
+static void add_render_object(Render *re, Object *ob, Object *par, DupliObject *dob, float omat[4][4], int timeoffset)
+{
+	ObjectRen *obr;
+	ObjectInstanceRen *obi;
+	ParticleSystem *psys;
+	int show_emitter, allow_render= 1, index, psysindex, i;
+
+	index= (dob)? dob->persistent_id[0]: 0;
+
+	/* It seems that we may generate psys->renderdata recursively in some nasty intricated cases of
+	 * several levels of bupliobject (see T51524).
+	 * For now, basic rule is, do not restore psys if it was already in 'render state'.
+	 * Another, more robust solution could be to add some reference counting to that renderdata... */
+	bool psys_has_renderdata = false;
+
+	/* the emitter has to be processed first (render levels of modifiers) */
+	/* so here we only check if the emitter should be rendered */
+	if (ob->particlesystem.first) {
+		show_emitter= 0;
+		for (psys=ob->particlesystem.first; psys; psys=psys->next) {
+			show_emitter += psys->part->draw & PART_DRAW_EMITTER;
+			if (!(re->r.scemode & R_VIEWPORT_PREVIEW)) {
+				psys_has_renderdata |= (psys->renderdata != NULL);
+				psys_render_set(ob, psys, re->viewmat, re->winmat, re->winx, re->winy, timeoffset);
+			}
+		}
+
+		/* if no psys has "show emitter" selected don't render emitter */
+		if (show_emitter == 0)
+			allow_render= 0;
+	}
+
+	/* one render object for the data itself */
+	if (allow_render) {
+		obr= RE_addRenderObject(re, ob, par, index, 0, ob->lay);
+		if ((dob && !dob->animated) || (ob->transflag & OB_RENDER_DUPLI)) {
+			obr->flag |= R_INSTANCEABLE;
+			copy_m4_m4(obr->obmat, ob->obmat);
+		}
+		init_render_object_data(re, obr, timeoffset);
+
+		/* only add instance for objects that have not been used for dupli */
+		if (!(ob->transflag & OB_RENDER_DUPLI)) {
+			obi = RE_addRenderInstance(re, obr, ob, par, index, 0, NULL, ob->lay, dob);
+			if (dob) set_dupli_tex_mat(re, obi, dob, omat);
+		}
+		else
+			find_dupli_instances(re, obr, dob);
+
+		for (i=1; i<=ob->totcol; i++) {
+			Material* ma = give_render_material(re, ob, i);
+			if (ma && ma->material_type == MA_TYPE_VOLUME)
+				add_volume(re, obr, ma);
+		}
+	}
+
+	/* and one render object per particle system */
+	if (ob->particlesystem.first) {
+		psysindex= 1;
+		for (psys=ob->particlesystem.first; psys; psys=psys->next, psysindex++) {
+			if (!psys_check_enabled(ob, psys, G.is_rendering))
+				continue;
+
+			obr= RE_addRenderObject(re, ob, par, index, psysindex, ob->lay);
+			if ((dob && !dob->animated) || (ob->transflag & OB_RENDER_DUPLI)) {
+				obr->flag |= R_INSTANCEABLE;
+				copy_m4_m4(obr->obmat, ob->obmat);
+			}
+			if (dob)
+				psys->flag |= PSYS_USE_IMAT;
+			init_render_object_data(re, obr, timeoffset);
+			if (!(re->r.scemode & R_VIEWPORT_PREVIEW) && !psys_has_renderdata) {
+				psys_render_restore(ob, psys);
+			}
+			psys->flag &= ~PSYS_USE_IMAT;
+
+			/* only add instance for objects that have not been used for dupli */
+			if (!(ob->transflag & OB_RENDER_DUPLI)) {
+				obi = RE_addRenderInstance(re, obr, ob, par, index, psysindex, NULL, ob->lay, dob);
+				if (dob) set_dupli_tex_mat(re, obi, dob, omat);
+			}
+			else
+				find_dupli_instances(re, obr, dob);
+		}
+	}
+}
+
+/* par = pointer to duplicator parent, needed for object lookup table */
+/* index = when duplicater copies same object (particle), the counter */
+static void init_render_object(Render *re, Object *ob, Object *par, DupliObject *dob, float omat[4][4], int timeoffset)
+{
+	static double lasttime= 0.0;
+	double time;
+	float mat[4][4];
+
+	if (ob->type==OB_LAMP)
+		add_render_lamp(re, ob);
+	else if (render_object_type(ob->type))
+		add_render_object(re, ob, par, dob, omat, timeoffset);
+	else {
+		mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+		invert_m4_m4(ob->imat, mat);
+	}
+
+	time= PIL_check_seconds_timer();
+	if (time - lasttime > 1.0) {
+		lasttime= time;
+		/* clumsy copying still */
+		re->i.totvert= re->totvert;
+		re->i.totface= re->totvlak;
+		re->i.totstrand= re->totstrand;
+		re->i.tothalo= re->tothalo;
+		re->i.totlamp= re->totlamp;
+		re->stats_draw(re->sdh, &re->i);
+	}
+
+	ob->flag |= OB_DONE;
+}
+
+void RE_Database_Free(Render *re)
+{
+	LampRen *lar;
+
+	/* will crash if we try to free empty database */
+	if (!re->i.convertdone)
+		return;
+
+	/* statistics for debugging render memory usage */
+	if ((G.debug & G_DEBUG) && (G.is_rendering)) {
+		if ((re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))==0) {
+			BKE_image_print_memlist();
+			MEM_printmemlist_stats();
+		}
+	}
+
+	/* FREE */
+
+	for (lar= re->lampren.first; lar; lar= lar->next) {
+		freeshadowbuf(lar);
+		if (lar->jitter) MEM_freeN(lar->jitter);
+		if (lar->shadsamp) MEM_freeN(lar->shadsamp);
+		if (lar->sunsky) MEM_freeN(lar->sunsky);
+		curvemapping_free(lar->curfalloff);
+	}
+
+	free_volume_precache(re);
+
+	BLI_freelistN(&re->lampren);
+	BLI_freelistN(&re->lights);
+
+	free_renderdata_tables(re);
+
+	/* free orco */
+	free_mesh_orco_hash(re);
+
+	if (re->main) {
+		end_render_materials(re->main);
+		end_render_textures(re);
+		free_pointdensities(re);
+	}
+
+	free_camera_inside_volumes(re);
+
+	if (re->wrld.aosphere) {
+		MEM_freeN(re->wrld.aosphere);
+		re->wrld.aosphere= NULL;
+		if (re->scene && re->scene->world)
+			re->scene->world->aosphere= NULL;
+	}
+	if (re->wrld.aotables) {
+		MEM_freeN(re->wrld.aotables);
+		re->wrld.aotables= NULL;
+		if (re->scene && re->scene->world)
+			re->scene->world->aotables= NULL;
+	}
+	if (re->r.mode & R_RAYTRACE)
+		free_render_qmcsampler(re);
+
+	if (re->r.mode & R_RAYTRACE) freeraytree(re);
+
+	free_sss(re);
+	free_occ(re);
+	free_strand_surface(re);
+
+	re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0;
+	re->i.convertdone = false;
+
+	re->bakebuf= NULL;
+
+	if (re->scene)
+		if (re->scene->r.scemode & R_FREE_IMAGE)
+			if ((re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))==0)
+				BKE_image_free_all_textures();
+
+	if (re->memArena) {
+		BLI_memarena_free(re->memArena);
+		re->memArena = NULL;
+	}
+}
+
+static int allow_render_object(Render *re, Object *ob, int nolamps, int onlyselected, Object *actob)
+{
+	if (is_object_hidden(re, ob))
+		return 0;
+
+	/* Only handle dupli-hiding here if there is no particle systems. Else, let those handle show/noshow. */
+	if (!ob->particlesystem.first) {
+		if ((ob->transflag & OB_DUPLI) && !(ob->transflag & OB_DUPLIFRAMES)) {
+			return 0;
+		}
+	}
+
+	/* don't add non-basic meta objects, ends up having renderobjects with no geometry */
+	if (ob->type == OB_MBALL && ob!=BKE_mball_basis_find(re->eval_ctx, re->scene, ob))
+		return 0;
+
+	if (nolamps && (ob->type==OB_LAMP))
+		return 0;
+
+	if (onlyselected && (ob!=actob && !(ob->flag & SELECT)))
+		return 0;
+
+	return 1;
+}
+
+static int allow_render_dupli_instance(Render *UNUSED(re), DupliObject *dob, Object *obd)
+{
+	ParticleSystem *psys;
+	Material *ma;
+	short a, *totmaterial;
+
+	/* don't allow objects with halos. we need to have
+	 * all halo's to sort them globally in advance */
+	totmaterial= give_totcolp(obd);
+
+	if (totmaterial) {
+		for (a= 0; a<*totmaterial; a++) {
+			ma= give_current_material(obd, a + 1);
+			if (ma && (ma->material_type == MA_TYPE_HALO))
+				return 0;
+		}
+	}
+
+	for (psys=obd->particlesystem.first; psys; psys=psys->next)
+		if (!ELEM(psys->part->ren_as, PART_DRAW_BB, PART_DRAW_LINE, PART_DRAW_PATH, PART_DRAW_OB, PART_DRAW_GR))
+			return 0;
+
+	/* don't allow lamp, animated duplis, or radio render */
+	return (render_object_type(obd->type) &&
+			(!(dob->type == OB_DUPLIGROUP) || !dob->animated));
+}
+
+static void dupli_render_particle_set(Render *re, Object *ob, int timeoffset, int level, int enable)
+{
+	/* ugly function, but we need to set particle systems to their render
+	 * settings before calling object_duplilist, to get render level duplis */
+	Group *group;
+	GroupObject *go;
+	ParticleSystem *psys;
+	DerivedMesh *dm;
+
+	if (re->r.scemode & R_VIEWPORT_PREVIEW)
+		return;
+
+	if (level >= MAX_DUPLI_RECUR)
+		return;
+
+	if (ob->transflag & OB_DUPLIPARTS) {
+		for (psys=ob->particlesystem.first; psys; psys=psys->next) {
+			if (ELEM(psys->part->ren_as, PART_DRAW_OB, PART_DRAW_GR)) {
+				if (enable)
+					psys_render_set(ob, psys, re->viewmat, re->winmat, re->winx, re->winy, timeoffset);
+				else
+					psys_render_restore(ob, psys);
+			}
+		}
+
+		if (enable) {
+			/* this is to make sure we get render level duplis in groups:
+			 * the derivedmesh must be created before init_render_mesh,
+			 * since object_duplilist does dupliparticles before that */
+			dm = mesh_create_derived_render(re->scene, ob, CD_MASK_RENDER_INTERNAL);
+			dm->release(dm);
+
+			for (psys=ob->particlesystem.first; psys; psys=psys->next)
+				psys_get_modifier(ob, psys)->flag &= ~eParticleSystemFlag_psys_updated;
+		}
+	}
+
+	if (ob->dup_group==NULL) return;
+	group= ob->dup_group;
+
+	for (go= group->gobject.first; go; go= go->next)
+		dupli_render_particle_set(re, go->ob, timeoffset, level+1, enable);
+}
+
+static int get_vector_renderlayers(Scene *sce)
+{
+	SceneRenderLayer *srl;
+	unsigned int lay= 0;
+
+	for (srl= sce->r.layers.first; srl; srl= srl->next)
+		if (srl->passflag & SCE_PASS_VECTOR)
+			lay |= srl->lay;
+
+	return lay;
+}
+
+static void add_group_render_dupli_obs(Render *re, Group *group, int nolamps, int onlyselected, Object *actob, int timeoffset, int level)
+{
+	GroupObject *go;
+	Object *ob;
+
+	/* simple preventing of too deep nested groups */
+	if (level>MAX_DUPLI_RECUR) return;
+
+	/* recursively go into dupligroups to find objects with OB_RENDER_DUPLI
+	 * that were not created yet */
+	for (go= group->gobject.first; go; go= go->next) {
+		ob= go->ob;
+
+		if (ob->flag & OB_DONE) {
+			if (ob->transflag & OB_RENDER_DUPLI) {
+				if (allow_render_object(re, ob, nolamps, onlyselected, actob)) {
+					init_render_object(re, ob, NULL, NULL, NULL, timeoffset);
+					ob->transflag &= ~OB_RENDER_DUPLI;
+
+					if (ob->dup_group)
+						add_group_render_dupli_obs(re, ob->dup_group, nolamps, onlyselected, actob, timeoffset, level+1);
+				}
+			}
+		}
+	}
+}
+
+static void database_init_objects(Render *re, unsigned int renderlay, int nolamps, int onlyselected, Object *actob, int timeoffset)
+{
+	Base *base;
+	Object *ob;
+	Group *group;
+	ObjectInstanceRen *obi;
+	Scene *sce_iter;
+	int lay, vectorlay;
+
+	/* for duplis we need the Object texture mapping to work as if
+	 * untransformed, set_dupli_tex_mat sets the matrix to allow that
+	 * NULL is just for init */
+	set_dupli_tex_mat(NULL, NULL, NULL, NULL);
+
+	/* loop over all objects rather then using SETLOOPER because we may
+	 * reference an mtex-mapped object which isn't rendered or is an
+	 * empty in a dupli group. We could scan all render material/lamp/world
+	 * mtex's for mapto objects but its easier just to set the
+	 * 'imat' / 'imat_ren' on all and unlikely to be a performance hit
+	 * See bug: [#28744] - campbell */
+	for (ob= re->main->object.first; ob; ob= ob->id.next) {
+		float mat[4][4];
+
+		/* imat objects has to be done here, since displace can have texture using Object map-input */
+		mul_m4_m4m4(mat, re->viewmat, ob->obmat);
+		invert_m4_m4(ob->imat_ren, mat);
+		copy_m4_m4(ob->imat, ob->imat_ren);
+		/* each object should only be rendered once */
+		ob->flag &= ~OB_DONE;
+		ob->transflag &= ~OB_RENDER_DUPLI;
+	}
+
+	for (SETLOOPER(re->scene, sce_iter, base)) {
+		ob= base->object;
+
+		/* in the prev/next pass for making speed vectors, avoid creating
+		 * objects that are not on a renderlayer with a vector pass, can
+		 * save a lot of time in complex scenes */
+		vectorlay= get_vector_renderlayers(re->scene);
+		lay= (timeoffset)? renderlay & vectorlay: renderlay;
+
+		/* if the object has been restricted from rendering in the outliner, ignore it */
+		if (is_object_restricted(re, ob)) continue;
+
+		/* OB_DONE means the object itself got duplicated, so was already converted */
+		if (ob->flag & OB_DONE) {
+			/* OB_RENDER_DUPLI means instances for it were already created, now
+			 * it still needs to create the ObjectRen containing the data */
+			if (ob->transflag & OB_RENDER_DUPLI) {
+				if (allow_render_object(re, ob, nolamps, onlyselected, actob)) {
+					init_render_object(re, ob, NULL, NULL, NULL, timeoffset);
+					ob->transflag &= ~OB_RENDER_DUPLI;
+				}
+			}
+		}
+		else if ((base->lay & lay) || (ob->type==OB_LAMP && (base->lay & re->lay)) ) {
+			if ((ob->transflag & OB_DUPLI) && (ob->type!=OB_MBALL)) {
+				DupliObject *dob;
+				ListBase *duplilist;
+				DupliApplyData *duplilist_apply_data = NULL;
+				int i;
+
+				/* create list of duplis generated by this object, particle
+				 * system need to have render settings set for dupli particles */
+				dupli_render_particle_set(re, ob, timeoffset, 0, 1);
+				duplilist = object_duplilist(re->eval_ctx, re->scene, ob);
+				duplilist_apply_data = duplilist_apply(ob, NULL, duplilist);
+				/* postpone 'dupli_render_particle_set', since RE_addRenderInstance reads
+				 * index values from 'dob->persistent_id[0]', referencing 'psys->child' which
+				 * may be smaller once the particle system is restored, see: T45563. */
+
+				for (dob= duplilist->first, i = 0; dob; dob= dob->next, ++i) {
+					DupliExtraData *dob_extra = &duplilist_apply_data->extra[i];
+					Object *obd= dob->ob;
+
+					copy_m4_m4(obd->obmat, dob->mat);
+
+					/* group duplis need to set ob matrices correct, for deform. so no_draw is part handled */
+					if (!(obd->transflag & OB_RENDER_DUPLI) && dob->no_draw)
+						continue;
+
+					if (is_object_hidden(re, obd))
+						continue;
+
+					if (obd->type==OB_MBALL)
+						continue;
+
+					if (!allow_render_object(re, obd, nolamps, onlyselected, actob))
+						continue;
+
+					if (allow_render_dupli_instance(re, dob, obd)) {
+						ParticleSystem *psys;
+						ObjectRen *obr = NULL;
+						int psysindex;
+						float mat[4][4];
+
+						obi=NULL;
+
+						/* instances instead of the actual object are added in two cases, either
+						 * this is a duplivert/face/particle, or it is a non-animated object in
+						 * a dupligroup that has already been created before */
+						if (dob->type != OB_DUPLIGROUP || (obr=find_dupligroup_dupli(re, obd, 0))) {
+							mul_m4_m4m4(mat, re->viewmat, dob->mat);
+														/* ob = particle system, use that layer */
+							obi = RE_addRenderInstance(re, NULL, obd, ob, dob->persistent_id[0], 0, mat, ob->lay, dob);
+
+							/* fill in instance variables for texturing */
+							set_dupli_tex_mat(re, obi, dob, dob_extra->obmat);
+							if (dob->type != OB_DUPLIGROUP) {
+								copy_v3_v3(obi->dupliorco, dob->orco);
+								obi->dupliuv[0]= dob->uv[0];
+								obi->dupliuv[1]= dob->uv[1];
+							}
+							else {
+								/* for the second case, setup instance to point to the already
+								 * created object, and possibly setup instances if this object
+								 * itself was duplicated. for the first case find_dupli_instances
+								 * will be called later. */
+								assign_dupligroup_dupli(re, obi, obr, dob);
+								if (obd->transflag & OB_RENDER_DUPLI)
+									find_dupli_instances(re, obr, dob);
+							}
+						}
+
+						/* same logic for particles, each particle system has it's own object, so
+						 * need to go over them separately */
+						psysindex= 1;
+						for (psys=obd->particlesystem.first; psys; psys=psys->next) {
+							if (dob->type != OB_DUPLIGROUP || (obr=find_dupligroup_dupli(re, obd, psysindex))) {
+								if (obi == NULL)
+									mul_m4_m4m4(mat, re->viewmat, dob->mat);
+								obi = RE_addRenderInstance(re, NULL, obd, ob, dob->persistent_id[0], psysindex++, mat, obd->lay, dob);
+
+								set_dupli_tex_mat(re, obi, dob, dob_extra->obmat);
+								if (dob->type != OB_DUPLIGROUP) {
+									copy_v3_v3(obi->dupliorco, dob->orco);
+									obi->dupliuv[0]= dob->uv[0];
+									obi->dupliuv[1]= dob->uv[1];
+								}
+								else {
+									assign_dupligroup_dupli(re, obi, obr, dob);
+									if (obd->transflag & OB_RENDER_DUPLI)
+										find_dupli_instances(re, obr, dob);
+								}
+							}
+						}
+
+						if (obi==NULL)
+							/* can't instance, just create the object */
+							init_render_object(re, obd, ob, dob, dob_extra->obmat, timeoffset);
+
+						if (dob->type != OB_DUPLIGROUP) {
+							obd->flag |= OB_DONE;
+							obd->transflag |= OB_RENDER_DUPLI;
+						}
+					}
+					else
+						init_render_object(re, obd, ob, dob, dob_extra->obmat, timeoffset);
+
+					if (re->test_break(re->tbh)) break;
+				}
+
+				/* restore particle system */
+				dupli_render_particle_set(re, ob, timeoffset, 0, false);
+
+				if (duplilist_apply_data) {
+					duplilist_restore(duplilist, duplilist_apply_data);
+					duplilist_free_apply_data(duplilist_apply_data);
+				}
+				free_object_duplilist(duplilist);
+
+				if (allow_render_object(re, ob, nolamps, onlyselected, actob))
+					init_render_object(re, ob, NULL, NULL, NULL, timeoffset);
+			}
+			else if (allow_render_object(re, ob, nolamps, onlyselected, actob))
+				init_render_object(re, ob, NULL, NULL, NULL, timeoffset);
+		}
+
+		if (re->test_break(re->tbh)) break;
+	}
+
+	/* objects in groups with OB_RENDER_DUPLI set still need to be created,
+	 * since they may not be part of the scene */
+	for (group= re->main->group.first; group; group=group->id.next)
+		add_group_render_dupli_obs(re, group, nolamps, onlyselected, actob, timeoffset, 0);
+
+	if (!re->test_break(re->tbh))
+		RE_makeRenderInstances(re);
+}
+
+/* used to be 'rotate scene' */
+void RE_Database_FromScene(Render *re, Main *bmain, Scene *scene, unsigned int lay, int use_camera_view)
+{
+	Scene *sce;
+	Object *camera;
+	float mat[4][4];
+	float amb[3];
+
+	re->main= bmain;
+	re->scene= scene;
+	re->lay= lay;
+
+	if (re->r.scemode & R_VIEWPORT_PREVIEW)
+		re->scene_color_manage = BKE_scene_check_color_management_enabled(scene);
+
+	/* scene needs to be set to get camera */
+	camera= RE_GetCamera(re);
+
+	/* per second, per object, stats print this */
+	re->i.infostr= "Preparing Scene data";
+	re->i.cfra= scene->r.cfra;
+	BLI_strncpy(re->i.scene_name, scene->id.name + 2, sizeof(re->i.scene_name));
+
+	/* XXX add test if dbase was filled already? */
+
+	re->memArena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "render db arena");
+	re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0;
+	re->lights.first= re->lights.last= NULL;
+	re->lampren.first= re->lampren.last= NULL;
+
+	re->i.partsdone = false;	/* signal now in use for previewrender */
+
+	/* in localview, lamps are using normal layers, objects only local bits */
+	if (re->lay & 0xFF000000)
+		lay &= 0xFF000000;
+
+	/* applies changes fully */
+	if ((re->r.scemode & (R_NO_FRAME_UPDATE|R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))==0) {
+		BKE_scene_update_for_newframe(re->eval_ctx, re->main, re->scene, lay);
+		render_update_anim_renderdata(re, &re->scene->r);
+	}
+
+	/* if no camera, viewmat should have been set! */
+	if (use_camera_view && camera) {
+		/* called before but need to call again in case of lens animation from the
+		 * above call to BKE_scene_update_for_newframe, fixes bug. [#22702].
+		 * following calls don't depend on 'RE_SetCamera' */
+		RE_SetCamera(re, camera);
+		RE_GetCameraModelMatrix(re, camera, mat);
+		invert_m4(mat);
+		RE_SetView(re, mat);
+
+		/* force correct matrix for scaled cameras */
+		DAG_id_tag_update_ex(re->main, &camera->id, OB_RECALC_OB);
+	}
+
+	/* store for incremental render, viewmat rotates dbase */
+	copy_m4_m4(re->viewmat_orig, re->viewmat);
+
+	init_render_world(re);	/* do first, because of ambient. also requires re->osa set correct */
+	if (re->r.mode & R_RAYTRACE) {
+		init_render_qmcsampler(re);
+
+		if (re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT))
+			if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
+				init_ao_sphere(re, &re->wrld);
+	}
+
+	/* still bad... doing all */
+	init_render_textures(re);
+	copy_v3_v3(amb, &re->wrld.ambr);
+	init_render_materials(re->main, re->r.mode, amb, (re->r.scemode & R_BUTS_PREVIEW) == 0);
+	set_node_shader_lamp_loop(shade_material_loop);
+
+	/* MAKE RENDER DATA */
+	database_init_objects(re, lay, 0, 0, NULL, 0);
+
+	if (!re->test_break(re->tbh)) {
+		set_material_lightgroups(re);
+		for (sce= re->scene; sce; sce= sce->set)
+			set_renderlayer_lightgroups(re, sce);
+
+		/* for now some clumsy copying still */
+		re->i.totvert= re->totvert;
+		re->i.totface= re->totvlak;
+		re->i.totstrand= re->totstrand;
+		re->i.tothalo= re->tothalo;
+		re->i.totlamp= re->totlamp;
+		re->stats_draw(re->sdh, &re->i);
+	}
+}
+
+void RE_Database_Preprocess(Render *re)
+{
+	if (!re->test_break(re->tbh)) {
+		int tothalo;
+
+		tothalo= re->tothalo;
+		sort_halos(re, tothalo);
+
+		init_camera_inside_volumes(re);
+
+		re->i.infostr = IFACE_("Creating Shadowbuffers");
+		re->stats_draw(re->sdh, &re->i);
+
+		/* SHADOW BUFFER */
+		threaded_makeshadowbufs(re);
+
+		/* old code checked for internal render (aka not yafray) */
+		{
+			/* raytree */
+			if (!re->test_break(re->tbh)) {
+				if (re->r.mode & R_RAYTRACE) {
+					makeraytree(re);
+				}
+			}
+			/* ENVIRONMENT MAPS */
+			if (!re->test_break(re->tbh))
+				make_envmaps(re);
+
+			/* point density texture */
+			if (!re->test_break(re->tbh))
+				make_pointdensities(re);
+			/* voxel data texture */
+			if (!re->test_break(re->tbh))
+				make_voxeldata(re);
+		}
+
+		if (!re->test_break(re->tbh))
+			project_renderdata(re, projectverto, (re->r.mode & R_PANORAMA) != 0, 0, 1);
+
+		/* Occlusion */
+		if ((re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) && !re->test_break(re->tbh))
+			if (re->wrld.ao_gather_method == WO_AOGATHER_APPROX)
+				if (re->r.mode & R_SHADOW)
+					make_occ_tree(re);
+
+		/* SSS */
+		if ((re->r.mode & R_SSS) && !re->test_break(re->tbh))
+			make_sss_tree(re);
+
+		if (!re->test_break(re->tbh))
+			if (re->r.mode & R_RAYTRACE)
+				volume_precache(re);
+	}
+
+	re->i.convertdone = true;
+
+	if (re->test_break(re->tbh))
+		RE_Database_Free(re);
+
+	re->i.infostr = NULL;
+	re->stats_draw(re->sdh, &re->i);
+}
+
+/* exported call to recalculate hoco for vertices, when winmat changed */
+void RE_DataBase_ApplyWindow(Render *re)
+{
+	project_renderdata(re, projectverto, 0, 0, 0);
+}
+
+/* exported call to rotate render data again, when viewmat changed */
+void RE_DataBase_IncrementalView(Render *re, float viewmat[4][4], int restore)
+{
+	float oldviewinv[4][4], tmat[4][4];
+
+	invert_m4_m4(oldviewinv, re->viewmat_orig);
+
+	/* we have to correct for the already rotated vertexcoords */
+	mul_m4_m4m4(tmat, viewmat, oldviewinv);
+
+	copy_m4_m4(re->viewmat, viewmat);
+	invert_m4_m4(re->viewinv, re->viewmat);
+
+	init_camera_inside_volumes(re);
+
+	env_rotate_scene(re, tmat, !restore);
+
+	/* SSS points distribution depends on view */
+	if ((re->r.mode & R_SSS) && !re->test_break(re->tbh))
+		make_sss_tree(re);
+}
+
+
+void RE_DataBase_GetView(Render *re, float mat[4][4])
+{
+	copy_m4_m4(mat, re->viewmat);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Speed Vectors															 */
+/* ------------------------------------------------------------------------- */
+
+static void database_fromscene_vectors(Render *re, Scene *scene, unsigned int lay, int timeoffset)
+{
+	Object *camera= RE_GetCamera(re);
+	float mat[4][4];
+
+	re->scene= scene;
+	re->lay= lay;
+
+	/* XXX add test if dbase was filled already? */
+
+	re->memArena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "vector render db arena");
+	re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0;
+	re->i.totface=re->i.totvert=re->i.totstrand=re->i.totlamp=re->i.tothalo= 0;
+	re->lights.first= re->lights.last= NULL;
+
+	/* in localview, lamps are using normal layers, objects only local bits */
+	if (re->lay & 0xFF000000)
+		lay &= 0xFF000000;
+
+	/* applies changes fully */
+	scene->r.cfra += timeoffset;
+	BKE_scene_update_for_newframe(re->eval_ctx, re->main, re->scene, lay);
+
+	/* if no camera, viewmat should have been set! */
+	if (camera) {
+		RE_GetCameraModelMatrix(re, camera, mat);
+		normalize_m4(mat);
+		invert_m4(mat);
+		RE_SetView(re, mat);
+	}
+
+	/* MAKE RENDER DATA */
+	database_init_objects(re, lay, 0, 0, NULL, timeoffset);
+
+	if (!re->test_break(re->tbh))
+		project_renderdata(re, projectverto, (re->r.mode & R_PANORAMA) != 0, 0, 1);
+
+	/* do this in end, particles for example need cfra */
+	scene->r.cfra -= timeoffset;
+}
+
+/* choose to use static, to prevent giving too many args to this call */
+static void speedvector_project(Render *re, float zco[2], const float co[3], const float ho[4])
+{
+	static float pixelphix=0.0f, pixelphiy=0.0f, zmulx=0.0f, zmuly=0.0f;
+	static int pano= 0;
+	float div;
+
+	/* initialize */
+	if (re) {
+		pano= re->r.mode & R_PANORAMA;
+
+		/* precalculate amount of radians 1 pixel rotates */
+		if (pano) {
+			/* size of 1 pixel mapped to viewplane coords */
+			float psize;
+
+			psize = BLI_rctf_size_x(&re->viewplane) / (float)re->winx;
+			/* x angle of a pixel */
+			pixelphix = atan(psize / re->clipsta);
+
+			psize = BLI_rctf_size_y(&re->viewplane) / (float)re->winy;
+			/* y angle of a pixel */
+			pixelphiy = atan(psize / re->clipsta);
+		}
+		zmulx= re->winx/2;
+		zmuly= re->winy/2;
+
+		return;
+	}
+
+	/* now map hocos to screenspace, uses very primitive clip still */
+	if (ho[3]<0.1f) div= 10.0f;
+	else div= 1.0f/ho[3];
+
+	/* use cylinder projection */
+	if (pano) {
+		float vec[3], ang;
+		/* angle between (0, 0, -1) and (co) */
+		copy_v3_v3(vec, co);
+
+		ang= saacos(-vec[2]/sqrtf(vec[0]*vec[0] + vec[2]*vec[2]));
+		if (vec[0]<0.0f) ang= -ang;
+		zco[0]= ang/pixelphix + zmulx;
+
+		ang= 0.5f*(float)M_PI - saacos(vec[1] / len_v3(vec));
+		zco[1]= ang/pixelphiy + zmuly;
+
+	}
+	else {
+		zco[0]= zmulx*(1.0f+ho[0]*div);
+		zco[1]= zmuly*(1.0f+ho[1]*div);
+	}
+}
+
+static void calculate_speedvector(const float vectors[2], int step, float winsq, float winroot, const float co[3], const float ho[4], float speed[4])
+{
+	float zco[2], len;
+
+	speedvector_project(NULL, zco, co, ho);
+
+	zco[0]= vectors[0] - zco[0];
+	zco[1]= vectors[1] - zco[1];
+
+	/* enable nice masks for hardly moving stuff or float inaccuracy */
+	if (zco[0]<0.1f && zco[0]>-0.1f && zco[1]<0.1f && zco[1]>-0.1f ) {
+		zco[0]= 0.0f;
+		zco[1]= 0.0f;
+	}
+
+	/* maximize speed for image width, otherwise it never looks good */
+	len= zco[0]*zco[0] + zco[1]*zco[1];
+	if (len > winsq) {
+		len= winroot/sqrtf(len);
+		zco[0]*= len;
+		zco[1]*= len;
+	}
+
+	/* note; in main vecblur loop speedvec is negated again */
+	if (step) {
+		speed[2]= -zco[0];
+		speed[3]= -zco[1];
+	}
+	else {
+		speed[0]= zco[0];
+		speed[1]= zco[1];
+	}
+}
+
+static float *calculate_strandsurface_speedvectors(Render *re, ObjectInstanceRen *obi, StrandSurface *mesh)
+{
+	if (mesh->co && mesh->prevco && mesh->nextco) {
+		float winsq= (float)re->winx*(float)re->winy; /* int's can wrap on large images */
+		float winroot= sqrtf(winsq);
+		float (*winspeed)[4];
+		float ho[4], prevho[4], nextho[4], winmat[4][4], vec[2];
+		int a;
+
+		if (obi->flag & R_TRANSFORMED)
+			mul_m4_m4m4(winmat, re->winmat, obi->mat);
+		else
+			copy_m4_m4(winmat, re->winmat);
+
+		winspeed= MEM_callocN(sizeof(float)*4*mesh->totvert, "StrandSurfWin");
+
+		for (a=0; a<mesh->totvert; a++) {
+			projectvert(mesh->co[a], winmat, ho);
+
+			projectvert(mesh->prevco[a], winmat, prevho);
+			speedvector_project(NULL, vec, mesh->prevco[a], prevho);
+			calculate_speedvector(vec, 0, winsq, winroot, mesh->co[a], ho, winspeed[a]);
+
+			projectvert(mesh->nextco[a], winmat, nextho);
+			speedvector_project(NULL, vec, mesh->nextco[a], nextho);
+			calculate_speedvector(vec, 1, winsq, winroot, mesh->co[a], ho, winspeed[a]);
+		}
+
+		return (float *)winspeed;
+	}
+
+	return NULL;
+}
+
+static void calculate_speedvectors(Render *re, ObjectInstanceRen *obi, float *vectors, int step)
+{
+	ObjectRen *obr= obi->obr;
+	VertRen *ver= NULL;
+	StrandRen *strand= NULL;
+	StrandBuffer *strandbuf;
+	StrandSurface *mesh= NULL;
+	float *speed, (*winspeed)[4]=NULL, ho[4], winmat[4][4];
+	float *co1, *co2, *co3, *co4, w[4];
+	float winsq = (float)re->winx * (float)re->winy, winroot = sqrtf(winsq);  /* int's can wrap on large images */
+	int a, *face, *index;
+
+	if (obi->flag & R_TRANSFORMED)
+		mul_m4_m4m4(winmat, re->winmat, obi->mat);
+	else
+		copy_m4_m4(winmat, re->winmat);
+
+	if (obr->vertnodes) {
+		for (a=0; a<obr->totvert; a++, vectors+=2) {
+			if ((a & 255)==0) ver= obr->vertnodes[a>>8].vert;
+			else ver++;
+
+			speed= RE_vertren_get_winspeed(obi, ver, 1);
+			projectvert(ver->co, winmat, ho);
+			calculate_speedvector(vectors, step, winsq, winroot, ver->co, ho, speed);
+		}
+	}
+
+	if (obr->strandnodes) {
+		strandbuf= obr->strandbuf;
+		mesh= (strandbuf)? strandbuf->surface: NULL;
+
+		/* compute speed vectors at surface vertices */
+		if (mesh)
+			winspeed= (float(*)[4])calculate_strandsurface_speedvectors(re, obi, mesh);
+
+		if (winspeed) {
+			for (a=0; a<obr->totstrand; a++, vectors+=2) {
+				if ((a & 255)==0) strand= obr->strandnodes[a>>8].strand;
+				else strand++;
+
+				index= RE_strandren_get_face(obr, strand, 0);
+				if (index && *index < mesh->totface) {
+					speed= RE_strandren_get_winspeed(obi, strand, 1);
+
+					/* interpolate speed vectors from strand surface */
+					face= mesh->face[*index];
+
+					co1 = mesh->co[face[0]];
+					co2 = mesh->co[face[1]];
+					co3 = mesh->co[face[2]];
+
+					if (face[3]) {
+						co4 = mesh->co[face[3]];
+						interp_weights_quad_v3(w, co1, co2, co3, co4, strand->vert->co);
+					}
+					else {
+						interp_weights_tri_v3(w, co1, co2, co3, strand->vert->co);
+					}
+
+					zero_v4(speed);
+					madd_v4_v4fl(speed, winspeed[face[0]], w[0]);
+					madd_v4_v4fl(speed, winspeed[face[1]], w[1]);
+					madd_v4_v4fl(speed, winspeed[face[2]], w[2]);
+					if (face[3])
+						madd_v4_v4fl(speed, winspeed[face[3]], w[3]);
+				}
+			}
+
+			MEM_freeN(winspeed);
+		}
+	}
+}
+
+static int load_fluidsimspeedvectors(Render *re, ObjectInstanceRen *obi, float *vectors, int step)
+{
+	ObjectRen *obr= obi->obr;
+	Object *fsob= obr->ob;
+	VertRen *ver= NULL;
+	float *speed, div, zco[2], avgvel[4] = {0.0, 0.0, 0.0, 0.0};
+	float zmulx= re->winx/2, zmuly= re->winy/2, len;
+	float winsq = (float)re->winx * (float)re->winy, winroot= sqrtf(winsq); /* int's can wrap on large images */
+	int a, j;
+	float hoco[4], ho[4], fsvec[4], camco[4];
+	float mat[4][4], winmat[4][4];
+	float imat[4][4];
+	FluidsimModifierData *fluidmd = (FluidsimModifierData *)modifiers_findByType(fsob, eModifierType_Fluidsim);
+	FluidsimSettings *fss;
+	FluidVertexVelocity *velarray = NULL;
+
+	/* only one step needed */
+	if (step) return 1;
+
+	if (fluidmd)
+		fss = fluidmd->fss;
+	else
+		return 0;
+
+	copy_m4_m4(mat, re->viewmat);
+	invert_m4_m4(imat, mat);
+
+	/* set first vertex OK */
+	if (!fss->meshVelocities) return 0;
+
+	if ( obr->totvert != fss->totvert) {
+		//fprintf(stderr, "load_fluidsimspeedvectors - modified fluidsim mesh, not using speed vectors (%d,%d)...\n", obr->totvert, fsob->fluidsimSettings->meshSurface->totvert); // DEBUG
+		return 0;
+	}
+
+	velarray = fss->meshVelocities;
+
+	if (obi->flag & R_TRANSFORMED)
+		mul_m4_m4m4(winmat, re->winmat, obi->mat);
+	else
+		copy_m4_m4(winmat, re->winmat);
+
+	/* (bad) HACK calculate average velocity */
+	/* better solution would be fixing getVelocityAt() in intern/elbeem/intern/solver_util.cpp
+	 * so that also small drops/little water volumes return a velocity != 0.
+	 * But I had no luck in fixing that function - DG */
+	for (a=0; a<obr->totvert; a++) {
+		for (j=0;j<3;j++) avgvel[j] += velarray[a].vel[j];
+
+	}
+	for (j=0;j<3;j++) avgvel[j] /= (float)(obr->totvert);
+
+
+	for (a=0; a<obr->totvert; a++, vectors+=2) {
+		if ((a & 255)==0)
+			ver= obr->vertnodes[a>>8].vert;
+		else
+			ver++;
+
+		/* get fluid velocity */
+		fsvec[3] = 0.0f;
+		//fsvec[0] = fsvec[1] = fsvec[2] = fsvec[3] = 0.0; fsvec[2] = 2.0f; // NT fixed test
+		for (j=0;j<3;j++) fsvec[j] = velarray[a].vel[j];
+
+		/* (bad) HACK insert average velocity if none is there (see previous comment) */
+		if ((fsvec[0] == 0.0f) && (fsvec[1] == 0.0f) && (fsvec[2] == 0.0f)) {
+			fsvec[0] = avgvel[0];
+			fsvec[1] = avgvel[1];
+			fsvec[2] = avgvel[2];
+		}
+
+		/* transform (=rotate) to cam space */
+		camco[0] = dot_v3v3(imat[0], fsvec);
+		camco[1] = dot_v3v3(imat[1], fsvec);
+		camco[2] = dot_v3v3(imat[2], fsvec);
+
+		/* get homogeneous coordinates */
+		projectvert(camco, winmat, hoco);
+		projectvert(ver->co, winmat, ho);
+
+		/* now map hocos to screenspace, uses very primitive clip still */
+		/* use ho[3] of original vertex, xy component of vel. direction */
+		if (ho[3]<0.1f) div= 10.0f;
+		else div= 1.0f/ho[3];
+		zco[0]= zmulx*hoco[0]*div;
+		zco[1]= zmuly*hoco[1]*div;
+
+		/* maximize speed as usual */
+		len= zco[0]*zco[0] + zco[1]*zco[1];
+		if (len > winsq) {
+			len= winroot/sqrtf(len);
+			zco[0]*= len; zco[1]*= len;
+		}
+
+		speed= RE_vertren_get_winspeed(obi, ver, 1);
+		/* set both to the same value */
+		speed[0]= speed[2]= zco[0];
+		speed[1]= speed[3]= zco[1];
+		//if (a < 20) fprintf(stderr,"speed %d %f,%f | camco %f,%f,%f | hoco %f,%f,%f,%f\n", a, speed[0], speed[1], camco[0],camco[1], camco[2], hoco[0],hoco[1], hoco[2],hoco[3]); // NT DEBUG
+	}
+
+	return 1;
+}
+
+/* makes copy per object of all vectors */
+/* result should be that we can free entire database */
+static void copy_dbase_object_vectors(Render *re, ListBase *lb)
+{
+	ObjectInstanceRen *obi, *obilb;
+	ObjectRen *obr;
+	VertRen *ver= NULL;
+	float *vec, ho[4], winmat[4][4];
+	int a, totvector;
+
+	for (obi= re->instancetable.first; obi; obi= obi->next) {
+		obr= obi->obr;
+
+		obilb= MEM_mallocN(sizeof(ObjectInstanceRen), "ObInstanceVector");
+		memcpy(obilb, obi, sizeof(ObjectInstanceRen));
+		BLI_addtail(lb, obilb);
+
+		obilb->totvector= totvector= obr->totvert;
+
+		if (totvector > 0) {
+			vec= obilb->vectors= MEM_mallocN(2*sizeof(float)*totvector, "vector array");
+
+			if (obi->flag & R_TRANSFORMED)
+				mul_m4_m4m4(winmat, re->winmat, obi->mat);
+			else
+				copy_m4_m4(winmat, re->winmat);
+
+			for (a=0; a<obr->totvert; a++, vec+=2) {
+				if ((a & 255)==0) ver= obr->vertnodes[a>>8].vert;
+				else ver++;
+
+				projectvert(ver->co, winmat, ho);
+				speedvector_project(NULL, vec, ver->co, ho);
+			}
+		}
+	}
+}
+
+static void free_dbase_object_vectors(ListBase *lb)
+{
+	ObjectInstanceRen *obi;
+
+	for (obi= lb->first; obi; obi= obi->next)
+		if (obi->vectors)
+			MEM_freeN(obi->vectors);
+	BLI_freelistN(lb);
+}
+
+void RE_Database_FromScene_Vectors(Render *re, Main *bmain, Scene *sce, unsigned int lay)
+{
+	ObjectInstanceRen *obi, *oldobi;
+	StrandSurface *mesh;
+	ListBase *table;
+	ListBase oldtable= {NULL, NULL}, newtable= {NULL, NULL};
+	ListBase strandsurface;
+	int step;
+
+	re->i.infostr = IFACE_("Calculating previous frame vectors");
+	re->r.mode |= R_SPEED;
+
+	speedvector_project(re, NULL, NULL, NULL);	/* initializes projection code */
+
+	/* creates entire dbase */
+	database_fromscene_vectors(re, sce, lay, -1);
+
+	/* copy away vertex info */
+	copy_dbase_object_vectors(re, &oldtable);
+
+	/* free dbase and make the future one */
+	strandsurface= re->strandsurface;
+	memset(&re->strandsurface, 0, sizeof(ListBase));
+	re->i.convertdone = true;
+	RE_Database_Free(re);
+	re->strandsurface= strandsurface;
+
+	if (!re->test_break(re->tbh)) {
+		/* creates entire dbase */
+		re->i.infostr = IFACE_("Calculating next frame vectors");
+
+		database_fromscene_vectors(re, sce, lay, +1);
+	}
+	/* copy away vertex info */
+	copy_dbase_object_vectors(re, &newtable);
+
+	/* free dbase and make the real one */
+	strandsurface= re->strandsurface;
+	memset(&re->strandsurface, 0, sizeof(ListBase));
+	re->i.convertdone = true;
+	RE_Database_Free(re);
+	re->strandsurface= strandsurface;
+
+	if (!re->test_break(re->tbh)) {
+		RE_Database_FromScene(re, bmain, sce, lay, 1);
+		RE_Database_Preprocess(re);
+	}
+
+	if (!re->test_break(re->tbh)) {
+		int vectorlay= get_vector_renderlayers(re->scene);
+
+		for (step= 0; step<2; step++) {
+
+			if (step)
+				table= &newtable;
+			else
+				table= &oldtable;
+
+			oldobi= table->first;
+			for (obi= re->instancetable.first; obi && oldobi; obi= obi->next) {
+				int ok= 1;
+				FluidsimModifierData *fluidmd;
+
+				if (!(obi->lay & vectorlay))
+					continue;
+
+				obi->totvector= obi->obr->totvert;
+
+				/* find matching object in old table */
+				if (oldobi->ob!=obi->ob || oldobi->par!=obi->par || oldobi->index!=obi->index || oldobi->psysindex!=obi->psysindex) {
+					ok= 0;
+					for (oldobi= table->first; oldobi; oldobi= oldobi->next)
+						if (oldobi->ob==obi->ob && oldobi->par==obi->par && oldobi->index==obi->index && oldobi->psysindex==obi->psysindex)
+							break;
+					if (oldobi==NULL)
+						oldobi= table->first;
+					else
+						ok= 1;
+				}
+				if (ok==0) {
+					printf("speed table: missing object %s\n", obi->ob->id.name + 2);
+					continue;
+				}
+
+				/* NT check for fluidsim special treatment */
+				fluidmd = (FluidsimModifierData *)modifiers_findByType(obi->ob, eModifierType_Fluidsim);
+				if (fluidmd && fluidmd->fss && (fluidmd->fss->type & OB_FLUIDSIM_DOMAIN)) {
+					/* use preloaded per vertex simulation data, only does calculation for step=1 */
+					/* NOTE/FIXME - velocities and meshes loaded unnecessarily often during the database_fromscene_vectors calls... */
+					load_fluidsimspeedvectors(re, obi, oldobi->vectors, step);
+				}
+				else {
+					/* check if both have same amounts of vertices */
+					if (obi->totvector==oldobi->totvector)
+						calculate_speedvectors(re, obi, oldobi->vectors, step);
+					else
+						printf("Warning: object %s has different amount of vertices or strands on other frame\n", obi->ob->id.name + 2);
+				}  /* not fluidsim */
+
+				oldobi= oldobi->next;
+			}
+		}
+	}
+
+	free_dbase_object_vectors(&oldtable);
+	free_dbase_object_vectors(&newtable);
+
+	for (mesh=re->strandsurface.first; mesh; mesh=mesh->next) {
+		if (mesh->prevco) {
+			MEM_freeN(mesh->prevco);
+			mesh->prevco= NULL;
+		}
+		if (mesh->nextco) {
+			MEM_freeN(mesh->nextco);
+			mesh->nextco= NULL;
+		}
+	}
+
+	re->i.infostr = NULL;
+	re->stats_draw(re->sdh, &re->i);
+}
+
+
+/* ------------------------------------------------------------------------- */
+/* Baking																	 */
+/* ------------------------------------------------------------------------- */
+
+/* setup for shaded view or bake, so only lamps and materials are initialized */
+/* type:
+ * RE_BAKE_LIGHT:  for shaded view, only add lamps
+ * RE_BAKE_ALL:    for baking, all lamps and objects
+ * RE_BAKE_NORMALS:for baking, no lamps and only selected objects
+ * RE_BAKE_AO:     for baking, no lamps, but all objects
+ * RE_BAKE_TEXTURE:for baking, no lamps, only selected objects
+ * RE_BAKE_VERTEX_COLORS:for baking, no lamps, only selected objects
+ * RE_BAKE_DISPLACEMENT:for baking, no lamps, only selected objects
+ * RE_BAKE_DERIVATIVE:for baking, no lamps, only selected objects
+ * RE_BAKE_SHADOW: for baking, only shadows, but all objects
+ */
+void RE_Database_Baking(Render *re, Main *bmain, Scene *scene, unsigned int lay, const int type, Object *actob)
+{
+	Object *camera;
+	float mat[4][4];
+	float amb[3];
+	const short onlyselected= !ELEM(type, RE_BAKE_LIGHT, RE_BAKE_ALL, RE_BAKE_SHADOW, RE_BAKE_AO, RE_BAKE_VERTEX_COLORS);
+	const short nolamps= ELEM(type, RE_BAKE_NORMALS, RE_BAKE_TEXTURE, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE, RE_BAKE_VERTEX_COLORS);
+
+	re->main= bmain;
+	re->scene= scene;
+	re->lay= lay;
+
+	/* renderdata setup and exceptions */
+	render_copy_renderdata(&re->r, &scene->r);
+
+	RE_init_threadcount(re);
+
+	re->flag |= R_BAKING;
+	re->excludeob= actob;
+	if (actob)
+		re->flag |= R_BAKE_TRACE;
+
+	if (type==RE_BAKE_NORMALS && re->r.bake_normal_space==R_BAKE_SPACE_TANGENT)
+		re->flag |= R_NEED_TANGENT;
+
+	if (type==RE_BAKE_VERTEX_COLORS)
+		re->flag |=  R_NEED_VCOL;
+
+	if (!actob && ELEM(type, RE_BAKE_LIGHT, RE_BAKE_NORMALS, RE_BAKE_TEXTURE, RE_BAKE_DISPLACEMENT, RE_BAKE_DERIVATIVE, RE_BAKE_VERTEX_COLORS)) {
+		re->r.mode &= ~R_SHADOW;
+		re->r.mode &= ~R_RAYTRACE;
+	}
+
+	if (!actob && (type==RE_BAKE_SHADOW)) {
+		re->r.mode |= R_SHADOW;
+	}
+
+	/* setup render stuff */
+	re->memArena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "bake db arena");
+
+	re->totvlak=re->totvert=re->totstrand=re->totlamp=re->tothalo= 0;
+	re->lights.first= re->lights.last= NULL;
+	re->lampren.first= re->lampren.last= NULL;
+
+	/* in localview, lamps are using normal layers, objects only local bits */
+	if (re->lay & 0xFF000000)
+		lay &= 0xFF000000;
+
+	camera= RE_GetCamera(re);
+
+	/* if no camera, set unit */
+	if (camera) {
+		normalize_m4_m4(mat, camera->obmat);
+		invert_m4(mat);
+		RE_SetView(re, mat);
+	}
+	else {
+		unit_m4(mat);
+		RE_SetView(re, mat);
+	}
+	copy_m3_m4(re->imat, re->viewinv);
+
+	/* TODO: deep shadow maps + baking + strands */
+	/* strands use the window matrix and view size, there is to correct
+	 * window matrix but at least avoids malloc and crash loop [#27807] */
+	unit_m4(re->winmat);
+	re->winx= re->winy= 256;
+	/* done setting dummy values */
+
+	init_render_world(re);	/* do first, because of ambient. also requires re->osa set correct */
+	if (re->r.mode & R_RAYTRACE) {
+		init_render_qmcsampler(re);
+
+		if (re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT))
+			if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
+				init_ao_sphere(re, &re->wrld);
+	}
+
+	/* still bad... doing all */
+	init_render_textures(re);
+
+	copy_v3_v3(amb, &re->wrld.ambr);
+	init_render_materials(re->main, re->r.mode, amb, true);
+
+	set_node_shader_lamp_loop(shade_material_loop);
+
+	/* MAKE RENDER DATA */
+	database_init_objects(re, lay, nolamps, onlyselected, actob, 0);
+
+	set_material_lightgroups(re);
+
+	/* SHADOW BUFFER */
+	if (type!=RE_BAKE_LIGHT)
+		if (re->r.mode & R_SHADOW)
+			threaded_makeshadowbufs(re);
+
+	/* raytree */
+	if (!re->test_break(re->tbh))
+		if (re->r.mode & R_RAYTRACE)
+			makeraytree(re);
+
+	/* point density texture */
+	if (!re->test_break(re->tbh))
+		make_pointdensities(re);
+
+	/* voxel data texture */
+	if (!re->test_break(re->tbh))
+		make_voxeldata(re);
+
+	/* occlusion */
+	if ((re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) && !re->test_break(re->tbh))
+		if (re->wrld.ao_gather_method == WO_AOGATHER_APPROX)
+			if (re->r.mode & R_SHADOW)
+				make_occ_tree(re);
+
+	re->i.convertdone = true;
+}
diff --git a/source/blender/render/intern/source/envmap.c b/source/blender/render/intern/source/envmap.c
new file mode 100644
index 00000000000..85a6af92a28
--- /dev/null
+++ b/source/blender/render/intern/source/envmap.c
@@ -0,0 +1,822 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributors: 2004/2005/2006 Blender Foundation, full recode
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/envmap.c
+ *  \ingroup render
+ */
+
+#include <math.h>
+#include <string.h>
+
+/* external modules: */
+
+#include "BLI_math.h"
+#include "BLI_blenlib.h"
+#include "BLI_threads.h"
+#include "BLI_utildefines.h"
+
+#include "BLT_translation.h"
+
+#include "IMB_imbuf_types.h"
+#include "IMB_imbuf.h"        /* for rectcpy */
+
+#include "DNA_group_types.h"
+#include "DNA_image_types.h"
+#include "DNA_lamp_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+#include "DNA_texture_types.h"
+
+#include "BKE_main.h"
+#include "BKE_image.h"   /* BKE_imbuf_write */
+#include "BKE_texture.h"
+#include "BKE_scene.h"
+
+/* this module */
+#include "render_types.h"
+#include "envmap.h"
+#include "renderdatabase.h"
+#include "renderpipeline.h"
+#include "texture.h"
+#include "zbuf.h"
+#include "render_result.h"
+
+/* ------------------------------------------------------------------------- */
+
+static void envmap_split_ima(EnvMap *env, ImBuf *ibuf)
+{
+	int dx, part;
+
+	/* after lock we test cube[1], if set the other thread has done it fine */
+	BLI_thread_lock(LOCK_IMAGE);
+	if (env->cube[1] == NULL) {
+
+		BKE_texture_envmap_free_data(env);
+
+		dx = ibuf->y;
+		dx /= 2;
+		if (3 * dx == ibuf->x) {
+			env->type = ENV_CUBE;
+			env->ok = ENV_OSA;
+		}
+		else if (ibuf->x == ibuf->y) {
+			env->type = ENV_PLANE;
+			env->ok = ENV_OSA;
+		}
+		else {
+			printf("Incorrect envmap size\n");
+			env->ok = 0;
+			env->ima->ok = 0;
+		}
+
+		if (env->ok) {
+			if (env->type == ENV_CUBE) {
+				for (part = 0; part < 6; part++) {
+					env->cube[part] = IMB_allocImBuf(dx, dx, 24, IB_rect | IB_rectfloat);
+				}
+				IMB_float_from_rect(ibuf);
+
+				IMB_rectcpy(env->cube[0], ibuf,
+				            0, 0, 0, 0, dx, dx);
+				IMB_rectcpy(env->cube[1], ibuf,
+				            0, 0, dx, 0, dx, dx);
+				IMB_rectcpy(env->cube[2], ibuf,
+				            0, 0, 2 * dx, 0, dx, dx);
+				IMB_rectcpy(env->cube[3], ibuf,
+				            0, 0, 0, dx, dx, dx);
+				IMB_rectcpy(env->cube[4], ibuf,
+				            0, 0, dx, dx, dx, dx);
+				IMB_rectcpy(env->cube[5], ibuf,
+				            0, 0, 2 * dx, dx, dx, dx);
+
+			}
+			else { /* ENV_PLANE */
+				env->cube[1] = IMB_dupImBuf(ibuf);
+				IMB_float_from_rect(env->cube[1]);
+			}
+		}
+	}
+	BLI_thread_unlock(LOCK_IMAGE);
+}
+
+/* ------------------------------------------------------------------------- */
+/* ****************** RENDER ********************** */
+
+/* copy current render */
+static Render *envmap_render_copy(Render *re, EnvMap *env)
+{
+	Render *envre;
+	float viewscale;
+	int cuberes;
+
+	envre = RE_NewRender("Envmap");
+
+	env->lastsize = re->r.size;
+	cuberes = (env->cuberes * re->r.size) / 100;
+	cuberes &= 0xFFFC;
+
+	/* this flag has R_ZTRA in it for example */
+	envre->flag = re->flag;
+
+	/* set up renderdata */
+	render_copy_renderdata(&envre->r, &re->r);
+	envre->r.mode &= ~(R_BORDER | R_PANORAMA | R_ORTHO | R_MBLUR);
+	BLI_freelistN(&envre->r.layers);
+	BLI_freelistN(&envre->r.views);
+	envre->r.filtertype = 0;
+	envre->r.tilex = envre->r.xsch / 2;
+	envre->r.tiley = envre->r.ysch / 2;
+	envre->r.size = 100;
+	envre->r.yasp = envre->r.xasp = 1;
+
+	RE_InitState(envre, NULL, &envre->r, NULL, cuberes, cuberes, NULL);
+	envre->main = re->main;
+	envre->scene = re->scene;    /* unsure about this... */
+	envre->scene_color_manage = re->scene_color_manage;
+	envre->lay = re->lay;
+
+	/* view stuff in env render */
+	viewscale = (env->type == ENV_PLANE) ? env->viewscale : 1.0f;
+	RE_SetEnvmapCamera(envre, env->object, viewscale, env->clipsta, env->clipend);
+	copy_m4_m4(envre->viewmat_orig, re->viewmat_orig);
+
+	/* callbacks */
+	envre->display_update = re->display_update;
+	envre->duh = re->duh;
+	envre->test_break = re->test_break;
+	envre->tbh = re->tbh;
+	envre->current_scene_update = re->current_scene_update;
+	envre->suh = re->suh;
+
+	/* and for the evil stuff; copy the database... */
+	envre->totvlak = re->totvlak;
+	envre->totvert = re->totvert;
+	envre->tothalo = re->tothalo;
+	envre->totstrand = re->totstrand;
+	envre->totlamp = re->totlamp;
+	envre->sortedhalos = re->sortedhalos;
+	envre->lights = re->lights;
+	envre->objecttable = re->objecttable;
+	envre->customdata_names = re->customdata_names;
+	envre->raytree = re->raytree;
+	envre->totinstance = re->totinstance;
+	envre->instancetable = re->instancetable;
+	envre->objectinstance = re->objectinstance;
+	envre->qmcsamplers = re->qmcsamplers;
+
+	return envre;
+}
+
+static void envmap_free_render_copy(Render *envre)
+{
+
+	envre->totvlak = 0;
+	envre->totvert = 0;
+	envre->tothalo = 0;
+	envre->totstrand = 0;
+	envre->totlamp = 0;
+	envre->totinstance = 0;
+	envre->sortedhalos = NULL;
+	BLI_listbase_clear(&envre->lights);
+	BLI_listbase_clear(&envre->objecttable);
+	BLI_listbase_clear(&envre->customdata_names);
+	envre->raytree = NULL;
+	BLI_listbase_clear(&envre->instancetable);
+	envre->objectinstance = NULL;
+	envre->qmcsamplers = NULL;
+
+	RE_FreeRender(envre);
+}
+
+/* ------------------------------------------------------------------------- */
+
+static void envmap_transmatrix(float mat[4][4], int part)
+{
+	float tmat[4][4], eul[3], rotmat[4][4];
+
+	eul[0] = eul[1] = eul[2] = 0.0;
+
+	if (part == 0) {          /* neg z */
+		/* pass */
+	}
+	else if (part == 1) { /* pos z */
+		eul[0] = M_PI;
+	}
+	else if (part == 2) { /* pos y */
+		eul[0] = M_PI / 2.0;
+	}
+	else if (part == 3) { /* neg x */
+		eul[0] = M_PI / 2.0;
+		eul[2] = M_PI / 2.0;
+	}
+	else if (part == 4) { /* neg y */
+		eul[0] = M_PI / 2.0;
+		eul[2] = M_PI;
+	}
+	else {              /* pos x */
+		eul[0] = M_PI / 2.0;
+		eul[2] = -M_PI / 2.0;
+	}
+
+	copy_m4_m4(tmat, mat);
+	eul_to_mat4(rotmat, eul);
+	mul_m4_m4m4(mat, tmat, rotmat);
+}
+/* ------------------------------------------------------------------------- */
+
+static void env_set_imats(Render *re)
+{
+	Base *base;
+	float mat[4][4];
+
+	base = re->scene->base.first;
+	while (base) {
+		mul_m4_m4m4(mat, re->viewmat, base->object->obmat);
+		invert_m4_m4(base->object->imat, mat);
+
+		base = base->next;
+	}
+
+}
+
+/* ------------------------------------------------------------------------- */
+
+void env_rotate_scene(Render *re, float mat[4][4], int do_rotate)
+{
+	ObjectRen *obr;
+	ObjectInstanceRen *obi;
+	LampRen *lar = NULL;
+	HaloRen *har = NULL;
+	float imat[3][3], mat_inverse[4][4], smat[4][4], tmat[4][4], cmat[3][3], tmpmat[4][4];
+	int a;
+
+	if (do_rotate == 0) {
+		invert_m4_m4(tmat, mat);
+		copy_m3_m4(imat, tmat);
+
+		copy_m4_m4(mat_inverse, mat);
+	}
+	else {
+		copy_m4_m4(tmat, mat);
+		copy_m3_m4(imat, mat);
+
+		invert_m4_m4(mat_inverse, tmat);
+	}
+
+	for (obi = re->instancetable.first; obi; obi = obi->next) {
+		/* append or set matrix depending on dupli */
+		if (obi->flag & R_DUPLI_TRANSFORMED) {
+			copy_m4_m4(tmpmat, obi->mat);
+			mul_m4_m4m4(obi->mat, tmat, tmpmat);
+		}
+		else if (do_rotate == 1)
+			copy_m4_m4(obi->mat, tmat);
+		else
+			unit_m4(obi->mat);
+
+		copy_m3_m4(cmat, obi->mat);
+		invert_m3_m3(obi->nmat, cmat);
+		transpose_m3(obi->nmat);
+
+		/* indicate the renderer has to use transform matrices */
+		if (do_rotate == 0)
+			obi->flag &= ~R_ENV_TRANSFORMED;
+		else {
+			obi->flag |= R_ENV_TRANSFORMED;
+			copy_m4_m4(obi->imat, mat_inverse);
+		}
+	}
+
+
+	for (obr = re->objecttable.first; obr; obr = obr->next) {
+		for (a = 0; a < obr->tothalo; a++) {
+			if ((a & 255) == 0) har = obr->bloha[a >> 8];
+			else har++;
+
+			mul_m4_v3(tmat, har->co);
+		}
+
+		/* imat_ren is needed for correct texture coordinates */
+		mul_m4_m4m4(obr->ob->imat_ren, re->viewmat, obr->ob->obmat);
+		invert_m4(obr->ob->imat_ren);
+	}
+
+	for (lar = re->lampren.first; lar; lar = lar->next) {
+		float lamp_imat[4][4];
+
+		/* copy from add_render_lamp */
+		if (do_rotate == 1)
+			mul_m4_m4m4(tmpmat, re->viewmat, lar->lampmat);
+		else
+			mul_m4_m4m4(tmpmat, re->viewmat_orig, lar->lampmat);
+
+		invert_m4_m4(lamp_imat, tmpmat);
+		copy_m3_m4(lar->mat, tmpmat);
+		copy_m3_m4(lar->imat, lamp_imat);
+
+		lar->vec[0]= -tmpmat[2][0];
+		lar->vec[1]= -tmpmat[2][1];
+		lar->vec[2]= -tmpmat[2][2];
+		normalize_v3(lar->vec);
+		lar->co[0]= tmpmat[3][0];
+		lar->co[1]= tmpmat[3][1];
+		lar->co[2]= tmpmat[3][2];
+
+		if (lar->type == LA_AREA) {
+			area_lamp_vectors(lar);
+		}
+		else if (lar->type == LA_SPOT) {
+			normalize_v3(lar->imat[0]);
+			normalize_v3(lar->imat[1]);
+			normalize_v3(lar->imat[2]);
+
+			lar->sh_invcampos[0] = -lar->co[0];
+			lar->sh_invcampos[1] = -lar->co[1];
+			lar->sh_invcampos[2] = -lar->co[2];
+			mul_m3_v3(lar->imat, lar->sh_invcampos);
+			lar->sh_invcampos[2] *= lar->sh_zfac;
+
+			if (lar->shb) {
+				if (do_rotate == 1) {
+					mul_m4_m4m4(smat, lar->shb->viewmat, mat_inverse);
+					mul_m4_m4m4(lar->shb->persmat, lar->shb->winmat, smat);
+				}
+				else mul_m4_m4m4(lar->shb->persmat, lar->shb->winmat, lar->shb->viewmat);
+			}
+		}
+	}
+
+	if (do_rotate) {
+		init_render_world(re);
+		env_set_imats(re);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+
+static void env_layerflags(Render *re, unsigned int notlay)
+{
+	ObjectRen *obr;
+	VlakRen *vlr = NULL;
+	int a;
+
+	/* invert notlay, so if face is in multiple layers it will still be visible,
+	 * unless all 'notlay' bits match the face bits.
+	 * face: 0110
+	 * not:  0100
+	 * ~not: 1011
+	 * now (face & ~not) is true
+	 */
+
+	notlay = ~notlay;
+
+	for (obr = re->objecttable.first; obr; obr = obr->next) {
+		if ((obr->lay & notlay) == 0) {
+			for (a = 0; a < obr->totvlak; a++) {
+				if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
+				else vlr++;
+
+				vlr->flag |= R_HIDDEN;
+			}
+		}
+	}
+}
+
+static void env_hideobject(Render *re, Object *ob)
+{
+	ObjectRen *obr;
+	VlakRen *vlr = NULL;
+	int a;
+
+	for (obr = re->objecttable.first; obr; obr = obr->next) {
+		for (a = 0; a < obr->totvlak; a++) {
+			if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
+			else vlr++;
+
+			if (obr->ob == ob)
+				vlr->flag |= R_HIDDEN;
+		}
+	}
+}
+
+static void env_showobjects(Render *re)
+{
+	ObjectRen *obr;
+	VlakRen *vlr = NULL;
+	int a;
+
+	for (obr = re->objecttable.first; obr; obr = obr->next) {
+		for (a = 0; a < obr->totvlak; a++) {
+			if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
+			else vlr++;
+
+			vlr->flag &= ~R_HIDDEN;
+		}
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+
+static void render_envmap(Render *re, EnvMap *env)
+{
+	/* only the cubemap and planar map is implemented */
+	Render *envre;
+	ImBuf *ibuf;
+	float orthmat[4][4];
+	float oldviewinv[4][4], mat[4][4], tmat[4][4];
+	short part;
+
+	/* need a recalc: ortho-render has no correct viewinv */
+	invert_m4_m4(oldviewinv, re->viewmat);
+
+	envre = envmap_render_copy(re, env);
+
+	/* precalc orthmat for object */
+	copy_m4_m4(orthmat, env->object->obmat);
+	normalize_m4(orthmat);
+
+	/* need imat later for texture imat */
+	mul_m4_m4m4(mat, re->viewmat, orthmat);
+	invert_m4_m4(tmat, mat);
+	copy_m3_m4(env->obimat, tmat);
+
+	for (part = 0; part < 6; part++) {
+		if (env->type == ENV_PLANE && part != 1)
+			continue;
+
+		re->display_clear(re->dch, envre->result);
+
+		copy_m4_m4(tmat, orthmat);
+		envmap_transmatrix(tmat, part);
+		invert_m4_m4(mat, tmat);
+		/* mat now is the camera 'viewmat' */
+
+		copy_m4_m4(envre->viewmat, mat);
+		copy_m4_m4(envre->viewinv, tmat);
+
+		/* we have to correct for the already rotated vertexcoords */
+		mul_m4_m4m4(tmat, envre->viewmat, oldviewinv);
+		invert_m4_m4(env->imat, tmat);
+
+		env_rotate_scene(envre, tmat, 1);
+		project_renderdata(envre, projectverto, 0, 0, 1);
+		env_layerflags(envre, env->notlay);
+		env_hideobject(envre, env->object);
+
+		if (re->test_break(re->tbh) == 0) {
+			RE_TileProcessor(envre);
+		}
+
+		/* rotate back */
+		env_showobjects(envre);
+		env_rotate_scene(envre, tmat, 0);
+
+		if (re->test_break(re->tbh) == 0) {
+			int y;
+			float *alpha;
+			float *rect;
+
+			if (envre->result->do_exr_tile) {
+				BLI_rw_mutex_lock(&envre->resultmutex, THREAD_LOCK_WRITE);
+				render_result_exr_file_end(envre);
+				BLI_rw_mutex_unlock(&envre->resultmutex);
+			}
+
+			RenderLayer *rl = envre->result->layers.first;
+
+			/* envmap is rendered independently of multiview  */
+			rect = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, "");
+			ibuf = IMB_allocImBuf(envre->rectx, envre->recty, 24, IB_rect | IB_rectfloat);
+			memcpy(ibuf->rect_float, rect, ibuf->channels * ibuf->x * ibuf->y * sizeof(float));
+
+			/* envmap renders without alpha */
+			alpha = ibuf->rect_float + 3;
+			for (y = ibuf->x * ibuf->y - 1; y >= 0; y--, alpha += 4)
+				*alpha = 1.0;
+
+			env->cube[part] = ibuf;
+		}
+
+		if (re->test_break(re->tbh)) break;
+
+	}
+
+	if (re->test_break(re->tbh)) BKE_texture_envmap_free_data(env);
+	else {
+		if (envre->r.mode & R_OSA) env->ok = ENV_OSA;
+		else env->ok = ENV_NORMAL;
+		env->lastframe = re->scene->r.cfra;
+	}
+
+	/* restore */
+	envmap_free_render_copy(envre);
+	env_set_imats(re);
+
+}
+
+/* ------------------------------------------------------------------------- */
+
+void make_envmaps(Render *re)
+{
+	Tex *tex;
+	bool do_init = false;
+	int depth = 0, trace;
+
+	if (!(re->r.mode & R_ENVMAP)) return;
+
+	/* we don't raytrace, disabling the flag will cause ray_transp render solid */
+	trace = (re->r.mode & R_RAYTRACE);
+	re->r.mode &= ~R_RAYTRACE;
+
+	re->i.infostr = IFACE_("Creating Environment maps");
+	re->stats_draw(re->sdh, &re->i);
+
+	/* 5 = hardcoded max recursion level */
+	while (depth < 5) {
+		tex = re->main->tex.first;
+		while (tex) {
+			if (tex->id.us && tex->type == TEX_ENVMAP) {
+				if (tex->env && tex->env->object) {
+					EnvMap *env = tex->env;
+
+					if (env->object->lay & re->lay) {
+						if (env->stype == ENV_LOAD) {
+							float orthmat[4][4], mat[4][4], tmat[4][4];
+
+							/* precalc orthmat for object */
+							copy_m4_m4(orthmat, env->object->obmat);
+							normalize_m4(orthmat);
+
+							/* need imat later for texture imat */
+							mul_m4_m4m4(mat, re->viewmat, orthmat);
+							invert_m4_m4(tmat, mat);
+							copy_m3_m4(env->obimat, tmat);
+						}
+						else {
+
+							/* decide if to render an envmap (again) */
+							if (env->depth >= depth) {
+
+								/* set 'recalc' to make sure it does an entire loop of recalcs */
+
+								if (env->ok) {
+									/* free when OSA, and old one isn't OSA */
+									if ((re->r.mode & R_OSA) && env->ok == ENV_NORMAL)
+										BKE_texture_envmap_free_data(env);
+									/* free when size larger */
+									else if (env->lastsize < re->r.size)
+										BKE_texture_envmap_free_data(env);
+									/* free when env is in recalcmode */
+									else if (env->recalc)
+										BKE_texture_envmap_free_data(env);
+								}
+
+								if (env->ok == 0 && depth == 0) env->recalc = 1;
+
+								if (env->ok == 0) {
+									do_init = true;
+									render_envmap(re, env);
+
+									if (depth == env->depth) env->recalc = 0;
+								}
+							}
+						}
+					}
+				}
+			}
+			tex = tex->id.next;
+		}
+		depth++;
+	}
+
+	if (do_init) {
+		re->display_init(re->dih, re->result);
+		re->display_clear(re->dch, re->result);
+		// re->flag |= R_REDRAW_PRV;
+	}
+	/* restore */
+	re->r.mode |= trace;
+
+}
+
+/* ------------------------------------------------------------------------- */
+
+static int envcube_isect(EnvMap *env, const float vec[3], float answ[2])
+{
+	float lambda;
+	int face;
+
+	if (env->type == ENV_PLANE) {
+		face = 1;
+
+		lambda = 1.0f / vec[2];
+		answ[0] = env->viewscale * lambda * vec[0];
+		answ[1] = -env->viewscale * lambda * vec[1];
+	}
+	else {
+		/* which face */
+		if (vec[2] <= -fabsf(vec[0]) && vec[2] <= -fabsf(vec[1]) ) {
+			face = 0;
+			lambda = -1.0f / vec[2];
+			answ[0] = lambda * vec[0];
+			answ[1] = lambda * vec[1];
+		}
+		else if (vec[2] >= fabsf(vec[0]) && vec[2] >= fabsf(vec[1])) {
+			face = 1;
+			lambda = 1.0f / vec[2];
+			answ[0] = lambda * vec[0];
+			answ[1] = -lambda * vec[1];
+		}
+		else if (vec[1] >= fabsf(vec[0])) {
+			face = 2;
+			lambda = 1.0f / vec[1];
+			answ[0] = lambda * vec[0];
+			answ[1] = lambda * vec[2];
+		}
+		else if (vec[0] <= -fabsf(vec[1])) {
+			face = 3;
+			lambda = -1.0f / vec[0];
+			answ[0] = lambda * vec[1];
+			answ[1] = lambda * vec[2];
+		}
+		else if (vec[1] <= -fabsf(vec[0])) {
+			face = 4;
+			lambda = -1.0f / vec[1];
+			answ[0] = -lambda * vec[0];
+			answ[1] = lambda * vec[2];
+		}
+		else {
+			face = 5;
+			lambda = 1.0f / vec[0];
+			answ[0] = -lambda * vec[1];
+			answ[1] = lambda * vec[2];
+		}
+	}
+
+	answ[0] = 0.5f + 0.5f * answ[0];
+	answ[1] = 0.5f + 0.5f * answ[1];
+	return face;
+}
+
+/* ------------------------------------------------------------------------- */
+
+static void set_dxtdyt(float r_dxt[3], float r_dyt[3], const float dxt[3], const float dyt[3], int face)
+{
+	if (face == 2 || face == 4) {
+		r_dxt[0] = dxt[0];
+		r_dyt[0] = dyt[0];
+		r_dxt[1] = dxt[2];
+		r_dyt[1] = dyt[2];
+	}
+	else if (face == 3 || face == 5) {
+		r_dxt[0] = dxt[1];
+		r_dxt[1] = dxt[2];
+		r_dyt[0] = dyt[1];
+		r_dyt[1] = dyt[2];
+	}
+	else {
+		r_dxt[0] = dxt[0];
+		r_dyt[0] = dyt[0];
+		r_dxt[1] = dxt[1];
+		r_dyt[1] = dyt[1];
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+
+int envmaptex(Tex *tex, const float texvec[3], float dxt[3], float dyt[3], int osatex, TexResult *texres, struct ImagePool *pool, const bool skip_load_image)
+{
+	extern Render R;                /* only in this call */
+	/* texvec should be the already reflected normal */
+	EnvMap *env;
+	ImBuf *ibuf;
+	float fac, vec[3], sco[3], dxts[3], dyts[3];
+	int face, face1;
+
+	env = tex->env;
+	if (env == NULL || (env->stype != ENV_LOAD && env->object == NULL)) {
+		texres->tin = 0.0;
+		return 0;
+	}
+
+	if (env->stype == ENV_LOAD) {
+		env->ima = tex->ima;
+		if (env->ima && env->ima->ok) {
+			if (env->cube[1] == NULL) {
+				ImBuf *ibuf_ima = BKE_image_pool_acquire_ibuf(env->ima, NULL, pool);
+				if (ibuf_ima)
+					envmap_split_ima(env, ibuf_ima);
+				else
+					env->ok = 0;
+
+				if (env->type == ENV_PLANE)
+					tex->extend = TEX_EXTEND;
+
+				BKE_image_pool_release_ibuf(env->ima, ibuf_ima, pool);
+			}
+		}
+	}
+
+	if (env->ok == 0) {
+		texres->tin = 0.0;
+		return 0;
+	}
+
+	/* rotate to envmap space, if object is set */
+	copy_v3_v3(vec, texvec);
+	if (env->object) {
+		mul_m3_v3(env->obimat, vec);
+		if (osatex) {
+			mul_m3_v3(env->obimat, dxt);
+			mul_m3_v3(env->obimat, dyt);
+		}
+	}
+	else {
+		if (!BKE_scene_use_world_space_shading(R.scene)) {
+			// texvec is in view space
+			mul_mat3_m4_v3(R.viewinv, vec);
+			if (osatex) {
+				mul_mat3_m4_v3(R.viewinv, dxt);
+				mul_mat3_m4_v3(R.viewinv, dyt);
+			}
+		}
+	}
+
+	face = envcube_isect(env, vec, sco);
+	ibuf = env->cube[face];
+
+	if (osatex) {
+		set_dxtdyt(dxts, dyts, dxt, dyt, face);
+		imagewraposa(tex, NULL, ibuf, sco, dxts, dyts, texres, pool, skip_load_image);
+
+		/* edges? */
+
+		if (texres->ta < 1.0f) {
+			TexResult texr1, texr2;
+
+			texr1.nor = texr2.nor = NULL;
+			texr1.talpha = texr2.talpha = texres->talpha; /* boxclip expects this initialized */
+
+			add_v3_v3(vec, dxt);
+			face1 = envcube_isect(env, vec, sco);
+			sub_v3_v3(vec, dxt);
+
+			if (face != face1) {
+				ibuf = env->cube[face1];
+				set_dxtdyt(dxts, dyts, dxt, dyt, face1);
+				imagewraposa(tex, NULL, ibuf, sco, dxts, dyts, &texr1, pool, skip_load_image);
+			}
+			else texr1.tr = texr1.tg = texr1.tb = texr1.ta = 0.0;
+
+			/* here was the nasty bug! results were not zero-ed. FPE! */
+
+			add_v3_v3(vec, dyt);
+			face1 = envcube_isect(env, vec, sco);
+			sub_v3_v3(vec, dyt);
+
+			if (face != face1) {
+				ibuf = env->cube[face1];
+				set_dxtdyt(dxts, dyts, dxt, dyt, face1);
+				imagewraposa(tex, NULL, ibuf, sco, dxts, dyts, &texr2, pool, skip_load_image);
+			}
+			else texr2.tr = texr2.tg = texr2.tb = texr2.ta = 0.0;
+
+			fac = (texres->ta + texr1.ta + texr2.ta);
+			if (fac != 0.0f) {
+				fac = 1.0f / fac;
+
+				texres->tr = fac * (texres->ta * texres->tr + texr1.ta * texr1.tr + texr2.ta * texr2.tr);
+				texres->tg = fac * (texres->ta * texres->tg + texr1.ta * texr1.tg + texr2.ta * texr2.tg);
+				texres->tb = fac * (texres->ta * texres->tb + texr1.ta * texr1.tb + texr2.ta * texr2.tb);
+			}
+			texres->ta = 1.0;
+		}
+	}
+	else {
+		imagewrap(tex, NULL, ibuf, sco, texres, pool, skip_load_image);
+	}
+
+	return 1;
+}
diff --git a/source/blender/render/intern/source/external_engine.c b/source/blender/render/intern/source/external_engine.c
index b541c993bc7..10ed91b53c4 100644
--- a/source/blender/render/intern/source/external_engine.c
+++ b/source/blender/render/intern/source/external_engine.c
@@ -112,11 +112,11 @@ void RE_engines_register(RenderEngineType *render_type)
 RenderEngineType *RE_engines_find(const char *idname)
 {
 	RenderEngineType *type;
-	
+
 	type = BLI_findstring(&R_engines, idname, offsetof(RenderEngineType, idname));
 	if (!type)
 		type = BLI_findstring(&R_engines, "BLENDER_EEVEE", offsetof(RenderEngineType, idname));
-	
+
 	return type;
 }
 
@@ -320,7 +320,7 @@ int RE_engine_test_break(RenderEngine *engine)
 
 	if (re)
 		return re->test_break(re->tbh);
-	
+
 	return 0;
 }
 
@@ -776,7 +776,7 @@ int RE_engine_render(Render *re, int do_all)
 
 	if (BKE_reports_contain(re->reports, RPT_ERROR))
 		G.is_break = true;
-	
+
 #ifdef WITH_FREESTYLE
 	if (re->r.mode & R_EDGE_FRS)
 		RE_RenderFreestyleExternal(re);
diff --git a/source/blender/render/intern/source/imagetexture.c b/source/blender/render/intern/source/imagetexture.c
index b9d55916f51..1e9ad79e599 100644
--- a/source/blender/render/intern/source/imagetexture.c
+++ b/source/blender/render/intern/source/imagetexture.c
@@ -32,7 +32,7 @@
 #include <fcntl.h>
 #include <math.h>
 #include <float.h>
-#ifndef WIN32 
+#ifndef WIN32
 #include <unistd.h>
 #else
 #include <io.h>
@@ -67,7 +67,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max
 static void ibuf_get_color(float col[4], struct ImBuf *ibuf, int x, int y)
 {
 	int ofs = y * ibuf->x + x;
-	
+
 	if (ibuf->rect_float) {
 		if (ibuf->channels==4) {
 			const float *fp= ibuf->rect_float + 4*ofs;
@@ -105,15 +105,15 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul
 	int xi, yi; /* original values */
 
 	texres->tin= texres->ta= texres->tr= texres->tg= texres->tb= 0.0f;
-	
+
 	/* we need to set retval OK, otherwise texture code generates normals itself... */
 	retval= texres->nor ? 3 : 1;
-	
+
 	/* quick tests */
 	if (ibuf==NULL && ima==NULL)
 		return retval;
 	if (ima) {
-		
+
 		/* hack for icon render */
 		if (skip_load_image && !BKE_image_has_loaded_ibuf(ima))
 			return retval;
@@ -127,7 +127,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul
 			BKE_image_pool_release_ibuf(ima, ibuf, pool);
 		return retval;
 	}
-	
+
 	/* setup mapping */
 	if (tex->imaflag & TEX_IMAROT) {
 		fy= texvec[0];
@@ -137,10 +137,10 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul
 		fx= texvec[0];
 		fy= texvec[1];
 	}
-	
+
 	if (tex->extend == TEX_CHECKER) {
 		int xs, ys;
-		
+
 		xs= (int)floor(fx);
 		ys= (int)floor(fy);
 		fx-= xs;
@@ -205,7 +205,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul
 			if (y<0) y+= ibuf->y;
 		}
 	}
-	
+
 	/* keep this before interpolation [#29761] */
 	if (ima) {
 		if ((tex->imaflag & TEX_USEALPHA) && (ima->flag & IMA_IGNORE_ALPHA) == 0) {
@@ -232,7 +232,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul
 	else { /* no filtering */
 		ibuf_get_color(&texres->tr, ibuf, x, y);
 	}
-	
+
 	if (texres->nor) {
 		if (tex->imaflag & TEX_NORMALMAP) {
 			/* qdn: normal from color
@@ -283,7 +283,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul
 	else {
 		texres->ta = texres->tin = 1.0;
 	}
-	
+
 	if (tex->flag & TEX_NEGALPHA) {
 		texres->ta = 1.0f - texres->ta;
 	}
@@ -301,7 +301,7 @@ int imagewrap(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], TexResul
 		BKE_image_pool_release_ibuf(ima, ibuf, pool);
 
 	BRICONTRGB;
-	
+
 	return retval;
 }
 
@@ -327,9 +327,9 @@ static void clipx_rctf_swap(rctf *stack, short *count, float x1, float x2)
 				newrct->xmin = rf->xmin+(x2-x1);
 				newrct->ymin = rf->ymin;
 				newrct->ymax = rf->ymax;
-				
+
 				if (newrct->xmin ==newrct->xmax) (*count)--;
-				
+
 				rf->xmin = x1;
 			}
 		}
@@ -489,7 +489,7 @@ static void boxsampleclip(struct ImBuf *ibuf, rctf *rf, TexResult *texres)
 	else {
 		div= texres->tr= texres->tg= texres->tb= texres->ta= 0.0;
 		for (y=starty; y<=endy; y++) {
-			
+
 			muly= 1.0;
 
 			if (starty==endy) {
@@ -499,10 +499,10 @@ static void boxsampleclip(struct ImBuf *ibuf, rctf *rf, TexResult *texres)
 				if (y==starty) muly= 1.0f-(rf->ymin - y);
 				if (y==endy) muly= (rf->ymax - y);
 			}
-			
+
 			if (startx==endx) {
 				mulx= muly;
-				
+
 				ibuf_get_color(col, ibuf, startx, y);
 
 				texres->ta+= mulx*col[3];
@@ -518,7 +518,7 @@ static void boxsampleclip(struct ImBuf *ibuf, rctf *rf, TexResult *texres)
 					if (x==endx) mulx*= (rf->xmax - x);
 
 					ibuf_get_color(col, ibuf, x, y);
-					
+
 					if (mulx==1.0f) {
 						texres->ta+= col[3];
 						texres->tr+= col[0];
@@ -573,7 +573,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max
 	rf->ymax = maxy*(ibuf->y);
 
 	texr.talpha= texres->talpha;	/* is read by boxsample_clip */
-	
+
 	if (imapextend) {
 		CLAMP(rf->xmin, 0.0f, ibuf->x-1);
 		CLAMP(rf->xmax, 0.0f, ibuf->x-1);
@@ -608,7 +608,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max
 		tot= texres->tr= texres->tb= texres->tg= texres->ta= 0.0;
 		while (count--) {
 			boxsampleclip(ibuf, rf, &texr);
-			
+
 			opp= square_rctf(rf);
 			tot+= opp;
 
@@ -629,7 +629,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max
 		boxsampleclip(ibuf, rf, texres);
 
 	if (texres->talpha==0) texres->ta= 1.0;
-	
+
 	if (alphaclip!=1.0f) {
 		/* premul it all */
 		texres->tr*= alphaclip;
@@ -637,7 +637,7 @@ static void boxsample(ImBuf *ibuf, float minx, float miny, float maxx, float max
 		texres->tb*= alphaclip;
 		texres->ta*= alphaclip;
 	}
-}	
+}
 
 /*-----------------------------------------------------------------------------------------------------------------
  * from here, some functions only used for the new filtering */
@@ -874,7 +874,7 @@ static void image_mipmap_test(Tex *tex, ImBuf *ibuf)
 {
 	if (tex->imaflag & TEX_MIPMAP) {
 		if ((ibuf->flags & IB_fields) == 0) {
-			
+
 			if (ibuf->mipmap[0] && (ibuf->userflags & IB_MIPMAP_INVALID)) {
 				BLI_thread_lock(LOCK_IMAGE);
 				if (ibuf->userflags & IB_MIPMAP_INVALID) {
@@ -885,7 +885,7 @@ static void image_mipmap_test(Tex *tex, ImBuf *ibuf)
 			}
 			if (ibuf->mipmap[0] == NULL) {
 				BLI_thread_lock(LOCK_IMAGE);
-				if (ibuf->mipmap[0] == NULL) 
+				if (ibuf->mipmap[0] == NULL)
 					IMB_makemipmap(ibuf, tex->imaflag & TEX_GAUSS_MIP);
 				BLI_thread_unlock(LOCK_IMAGE);
 			}
@@ -895,7 +895,7 @@ static void image_mipmap_test(Tex *tex, ImBuf *ibuf)
 			}
 		}
 	}
-	
+
 }
 
 static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], float dxt[2], float dyt[2], TexResult *texres, struct ImagePool *pool, const bool skip_load_image)
@@ -946,7 +946,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex
 
 	/* mipmap test */
 	image_mipmap_test(tex, ibuf);
-	
+
 	if (ima) {
 		if ((tex->imaflag & TEX_USEALPHA) && (ima->flag & IMA_IGNORE_ALPHA) == 0) {
 			if ((tex->imaflag & TEX_CALCALPHA) == 0) {
@@ -1281,7 +1281,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex
 	else
 		texres->tin = texres->ta;
 	if (tex->flag & TEX_NEGALPHA) texres->ta = 1.f - texres->ta;
-	
+
 	if (texres->nor && (tex->imaflag & TEX_NORMALMAP)) {	/* normal from color */
 		/* The invert of the red channel is to make
 		 * the normal map compliant with the outside world.
@@ -1312,7 +1312,7 @@ static int imagewraposa_aniso(Tex *tex, Image *ima, ImBuf *ibuf, const float tex
 		BKE_image_pool_release_ibuf(ima, ibuf, pool);
 
 	BRICONTRGB;
-	
+
 	return retval;
 }
 
@@ -1334,10 +1334,10 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 		return imagewraposa_aniso(tex, ima, ibuf, texvec, dxt, dyt, texres, pool, skip_load_image);
 
 	texres->tin= texres->ta= texres->tr= texres->tg= texres->tb= 0.0f;
-	
+
 	/* we need to set retval OK, otherwise texture code generates normals itself... */
 	retval = texres->nor ? 3 : 1;
-	
+
 	/* quick tests */
 	if (ibuf==NULL && ima==NULL)
 		return retval;
@@ -1346,7 +1346,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 		/* hack for icon render */
 		if (skip_load_image && !BKE_image_has_loaded_ibuf(ima))
 			return retval;
-		
+
 		ibuf = BKE_image_pool_acquire_ibuf(ima, &tex->iuser, pool);
 
 		ima->flag|= IMA_USED_FOR_RENDER;
@@ -1356,7 +1356,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 			BKE_image_pool_release_ibuf(ima, ibuf, pool);
 		return retval;
 	}
-	
+
 	/* mipmap test */
 	image_mipmap_test(tex, ibuf);
 
@@ -1367,9 +1367,9 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 			}
 		}
 	}
-	
+
 	texr.talpha= texres->talpha;
-	
+
 	if (tex->imaflag & TEX_IMAROT) {
 		fy= texvec[0];
 		fx= texvec[1];
@@ -1378,7 +1378,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 		fx= texvec[0];
 		fy= texvec[1];
 	}
-	
+
 	/* pixel coordinates */
 
 	minx = min_fff(dxt[0], dyt[0], dxt[0] + dyt[0]);
@@ -1389,7 +1389,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 	/* tex_sharper has been removed */
 	minx= (maxx-minx)/2.0f;
 	miny= (maxy-miny)/2.0f;
-	
+
 	if (tex->imaflag & TEX_FILTER_MIN) {
 		/* make sure the filtersize is minimal in pixels (normal, ref map can have miniature pixel dx/dy) */
 		float addval= (0.5f * tex->filtersize) / (float) MIN2(ibuf->x, ibuf->y);
@@ -1402,7 +1402,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 	else if (tex->filtersize!=1.0f) {
 		minx*= tex->filtersize;
 		miny*= tex->filtersize;
-		
+
 		dxt[0]*= tex->filtersize;
 		dxt[1]*= tex->filtersize;
 		dyt[0]*= tex->filtersize;
@@ -1410,13 +1410,13 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 	}
 
 	if (tex->imaflag & TEX_IMAROT) SWAP(float, minx, miny);
-	
+
 	if (minx>0.25f) minx= 0.25f;
 	else if (minx<0.00001f) minx= 0.00001f;	/* side faces of unit-cube */
 	if (miny>0.25f) miny= 0.25f;
 	else if (miny<0.00001f) miny= 0.00001f;
 
-	
+
 	/* repeat and clip */
 	imaprepeat= (tex->extend==TEX_REPEAT);
 	imapextend= (tex->extend==TEX_EXTEND);
@@ -1430,10 +1430,10 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 
 	if (tex->extend == TEX_CHECKER) {
 		int xs, ys, xs1, ys1, xs2, ys2, boundary;
-		
+
 		xs= (int)floor(fx);
 		ys= (int)floor(fy);
-		
+
 		/* both checkers available, no boundary exceptions, checkerdist will eat aliasing */
 		if ( (tex->flag & TEX_CHECKER_ODD) && (tex->flag & TEX_CHECKER_EVEN) ) {
 			fx-= xs;
@@ -1447,7 +1447,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 			return retval;
 		}
 		else {
-			
+
 			xs1= (int)floor(fx-minx);
 			ys1= (int)floor(fy-miny);
 			xs2= (int)floor(fx+minx);
@@ -1479,14 +1479,14 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 				if (tex->flag & TEX_CHECKER_ODD) {
 					if ((xs1+ys) & 1) fx-= xs2;
 					else fx-= xs1;
-					
+
 					if ((ys1+xs) & 1) fy-= ys2;
 					else fy-= ys1;
 				}
 				if (tex->flag & TEX_CHECKER_EVEN) {
 					if ((xs1+ys) & 1) fx-= xs1;
 					else fx-= xs2;
-					
+
 					if ((ys1+xs) & 1) fy-= ys1;
 					else fy-= ys2;
 				}
@@ -1525,7 +1525,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 			if (fx>1.0f) fx -= (int)(fx);
 			else if (fx<0.0f) fx+= 1-(int)(fx);
 		}
-		
+
 		if (imapextend) {
 			if (fy>1.0f) fy = 1.0f;
 			else if (fy<0.0f) fy= 0.0f;
@@ -1540,18 +1540,18 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 	if (tex->imaflag & TEX_MIPMAP) {
 		ImBuf *previbuf, *curibuf;
 		float bumpscale;
-		
+
 		dx = minx;
 		dy = miny;
 		maxd = max_ff(dx, dy);
 		if (maxd > 0.5f) maxd = 0.5f;
 
 		pixsize = 1.0f / (float) MIN2(ibuf->x, ibuf->y);
-		
+
 		bumpscale= pixsize/maxd;
 		if (bumpscale>1.0f) bumpscale= 1.0f;
 		else bumpscale*=bumpscale;
-		
+
 		curmap= 0;
 		previbuf= curibuf= ibuf;
 		while (curmap < IMB_MIPMAP_LEVELS && ibuf->mipmap[curmap]) {
@@ -1567,12 +1567,12 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 			if (minx < 0.5f / ibuf->x) minx = 0.5f / ibuf->x;
 			if (miny < 0.5f / ibuf->y) miny = 0.5f / ibuf->y;
 		}
-		
+
 		if (texres->nor && (tex->imaflag & TEX_NORMALMAP)==0) {
 			/* a bit extra filter */
 			//minx*= 1.35f;
 			//miny*= 1.35f;
-			
+
 			boxsample(curibuf, fx-minx, fy-miny, fx+minx, fy+miny, texres, imaprepeat, imapextend);
 			val1= texres->tr+texres->tg+texres->tb;
 			boxsample(curibuf, fx-minx+dxt[0], fy-miny+dxt[1], fx+minx+dxt[0], fy+miny+dxt[1], &texr, imaprepeat, imapextend);
@@ -1583,11 +1583,11 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 			/* don't switch x or y! */
 			texres->nor[0]= (val1-val2);
 			texres->nor[1]= (val1-val3);
-			
+
 			if (previbuf!=curibuf) {  /* interpolate */
-				
+
 				boxsample(previbuf, fx-minx, fy-miny, fx+minx, fy+miny, &texr, imaprepeat, imapextend);
-				
+
 				/* calc rgb */
 				dx= 2.0f*(pixsize-maxd)/pixsize;
 				if (dx>=1.0f) {
@@ -1601,16 +1601,16 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 					texres->tr= dy*texres->tr+ dx*texr.tr;
 					texres->ta= dy*texres->ta+ dx*texr.ta;
 				}
-				
+
 				val1= dy*val1+ dx*(texr.tr + texr.tg + texr.tb);
 				boxsample(previbuf, fx-minx+dxt[0], fy-miny+dxt[1], fx+minx+dxt[0], fy+miny+dxt[1], &texr, imaprepeat, imapextend);
 				val2= dy*val2+ dx*(texr.tr + texr.tg + texr.tb);
 				boxsample(previbuf, fx-minx+dyt[0], fy-miny+dyt[1], fx+minx+dyt[0], fy+miny+dyt[1], &texr, imaprepeat, imapextend);
 				val3= dy*val3+ dx*(texr.tr + texr.tg + texr.tb);
-				
+
 				texres->nor[0]= (val1-val2);	/* vals have been interpolated above! */
 				texres->nor[1]= (val1-val3);
-				
+
 				if (dx<1.0f) {
 					dy= 1.0f-dx;
 					texres->tb= dy*texres->tb+ dx*texr.tb;
@@ -1632,9 +1632,9 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 
 			if (previbuf!=curibuf) {  /* interpolate */
 				boxsample(previbuf, minx, miny, maxx, maxy, &texr, imaprepeat, imapextend);
-				
+
 				fx= 2.0f*(pixsize-maxd)/pixsize;
-				
+
 				if (fx>=1.0f) {
 					texres->ta= texr.ta; texres->tb= texr.tb;
 					texres->tg= texr.tg; texres->tr= texr.tr;
@@ -1672,7 +1672,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 		else
 			boxsample(ibuf, fx-minx, fy-miny, fx+minx, fy+miny, texres, imaprepeat, imapextend);
 	}
-	
+
 	if (tex->imaflag & TEX_CALCALPHA) {
 		texres->ta = texres->tin = texres->ta * max_fff(texres->tr, texres->tg, texres->tb);
 	}
@@ -1681,7 +1681,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 	}
 
 	if (tex->flag & TEX_NEGALPHA) texres->ta= 1.0f-texres->ta;
-	
+
 	if (texres->nor && (tex->imaflag & TEX_NORMALMAP)) {
 		/* qdn: normal from color
 		 * The invert of the red channel is to make
@@ -1705,7 +1705,7 @@ int imagewraposa(Tex *tex, Image *ima, ImBuf *ibuf, const float texvec[3], const
 		BKE_image_pool_release_ibuf(ima, ibuf, pool);
 
 	BRICONTRGB;
-	
+
 	return retval;
 }
 
@@ -1713,16 +1713,16 @@ void image_sample(Image *ima, float fx, float fy, float dx, float dy, float resu
 {
 	TexResult texres;
 	ImBuf *ibuf = BKE_image_pool_acquire_ibuf(ima, NULL, pool);
-	
+
 	if (UNLIKELY(ibuf == NULL)) {
 		zero_v4(result);
 		return;
 	}
-	
+
 	texres.talpha = true; /* boxsample expects to be initialized */
 	boxsample(ibuf, fx, fy, fx + dx, fy + dy, &texres, 0, 1);
 	copy_v4_v4(result, &texres.tr);
-	
+
 	ima->flag|= IMA_USED_FOR_RENDER;
 
 	BKE_image_pool_release_ibuf(ima, ibuf, pool);
@@ -1737,11 +1737,11 @@ void ibuf_sample(ImBuf *ibuf, float fx, float fy, float dx, float dy, float resu
 	AFD.dyt[0] = dy; AFD.dyt[1] = dy;
 	//copy_v2_v2(AFD.dxt, dx);
 	//copy_v2_v2(AFD.dyt, dy);
-	
+
 	AFD.intpol = 1;
 	AFD.extflag = TXC_EXTD;
 
 	ewa_eval(&texres, ibuf, fx, fy, &AFD);
-	
+
 	copy_v4_v4(result, &texres.tr);
 }
diff --git a/source/blender/render/intern/source/initrender.c b/source/blender/render/intern/source/initrender.c
index 4274d641674..9611a8a7452 100644
--- a/source/blender/render/intern/source/initrender.c
+++ b/source/blender/render/intern/source/initrender.c
@@ -66,9 +66,9 @@ static float filt_quadratic(float x)
 static float filt_cubic(float x)
 {
 	float x2 = x * x;
-	
+
 	if (x <  0.0f) x = -x;
-	
+
 	if (x < 1.0f) return 0.5f * x * x2 - x2 + 2.0f / 3.0f;
 	if (x < 2.0f) return (2.0f - x) * (2.0f - x) * (2.0f - x) / 6.0f;
 	return 0.0f;
@@ -78,7 +78,7 @@ static float filt_cubic(float x)
 static float filt_catrom(float x)
 {
 	float x2 = x * x;
-	
+
 	if (x <  0.0f) x = -x;
 	if (x < 1.0f) return  1.5f * x2 * x - 2.5f * x2 + 1.0f;
 	if (x < 2.0f) return -0.5f * x2 * x + 2.5f * x2 - 4.0f * x + 2.0f;
@@ -108,34 +108,34 @@ static float filt_mitchell(float x) /* Mitchell & Netravali's two-param cubic */
 float RE_filter_value(int type, float x)
 {
 	float gaussfac = 1.6f;
-	
+
 	x = ABS(x);
-	
+
 	switch (type) {
 		case R_FILTER_BOX:
 			if (x > 1.0f) return 0.0f;
 			return 1.0f;
-			
+
 		case R_FILTER_TENT:
 			if (x > 1.0f) return 0.0f;
 			return 1.0f - x;
-			
+
 		case R_FILTER_GAUSS:
 		{
 			const float two_gaussfac2 = 2.0f * gaussfac * gaussfac;
 			x *= 3.0f * gaussfac;
 			return 1.0f / sqrtf((float)M_PI * two_gaussfac2) * expf(-x*x / two_gaussfac2);
 		}
-			
+
 		case R_FILTER_MITCH:
 			return filt_mitchell(x * gaussfac);
-			
+
 		case R_FILTER_QUAD:
 			return filt_quadratic(x * gaussfac);
-			
+
 		case R_FILTER_CUBIC:
 			return filt_cubic(x * gaussfac);
-			
+
 		case R_FILTER_CATROM:
 			return filt_catrom(x * gaussfac);
 	}
@@ -221,20 +221,20 @@ void RE_parts_init(Render *re)
 {
 	int nr, xd, yd, partx, party, xparts, yparts;
 	int xminb, xmaxb, yminb, ymaxb;
-	
+
 	RE_parts_free(re);
-	
+
 	/* this is render info for caller, is not reset when parts are freed! */
 	re->i.totpart = 0;
 	re->i.curpart = 0;
 	re->i.partsdone = 0;
-	
+
 	/* just for readable code.. */
 	xminb = re->disprect.xmin;
 	yminb = re->disprect.ymin;
 	xmaxb = re->disprect.xmax;
 	ymaxb = re->disprect.ymax;
-	
+
 	RE_parts_clamp(re);
 
 	partx = re->partx;
@@ -242,17 +242,17 @@ void RE_parts_init(Render *re)
 	/* part count */
 	xparts = (re->rectx + partx - 1) / partx;
 	yparts = (re->recty + party - 1) / party;
-	
+
 	for (nr = 0; nr < xparts * yparts; nr++) {
 		rcti disprect;
 		int rectx, recty;
-		
+
 		xd = (nr % xparts);
 		yd = (nr - xd) / xparts;
-		
+
 		disprect.xmin = xminb + xd * partx;
 		disprect.ymin = yminb + yd * party;
-		
+
 		/* ensure we cover the entire picture, so last parts go to end */
 		if (xd < xparts - 1) {
 			disprect.xmax = disprect.xmin + partx;
@@ -260,21 +260,21 @@ void RE_parts_init(Render *re)
 				disprect.xmax = xmaxb;
 		}
 		else disprect.xmax = xmaxb;
-		
+
 		if (yd < yparts - 1) {
 			disprect.ymax = disprect.ymin + party;
 			if (disprect.ymax > ymaxb)
 				disprect.ymax = ymaxb;
 		}
 		else disprect.ymax = ymaxb;
-		
+
 		rectx = BLI_rcti_size_x(&disprect);
 		recty = BLI_rcti_size_y(&disprect);
-		
+
 		/* so, now can we add this part? */
 		if (rectx > 0 && recty > 0) {
 			RenderPart *pa = MEM_callocN(sizeof(RenderPart), "new part");
-			
+
 			pa->disprect = disprect;
 			pa->rectx = rectx;
 			pa->recty = recty;
diff --git a/source/blender/render/intern/source/occlusion.c b/source/blender/render/intern/source/occlusion.c
new file mode 100644
index 00000000000..8aa90a390b3
--- /dev/null
+++ b/source/blender/render/intern/source/occlusion.c
@@ -0,0 +1,1533 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2008 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): Brecht Van Lommel.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/occlusion.c
+ *  \ingroup render
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_material_types.h"
+
+#include "BLI_math.h"
+#include "BLI_memarena.h"
+#include "BLI_threads.h"
+#include "BLI_utildefines.h"
+
+#include "BLT_translation.h"
+
+#include "BKE_node.h"
+#include "BKE_scene.h"
+
+
+#include "RE_shader_ext.h"
+
+/* local includes */
+#include "occlusion.h"
+#include "render_types.h"
+#include "rendercore.h"
+#include "renderdatabase.h"
+#include "shading.h"
+
+/* ------------------------- Declarations --------------------------- */
+
+#define INVPI ((float)M_1_PI)
+#define TOTCHILD 8
+#define CACHE_STEP 3
+
+typedef struct OcclusionCacheSample {
+	float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2;
+	int x, y, filled;
+} OcclusionCacheSample;
+
+typedef struct OcclusionCache {
+	OcclusionCacheSample *sample;
+	int x, y, w, h, step;
+} OcclusionCache;
+
+typedef struct OccFace {
+	int obi;
+	int facenr;
+} OccFace;
+
+typedef struct OccNode {
+	float co[3], area;
+	float sh[9], dco;
+	float occlusion, rad[3];
+	int childflag;
+	union {
+		//OccFace face;
+		int face;
+		struct OccNode *node;
+	} child[TOTCHILD];
+} OccNode;
+
+typedef struct OcclusionTree {
+	MemArena *arena;
+
+	float (*co)[3];     /* temporary during build */
+
+	OccFace *face;      /* instance and face indices */
+	float *occlusion;   /* occlusion for faces */
+	float (*rad)[3];    /* radiance for faces */
+
+	OccNode *root;
+
+	OccNode **stack[BLENDER_MAX_THREADS];
+	int maxdepth;
+
+	int totface;
+
+	float error;
+	float distfac;
+
+	int dothreadedbuild;
+	int totbuildthread;
+	int doindirect;
+
+	OcclusionCache *cache;
+
+	int num_threads;
+} OcclusionTree;
+
+typedef struct OcclusionThread {
+	Render *re;
+	StrandSurface *mesh;
+	float (*faceao)[3];
+	float (*faceenv)[3];
+	float (*faceindirect)[3];
+	int begin, end;
+	int thread;
+} OcclusionThread;
+
+typedef struct OcclusionBuildThread {
+	OcclusionTree *tree;
+	int begin, end, depth;
+	OccNode *node;
+} OcclusionBuildThread;
+
+/* ------------------------- Shading --------------------------- */
+
+extern Render R; /* meh */
+
+static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
+{
+	ShadeInput *shi = ssamp->shi;
+	ShadeResult *shr = ssamp->shr;
+	float l, u, v, *v1, *v2, *v3;
+
+	/* init */
+	if (vlr->v4) {
+		shi->u = u = 0.5f;
+		shi->v = v = 0.5f;
+	}
+	else {
+		shi->u = u = 1.0f / 3.0f;
+		shi->v = v = 1.0f / 3.0f;
+	}
+
+	/* setup render coordinates */
+	v1 = vlr->v1->co;
+	v2 = vlr->v2->co;
+	v3 = vlr->v3->co;
+
+	/* renderco */
+	l = 1.0f - u - v;
+
+	shi->co[0] = l * v3[0] + u * v1[0] + v * v2[0];
+	shi->co[1] = l * v3[1] + u * v1[1] + v * v2[1];
+	shi->co[2] = l * v3[2] + u * v1[2] + v * v2[2];
+
+	shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
+
+	/* set up view vector */
+	copy_v3_v3(shi->view, shi->co);
+	normalize_v3(shi->view);
+
+	/* cache for shadow */
+	shi->samplenr++;
+
+	shi->xs = 0; /* TODO */
+	shi->ys = 0;
+
+	shade_input_set_normals(shi);
+
+	/* no normal flip */
+	if (shi->flippednor)
+		shade_input_flip_normals(shi);
+
+	madd_v3_v3fl(shi->co, shi->facenor, -0.0001f); /* ugly.. */
+
+	/* not a pretty solution, but fixes common cases */
+	if (shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
+		negate_v3(shi->vn);
+		negate_v3(shi->vno);
+		negate_v3(shi->nmapnorm);
+	}
+
+	/* init material vars */
+	shade_input_init_material(shi);
+
+	/* render */
+	shade_input_set_shade_texco(shi);
+
+	if (shi->mat->nodetree && shi->mat->use_nodes) {
+		ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
+		shi->mat = vlr->mat;  /* shi->mat is being set in nodetree */
+	}
+	else {
+		shade_material_loop(shi, shr);
+	}
+
+	copy_v3_v3(rad, shr->combined);
+}
+
+static void occ_build_shade(Render *re, OcclusionTree *tree)
+{
+	ShadeSample ssamp;
+	ObjectInstanceRen *obi;
+	VlakRen *vlr;
+	int a;
+
+	R = *re;
+
+	/* setup shade sample with correct passes */
+	memset(&ssamp, 0, sizeof(ShadeSample));
+	ssamp.shi[0].lay = re->lay;
+	ssamp.shi[0].passflag = SCE_PASS_DIFFUSE | SCE_PASS_RGBA;
+	ssamp.shi[0].combinedflag = ~(SCE_PASS_SPEC);
+	ssamp.tot = 1;
+
+	for (a = 0; a < tree->totface; a++) {
+		obi = &R.objectinstance[tree->face[a].obi];
+		vlr = RE_findOrAddVlak(obi->obr, tree->face[a].facenr);
+
+		occ_shade(&ssamp, obi, vlr, tree->rad[a]);
+
+		if (re->test_break(re->tbh))
+			break;
+	}
+}
+
+/* ------------------------- Spherical Harmonics --------------------------- */
+
+/* Use 2nd order SH => 9 coefficients, stored in this order:
+ * 0 = (0,0),
+ * 1 = (1,-1), 2 = (1,0), 3 = (1,1),
+ * 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
+
+static void sh_copy(float *shresult, float *sh)
+{
+	memcpy(shresult, sh, sizeof(float) * 9);
+}
+
+static void sh_mul(float *sh, float f)
+{
+	int i;
+
+	for (i = 0; i < 9; i++)
+		sh[i] *= f;
+}
+
+static void sh_add(float *shresult, float *sh1, float *sh2)
+{
+	int i;
+
+	for (i = 0; i < 9; i++)
+		shresult[i] = sh1[i] + sh2[i];
+}
+
+static void sh_from_disc(float *n, float area, float *shresult)
+{
+	/* See formula (3) in:
+	 * "An Efficient Representation for Irradiance Environment Maps" */
+	float sh[9], x, y, z;
+
+	x = n[0];
+	y = n[1];
+	z = n[2];
+
+	sh[0] = 0.282095f;
+
+	sh[1] = 0.488603f * y;
+	sh[2] = 0.488603f * z;
+	sh[3] = 0.488603f * x;
+
+	sh[4] = 1.092548f * x * y;
+	sh[5] = 1.092548f * y * z;
+	sh[6] = 0.315392f * (3.0f * z * z - 1.0f);
+	sh[7] = 1.092548f * x * z;
+	sh[8] = 0.546274f * (x * x - y * y);
+
+	sh_mul(sh, area);
+	sh_copy(shresult, sh);
+}
+
+static float sh_eval(float *sh, float *v)
+{
+	/* See formula (13) in:
+	 * "An Efficient Representation for Irradiance Environment Maps" */
+	static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
+	static const float c4 = 0.886227f, c5 = 0.247708f;
+	float x, y, z, sum;
+
+	x = v[0];
+	y = v[1];
+	z = v[2];
+
+	sum = c1 * sh[8] * (x * x - y * y);
+	sum += c3 * sh[6] * z * z;
+	sum += c4 * sh[0];
+	sum += -c5 * sh[6];
+	sum += 2.0f * c1 * (sh[4] * x * y + sh[7] * x * z + sh[5] * y * z);
+	sum += 2.0f * c2 * (sh[3] * x + sh[1] * y + sh[2] * z);
+
+	return sum;
+}
+
+/* ------------------------------ Building --------------------------------- */
+
+static void occ_face(const OccFace *face, float co[3], float normal[3], float *area)
+{
+	ObjectInstanceRen *obi;
+	VlakRen *vlr;
+	float v1[3], v2[3], v3[3], v4[3];
+
+	obi = &R.objectinstance[face->obi];
+	vlr = RE_findOrAddVlak(obi->obr, face->facenr);
+
+	if (co) {
+		if (vlr->v4)
+			mid_v3_v3v3(co, vlr->v1->co, vlr->v3->co);
+		else
+			mid_v3_v3v3v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
+
+		if (obi->flag & R_TRANSFORMED)
+			mul_m4_v3(obi->mat, co);
+	}
+
+	if (normal) {
+		normal[0] = -vlr->n[0];
+		normal[1] = -vlr->n[1];
+		normal[2] = -vlr->n[2];
+
+		if (obi->flag & R_TRANSFORMED)
+			mul_m3_v3(obi->nmat, normal);
+	}
+
+	if (area) {
+		copy_v3_v3(v1, vlr->v1->co);
+		copy_v3_v3(v2, vlr->v2->co);
+		copy_v3_v3(v3, vlr->v3->co);
+		if (vlr->v4) copy_v3_v3(v4, vlr->v4->co);
+
+		if (obi->flag & R_TRANSFORMED) {
+			mul_m4_v3(obi->mat, v1);
+			mul_m4_v3(obi->mat, v2);
+			mul_m4_v3(obi->mat, v3);
+			if (vlr->v4) mul_m4_v3(obi->mat, v4);
+		}
+
+		/* todo: correct area for instances */
+		if (vlr->v4)
+			*area = area_quad_v3(v1, v2, v3, v4);
+		else
+			*area = area_tri_v3(v1, v2, v3);
+	}
+}
+
+static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
+{
+	OccNode *child;
+	float occ, area, totarea, rad[3];
+	int a, b, indirect = tree->doindirect;
+
+	occ = 0.0f;
+	totarea = 0.0f;
+	if (indirect) zero_v3(rad);
+
+	for (b = 0; b < TOTCHILD; b++) {
+		if (node->childflag & (1 << b)) {
+			a = node->child[b].face;
+			occ_face(&tree->face[a], NULL, NULL, &area);
+			occ += area * tree->occlusion[a];
+			if (indirect) madd_v3_v3fl(rad, tree->rad[a], area);
+			totarea += area;
+		}
+		else if (node->child[b].node) {
+			child = node->child[b].node;
+			occ_sum_occlusion(tree, child);
+
+			occ += child->area * child->occlusion;
+			if (indirect) madd_v3_v3fl(rad, child->rad, child->area);
+			totarea += child->area;
+		}
+	}
+
+	if (totarea != 0.0f) {
+		occ /= totarea;
+		if (indirect) mul_v3_fl(rad, 1.0f / totarea);
+	}
+
+	node->occlusion = occ;
+	if (indirect) copy_v3_v3(node->rad, rad);
+}
+
+static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
+{
+	float len, maxlen = -1.0f;
+	int a, axis = 0;
+
+	INIT_MINMAX(min, max);
+
+	for (a = begin; a < end; a++) {
+		minmax_v3v3_v3(min, max, tree->co[a]);
+	}
+
+	for (a = 0; a < 3; a++) {
+		len = max[a] - min[a];
+
+		if (len > maxlen) {
+			maxlen = len;
+			axis = a;
+		}
+	}
+
+	return axis;
+}
+
+static void occ_node_from_face(OccFace *face, OccNode *node)
+{
+	float n[3];
+
+	occ_face(face, node->co, n, &node->area);
+	node->dco = 0.0f;
+	sh_from_disc(n, node->area, node->sh);
+}
+
+static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco)
+{
+	int b;
+	for (b = 0; b < TOTCHILD; b++) {
+		float dist, d[3], nco[3];
+
+		if (node->childflag & (1 << b)) {
+			occ_face(tree->face + node->child[b].face, nco, NULL, NULL);
+		}
+		else if (node->child[b].node) {
+			OccNode *child = node->child[b].node;
+			occ_build_dco(tree, child, co, dco);
+			copy_v3_v3(nco, child->co);
+		}
+		else {
+			continue;
+		}
+
+		sub_v3_v3v3(d, nco, co);
+		dist = dot_v3v3(d, d);
+		if (dist > *dco)
+			*dco = dist;
+	}
+}
+
+static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
+{
+	float min[3], max[3], mid;
+	int axis, a, enda;
+
+	/* split in middle of boundbox. this seems faster than median split
+	 * on complex scenes, possibly since it avoids two distant faces to
+	 * be in the same node better? */
+	axis = occ_find_bbox_axis(tree, begin, end, min, max);
+	mid = 0.5f * (min[axis] + max[axis]);
+
+	a = begin;
+	enda = end;
+	while (a < enda) {
+		if (tree->co[a][axis] > mid) {
+			enda--;
+			SWAP(OccFace, tree->face[a], tree->face[enda]);
+			swap_v3_v3(tree->co[a], tree->co[enda]);
+		}
+		else
+			a++;
+	}
+
+	*split = enda;
+}
+
+static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
+{
+	/* split faces into eight groups */
+	int b, splitx, splity[2], splitz[4];
+
+	occ_build_split(tree, begin, end, &splitx);
+
+	/* force split if none found, to deal with degenerate geometry */
+	if (splitx == begin || splitx == end)
+		splitx = (begin + end) / 2;
+
+	occ_build_split(tree, begin, splitx, &splity[0]);
+	occ_build_split(tree, splitx, end, &splity[1]);
+
+	occ_build_split(tree, begin, splity[0], &splitz[0]);
+	occ_build_split(tree, splity[0], splitx, &splitz[1]);
+	occ_build_split(tree, splitx, splity[1], &splitz[2]);
+	occ_build_split(tree, splity[1], end, &splitz[3]);
+
+	offset[0] = begin;
+	offset[1] = splitz[0];
+	offset[2] = splity[0];
+	offset[3] = splitz[1];
+	offset[4] = splitx;
+	offset[5] = splitz[2];
+	offset[6] = splity[1];
+	offset[7] = splitz[3];
+
+	for (b = 0; b < 7; b++)
+		count[b] = offset[b + 1] - offset[b];
+	count[7] = end - offset[7];
+}
+
+static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth);
+
+static void *exec_occ_build(void *data)
+{
+	OcclusionBuildThread *othread = (OcclusionBuildThread *)data;
+
+	occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth);
+
+	return NULL;
+}
+
+static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth)
+{
+	ListBase threads;
+	OcclusionBuildThread othreads[BLENDER_MAX_THREADS];
+	OccNode *child, tmpnode;
+	/* OccFace *face; */
+	int a, b, totthread = 0, offset[TOTCHILD], count[TOTCHILD];
+
+	/* add a new node */
+	node->occlusion = 1.0f;
+
+	/* leaf node with only children */
+	if (end - begin <= TOTCHILD) {
+		for (a = begin, b = 0; a < end; a++, b++) {
+			/* face= &tree->face[a]; */
+			node->child[b].face = a;
+			node->childflag |= (1 << b);
+		}
+	}
+	else {
+		/* order faces */
+		occ_build_8_split(tree, begin, end, offset, count);
+
+		if (depth == 1 && tree->dothreadedbuild)
+			BLI_threadpool_init(&threads, exec_occ_build, tree->totbuildthread);
+
+		for (b = 0; b < TOTCHILD; b++) {
+			if (count[b] == 0) {
+				node->child[b].node = NULL;
+			}
+			else if (count[b] == 1) {
+				/* face= &tree->face[offset[b]]; */
+				node->child[b].face = offset[b];
+				node->childflag |= (1 << b);
+			}
+			else {
+				if (tree->dothreadedbuild)
+					BLI_thread_lock(LOCK_CUSTOM1);
+
+				child = BLI_memarena_alloc(tree->arena, sizeof(OccNode));
+				node->child[b].node = child;
+
+				/* keep track of maximum depth for stack */
+				if (depth >= tree->maxdepth)
+					tree->maxdepth = depth + 1;
+
+				if (tree->dothreadedbuild)
+					BLI_thread_unlock(LOCK_CUSTOM1);
+
+				if (depth == 1 && tree->dothreadedbuild) {
+					othreads[totthread].tree = tree;
+					othreads[totthread].node = child;
+					othreads[totthread].begin = offset[b];
+					othreads[totthread].end = offset[b] + count[b];
+					othreads[totthread].depth = depth + 1;
+					BLI_threadpool_insert(&threads, &othreads[totthread]);
+					totthread++;
+				}
+				else
+					occ_build_recursive(tree, child, offset[b], offset[b] + count[b], depth + 1);
+			}
+		}
+
+		if (depth == 1 && tree->dothreadedbuild)
+			BLI_threadpool_end(&threads);
+	}
+
+	/* combine area, position and sh */
+	for (b = 0; b < TOTCHILD; b++) {
+		if (node->childflag & (1 << b)) {
+			child = &tmpnode;
+			occ_node_from_face(tree->face + node->child[b].face, &tmpnode);
+		}
+		else {
+			child = node->child[b].node;
+		}
+
+		if (child) {
+			node->area += child->area;
+			sh_add(node->sh, node->sh, child->sh);
+			madd_v3_v3fl(node->co, child->co, child->area);
+		}
+	}
+
+	if (node->area != 0.0f)
+		mul_v3_fl(node->co, 1.0f / node->area);
+
+	/* compute maximum distance from center */
+	node->dco = 0.0f;
+	if (node->area > 0.0f)
+		occ_build_dco(tree, node, node->co, &node->dco);
+}
+
+static void occ_build_sh_normalize(OccNode *node)
+{
+	/* normalize spherical harmonics to not include area, so
+	 * we can clamp the dot product and then multiply by area */
+	int b;
+
+	if (node->area != 0.0f)
+		sh_mul(node->sh, 1.0f / node->area);
+
+	for (b = 0; b < TOTCHILD; b++) {
+		if (node->childflag & (1 << b)) {
+			/* pass */
+		}
+		else if (node->child[b].node) {
+			occ_build_sh_normalize(node->child[b].node);
+		}
+	}
+}
+
+static OcclusionTree *occ_tree_build(Render *re)
+{
+	const int num_threads = re->r.threads;
+	OcclusionTree *tree;
+	ObjectInstanceRen *obi;
+	ObjectRen *obr;
+	Material *ma;
+	VlakRen *vlr = NULL;
+	int a, b, c, totface;
+
+	/* count */
+	totface = 0;
+	for (obi = re->instancetable.first; obi; obi = obi->next) {
+		obr = obi->obr;
+		for (a = 0; a < obr->totvlak; a++) {
+			if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
+			else vlr++;
+
+			ma = vlr->mat;
+
+			if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE))
+				totface++;
+		}
+	}
+
+	if (totface == 0)
+		return NULL;
+
+	tree = MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
+	tree->totface = totface;
+
+	/* parameters */
+	tree->error = get_render_aosss_error(&re->r, re->wrld.ao_approx_error);
+	tree->distfac = (re->wrld.aomode & WO_AODIST) ? re->wrld.aodistfac : 0.0f;
+	tree->doindirect = (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0);
+
+	/* allocation */
+	tree->arena = BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena");
+	BLI_memarena_use_calloc(tree->arena);
+
+	if (re->wrld.aomode & WO_AOCACHE)
+		tree->cache = MEM_callocN(sizeof(OcclusionCache) * num_threads, "OcclusionCache");
+
+	tree->face = MEM_callocN(sizeof(OccFace) * totface, "OcclusionFace");
+	tree->co = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionCo");
+	tree->occlusion = MEM_callocN(sizeof(float) * totface, "OcclusionOcclusion");
+
+	if (tree->doindirect)
+		tree->rad = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionRad");
+
+	/* make array of face pointers */
+	for (b = 0, c = 0, obi = re->instancetable.first; obi; obi = obi->next, c++) {
+		obr = obi->obr;
+		for (a = 0; a < obr->totvlak; a++) {
+			if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
+			else vlr++;
+
+			ma = vlr->mat;
+
+			if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) {
+				tree->face[b].obi = c;
+				tree->face[b].facenr = a;
+				tree->occlusion[b] = 1.0f;
+				occ_face(&tree->face[b], tree->co[b], NULL, NULL);
+				b++;
+			}
+		}
+	}
+
+	/* threads */
+	tree->totbuildthread = (re->r.threads > 1 && totface > 10000) ? 8 : 1;
+	tree->dothreadedbuild = (tree->totbuildthread > 1);
+
+	/* recurse */
+	tree->root = BLI_memarena_alloc(tree->arena, sizeof(OccNode));
+	tree->maxdepth = 1;
+	occ_build_recursive(tree, tree->root, 0, totface, 1);
+
+	if (tree->doindirect) {
+		if (!(re->test_break(re->tbh)))
+			occ_build_shade(re, tree);
+
+		if (!(re->test_break(re->tbh)))
+			occ_sum_occlusion(tree, tree->root);
+	}
+
+	MEM_freeN(tree->co);
+	tree->co = NULL;
+
+	if (!(re->test_break(re->tbh)))
+		occ_build_sh_normalize(tree->root);
+
+	for (a = 0; a < num_threads; a++)
+		tree->stack[a] = MEM_callocN(sizeof(OccNode) * TOTCHILD * (tree->maxdepth + 1), "OccStack");
+
+	tree->num_threads = num_threads;
+
+	return tree;
+}
+
+static void occ_free_tree(OcclusionTree *tree)
+{
+	int a;
+
+	if (tree) {
+		if (tree->arena) BLI_memarena_free(tree->arena);
+		for (a = 0; a < tree->num_threads; a++)
+			if (tree->stack[a])
+				MEM_freeN(tree->stack[a]);
+		if (tree->occlusion) MEM_freeN(tree->occlusion);
+		if (tree->cache) MEM_freeN(tree->cache);
+		if (tree->face) MEM_freeN(tree->face);
+		if (tree->rad) MEM_freeN(tree->rad);
+		MEM_freeN(tree);
+	}
+}
+
+/* ------------------------- Traversal --------------------------- */
+
+static float occ_solid_angle(OccNode *node, const float v[3], float d2, float invd2, const float receivenormal[3])
+{
+	float dotreceive, dotemit;
+	float ev[3];
+
+	ev[0] = -v[0] * invd2;
+	ev[1] = -v[1] * invd2;
+	ev[2] = -v[2] * invd2;
+	dotemit = sh_eval(node->sh, ev);
+	dotreceive = dot_v3v3(receivenormal, v) * invd2;
+
+	CLAMP(dotemit, 0.0f, 1.0f);
+	CLAMP(dotreceive, 0.0f, 1.0f);
+
+	return ((node->area * dotemit * dotreceive) / (d2 + node->area * INVPI)) * INVPI;
+}
+
+static float occ_form_factor(OccFace *face, float *p, float *n)
+{
+	ObjectInstanceRen *obi;
+	VlakRen *vlr;
+	float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib = 0.0f;
+
+	obi = &R.objectinstance[face->obi];
+	vlr = RE_findOrAddVlak(obi->obr, face->facenr);
+
+	copy_v3_v3(v1, vlr->v1->co);
+	copy_v3_v3(v2, vlr->v2->co);
+	copy_v3_v3(v3, vlr->v3->co);
+
+	if (obi->flag & R_TRANSFORMED) {
+		mul_m4_v3(obi->mat, v1);
+		mul_m4_v3(obi->mat, v2);
+		mul_m4_v3(obi->mat, v3);
+	}
+
+	if (form_factor_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
+		contrib += form_factor_quad(p, n, q0, q1, q2, q3);
+
+	if (vlr->v4) {
+		copy_v3_v3(v4, vlr->v4->co);
+		if (obi->flag & R_TRANSFORMED)
+			mul_m4_v3(obi->mat, v4);
+
+		if (form_factor_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
+			contrib += form_factor_quad(p, n, q0, q1, q2, q3);
+	}
+
+	return contrib;
+}
+
+static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude,
+                       const float pp[3], const float pn[3], float *occ, float rad[3], float bentn[3])
+{
+	OccNode *node, **stack;
+	OccFace *face;
+	float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2;
+	float distfac, fac, error, d2, weight, emitarea;
+	int b, f, totstack;
+
+	/* init variables */
+	copy_v3_v3(p, pp);
+	copy_v3_v3(n, pn);
+	madd_v3_v3fl(p, n, 1e-4f);
+
+	if (bentn)
+		copy_v3_v3(bentn, n);
+
+	error = tree->error;
+	distfac = tree->distfac;
+
+	resultocc = 0.0f;
+	zero_v3(resultrad);
+
+	/* init stack */
+	stack = tree->stack[thread];
+	stack[0] = tree->root;
+	totstack = 1;
+
+	while (totstack) {
+		/* pop point off the stack */
+		node = stack[--totstack];
+
+		sub_v3_v3v3(v, node->co, p);
+		d2 = dot_v3v3(v, v) + 1e-16f;
+		emitarea = MAX2(node->area, node->dco);
+
+		if (d2 * error > emitarea) {
+			if (distfac != 0.0f) {
+				fac = 1.0f / (1.0f + distfac * d2);
+				if (fac < 0.01f)
+					continue;
+			}
+			else
+				fac = 1.0f;
+
+			/* accumulate occlusion from spherical harmonics */
+			invd2 = 1.0f / sqrtf(d2);
+			weight = occ_solid_angle(node, v, d2, invd2, n);
+
+			if (rad)
+				madd_v3_v3fl(resultrad, node->rad, weight * fac);
+
+			weight *= node->occlusion;
+
+			if (bentn) {
+				bentn[0] -= weight * invd2 * v[0];
+				bentn[1] -= weight * invd2 * v[1];
+				bentn[2] -= weight * invd2 * v[2];
+			}
+
+			resultocc += weight * fac;
+		}
+		else {
+			/* traverse into children */
+			for (b = 0; b < TOTCHILD; b++) {
+				if (node->childflag & (1 << b)) {
+					f = node->child[b].face;
+					face = &tree->face[f];
+
+					/* accumulate occlusion with face form factor */
+					if (!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
+						if (bentn || distfac != 0.0f) {
+							occ_face(face, co, NULL, NULL);
+							sub_v3_v3v3(v, co, p);
+							d2 = dot_v3v3(v, v) + 1e-16f;
+
+							fac = (distfac == 0.0f) ? 1.0f : 1.0f / (1.0f + distfac * d2);
+							if (fac < 0.01f)
+								continue;
+						}
+						else
+							fac = 1.0f;
+
+						weight = occ_form_factor(face, p, n);
+
+						if (rad)
+							madd_v3_v3fl(resultrad, tree->rad[f], weight * fac);
+
+						weight *= tree->occlusion[f];
+
+						if (bentn) {
+							invd2 = 1.0f / sqrtf(d2);
+							bentn[0] -= weight * invd2 * v[0];
+							bentn[1] -= weight * invd2 * v[1];
+							bentn[2] -= weight * invd2 * v[2];
+						}
+
+						resultocc += weight * fac;
+					}
+				}
+				else if (node->child[b].node) {
+					/* push child on the stack */
+					stack[totstack++] = node->child[b].node;
+				}
+			}
+		}
+	}
+
+	if (occ) *occ = resultocc;
+	if (rad) copy_v3_v3(rad, resultrad);
+#if 0
+	if (rad && exclude) {
+		int a;
+		for (a = 0; a < tree->totface; a++)
+			if ((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr))
+				copy_v3_v3(rad, tree->rad[a]);
+	}
+#endif
+	if (bentn) normalize_v3(bentn);
+}
+
+static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce)
+{
+	float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ;
+	int bounce, i;
+
+	rad = MEM_callocN(sizeof(float) * 3 * tree->totface, "OcclusionBounceRad");
+	sum = MEM_dupallocN(tree->rad);
+
+	for (bounce = 1; bounce < totbounce; bounce++) {
+		for (i = 0; i < tree->totface; i++) {
+			occ_face(&tree->face[i], co, n, NULL);
+			madd_v3_v3fl(co, n, 1e-8f);
+
+			occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL);
+			rad[i][0] = MAX2(rad[i][0], 0.0f);
+			rad[i][1] = MAX2(rad[i][1], 0.0f);
+			rad[i][2] = MAX2(rad[i][2], 0.0f);
+			add_v3_v3(sum[i], rad[i]);
+
+			if (re->test_break(re->tbh))
+				break;
+		}
+
+		if (re->test_break(re->tbh))
+			break;
+
+		tmp = tree->rad;
+		tree->rad = rad;
+		rad = tmp;
+
+		occ_sum_occlusion(tree, tree->root);
+	}
+
+	MEM_freeN(rad);
+	MEM_freeN(tree->rad);
+	tree->rad = sum;
+
+	if (!re->test_break(re->tbh))
+		occ_sum_occlusion(tree, tree->root);
+}
+
+static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
+{
+	float *occ, co[3], n[3];
+	int pass, i;
+
+	occ = MEM_callocN(sizeof(float) * tree->totface, "OcclusionPassOcc");
+
+	for (pass = 0; pass < totpass; pass++) {
+		for (i = 0; i < tree->totface; i++) {
+			occ_face(&tree->face[i], co, n, NULL);
+			negate_v3(n);
+			madd_v3_v3fl(co, n, 1e-8f);
+
+			occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL);
+			if (re->test_break(re->tbh))
+				break;
+		}
+
+		if (re->test_break(re->tbh))
+			break;
+
+		for (i = 0; i < tree->totface; i++) {
+			tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
+			if (tree->occlusion[i] < 0.0f)
+				tree->occlusion[i] = 0.0f;
+		}
+
+		occ_sum_occlusion(tree, tree->root);
+	}
+
+	MEM_freeN(occ);
+}
+
+static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude,
+                            const float co[3], const float n[3], int thread, int onlyshadow,
+                            float *ao, float *env, float *indirect)
+{
+	float nn[3], bn[3], fac, occ, occlusion, correction, rad[3];
+	int envcolor;
+
+	envcolor = re->wrld.aocolor;
+	if (onlyshadow)
+		envcolor = WO_AOPLAIN;
+
+	negate_v3_v3(nn, n);
+
+	occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect) ? rad : NULL, (env && envcolor) ? bn : NULL);
+
+	correction = re->wrld.ao_approx_correction;
+
+	occlusion = (1.0f - correction) * (1.0f - occ);
+	CLAMP(occlusion, 0.0f, 1.0f);
+	if (correction != 0.0f)
+		occlusion += correction * expf(-occ);
+
+	if (env) {
+		/* sky shading using bent normal */
+		if (ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) {
+			fac = 0.5f * (1.0f + dot_v3v3(bn, re->grvec));
+			env[0] = (1.0f - fac) * re->wrld.horr + fac * re->wrld.zenr;
+			env[1] = (1.0f - fac) * re->wrld.horg + fac * re->wrld.zeng;
+			env[2] = (1.0f - fac) * re->wrld.horb + fac * re->wrld.zenb;
+
+			mul_v3_fl(env, occlusion);
+		}
+		else {
+			env[0] = occlusion;
+			env[1] = occlusion;
+			env[2] = occlusion;
+		}
+#if 0
+		else {  /* WO_AOSKYTEX */
+			float dxyview[3];
+			bn[0] = -bn[0];
+			bn[1] = -bn[1];
+			bn[2] = -bn[2];
+			dxyview[0] = 1.0f;
+			dxyview[1] = 1.0f;
+			dxyview[2] = 0.0f;
+			shadeSkyView(ao, co, bn, dxyview);
+		}
+#endif
+	}
+
+	if (ao) {
+		ao[0] = occlusion;
+		ao[1] = occlusion;
+		ao[2] = occlusion;
+	}
+
+	if (tree->doindirect) copy_v3_v3(indirect, rad);
+	else zero_v3(indirect);
+}
+
+/* ---------------------------- Caching ------------------------------- */
+
+static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
+{
+	x -= cache->x;
+	y -= cache->y;
+
+	x /= cache->step;
+	y /= cache->step;
+	x *= cache->step;
+	y *= cache->step;
+
+	if (x < 0 || x >= cache->w || y < 0 || y >= cache->h)
+		return NULL;
+	else
+		return &cache->sample[y * cache->w + x];
+}
+
+static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect)
+{
+	OcclusionCache *cache;
+	OcclusionCacheSample *samples[4], *sample;
+	float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
+	float d[3], dist2;
+	int i, x1, y1, x2, y2;
+
+	if (!tree->cache)
+		return 0;
+
+	/* first try to find a sample in the same pixel */
+	cache = &tree->cache[thread];
+
+	if (cache->sample && cache->step) {
+		sample = &cache->sample[(y - cache->y) * cache->w + (x - cache->x)];
+		if (sample->filled) {
+			sub_v3_v3v3(d, sample->co, co);
+			dist2 = dot_v3v3(d, d);
+			if (dist2 < 0.5f * sample->dist2 && dot_v3v3(sample->n, n) > 0.98f) {
+				copy_v3_v3(ao, sample->ao);
+				copy_v3_v3(env, sample->env);
+				copy_v3_v3(indirect, sample->indirect);
+				return 1;
+			}
+		}
+	}
+	else
+		return 0;
+
+	/* try to interpolate between 4 neighboring pixels */
+	samples[0] = find_occ_sample(cache, x, y);
+	samples[1] = find_occ_sample(cache, x + cache->step, y);
+	samples[2] = find_occ_sample(cache, x, y + cache->step);
+	samples[3] = find_occ_sample(cache, x + cache->step, y + cache->step);
+
+	for (i = 0; i < 4; i++)
+		if (!samples[i] || !samples[i]->filled)
+			return 0;
+
+	/* require intensities not being too different */
+	mino = min_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
+	maxo = max_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
+
+	if (maxo - mino > 0.05f)
+		return 0;
+
+	/* compute weighted interpolation between samples */
+	zero_v3(ao);
+	zero_v3(env);
+	zero_v3(indirect);
+	totw = 0.0f;
+
+	x1 = samples[0]->x;
+	y1 = samples[0]->y;
+	x2 = samples[3]->x;
+	y2 = samples[3]->y;
+
+	tx = (float)(x2 - x) / (float)(x2 - x1);
+	ty = (float)(y2 - y) / (float)(y2 - y1);
+
+	wb[3] = (1.0f - tx) * (1.0f - ty);
+	wb[2] = (tx) * (1.0f - ty);
+	wb[1] = (1.0f - tx) * (ty);
+	wb[0] = tx * ty;
+
+	for (i = 0; i < 4; i++) {
+		sub_v3_v3v3(d, samples[i]->co, co);
+		//dist2 = dot_v3v3(d, d);
+
+		wz[i] = 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
+		wn[i] = pow(dot_v3v3(samples[i]->n, n), 32.0f);
+
+		w = wb[i] * wn[i] * wz[i];
+
+		totw += w;
+		madd_v3_v3fl(ao, samples[i]->ao, w);
+		madd_v3_v3fl(env, samples[i]->env, w);
+		madd_v3_v3fl(indirect, samples[i]->indirect, w);
+	}
+
+	if (totw >= 0.9f) {
+		totw = 1.0f / totw;
+		mul_v3_fl(ao, totw);
+		mul_v3_fl(env, totw);
+		mul_v3_fl(indirect, totw);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void sample_occ_surface(ShadeInput *shi)
+{
+	StrandRen *strand = shi->strand;
+	StrandSurface *mesh = strand->buffer->surface;
+	const int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
+	float w[4], *co1, *co2, *co3, *co4;
+
+	if (mesh && mesh->face && mesh->co && mesh->ao && index) {
+		face = mesh->face[*index];
+
+		co1 = mesh->co[face[0]];
+		co2 = mesh->co[face[1]];
+		co3 = mesh->co[face[2]];
+
+		if (face[3]) {
+			co4 = mesh->co[face[3]];
+			interp_weights_quad_v3(w, co1, co2, co3, co4, strand->vert->co);
+		}
+		else {
+			interp_weights_tri_v3(w, co1, co2, co3, strand->vert->co);
+		}
+
+		zero_v3(shi->ao);
+		zero_v3(shi->env);
+		zero_v3(shi->indirect);
+
+		madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]);
+		madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]);
+		madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]);
+		madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]);
+		madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]);
+		madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]);
+		madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]);
+		madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]);
+		madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]);
+		if (face[3]) {
+			madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]);
+			madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]);
+			madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]);
+		}
+	}
+	else {
+		shi->ao[0] = 1.0f;
+		shi->ao[1] = 1.0f;
+		shi->ao[2] = 1.0f;
+		zero_v3(shi->env);
+		zero_v3(shi->indirect);
+	}
+}
+
+/* ------------------------- External Functions --------------------------- */
+
+static void *exec_strandsurface_sample(void *data)
+{
+	OcclusionThread *othread = (OcclusionThread *)data;
+	Render *re = othread->re;
+	StrandSurface *mesh = othread->mesh;
+	float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4;
+	int a, *face;
+
+	for (a = othread->begin; a < othread->end; a++) {
+		face = mesh->face[a];
+		co1 = mesh->co[face[0]];
+		co2 = mesh->co[face[1]];
+		co3 = mesh->co[face[2]];
+
+		if (face[3]) {
+			co4 = mesh->co[face[3]];
+
+			mid_v3_v3v3(co, co1, co3);
+			normal_quad_v3(n, co1, co2, co3, co4);
+		}
+		else {
+			mid_v3_v3v3v3(co, co1, co2, co3);
+			normal_tri_v3(n, co1, co2, co3);
+		}
+		negate_v3(n);
+
+		sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect);
+		copy_v3_v3(othread->faceao[a], ao);
+		copy_v3_v3(othread->faceenv[a], env);
+		copy_v3_v3(othread->faceindirect[a], indirect);
+	}
+
+	return NULL;
+}
+
+void make_occ_tree(Render *re)
+{
+	OcclusionThread othreads[BLENDER_MAX_THREADS];
+	OcclusionTree *tree;
+	StrandSurface *mesh;
+	ListBase threads;
+	float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3];
+	int a, totface, totthread, *face, *count;
+
+	/* ugly, needed for occ_face */
+	R = *re;
+
+	re->i.infostr = IFACE_("Occlusion preprocessing");
+	re->stats_draw(re->sdh, &re->i);
+
+	re->occlusiontree = tree = occ_tree_build(re);
+
+	if (tree && !re->test_break(re->tbh)) {
+		if (re->wrld.ao_approx_passes > 0)
+			occ_compute_passes(re, tree, re->wrld.ao_approx_passes);
+		if (tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT))
+			occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces);
+
+		for (mesh = re->strandsurface.first; mesh; mesh = mesh->next) {
+			if (!mesh->face || !mesh->co || !mesh->ao)
+				continue;
+
+			count = MEM_callocN(sizeof(int) * mesh->totvert, "OcclusionCount");
+			faceao = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceAO");
+			faceenv = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceEnv");
+			faceindirect = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceIndirect");
+
+			totthread = (mesh->totface > 10000) ? re->r.threads : 1;
+			totface = mesh->totface / totthread;
+			for (a = 0; a < totthread; a++) {
+				othreads[a].re = re;
+				othreads[a].faceao = faceao;
+				othreads[a].faceenv = faceenv;
+				othreads[a].faceindirect = faceindirect;
+				othreads[a].thread = a;
+				othreads[a].mesh = mesh;
+				othreads[a].begin = a * totface;
+				othreads[a].end = (a == totthread - 1) ? mesh->totface : (a + 1) * totface;
+			}
+
+			if (totthread == 1) {
+				exec_strandsurface_sample(&othreads[0]);
+			}
+			else {
+				BLI_threadpool_init(&threads, exec_strandsurface_sample, totthread);
+
+				for (a = 0; a < totthread; a++)
+					BLI_threadpool_insert(&threads, &othreads[a]);
+
+				BLI_threadpool_end(&threads);
+			}
+
+			for (a = 0; a < mesh->totface; a++) {
+				face = mesh->face[a];
+
+				copy_v3_v3(ao, faceao[a]);
+				copy_v3_v3(env, faceenv[a]);
+				copy_v3_v3(indirect, faceindirect[a]);
+
+				add_v3_v3(mesh->ao[face[0]], ao);
+				add_v3_v3(mesh->env[face[0]], env);
+				add_v3_v3(mesh->indirect[face[0]], indirect);
+				count[face[0]]++;
+				add_v3_v3(mesh->ao[face[1]], ao);
+				add_v3_v3(mesh->env[face[1]], env);
+				add_v3_v3(mesh->indirect[face[1]], indirect);
+				count[face[1]]++;
+				add_v3_v3(mesh->ao[face[2]], ao);
+				add_v3_v3(mesh->env[face[2]], env);
+				add_v3_v3(mesh->indirect[face[2]], indirect);
+				count[face[2]]++;
+
+				if (face[3]) {
+					add_v3_v3(mesh->ao[face[3]], ao);
+					add_v3_v3(mesh->env[face[3]], env);
+					add_v3_v3(mesh->indirect[face[3]], indirect);
+					count[face[3]]++;
+				}
+			}
+
+			for (a = 0; a < mesh->totvert; a++) {
+				if (count[a]) {
+					mul_v3_fl(mesh->ao[a], 1.0f / count[a]);
+					mul_v3_fl(mesh->env[a], 1.0f / count[a]);
+					mul_v3_fl(mesh->indirect[a], 1.0f / count[a]);
+				}
+			}
+
+			MEM_freeN(count);
+			MEM_freeN(faceao);
+			MEM_freeN(faceenv);
+			MEM_freeN(faceindirect);
+		}
+	}
+}
+
+void free_occ(Render *re)
+{
+	if (re->occlusiontree) {
+		occ_free_tree(re->occlusiontree);
+		re->occlusiontree = NULL;
+	}
+}
+
+void sample_occ(Render *re, ShadeInput *shi)
+{
+	OcclusionTree *tree = re->occlusiontree;
+	OcclusionCache *cache;
+	OcclusionCacheSample *sample;
+	OccFace exclude;
+	int onlyshadow;
+
+	if (tree) {
+		if (shi->strand) {
+			sample_occ_surface(shi);
+		}
+		/* try to get result from the cache if possible */
+		else if (shi->depth != 0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) {
+			/* no luck, let's sample the occlusion */
+			exclude.obi = shi->obi - re->objectinstance;
+			exclude.facenr = shi->vlr->index;
+			onlyshadow = (shi->mat->mode & MA_ONLYSHADOW);
+			sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
+
+			/* fill result into sample, each time */
+			if (tree->cache) {
+				cache = &tree->cache[shi->thread];
+
+				if (cache->sample && cache->step) {
+					sample = &cache->sample[(shi->ys - cache->y) * cache->w + (shi->xs - cache->x)];
+					copy_v3_v3(sample->co, shi->co);
+					copy_v3_v3(sample->n, shi->vno);
+					copy_v3_v3(sample->ao, shi->ao);
+					copy_v3_v3(sample->env, shi->env);
+					copy_v3_v3(sample->indirect, shi->indirect);
+					sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]);
+					sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2]));
+					sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
+					sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
+					sample->filled = 1;
+				}
+			}
+		}
+	}
+	else {
+		shi->ao[0] = 1.0f;
+		shi->ao[1] = 1.0f;
+		shi->ao[2] = 1.0f;
+
+		shi->env[0] = 0.0f;
+		shi->env[1] = 0.0f;
+		shi->env[2] = 0.0f;
+
+		shi->indirect[0] = 0.0f;
+		shi->indirect[1] = 0.0f;
+		shi->indirect[2] = 0.0f;
+	}
+}
+
+void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
+{
+	OcclusionTree *tree = re->occlusiontree;
+	PixStr ps;
+	OcclusionCache *cache;
+	OcclusionCacheSample *sample;
+	OccFace exclude;
+	ShadeInput *shi;
+	intptr_t *rd = NULL;
+	int *ro = NULL, *rp = NULL, *rz = NULL, onlyshadow;
+	int x, y, step = CACHE_STEP;
+
+	if (!tree->cache)
+		return;
+
+	cache = &tree->cache[pa->thread];
+	cache->w = pa->rectx;
+	cache->h = pa->recty;
+	cache->x = pa->disprect.xmin;
+	cache->y = pa->disprect.ymin;
+	cache->step = step;
+	cache->sample = MEM_callocN(sizeof(OcclusionCacheSample) * cache->w * cache->h, "OcclusionCacheSample");
+	sample = cache->sample;
+
+	if (re->osa) {
+		rd = pa->rectdaps;
+	}
+	else {
+		/* fake pixel struct for non-osa */
+		ps.next = NULL;
+		ps.mask = 0xFFFF;
+
+		ro = pa->recto;
+		rp = pa->rectp;
+		rz = pa->rectz;
+	}
+
+	/* compute a sample at every step pixels */
+	for (y = pa->disprect.ymin; y < pa->disprect.ymax; y++) {
+		for (x = pa->disprect.xmin; x < pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) {
+			if (!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax - 1))
+				continue;
+			if (!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax - 1))
+				continue;
+
+			if (re->osa) {
+				if (!*rd) continue;
+
+				shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y);
+			}
+			else {
+				if (!*rp) continue;
+
+				ps.obi = *ro;
+				ps.facenr = *rp;
+				ps.z = *rz;
+				shade_samples_fill_with_ps(ssamp, &ps, x, y);
+			}
+
+			shi = ssamp->shi;
+			if (shi->vlr) {
+				onlyshadow = (shi->mat->mode & MA_ONLYSHADOW);
+				exclude.obi = shi->obi - re->objectinstance;
+				exclude.facenr = shi->vlr->index;
+				sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
+
+				copy_v3_v3(sample->co, shi->co);
+				copy_v3_v3(sample->n, shi->vno);
+				copy_v3_v3(sample->ao, shi->ao);
+				copy_v3_v3(sample->env, shi->env);
+				copy_v3_v3(sample->indirect, shi->indirect);
+				sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]);
+				sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2]));
+				sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
+				sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
+				sample->x = shi->xs;
+				sample->y = shi->ys;
+				sample->filled = 1;
+			}
+
+			if (re->test_break(re->tbh))
+				break;
+		}
+	}
+}
+
+void free_occ_samples(Render *re, RenderPart *pa)
+{
+	OcclusionTree *tree = re->occlusiontree;
+	OcclusionCache *cache;
+
+	if (tree->cache) {
+		cache = &tree->cache[pa->thread];
+
+		if (cache->sample)
+			MEM_freeN(cache->sample);
+
+		cache->w = 0;
+		cache->h = 0;
+		cache->step = 0;
+	}
+}
+
diff --git a/source/blender/render/intern/source/pipeline.c b/source/blender/render/intern/source/pipeline.c
index e71cc6d063e..c9f13004836 100644
--- a/source/blender/render/intern/source/pipeline.c
+++ b/source/blender/render/intern/source/pipeline.c
@@ -137,7 +137,7 @@
 /* here we store all renders */
 static struct {
 	ListBase renderlist;
-} RenderGlobal = {{NULL, NULL}}; 
+} RenderGlobal = {{NULL, NULL}};
 
 /* ********* alloc and free ******** */
 
@@ -424,10 +424,10 @@ void RE_AcquireResultImage(Render *re, RenderResult *rr, const int view_id)
 		if (re->result) {
 			RenderLayer *rl;
 			RenderView *rv;
-			
+
 			rr->rectx = re->result->rectx;
 			rr->recty = re->result->recty;
-			
+
 			/* actview view */
 			rv = RE_RenderViewGetById(re->result, view_id);
 			rr->have_combined = (rv->rectf != NULL);
@@ -494,7 +494,7 @@ Render *RE_NewRender(const char *name)
 	/* only one render per name exists */
 	re = RE_GetRender(name);
 	if (re == NULL) {
-		
+
 		/* new render data struct */
 		re = MEM_callocN(sizeof(Render), "new render");
 		BLI_addtail(&RenderGlobal.renderlist, re);
@@ -502,7 +502,7 @@ Render *RE_NewRender(const char *name)
 		BLI_rw_mutex_init(&re->resultmutex);
 		BLI_rw_mutex_init(&re->partsmutex);
 	}
-	
+
 	RE_InitRenderCB(re);
 
 	return re;
@@ -574,10 +574,10 @@ void RE_FreeRender(Render *re)
 	/* main dbase can already be invalid now, some database-free code checks it */
 	re->main = NULL;
 	re->scene = NULL;
-	
+
 	render_result_free(re->result);
 	render_result_free(re->pushedresult);
-	
+
 	BLI_remlink(&RenderGlobal.renderlist, re);
 	MEM_freeN(re);
 }
@@ -715,7 +715,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd,
 	bool had_freestyle = (re->r.mode & R_EDGE_FRS) != 0;
 
 	re->ok = true;   /* maybe flag */
-	
+
 	re->i.starttime = PIL_check_seconds_timer();
 
 	/* copy render data and render layers for thread safety */
@@ -753,7 +753,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd,
 	}
 
 	re->r.scemode = check_mode_full_sample(&re->r);
-	
+
 	if (single_layer) {
 		int index = BLI_findindex(render_layers, single_layer);
 		if (index != -1) {
@@ -761,7 +761,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd,
 			re->r.scemode |= R_SINGLE_LAYER;
 		}
 	}
-		
+
 	/* if preview render, we try to keep old result */
 	BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
 
@@ -794,7 +794,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd,
 		}
 	}
 	else {
-		
+
 		/* make empty render result, so display callbacks can initialize */
 		render_result_free(re->result);
 		re->result = MEM_callocN(sizeof(RenderResult), "new render result");
@@ -811,7 +811,7 @@ void RE_InitState(Render *re, Render *source, RenderData *rd,
 	RE_parts_clamp(re);
 
 	BLI_rw_mutex_unlock(&re->resultmutex);
-	
+
 	RE_init_threadcount(re);
 
 	RE_point_density_fix_linking();
@@ -925,7 +925,7 @@ void render_update_anim_renderdata(Render *re, RenderData *rd, ListBase *render_
 void RE_SetWindow(Render *re, const rctf *viewplane, float clipsta, float clipend)
 {
 	/* re->ok flag? */
-	
+
 	re->viewplane = *viewplane;
 	re->clipsta = clipsta;
 	re->clipend = clipend;
@@ -934,13 +934,13 @@ void RE_SetWindow(Render *re, const rctf *viewplane, float clipsta, float clipen
 	perspective_m4(re->winmat,
 	               re->viewplane.xmin, re->viewplane.xmax,
 	               re->viewplane.ymin, re->viewplane.ymax, re->clipsta, re->clipend);
-	
+
 }
 
 void RE_SetOrtho(Render *re, const rctf *viewplane, float clipsta, float clipend)
 {
 	/* re->ok flag? */
-	
+
 	re->viewplane = *viewplane;
 	re->clipsta = clipsta;
 	re->clipend = clipend;
@@ -961,7 +961,7 @@ void RE_SetView(Render *re, float mat[4][4])
 void RE_GetViewPlane(Render *re, rctf *r_viewplane, rcti *r_disprect)
 {
 	*r_viewplane = re->viewplane;
-	
+
 	/* make disprect zero when no border render, is needed to detect changes in 3d view render */
 	if (re->r.mode & R_BORDER) {
 		*r_disprect = re->disprect;
@@ -1028,7 +1028,7 @@ void RE_test_break_cb(Render *re, void *handle, int (*f)(void *handle))
 #if 0
 void RE_AddObject(Render *UNUSED(re), Object *UNUSED(ob))
 {
-	
+
 }
 #endif
 
@@ -1121,9 +1121,9 @@ static void do_render(Render *re)
 
 	/* now use renderdata and camera to set viewplane */
 	RE_SetCamera(re, camera);
-	
+
 	do_render_3d(re);
-	
+
 	/* when border render, check if we have to insert it in black */
 	render_result_uncrop(re);
 }
@@ -1136,7 +1136,7 @@ static void render_scene(Render *re, Scene *sce, int cfra)
 {
 	Render *resc = RE_NewSceneRender(sce);
 	int winx = re->winx, winy = re->winy;
-	
+
 	sce->r.cfra = cfra;
 
 	BKE_scene_camera_switch_update(sce);
@@ -1146,7 +1146,7 @@ static void render_scene(Render *re, Scene *sce, int cfra)
 		winx = (sce->r.size * sce->r.xsch) / 100;
 		winy = (sce->r.size * sce->r.ysch) / 100;
 	}
-	
+
 	/* initial setup */
 	RE_InitState(resc, re, &sce->r, &sce->view_layers, NULL, winx, winy, &re->disprect);
 
@@ -1157,7 +1157,7 @@ static void render_scene(Render *re, Scene *sce, int cfra)
 	resc->main = re->main;
 	resc->scene = sce;
 	resc->lay = sce->lay;
-	
+
 	/* ensure scene has depsgraph, base flags etc OK */
 	BKE_scene_set_background(re->main, sce);
 
@@ -1170,7 +1170,7 @@ static void render_scene(Render *re, Scene *sce, int cfra)
 	resc->sdh = re->sdh;
 	resc->current_scene_update = re->current_scene_update;
 	resc->suh = re->suh;
-	
+
 	do_render(resc);
 }
 
@@ -1179,11 +1179,11 @@ static int composite_needs_render(Scene *sce, int this_scene)
 {
 	bNodeTree *ntree = sce->nodetree;
 	bNode *node;
-	
+
 	if (ntree == NULL) return 1;
 	if (sce->use_nodes == false) return 1;
 	if ((sce->r.scemode & R_DOCOMP) == 0) return 1;
-	
+
 	for (node = ntree->nodes.first; node; node = node->next) {
 		if (node->type == CMP_NODE_R_LAYERS && (node->flag & NODE_MUTED) == 0)
 			if (this_scene == 0 || node->id == NULL || node->id == &sce->id)
@@ -1334,14 +1334,14 @@ static void tag_scenes_for_render(Render *re)
 {
 	bNode *node;
 	Scene *sce;
-	
+
 	for (sce = re->main->scene.first; sce; sce = sce->id.next) {
 		sce->id.tag &= ~LIB_TAG_DOIT;
 #ifdef DEPSGRAPH_WORKAROUND_HACK
 		tag_dependend_objects_for_render(re->main, sce);
 #endif
 	}
-	
+
 #ifdef WITH_FREESTYLE
 	if (re->freestyle_bmain) {
 		for (sce = re->freestyle_bmain->scene.first; sce; sce = sce->id.next) {
@@ -1359,9 +1359,9 @@ static void tag_scenes_for_render(Render *re)
 		tag_dependend_objects_for_render(re->main, re->scene);
 #endif
 	}
-	
+
 	if (re->scene->nodetree == NULL) return;
-	
+
 	/* check for render-layers nodes using other scenes, we tag them LIB_TAG_DOIT */
 	for (node = re->scene->nodetree->nodes.first; node; node = node->next) {
 		node->flag &= ~NODE_TEST;
@@ -1397,7 +1397,7 @@ static void tag_scenes_for_render(Render *re)
 			}
 		}
 	}
-	
+
 }
 
 static void ntree_render_scenes(Render *re)
@@ -1406,15 +1406,15 @@ static void ntree_render_scenes(Render *re)
 	int cfra = re->scene->r.cfra;
 	Scene *restore_scene = re->scene;
 	bool scene_changed = false;
-	
+
 	if (re->scene->nodetree == NULL) return;
-	
+
 	tag_scenes_for_render(re);
 
 #ifdef DEPSGRAPH_WORKAROUND_GROUP_HACK
 	tag_collections_for_render(re);
 #endif
-	
+
 	/* now foreach render-result node tagged we do a full render */
 	/* results are stored in a way compisitor will find it */
 	for (node = re->scene->nodetree->nodes.first; node; node = node->next) {
@@ -1426,7 +1426,7 @@ static void ntree_render_scenes(Render *re)
 					scene_changed |= scene != restore_scene;
 					render_scene(re, scene, cfra);
 					node->flag &= ~NODE_TEST;
-					
+
 					nodeUpdate(restore_scene->nodetree, node);
 				}
 			}
@@ -1531,10 +1531,10 @@ static void do_render_composite(Render *re)
 {
 	bNodeTree *ntree = re->scene->nodetree;
 	int update_newframe = 0;
-	
+
 	/* INIT seeding, compositor can use random texture */
 	BLI_srandom(re->r.cfra);
-	
+
 	if (composite_needs_render(re->scene, 1)) {
 		/* save memory... free all cached images */
 		ntreeFreeCache(ntree);
@@ -1550,7 +1550,7 @@ static void do_render_composite(Render *re)
 
 		/* ensure new result gets added, like for regular renders */
 		BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
-		
+
 		render_result_free(re->result);
 		if ((re->r.mode & R_CROP) == 0) {
 			render_result_disprect_to_full_resolution(re);
@@ -1558,30 +1558,30 @@ static void do_render_composite(Render *re)
 		re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM, RR_ALL_LAYERS, RR_ALL_VIEWS);
 
 		BLI_rw_mutex_unlock(&re->resultmutex);
-		
+
 		/* scene render process already updates animsys */
 		update_newframe = 1;
 	}
-	
+
 	/* swap render result */
 	if (re->r.scemode & R_SINGLE_LAYER) {
 		BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
 		render_result_single_layer_end(re);
 		BLI_rw_mutex_unlock(&re->resultmutex);
 	}
-	
+
 	if (!re->test_break(re->tbh)) {
-		
+
 		if (ntree) {
 			ntreeCompositTagRender(re->scene);
 			ntreeCompositTagAnimated(ntree);
 		}
-		
+
 		if (ntree && re->scene->use_nodes && re->r.scemode & R_DOCOMP) {
 			/* checks if there are render-result nodes that need scene */
 			if ((re->r.scemode & R_SINGLE_LAYER) == 0)
 				ntree_render_scenes(re);
-			
+
 			if (!re->test_break(re->tbh)) {
 				ntree->stats_draw = render_composit_stats;
 				ntree->test_break = re->test_break;
@@ -1589,16 +1589,16 @@ static void do_render_composite(Render *re)
 				ntree->sdh = re;
 				ntree->tbh = re->tbh;
 				ntree->prh = re->prh;
-				
+
 				if (update_newframe) {
 					/* If we have consistent depsgraph now would be a time to update them. */
 				}
-				
+
 				RenderView *rv;
 				for (rv = re->result->views.first; rv; rv = rv->next) {
 					ntreeCompositExecTree(re->scene, ntree, &re->r, true, G.background == 0, &re->scene->view_settings, &re->scene->display_settings, rv->name);
 				}
-				
+
 				ntree->stats_draw = NULL;
 				ntree->test_break = NULL;
 				ntree->progress = NULL;
@@ -1651,15 +1651,15 @@ int RE_seq_render_active(Scene *scene, RenderData *rd)
 	Sequence *seq;
 
 	ed = scene->ed;
-	
+
 	if (!(rd->scemode & R_DOSEQ) || !ed || !ed->seqbase.first)
 		return 0;
-	
+
 	for (seq = ed->seqbase.first; seq; seq = seq->next) {
 		if (seq->type != SEQ_TYPE_SOUND_RAM)
 			return 1;
 	}
-	
+
 	return 0;
 }
 
@@ -1810,18 +1810,18 @@ static void do_render_all_options(Render *re)
 			do_render_seq(re);
 			render_seq = true;
 		}
-		
+
 		re->stats_draw(re->sdh, &re->i);
 		re->display_update(re->duh, re->result, NULL);
 	}
 	else {
 		do_render_composite(re);
 	}
-	
+
 	re->i.lastframetime = PIL_check_seconds_timer() - re->i.starttime;
-	
+
 	re->stats_draw(re->sdh, &re->i);
-	
+
 	/* save render result stamp if needed */
 	if (re->result != NULL) {
 		camera = RE_GetCamera(re);
@@ -1975,7 +1975,7 @@ static int check_composite_output(Scene *scene)
 bool RE_is_rendering_allowed(Scene *scene, ViewLayer *single_layer, Object *camera_override, ReportList *reports)
 {
 	int scemode = check_mode_full_sample(&scene->r);
-	
+
 	if (scene->r.mode & R_BORDER) {
 		if (scene->r.border.xmax <= scene->r.border.xmin ||
 		    scene->r.border.ymax <= scene->r.border.ymin)
@@ -1984,30 +1984,30 @@ bool RE_is_rendering_allowed(Scene *scene, ViewLayer *single_layer, Object *came
 			return 0;
 		}
 	}
-	
+
 	if (scemode & (R_EXR_TILE_FILE | R_FULL_SAMPLE)) {
 		char str[FILE_MAX];
-		
+
 		render_result_exr_file_path(scene, "", 0, str);
-		
+
 		if (!BLI_file_is_writable(str)) {
 			BKE_report(reports, RPT_ERROR, "Cannot save render buffers, check the temp default path");
 			return 0;
 		}
 	}
-	
+
 	if (scemode & R_DOCOMP) {
 		if (scene->use_nodes) {
 			if (!scene->nodetree) {
 				BKE_report(reports, RPT_ERROR, "No node tree in scene");
 				return 0;
 			}
-			
+
 			if (!check_composite_output(scene)) {
 				BKE_report(reports, RPT_ERROR, "No render output node in scene");
 				return 0;
 			}
-			
+
 			if (scemode & R_FULL_SAMPLE) {
 				if (composite_needs_render(scene, 0) == 0) {
 					BKE_report(reports, RPT_ERROR, "Full sample AA not supported without 3D rendering");
@@ -2016,12 +2016,12 @@ bool RE_is_rendering_allowed(Scene *scene, ViewLayer *single_layer, Object *came
 			}
 		}
 	}
-	
+
 	/* check valid camera, without camera render is OK (compo, seq) */
 	if (!check_valid_camera(scene, camera_override, reports)) {
 		return 0;
 	}
-	
+
 	/* get panorama & ortho, only after camera is set */
 	BKE_camera_object_mode(&scene->r, camera_override ? camera_override : scene->camera);
 
@@ -2098,19 +2098,19 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain,
 {
 	int winx, winy;
 	rcti disprect;
-	
+
 	/* r.xsch and r.ysch has the actual view window size
 	 * r.border is the clipping rect */
-	
+
 	/* calculate actual render result and display size */
 	winx = (rd->size * rd->xsch) / 100;
 	winy = (rd->size * rd->ysch) / 100;
-	
+
 	/* we always render smaller part, inserting it in larger image is compositor bizz, it uses disprect for it */
 	if (scene->r.mode & R_BORDER) {
 		disprect.xmin = rd->border.xmin * winx;
 		disprect.xmax = rd->border.xmax * winx;
-		
+
 		disprect.ymin = rd->border.ymin * winy;
 		disprect.ymax = rd->border.ymax * winy;
 	}
@@ -2119,7 +2119,7 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain,
 		disprect.xmax = winx;
 		disprect.ymax = winy;
 	}
-	
+
 	re->main = bmain;
 	re->scene = scene;
 	re->camera_override = camera_override;
@@ -2134,7 +2134,7 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain,
 		re->disprect = disprect;
 		return 1;
 	}
-	
+
 	/* check all scenes involved */
 	tag_scenes_for_render(re);
 
@@ -2153,17 +2153,17 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain,
 		ViewLayer *view_layer = BKE_view_layer_context_active_PLACEHOLDER(scene);
 		update_physics_cache(re, scene, view_layer, anim_init);
 	}
-	
+
 	if (single_layer || scene->r.scemode & R_SINGLE_LAYER) {
 		BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
 		render_result_single_layer_begin(re);
 		BLI_rw_mutex_unlock(&re->resultmutex);
 	}
-	
+
 	RE_InitState(re, NULL, &scene->r, &scene->view_layers, single_layer, winx, winy, &disprect);
 	if (!re->ok)  /* if an error was printed, abort */
 		return 0;
-	
+
 	/* initstate makes new result, have to send changed tags around */
 	ntreeCompositTagRender(re->scene);
 
@@ -2171,7 +2171,7 @@ static int render_initialize_from_main(Render *re, RenderData *rd, Main *bmain,
 
 	re->display_init(re->dih, re->result);
 	re->display_clear(re->dch, re->result);
-	
+
 	return 1;
 }
 
@@ -2188,9 +2188,9 @@ void RE_BlenderFrame(Render *re, Main *bmain, Scene *scene, ViewLayer *single_la
 
 	/* ugly global still... is to prevent preview events and signal subsurfs etc to make full resol */
 	G.is_rendering = true;
-	
+
 	scene->r.cfra = frame;
-	
+
 	if (render_initialize_from_main(re, &scene->r, bmain, scene, single_layer,
 	                                camera_override, lay_override, 0, 0))
 	{
@@ -2473,12 +2473,12 @@ static int do_write_image_or_movie(Render *re, Main *bmain, Scene *scene, bMovie
 		/* write images as individual images or stereo */
 		ok = RE_WriteRenderViewsImage(re->reports, &rres, scene, true, name);
 	}
-	
+
 	RE_ReleaseResultImageViews(re, &rres);
 
 	render_time = re->i.lastframetime;
 	re->i.lastframetime = PIL_check_seconds_timer() - re->i.starttime;
-	
+
 	BLI_timecode_string_from_time_simple(name, sizeof(name), re->i.lastframetime);
 	printf(" Time: %s", name);
 
@@ -2489,7 +2489,7 @@ static int do_write_image_or_movie(Render *re, Main *bmain, Scene *scene, bMovie
 
 	BLI_timecode_string_from_time_simple(name, sizeof(name), re->i.lastframetime - render_time);
 	printf(" (Saving: %s)\n", name);
-	
+
 	fputc('\n', stdout);
 	fflush(stdout); /* needed for renderd !! (not anymore... (ton)) */
 
@@ -2713,10 +2713,10 @@ void RE_BlenderAnim(Render *re, Main *bmain, Scene *scene, Object *camera_overri
 			/* run callbacs before rendering, before the scene is updated */
 			BLI_callback_exec(re->main, (ID *)scene, BLI_CB_EVT_RENDER_PRE);
 
-			
+
 			do_render_all_options(re);
 			totrendered++;
-			
+
 			if (re->test_break(re->tbh) == 0) {
 				if (!G.is_break)
 					if (!do_write_image_or_movie(re, bmain, scene, mh, totvideos, NULL))
@@ -2724,7 +2724,7 @@ void RE_BlenderAnim(Render *re, Main *bmain, Scene *scene, Object *camera_overri
 			}
 			else
 				G.is_break = true;
-		
+
 			if (G.is_break == true) {
 				/* remove touched file */
 				if (is_movie == false) {
@@ -2753,7 +2753,7 @@ void RE_BlenderAnim(Render *re, Main *bmain, Scene *scene, Object *camera_overri
 						}
 					}
 				}
-				
+
 				break;
 			}
 
@@ -2763,12 +2763,12 @@ void RE_BlenderAnim(Render *re, Main *bmain, Scene *scene, Object *camera_overri
 			}
 		}
 	}
-	
+
 	/* end movie */
 	if (is_movie) {
 		re_movie_free_all(re, mh, totvideos);
 	}
-	
+
 	if (totskipped && totrendered == 0)
 		BKE_report(re->reports, RPT_INFO, "No frames rendered, skipped to not overwrite");
 
@@ -2812,16 +2812,16 @@ bool RE_ReadRenderResult(Scene *scene, Scene *scenode)
 	int winx, winy;
 	bool success;
 	rcti disprect;
-	
+
 	/* calculate actual render result and display size */
 	winx = (scene->r.size * scene->r.xsch) / 100;
 	winy = (scene->r.size * scene->r.ysch) / 100;
-	
+
 	/* only in movie case we render smaller part */
 	if (scene->r.mode & R_BORDER) {
 		disprect.xmin = scene->r.border.xmin * winx;
 		disprect.xmax = scene->r.border.xmax * winx;
-		
+
 		disprect.ymin = scene->r.border.ymin * winy;
 		disprect.ymax = scene->r.border.ymax * winy;
 	}
@@ -2830,17 +2830,17 @@ bool RE_ReadRenderResult(Scene *scene, Scene *scenode)
 		disprect.xmax = winx;
 		disprect.ymax = winy;
 	}
-	
+
 	if (scenode)
 		scene = scenode;
-	
+
 	/* get render: it can be called from UI with draw callbacks */
 	re = RE_GetSceneRender(scene);
 	if (re == NULL)
 		re = RE_NewSceneRender(scene);
 	RE_InitState(re, NULL, &scene->r, &scene->view_layers, NULL, winx, winy, &disprect);
 	re->scene = scene;
-	
+
 	BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
 	success = render_result_exr_file_cache_read(re);
 	BLI_rw_mutex_unlock(&re->resultmutex);
@@ -2850,7 +2850,7 @@ bool RE_ReadRenderResult(Scene *scene, Scene *scenode)
 	return success;
 }
 
-void RE_init_threadcount(Render *re) 
+void RE_init_threadcount(Render *re)
 {
 	re->r.threads = BKE_render_num_threads(&re->r);
 }
@@ -3014,7 +3014,7 @@ RenderPass *RE_create_gp_pass(RenderResult *rr, const char *layername, const cha
 		rl->rectx = rr->rectx;
 		rl->recty = rr->recty;
 	}
-	
+
 	/* clear previous pass if exist or the new image will be over previous one*/
 	RenderPass *rp = RE_pass_find_by_name(rl, RE_PASSNAME_COMBINED, viewname);
 	if (rp) {
diff --git a/source/blender/render/intern/source/pixelblending.c b/source/blender/render/intern/source/pixelblending.c
new file mode 100644
index 00000000000..c7cfe765f5b
--- /dev/null
+++ b/source/blender/render/intern/source/pixelblending.c
@@ -0,0 +1,400 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributor(s): Full recode, 2004-2006 Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/pixelblending.c
+ *  \ingroup render
+ *
+ * Functions to blend pixels with or without alpha, in various formats
+ * nzc - June 2000
+ */
+
+
+#include <math.h>
+#include <string.h>
+
+/* global includes */
+
+/* own includes */
+#include "render_types.h"
+#include "pixelblending.h"
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+
+/* ------------------------------------------------------------------------- */
+/* Debug/behavior defines                                                   */
+/* if defined: alpha blending with floats clips color, as with shorts       */
+/* #define RE_FLOAT_COLOR_CLIPPING  */
+/* if defined: alpha values are clipped                                      */
+/* For now, we just keep alpha clipping. We run into thresholding and        */
+/* blending difficulties otherwise. Be careful here.                         */
+#define RE_ALPHA_CLIPPING
+
+
+
+/* Threshold for a 'full' pixel: pixels with alpha above this level are      */
+/* considered opaque This is the decimal value for 0xFFF0 / 0xFFFF           */
+#define RE_FULL_COLOR_FLOAT 0.9998f
+/* Threshold for an 'empty' pixel: pixels with alpha above this level are    */
+/* considered completely transparent. This is the decimal value              */
+/* for 0x000F / 0xFFFF                                                       */
+#define RE_EMPTY_COLOR_FLOAT 0.0002f
+
+
+/* ------------------------------------------------------------------------- */
+
+void addAlphaOverFloat(float dest[4], const float source[4])
+{
+	/* d = s + (1-alpha_s)d*/
+	float mul;
+
+	mul = 1.0f - source[3];
+
+	dest[0] = (mul * dest[0]) + source[0];
+	dest[1] = (mul * dest[1]) + source[1];
+	dest[2] = (mul * dest[2]) + source[2];
+	dest[3] = (mul * dest[3]) + source[3];
+
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+void addAlphaUnderFloat(float dest[4], const float source[4])
+{
+	float mul;
+
+	mul = 1.0f - dest[3];
+
+	dest[0] += (mul * source[0]);
+	dest[1] += (mul * source[1]);
+	dest[2] += (mul * source[2]);
+	dest[3] += (mul * source[3]);
+}
+
+
+/* ------------------------------------------------------------------------- */
+void addalphaAddfacFloat(float dest[4], const float source[4], char addfac)
+{
+	float m; /* weiging factor of destination */
+	float c; /* intermediate color           */
+
+	/* Addfac is a number between 0 and 1: rescale */
+	/* final target is to diminish the influence of dest when addfac rises */
+	m = 1.0f - (source[3] * ((255 - addfac) / 255.0f));
+
+	/* blend colors*/
+	c = (m * dest[0]) + source[0];
+#ifdef RE_FLOAT_COLOR_CLIPPING
+	if (c >= RE_FULL_COLOR_FLOAT) dest[0] = RE_FULL_COLOR_FLOAT;
+	else
+#endif
+	dest[0] = c;
+
+	c = (m * dest[1]) + source[1];
+#ifdef RE_FLOAT_COLOR_CLIPPING
+	if (c >= RE_FULL_COLOR_FLOAT) dest[1] = RE_FULL_COLOR_FLOAT;
+	else
+#endif
+	dest[1] = c;
+
+	c = (m * dest[2]) + source[2];
+#ifdef RE_FLOAT_COLOR_CLIPPING
+	if (c >= RE_FULL_COLOR_FLOAT) dest[2] = RE_FULL_COLOR_FLOAT;
+	else
+#endif
+	dest[2] = c;
+
+	c = (m * dest[3]) + source[3];
+#ifdef RE_ALPHA_CLIPPING
+	if (c >= RE_FULL_COLOR_FLOAT) dest[3] = RE_FULL_COLOR_FLOAT;
+	else
+#endif
+	dest[3] = c;
+
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+/* filtered adding to scanlines */
+void add_filt_fmask(unsigned int mask, const float col[4], float *rowbuf, int row_w)
+{
+	/* calc the value of mask */
+	float **fmask1 = R.samples->fmask1, **fmask2 = R.samples->fmask2;
+	float *rb1, *rb2, *rb3;
+	float val, r, g, b, al;
+	unsigned int a, maskand, maskshift;
+	int j;
+
+	r = col[0];
+	g = col[1];
+	b = col[2];
+	al = col[3];
+
+	rb2 = rowbuf - 4;
+	rb3 = rb2 - 4 * row_w;
+	rb1 = rb2 + 4 * row_w;
+
+	maskand = (mask & 255);
+	maskshift = (mask >> 8);
+
+	for (j = 2; j >= 0; j--) {
+
+		a = j;
+
+		val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+		if (val != 0.0f) {
+			rb1[0] += val * r;
+			rb1[1] += val * g;
+			rb1[2] += val * b;
+			rb1[3] += val * al;
+		}
+		a += 3;
+
+		val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+		if (val != 0.0f) {
+			rb2[0] += val * r;
+			rb2[1] += val * g;
+			rb2[2] += val * b;
+			rb2[3] += val * al;
+		}
+		a += 3;
+
+		val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+		if (val != 0.0f) {
+			rb3[0] += val * r;
+			rb3[1] += val * g;
+			rb3[2] += val * b;
+			rb3[3] += val * al;
+		}
+
+		rb1 += 4;
+		rb2 += 4;
+		rb3 += 4;
+	}
+}
+
+
+void mask_array(unsigned int mask, float filt[3][3])
+{
+	float **fmask1 = R.samples->fmask1, **fmask2 = R.samples->fmask2;
+	unsigned int maskand = (mask & 255);
+	unsigned int maskshift = (mask >> 8);
+	int a, j;
+
+	for (j = 2; j >= 0; j--) {
+
+		a = j;
+
+		filt[2][2 - j] = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+
+		a += 3;
+
+		filt[1][2 - j] = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+
+		a += 3;
+
+		filt[0][2 - j] = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+	}
+}
+
+
+/**
+ * Index ordering, scanline based:
+ *
+ * <pre>
+ *      ---    ---   ---
+ *     | 2,0 | 2,1 | 2,2 |
+ *      ---    ---   ---
+ *     | 1,0 | 1,1 | 1,2 |
+ *      ---    ---   ---
+ *     | 0,0 | 0,1 | 0,2 |
+ *      ---    ---   ---
+ * </pre>
+ */
+
+void add_filt_fmask_coord(float filt[3][3], const float col[4], float *rowbuf, int row_stride, int x, int y, rcti *mask)
+{
+	float *fpoin[3][3];
+	float val, r, g, b, al, lfilt[3][3];
+
+	r = col[0];
+	g = col[1];
+	b = col[2];
+	al = col[3];
+
+	memcpy(lfilt, filt, sizeof(lfilt));
+
+	fpoin[0][1] = rowbuf - 4 * row_stride;
+	fpoin[1][1] = rowbuf;
+	fpoin[2][1] = rowbuf + 4 * row_stride;
+
+	fpoin[0][0] = fpoin[0][1] - 4;
+	fpoin[1][0] = fpoin[1][1] - 4;
+	fpoin[2][0] = fpoin[2][1] - 4;
+
+	fpoin[0][2] = fpoin[0][1] + 4;
+	fpoin[1][2] = fpoin[1][1] + 4;
+	fpoin[2][2] = fpoin[2][1] + 4;
+
+	/* limit filtering to withing a mask for border rendering, so pixels don't
+	 * leak outside of the border */
+	if (y <= mask->ymin) {
+		fpoin[0][0] = fpoin[1][0];
+		fpoin[0][1] = fpoin[1][1];
+		fpoin[0][2] = fpoin[1][2];
+		/* filter needs the opposite value yes! */
+		lfilt[0][0] = filt[2][0];
+		lfilt[0][1] = filt[2][1];
+		lfilt[0][2] = filt[2][2];
+	}
+	else if (y >= mask->ymax - 1) {
+		fpoin[2][0] = fpoin[1][0];
+		fpoin[2][1] = fpoin[1][1];
+		fpoin[2][2] = fpoin[1][2];
+
+		lfilt[2][0] = filt[0][0];
+		lfilt[2][1] = filt[0][1];
+		lfilt[2][2] = filt[0][2];
+	}
+
+	if (x <= mask->xmin) {
+		fpoin[2][0] = fpoin[2][1];
+		fpoin[1][0] = fpoin[1][1];
+		fpoin[0][0] = fpoin[0][1];
+
+		lfilt[2][0] = filt[2][2];
+		lfilt[1][0] = filt[1][2];
+		lfilt[0][0] = filt[0][2];
+	}
+	else if (x >= mask->xmax - 1) {
+		fpoin[2][2] = fpoin[2][1];
+		fpoin[1][2] = fpoin[1][1];
+		fpoin[0][2] = fpoin[0][1];
+
+		lfilt[2][2] = filt[2][0];
+		lfilt[1][2] = filt[1][0];
+		lfilt[0][2] = filt[0][0];
+	}
+
+
+	/* loop unroll */
+#define MASKFILT(i, j)                                                        \
+	val = lfilt[i][j];                                                        \
+	if (val != 0.0f) {                                                        \
+		float *fp = fpoin[i][j];                                              \
+		fp[0] += val * r;                                                     \
+		fp[1] += val * g;                                                     \
+		fp[2] += val * b;                                                     \
+		fp[3] += val * al;                                                    \
+	} (void)0
+
+	MASKFILT(0, 0);
+	MASKFILT(0, 1);
+	MASKFILT(0, 2);
+	MASKFILT(1, 0);
+	MASKFILT(1, 1);
+	MASKFILT(1, 2);
+	MASKFILT(2, 0);
+	MASKFILT(2, 1);
+	MASKFILT(2, 2);
+
+#undef MASKFILT
+}
+
+void add_filt_fmask_pixsize(unsigned int mask, float *in, float *rowbuf, int row_w, int pixsize)
+{
+	/* calc the value of mask */
+	float **fmask1 = R.samples->fmask1, **fmask2 = R.samples->fmask2;
+	float *rb1, *rb2, *rb3;
+	float val;
+	unsigned int a, maskand, maskshift;
+	int i, j;
+
+	rb2 = rowbuf - pixsize;
+	rb3 = rb2 - pixsize * row_w;
+	rb1 = rb2 + pixsize * row_w;
+
+	maskand = (mask & 255);
+	maskshift = (mask >> 8);
+
+	for (j = 2; j >= 0; j--) {
+
+		a = j;
+
+		val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+		if (val != 0.0f) {
+			for (i = 0; i < pixsize; i++)
+				rb1[i] += val * in[i];
+		}
+		a += 3;
+
+		val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+		if (val != 0.0f) {
+			for (i = 0; i < pixsize; i++)
+				rb2[i] += val * in[i];
+		}
+		a += 3;
+
+		val = *(fmask1[a] + maskand) + *(fmask2[a] + maskshift);
+		if (val != 0.0f) {
+			for (i = 0; i < pixsize; i++)
+				rb3[i] += val * in[i];
+		}
+
+		rb1 += pixsize;
+		rb2 += pixsize;
+		rb3 += pixsize;
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+void addalphaAddFloat(float dest[4], const float source[4])
+{
+
+	/* Makes me wonder whether this is required... */
+	if (dest[3] < RE_EMPTY_COLOR_FLOAT) {
+		dest[0] = source[0];
+		dest[1] = source[1];
+		dest[2] = source[2];
+		dest[3] = source[3];
+		return;
+	}
+
+	/* no clipping! */
+	dest[0] = dest[0] + source[0];
+	dest[1] = dest[1] + source[1];
+	dest[2] = dest[2] + source[2];
+	dest[3] = dest[3] + source[3];
+
+}
+
+
+/* ---------------------------------------------------------------------------- */
diff --git a/source/blender/render/intern/source/pixelshading.c b/source/blender/render/intern/source/pixelshading.c
new file mode 100644
index 00000000000..7f202629ce4
--- /dev/null
+++ b/source/blender/render/intern/source/pixelshading.c
@@ -0,0 +1,650 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributor(s): 2004-2006, Blender Foundation, full recode
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/pixelshading.c
+ *  \ingroup render
+ */
+
+
+#include <float.h>
+#include <math.h>
+#include <string.h>
+
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+/* External modules: */
+
+#include "DNA_group_types.h"
+#include "DNA_material_types.h"
+#include "DNA_object_types.h"
+#include "DNA_image_types.h"
+#include "DNA_texture_types.h"
+#include "DNA_lamp_types.h"
+
+#include "BKE_material.h"
+
+
+/* own module */
+#include "render_types.h"
+#include "renderdatabase.h"
+#include "texture.h"
+#include "rendercore.h"
+#include "shadbuf.h"
+#include "pixelshading.h"
+#include "sunsky.h"
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+
+extern const float hashvectf[];
+
+static void render_lighting_halo(HaloRen *har, float col_r[3])
+{
+	GroupObject *go;
+	LampRen *lar;
+	float i, inp, inpr, rco[3], dco[3], lv[3], lampdist, ld, t, *vn;
+	float ir, ig, ib, shadfac, soft, lacol[3];
+
+	ir= ig= ib= 0.0;
+
+	copy_v3_v3(rco, har->co);
+	dco[0]=dco[1]=dco[2]= 1.0f/har->rad;
+
+	vn= har->no;
+
+	for (go=R.lights.first; go; go= go->next) {
+		lar= go->lampren;
+
+		/* test for lamplayer */
+		if (lar->mode & LA_LAYER) if ((lar->lay & har->lay)==0) continue;
+
+		/* lampdist cacluation */
+		if (lar->type==LA_SUN || lar->type==LA_HEMI) {
+			copy_v3_v3(lv, lar->vec);
+			lampdist= 1.0;
+		}
+		else {
+			lv[0]= rco[0]-lar->co[0];
+			lv[1]= rco[1]-lar->co[1];
+			lv[2]= rco[2]-lar->co[2];
+			ld = len_v3(lv);
+			lv[0]/= ld;
+			lv[1]/= ld;
+			lv[2]/= ld;
+
+			/* ld is re-used further on (texco's) */
+
+			if (lar->mode & LA_QUAD) {
+				t= 1.0;
+				if (lar->ld1>0.0f)
+					t= lar->dist/(lar->dist+lar->ld1*ld);
+				if (lar->ld2>0.0f)
+					t*= lar->distkw/(lar->distkw+lar->ld2*ld*ld);
+
+				lampdist= t;
+			}
+			else {
+				lampdist= (lar->dist/(lar->dist+ld));
+			}
+
+			if (lar->mode & LA_SPHERE) {
+				t= lar->dist - ld;
+				if (t<0.0f) continue;
+
+				t/= lar->dist;
+				lampdist*= (t);
+			}
+
+		}
+
+		lacol[0]= lar->r;
+		lacol[1]= lar->g;
+		lacol[2]= lar->b;
+
+		if (lar->mode & LA_TEXTURE) {
+			ShadeInput shi;
+
+			/* Warning, This is not that nice, and possibly a bit slow,
+			 * however some variables were not initialized properly in, unless using shade_input_initialize(...),
+			 * we need to do a memset */
+			memset(&shi, 0, sizeof(ShadeInput));
+			/* end warning! - Campbell */
+
+			copy_v3_v3(shi.co, rco);
+			shi.osatex= 0;
+			do_lamp_tex(lar, lv, &shi, lacol, LA_TEXTURE);
+		}
+
+		if (lar->type==LA_SPOT) {
+
+			if (lar->mode & LA_SQUARE) {
+				if (lv[0]*lar->vec[0]+lv[1]*lar->vec[1]+lv[2]*lar->vec[2]>0.0f) {
+					float x, lvrot[3];
+
+					/* rotate view to lampspace */
+					copy_v3_v3(lvrot, lv);
+					mul_m3_v3(lar->imat, lvrot);
+
+					x = max_ff(fabsf(lvrot[0]/lvrot[2]), fabsf(lvrot[1]/lvrot[2]));
+					/* 1.0/(sqrt(1+x*x)) is equivalent to cos(atan(x)) */
+
+					inpr = 1.0f / (sqrtf(1.0f + x * x));
+				}
+				else inpr= 0.0;
+			}
+			else {
+				inpr= lv[0]*lar->vec[0]+lv[1]*lar->vec[1]+lv[2]*lar->vec[2];
+			}
+
+			t= lar->spotsi;
+			if (inpr<t) continue;
+			else {
+				t= inpr-t;
+				soft= 1.0;
+				if (t<lar->spotbl && lar->spotbl!=0.0f) {
+					/* soft area */
+					i= t/lar->spotbl;
+					t= i*i;
+					soft= (3.0f*t-2.0f*t*i);
+					inpr*= soft;
+				}
+				if (lar->mode & LA_ONLYSHADOW) {
+					/* if (ma->mode & MA_SHADOW) { */
+					/* dot product positive: front side face! */
+					inp= vn[0]*lv[0] + vn[1]*lv[1] + vn[2]*lv[2];
+					if (inp>0.0f) {
+						/* testshadowbuf==0.0 : 100% shadow */
+						shadfac = testshadowbuf(&R, lar->shb, rco, dco, dco, inp, 0.0f);
+						if ( shadfac>0.0f ) {
+							shadfac*= inp*soft*lar->energy;
+							ir -= shadfac;
+							ig -= shadfac;
+							ib -= shadfac;
+
+							continue;
+						}
+					}
+					/* } */
+				}
+				lampdist*=inpr;
+			}
+			if (lar->mode & LA_ONLYSHADOW) continue;
+
+		}
+
+		/* dot product and  reflectivity*/
+
+		inp = 1.0f - fabsf(dot_v3v3(vn, lv));
+
+		/* inp= cos(0.5*M_PI-acos(inp)); */
+
+		i= inp;
+
+		if (lar->type==LA_HEMI) {
+			i= 0.5f*i+0.5f;
+		}
+		if (i>0.0f) {
+			i*= lampdist;
+		}
+
+		/* shadow  */
+		if (i> -0.41f) { /* heuristic valua! */
+			if (lar->shb) {
+				shadfac = testshadowbuf(&R, lar->shb, rco, dco, dco, inp, 0.0f);
+				if (shadfac==0.0f) continue;
+				i*= shadfac;
+			}
+		}
+
+		if (i>0.0f) {
+			ir+= i*lacol[0];
+			ig+= i*lacol[1];
+			ib+= i*lacol[2];
+		}
+	}
+
+	if (ir<0.0f) ir= 0.0f;
+	if (ig<0.0f) ig= 0.0f;
+	if (ib<0.0f) ib= 0.0f;
+
+	col_r[0]*= ir;
+	col_r[1]*= ig;
+	col_r[2]*= ib;
+
+}
+
+
+/**
+ * Converts a halo z-buffer value to distance from the camera's near plane
+ * \param z The z-buffer value to convert
+ * \return a distance from the camera's near plane in blender units
+ */
+static float haloZtoDist(int z)
+{
+	float zco = 0;
+
+	if (z >= 0x7FFFFF)
+		return 10e10;
+	else {
+		zco = (float)z/(float)0x7FFFFF;
+		if (R.r.mode & R_ORTHO)
+			return (R.winmat[3][2] - zco*R.winmat[3][3])/(R.winmat[2][2]);
+		else
+			return (R.winmat[3][2])/(R.winmat[2][2] - R.winmat[2][3]*zco);
+	}
+}
+
+/**
+ * \param col (float[4]) Store the rgb color here (with alpha)
+ * The alpha is used to blend the color to the background
+ * color_new = (1-alpha)*color_background + color
+ * \param zz The current zbuffer value at the place of this pixel
+ * \param dist Distance of the pixel from the center of the halo squared. Given in pixels
+ * \param xn The x coordinate of the pixel relaticve to the center of the halo. given in pixels
+ * \param yn The y coordinate of the pixel relaticve to the center of the halo. given in pixels
+ */
+int shadeHaloFloat(HaloRen *har, float col[4], int zz,
+                   float dist, float xn,  float yn, short flarec)
+{
+	/* fill in col */
+	float t, zn, radist, ringf=0.0f, linef=0.0f, alpha, si, co;
+	int a;
+
+	if (R.wrld.mode & WO_MIST) {
+		if (har->type & HA_ONLYSKY) {
+			alpha= har->alfa;
+		}
+		else {
+			/* a bit patchy... */
+			alpha= mistfactor(-har->co[2], har->co)*har->alfa;
+		}
+	}
+	else alpha= har->alfa;
+
+	if (alpha==0.0f)
+		return 0;
+
+	/* soften the halo if it intersects geometry */
+	if (har->mat && har->mat->mode & MA_HALO_SOFT) {
+		float segment_length, halo_depth, distance_from_z /* , visible_depth */ /* UNUSED */, soften;
+
+		/* calculate halo depth */
+		segment_length= har->hasize*sasqrt(1.0f - dist/(har->rad*har->rad));
+		halo_depth= 2.0f*segment_length;
+
+		if (halo_depth < FLT_EPSILON)
+			return 0;
+
+		/* calculate how much of this depth is visible */
+		distance_from_z = haloZtoDist(zz) - haloZtoDist(har->zs);
+		/* visible_depth = halo_depth; */ /* UNUSED */
+		if (distance_from_z < segment_length) {
+			soften= (segment_length + distance_from_z)/halo_depth;
+
+			/* apply softening to alpha */
+			if (soften < 1.0f)
+				alpha *= soften;
+			if (alpha <= 0.0f)
+				return 0;
+		}
+	}
+	else {
+		/* not a soft halo. use the old softening code */
+		/* halo being intersected? */
+		if (har->zs> zz-har->zd) {
+			t= ((float)(zz-har->zs))/(float)har->zd;
+			alpha*= sqrtf(sqrtf(t));
+		}
+	}
+
+	radist = sqrtf(dist);
+
+	/* watch it: not used nicely: flarec is set at zero in pixstruct */
+	if (flarec) har->pixels+= (int)(har->rad-radist);
+
+	if (har->ringc) {
+		const float *rc;
+		float fac;
+		int ofs;
+
+		/* per ring an antialised circle */
+		ofs= har->seed;
+
+		for (a= har->ringc; a>0; a--, ofs+=2) {
+
+			rc= hashvectf + (ofs % 768);
+
+			fac = fabsf(rc[1] * (har->rad * fabsf(rc[0]) - radist));
+
+			if (fac< 1.0f) {
+				ringf+= (1.0f-fac);
+			}
+		}
+	}
+
+	if (har->type & HA_VECT) {
+		dist= fabsf(har->cos * (yn) - har->sin * (xn)) / har->rad;
+		if (dist>1.0f) dist= 1.0f;
+		if (har->tex) {
+			zn= har->sin*xn - har->cos*yn;
+			yn= har->cos*xn + har->sin*yn;
+			xn= zn;
+		}
+	}
+	else dist= dist/har->radsq;
+
+	if (har->type & HA_FLARECIRC) {
+		dist = 0.5f + fabsf(dist - 0.5f);
+	}
+
+	if (har->hard>=30) {
+		dist = sqrtf(dist);
+		if (har->hard>=40) {
+			dist = sinf(dist*(float)M_PI_2);
+			if (har->hard>=50) {
+				dist = sqrtf(dist);
+			}
+		}
+	}
+	else if (har->hard<20) dist*=dist;
+
+	if (dist < 1.0f)
+		dist= (1.0f-dist);
+	else
+		dist= 0.0f;
+
+	if (har->linec) {
+		const float *rc;
+		float fac;
+		int ofs;
+
+		/* per starpoint an antialiased line */
+		ofs= har->seed;
+
+		for (a= har->linec; a>0; a--, ofs+=3) {
+
+			rc= hashvectf + (ofs % 768);
+
+			fac = fabsf((xn) * rc[0] + (yn) * rc[1]);
+
+			if (fac< 1.0f )
+				linef+= (1.0f-fac);
+		}
+
+		linef*= dist;
+	}
+
+	if (har->starpoints) {
+		float ster, angle;
+		/* rotation */
+		angle = atan2f(yn, xn);
+		angle *= (1.0f+0.25f*har->starpoints);
+
+		co= cosf(angle);
+		si= sinf(angle);
+
+		angle= (co*xn+si*yn)*(co*yn-si*xn);
+
+		ster = fabsf(angle);
+		if (ster>1.0f) {
+			ster= (har->rad)/(ster);
+
+			if (ster<1.0f) dist*= sqrtf(ster);
+		}
+	}
+
+	/* disputable optimize... (ton) */
+	if (dist<=0.00001f)
+		return 0;
+
+	dist*= alpha;
+	ringf*= dist;
+	linef*= alpha;
+
+	/* The color is either the rgb spec-ed by the user, or extracted from   */
+	/* the texture                                                           */
+	if (har->tex) {
+		col[0]= har->r;
+		col[1]= har->g;
+		col[2]= har->b;
+		col[3]= dist;
+
+		do_halo_tex(har, xn, yn, col);
+
+		col[0]*= col[3];
+		col[1]*= col[3];
+		col[2]*= col[3];
+
+	}
+	else {
+		col[0]= dist*har->r;
+		col[1]= dist*har->g;
+		col[2]= dist*har->b;
+		if (har->type & HA_XALPHA) col[3]= dist*dist;
+		else col[3]= dist;
+	}
+
+	if (har->mat) {
+		if (har->mat->mode & MA_HALO_SHADE) {
+			/* we test for lights because of preview... */
+			if (R.lights.first) render_lighting_halo(har, col);
+		}
+
+		/* Next, we do the line and ring factor modifications. */
+		if (linef!=0.0f) {
+			Material *ma= har->mat;
+
+			col[0]+= linef * ma->specr;
+			col[1]+= linef * ma->specg;
+			col[2]+= linef * ma->specb;
+
+			if (har->type & HA_XALPHA) col[3]+= linef*linef;
+			else col[3]+= linef;
+		}
+		if (ringf!=0.0f) {
+			Material *ma= har->mat;
+
+			col[0]+= ringf * ma->mirr;
+			col[1]+= ringf * ma->mirg;
+			col[2]+= ringf * ma->mirb;
+
+			if (har->type & HA_XALPHA) col[3]+= ringf*ringf;
+			else col[3]+= ringf;
+		}
+	}
+
+	/* alpha requires clip, gives black dots */
+	if (col[3] > 1.0f)
+		col[3]= 1.0f;
+
+	return 1;
+}
+
+/* ------------------------------------------------------------------------- */
+
+/* Only view vector is important here. Result goes to col_r[3] */
+void shadeSkyView(float col_r[3], const float rco[3], const float view[3], const float dxyview[2], short thread)
+{
+	float zen[3], hor[3], blend, blendm;
+	int skyflag;
+
+	/* flag indicating if we render the top hemisphere */
+	skyflag = WO_ZENUP;
+
+	/* Some view vector stuff. */
+	if (R.wrld.skytype & WO_SKYREAL) {
+
+		blend = dot_v3v3(view, R.grvec);
+
+		if (blend<0.0f) skyflag= 0;
+
+		blend = fabsf(blend);
+	}
+	else if (R.wrld.skytype & WO_SKYPAPER) {
+		blend= 0.5f + 0.5f * view[1];
+	}
+	else {
+		/* the fraction of how far we are above the bottom of the screen */
+		blend = fabsf(0.5f + view[1]);
+	}
+
+	copy_v3_v3(hor, &R.wrld.horr);
+	copy_v3_v3(zen, &R.wrld.zenr);
+
+	/* Careful: SKYTEX and SKYBLEND are NOT mutually exclusive! If           */
+	/* SKYBLEND is active, the texture and color blend are added.           */
+	if (R.wrld.skytype & WO_SKYTEX) {
+		float lo[3];
+		copy_v3_v3(lo, view);
+		if (R.wrld.skytype & WO_SKYREAL) {
+
+			mul_m3_v3(R.imat, lo);
+
+			SWAP(float, lo[1],  lo[2]);
+
+		}
+		do_sky_tex(rco, view, lo, dxyview, hor, zen, &blend, skyflag, thread);
+	}
+
+	if (blend>1.0f) blend= 1.0f;
+	blendm= 1.0f-blend;
+
+	/* No clipping, no conversion! */
+	if (R.wrld.skytype & WO_SKYBLEND) {
+		col_r[0] = (blendm*hor[0] + blend*zen[0]);
+		col_r[1] = (blendm*hor[1] + blend*zen[1]);
+		col_r[2] = (blendm*hor[2] + blend*zen[2]);
+	}
+	else {
+		/* Done when a texture was grabbed. */
+		col_r[0]= hor[0];
+		col_r[1]= hor[1];
+		col_r[2]= hor[2];
+	}
+}
+
+/* shade sky according to sun lamps, all parameters are like shadeSkyView except sunsky*/
+void shadeSunView(float col_r[3], const float view[3])
+{
+	GroupObject *go;
+	LampRen *lar;
+	float sview[3];
+	bool do_init = true;
+
+	for (go=R.lights.first; go; go= go->next) {
+		lar= go->lampren;
+		if (lar->type==LA_SUN &&	lar->sunsky && (lar->sunsky->effect_type & LA_SUN_EFFECT_SKY)) {
+			float sun_collector[3];
+			float colorxyz[3];
+
+			if (do_init) {
+
+				normalize_v3_v3(sview, view);
+				mul_m3_v3(R.imat, sview);
+				if (sview[2] < 0.0f)
+					sview[2] = 0.0f;
+				normalize_v3(sview);
+				do_init = false;
+			}
+
+			GetSkyXYZRadiancef(lar->sunsky, sview, colorxyz);
+			xyz_to_rgb(colorxyz[0], colorxyz[1], colorxyz[2], &sun_collector[0], &sun_collector[1], &sun_collector[2],
+			           lar->sunsky->sky_colorspace);
+
+			ramp_blend(lar->sunsky->skyblendtype, col_r, lar->sunsky->skyblendfac, sun_collector);
+		}
+	}
+}
+
+
+/*
+ * Stuff the sky color into the collector.
+ */
+void shadeSkyPixel(float collector[4], float fx, float fy, short thread)
+{
+	float view[3], dxyview[2];
+
+	/*
+	 * The rules for sky:
+	 * 1. Draw an image, if a background image was provided. Stop
+	 * 2. get texture and color blend, and combine these.
+	 */
+
+	float fac;
+
+	if ((R.wrld.skytype & (WO_SKYBLEND+WO_SKYTEX))==0) {
+		/* 1. solid color */
+		copy_v3_v3(collector, &R.wrld.horr);
+
+		collector[3] = 0.0f;
+	}
+	else {
+		/* 2. */
+
+		/* This one true because of the context of this routine  */
+		if (R.wrld.skytype & WO_SKYPAPER) {
+			view[0]= -1.0f + 2.0f*(fx/(float)R.winx);
+			view[1]= -1.0f + 2.0f*(fy/(float)R.winy);
+			view[2]= 0.0;
+
+			dxyview[0]= 1.0f/(float)R.winx;
+			dxyview[1]= 1.0f/(float)R.winy;
+		}
+		else {
+			calc_view_vector(view, fx, fy);
+			fac= normalize_v3(view);
+
+			if (R.wrld.skytype & WO_SKYTEX) {
+				dxyview[0]= -R.viewdx/fac;
+				dxyview[1]= -R.viewdy/fac;
+			}
+		}
+
+		/* get sky color in the collector */
+		shadeSkyView(collector, NULL, view, dxyview, thread);
+		collector[3] = 0.0f;
+	}
+
+	calc_view_vector(view, fx, fy);
+	shadeSunView(collector, view);
+}
+
+/* aerial perspective */
+void shadeAtmPixel(struct SunSky *sunsky, float collector[3], float fx, float fy, float distance)
+{
+	float view[3];
+
+	calc_view_vector(view, fx, fy);
+	normalize_v3(view);
+	/*mul_m3_v3(R.imat, view);*/
+	AtmospherePixleShader(sunsky, view, distance, collector);
+}
+
+/* eof */
diff --git a/source/blender/render/intern/source/pointdensity.c b/source/blender/render/intern/source/pointdensity.c
index 53359c305dc..c025a1fdef7 100644
--- a/source/blender/render/intern/source/pointdensity.c
+++ b/source/blender/render/intern/source/pointdensity.c
@@ -102,7 +102,7 @@ static void point_data_pointers(PointDensity *pd,
 	const int totpoint = pd->totpoints;
 	float *data = pd->point_data;
 	int offset = 0;
-	
+
 	if (data_used & POINT_DATA_VEL) {
 		if (r_data_velocity)
 			*r_data_velocity = data + offset;
@@ -112,7 +112,7 @@ static void point_data_pointers(PointDensity *pd,
 		if (r_data_velocity)
 			*r_data_velocity = NULL;
 	}
-	
+
 	if (data_used & POINT_DATA_LIFE) {
 		if (r_data_life)
 			*r_data_life = data + offset;
@@ -122,7 +122,7 @@ static void point_data_pointers(PointDensity *pd,
 		if (r_data_life)
 			*r_data_life = NULL;
 	}
-	
+
 	if (data_used & POINT_DATA_COLOR) {
 		if (r_data_color)
 			*r_data_color = data + offset;
@@ -283,19 +283,19 @@ static void pointdensity_cache_vertex_color(PointDensity *pd, Object *UNUSED(ob)
 	const MLoopCol *mcol;
 	char layername[MAX_CUSTOMDATA_LAYER_NAME];
 	int i;
-	
+
 	BLI_assert(data_color);
-	
+
 	if (!CustomData_has_layer(&mesh->ldata, CD_MLOOPCOL))
 		return;
 	CustomData_validate_layer_name(&mesh->ldata, CD_MLOOPCOL, pd->vertex_attribute_name, layername);
 	mcol = CustomData_get_layer_named(&mesh->ldata, CD_MLOOPCOL, layername);
 	if (!mcol)
 		return;
-	
+
 	/* Stores the number of MLoops using the same vertex, so we can normalize colors. */
 	int *mcorners = MEM_callocN(sizeof(int) * pd->totpoints, "point density corner count");
-	
+
 	for (i = 0; i < totloop; i++) {
 		int v = mloop[i].v;
 
@@ -310,7 +310,7 @@ static void pointdensity_cache_vertex_color(PointDensity *pd, Object *UNUSED(ob)
 
 		++mcorners[v];
 	}
-	
+
 	/* Normalize colors by averaging over mcorners.
 	 * All the corners share the same vertex, ie. occupy the same point in space.
 	 */
@@ -318,7 +318,7 @@ static void pointdensity_cache_vertex_color(PointDensity *pd, Object *UNUSED(ob)
 		if (mcorners[i] > 0)
 			mul_v3_fl(&data_color[i*3], 1.0f / mcorners[i]);
 	}
-	
+
 	MEM_freeN(mcorners);
 }
 
@@ -328,9 +328,9 @@ static void pointdensity_cache_vertex_weight(PointDensity *pd, Object *ob, Mesh
 	const MDeformVert *mdef, *dv;
 	int mdef_index;
 	int i;
-	
+
 	BLI_assert(data_color);
-	
+
 	mdef = CustomData_get_layer(&mesh->vdata, CD_MDEFORMVERT);
 	if (!mdef)
 		return;
@@ -339,11 +339,11 @@ static void pointdensity_cache_vertex_weight(PointDensity *pd, Object *ob, Mesh
 		mdef_index = ob->actdef - 1;
 	if (mdef_index < 0)
 		return;
-	
+
 	for (i = 0, dv = mdef; i < totvert; ++i, ++dv, data_color += 3) {
 		MDeformWeight *dw;
 		int j;
-		
+
 		for (j = 0, dw = dv->dw; j < dv->totweight; ++j, ++dw) {
 			if (dw->def_nr == mdef_index) {
 				copy_v3_fl(data_color, dw->weight);
@@ -357,9 +357,9 @@ static void pointdensity_cache_vertex_normal(PointDensity *pd, Object *UNUSED(ob
 {
 	MVert *mvert = mesh->mvert, *mv;
 	int i;
-	
+
 	BLI_assert(data_color);
-	
+
 	for (i = 0, mv = mvert; i < pd->totpoints; i++, mv++, data_color += 3) {
 		normal_short_to_float_v3(data_color, mv->no);
 	}
@@ -413,7 +413,7 @@ static void pointdensity_cache_object(PointDensity *pd,
 
 		BLI_bvhtree_insert(pd->point_tree, i, co, 1);
 	}
-	
+
 	switch (pd->ob_color_source) {
 		case TEX_PD_COLOR_VERTCOL:
 			pointdensity_cache_vertex_color(pd, ob, mesh, data_color);
@@ -506,7 +506,7 @@ static float density_falloff(PointDensityRangeData *pdr, int index, float square
 {
 	const float dist = (pdr->squared_radius - squared_dist) / pdr->squared_radius * 0.5f;
 	float density = 0.0f;
-	
+
 	switch (pdr->falloff_type) {
 		case TEX_PD_FALLOFF_STD:
 			density = dist;
@@ -536,12 +536,12 @@ static float density_falloff(PointDensityRangeData *pdr, int index, float square
 				density = dist;
 			break;
 	}
-	
+
 	if (pdr->density_curve && dist != 0.0f) {
 		curvemapping_initialize(pdr->density_curve);
 		density = curvemapping_evaluateF(pdr->density_curve, 0, density / dist) * dist;
 	}
-	
+
 	return density;
 }
 
@@ -666,7 +666,7 @@ static void pointdensity_color(PointDensity *pd, TexResult *texres, float age, c
 
 	if (pd->source == TEX_PD_PSYS) {
 		float rgba[4];
-		
+
 		switch (pd->color_source) {
 			case TEX_PD_COLOR_PARTAGE:
 				if (pd->coba) {
@@ -681,7 +681,7 @@ static void pointdensity_color(PointDensity *pd, TexResult *texres, float age, c
 			case TEX_PD_COLOR_PARTSPEED:
 			{
 				float speed = len_v3(vec) * pd->speed_scale;
-				
+
 				if (pd->coba) {
 					if (BKE_colorband_evaluate(pd->coba, speed, rgba)) {
 						texres->talpha = true;
@@ -704,7 +704,7 @@ static void pointdensity_color(PointDensity *pd, TexResult *texres, float age, c
 	}
 	else {
 		float rgba[4];
-		
+
 		switch (pd->ob_color_source) {
 			case TEX_PD_COLOR_VERTCOL:
 				texres->talpha = true;
diff --git a/source/blender/render/intern/source/rayshade.c b/source/blender/render/intern/source/rayshade.c
new file mode 100644
index 00000000000..df1cb868230
--- /dev/null
+++ b/source/blender/render/intern/source/rayshade.c
@@ -0,0 +1,2503 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 1990-1998 NeoGeo BV.
+ * All rights reserved.
+ *
+ * Contributors: 2004/2005 Blender Foundation, full recode
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/rayshade.c
+ *  \ingroup render
+ */
+
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include <float.h>
+#include <assert.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_material_types.h"
+#include "DNA_lamp_types.h"
+
+#include "BLI_blenlib.h"
+#include "BLI_system.h"
+#include "BLI_math.h"
+#include "BLI_rand.h"
+#include "BLI_utildefines.h"
+
+#include "BLT_translation.h"
+
+#include "BKE_node.h"
+
+#include "render_result.h"
+#include "render_types.h"
+#include "rendercore.h"
+#include "renderdatabase.h"
+#include "pixelshading.h"
+#include "shading.h"
+#include "volumetric.h"
+
+#include "rayintersection.h"
+#include "rayobject.h"
+#include "raycounter.h"
+
+#define RAY_TRA		1
+#define RAY_INSIDE	2
+
+#define DEPTH_SHADOW_TRA  10
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+static int test_break(void *data)
+{
+	Render *re = (Render *)data;
+	return re->test_break(re->tbh);
+}
+
+static void RE_rayobject_config_control(RayObject *r, Render *re)
+{
+	if (RE_rayobject_isRayAPI(r)) {
+		r = RE_rayobject_align(r);
+		r->control.data = re;
+		r->control.test_break = test_break;
+	}
+}
+
+RayObject *RE_rayobject_create(int type, int size, int octree_resolution)
+{
+	RayObject * res = NULL;
+
+	if (type == R_RAYSTRUCTURE_AUTO) {
+		/* TODO */
+		//if (detect_simd())
+#ifdef __SSE__
+		type = BLI_cpu_support_sse2()? R_RAYSTRUCTURE_SIMD_SVBVH: R_RAYSTRUCTURE_VBVH;
+#else
+		type = R_RAYSTRUCTURE_VBVH;
+#endif
+	}
+
+#ifndef __SSE__
+	if (type == R_RAYSTRUCTURE_SIMD_SVBVH || type == R_RAYSTRUCTURE_SIMD_QBVH) {
+		puts("Warning: Using VBVH (SSE was disabled at compile time)");
+		type = R_RAYSTRUCTURE_VBVH;
+	}
+#endif
+
+
+	if (type == R_RAYSTRUCTURE_OCTREE) //TODO dynamic ocres
+		res = RE_rayobject_octree_create(octree_resolution, size);
+	else if (type == R_RAYSTRUCTURE_VBVH)
+		res = RE_rayobject_vbvh_create(size);
+	else if (type == R_RAYSTRUCTURE_SIMD_SVBVH)
+		res = RE_rayobject_svbvh_create(size);
+	else if (type == R_RAYSTRUCTURE_SIMD_QBVH)
+		res = RE_rayobject_qbvh_create(size);
+	else
+		res = RE_rayobject_vbvh_create(size);	//Fallback
+
+	return res;
+}
+
+static RayObject* rayobject_create(Render *re, int type, int size)
+{
+	RayObject * res = NULL;
+
+	res = RE_rayobject_create(type, size, re->r.ocres);
+
+	if (res)
+		RE_rayobject_config_control(res, re);
+
+	return res;
+}
+
+#ifdef RE_RAYCOUNTER
+RayCounter re_rc_counter[BLENDER_MAX_THREADS];
+#endif
+
+
+void freeraytree(Render *re)
+{
+	ObjectInstanceRen *obi;
+
+	if (re->raytree) {
+		RE_rayobject_free(re->raytree);
+		re->raytree = NULL;
+	}
+	if (re->rayfaces) {
+		MEM_freeN(re->rayfaces);
+		re->rayfaces = NULL;
+	}
+	if (re->rayprimitives) {
+		MEM_freeN(re->rayprimitives);
+		re->rayprimitives = NULL;
+	}
+
+	for (obi=re->instancetable.first; obi; obi=obi->next) {
+		ObjectRen *obr = obi->obr;
+		if (obr->raytree) {
+			RE_rayobject_free(obr->raytree);
+			obr->raytree = NULL;
+		}
+		if (obr->rayfaces) {
+			MEM_freeN(obr->rayfaces);
+			obr->rayfaces = NULL;
+		}
+		if (obi->raytree) {
+			RE_rayobject_free(obi->raytree);
+			obi->raytree = NULL;
+		}
+	}
+
+#ifdef RE_RAYCOUNTER
+	{
+		const int num_threads = re->r.threads;
+		RayCounter sum;
+		memset(&sum, 0, sizeof(sum));
+		int i;
+		for (i=0; i<num_threads; i++)
+			RE_RC_MERGE(&sum, re_rc_counter+i);
+		RE_RC_INFO(&sum);
+	}
+#endif
+}
+
+static bool is_raytraceable_vlr(Render *re, VlakRen *vlr)
+{
+	/* note: volumetric must be tracable, wire must not */
+	if ((re->flag & R_BAKE_TRACE) || (vlr->flag & R_TRACEBLE) || (vlr->mat->material_type == MA_TYPE_VOLUME))
+		if (vlr->mat->material_type != MA_TYPE_WIRE)
+			return 1;
+	return 0;
+}
+
+static bool is_raytraceable(Render *re, ObjectInstanceRen *obi)
+{
+	int v;
+	ObjectRen *obr = obi->obr;
+
+	if (re->excludeob && obr->ob == re->excludeob)
+		return 0;
+
+	for (v=0;v<obr->totvlak;v++) {
+		VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
+
+		if (is_raytraceable_vlr(re, vlr))
+			return 1;
+	}
+
+	return 0;
+}
+
+
+RayObject* makeraytree_object(Render *re, ObjectInstanceRen *obi)
+{
+	/*TODO
+	 * out-of-memory safeproof
+	 * break render
+	 * update render stats */
+	ObjectRen *obr = obi->obr;
+
+	if (obr->raytree == NULL) {
+		RayObject *raytree;
+		RayFace *face = NULL;
+		VlakPrimitive *vlakprimitive = NULL;
+		int v;
+
+		//Count faces
+		int faces = 0;
+		for (v=0;v<obr->totvlak;v++) {
+			VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
+			if (is_raytraceable_vlr(re, vlr))
+				faces++;
+		}
+
+		if (faces == 0)
+			return NULL;
+
+		//Create Ray cast accelaration structure
+		raytree = rayobject_create( re,  re->r.raytrace_structure, faces );
+		if (  (re->r.raytrace_options & R_RAYTRACE_USE_LOCAL_COORDS) )
+			vlakprimitive = obr->rayprimitives = (VlakPrimitive *)MEM_callocN(faces * sizeof(VlakPrimitive), "ObjectRen primitives");
+		else
+			face = obr->rayfaces = (RayFace *)MEM_callocN(faces * sizeof(RayFace), "ObjectRen faces");
+
+		obr->rayobi = obi;
+
+		for (v=0;v<obr->totvlak;v++) {
+			VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
+			if (is_raytraceable_vlr(re, vlr)) {
+				if ((re->r.raytrace_options & R_RAYTRACE_USE_LOCAL_COORDS)) {
+					RE_rayobject_add(raytree, RE_vlakprimitive_from_vlak(vlakprimitive, obi, vlr));
+					vlakprimitive++;
+				}
+				else {
+					RE_rayface_from_vlak(face, obi, vlr);
+					RE_rayobject_add(raytree, RE_rayobject_unalignRayFace(face));
+					face++;
+				}
+			}
+		}
+		RE_rayobject_done(raytree);
+
+		/* in case of cancel during build, raytree is not usable */
+		if (test_break(re))
+			RE_rayobject_free(raytree);
+		else
+			obr->raytree= raytree;
+	}
+
+	if (obr->raytree) {
+		if ((obi->flag & R_TRANSFORMED) && obi->raytree == NULL) {
+			obi->transform_primitives = 0;
+			obi->raytree = RE_rayobject_instance_create( obr->raytree, obi->mat, obi, obi->obr->rayobi );
+		}
+	}
+
+	if (obi->raytree) return obi->raytree;
+	return obi->obr->raytree;
+}
+
+static bool has_special_rayobject(Render *re, ObjectInstanceRen *obi)
+{
+	if ( (obi->flag & R_TRANSFORMED) && (re->r.raytrace_options & R_RAYTRACE_USE_INSTANCES) ) {
+		ObjectRen *obr = obi->obr;
+		int v, faces = 0;
+
+		for (v=0;v<obr->totvlak;v++) {
+			VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
+			if (is_raytraceable_vlr(re, vlr)) {
+				faces++;
+				if (faces > 4)
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+/*
+ * create a single raytrace structure with all faces
+ */
+static void makeraytree_single(Render *re)
+{
+	ObjectInstanceRen *obi;
+	RayObject *raytree;
+	RayFace *face = NULL;
+	VlakPrimitive *vlakprimitive = NULL;
+	int faces = 0, special = 0;
+
+	for (obi = re->instancetable.first; obi; obi = obi->next) {
+		if (is_raytraceable(re, obi)) {
+			ObjectRen *obr = obi->obr;
+
+			if (has_special_rayobject(re, obi)) {
+				special++;
+			}
+			else {
+				int v;
+				for (v = 0;v < obr->totvlak; v++) {
+					VlakRen *vlr = obr->vlaknodes[v >> 8].vlak + (v&255);
+					if (is_raytraceable_vlr(re, vlr)) {
+						faces++;
+					}
+				}
+			}
+		}
+	}
+
+	if (faces + special == 0) {
+		re->raytree = RE_rayobject_empty_create();
+		return;
+	}
+
+	//Create raytree
+	raytree = re->raytree = rayobject_create( re, re->r.raytrace_structure, faces+special );
+
+	if ( (re->r.raytrace_options & R_RAYTRACE_USE_LOCAL_COORDS) ) {
+		vlakprimitive = re->rayprimitives = (VlakPrimitive *)MEM_callocN(faces * sizeof(VlakPrimitive), "Raytrace vlak-primitives");
+	}
+	else {
+		face = re->rayfaces	= (RayFace *)MEM_callocN(faces * sizeof(RayFace), "Render ray faces");
+	}
+
+	for (obi=re->instancetable.first; obi; obi=obi->next)
+	if (is_raytraceable(re, obi)) {
+		if (test_break(re))
+			break;
+
+		if (has_special_rayobject(re, obi)) {
+			RayObject *obj = makeraytree_object(re, obi);
+
+			if (test_break(re))
+				break;
+
+			if (obj)
+				RE_rayobject_add(re->raytree, obj);
+		}
+		else {
+			int v;
+			ObjectRen *obr = obi->obr;
+
+			if (obi->flag & R_TRANSFORMED) {
+				obi->transform_primitives = 1;
+			}
+
+			for (v=0;v<obr->totvlak;v++) {
+				VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
+				if (is_raytraceable_vlr(re, vlr)) {
+					if ((re->r.raytrace_options & R_RAYTRACE_USE_LOCAL_COORDS)) {
+						RayObject *obj = RE_vlakprimitive_from_vlak( vlakprimitive, obi, vlr );
+						RE_rayobject_add(raytree, obj);
+						vlakprimitive++;
+					}
+					else {
+						RE_rayface_from_vlak(face, obi, vlr);
+						if ((obi->flag & R_TRANSFORMED)) {
+							mul_m4_v3(obi->mat, face->v1);
+							mul_m4_v3(obi->mat, face->v2);
+							mul_m4_v3(obi->mat, face->v3);
+							if (RE_rayface_isQuad(face))
+								mul_m4_v3(obi->mat, face->v4);
+						}
+
+						RE_rayobject_add(raytree, RE_rayobject_unalignRayFace(face));
+						face++;
+					}
+				}
+			}
+		}
+	}
+
+	if (!test_break(re)) {
+		re->i.infostr = IFACE_("Raytree.. building");
+		re->stats_draw(re->sdh, &re->i);
+
+		RE_rayobject_done(raytree);
+	}
+}
+
+void makeraytree(Render *re)
+{
+	float min[3], max[3], sub[3];
+	int i;
+
+	re->i.infostr = IFACE_("Raytree.. preparing");
+	re->stats_draw(re->sdh, &re->i);
+
+	/* disable options not yet supported by octree,
+	 * they might actually never be supported (unless people really need it) */
+	if (re->r.raytrace_structure == R_RAYSTRUCTURE_OCTREE)
+		re->r.raytrace_options &= ~( R_RAYTRACE_USE_INSTANCES | R_RAYTRACE_USE_LOCAL_COORDS);
+
+	makeraytree_single(re);
+
+	if (test_break(re)) {
+		freeraytree(re);
+
+		re->i.infostr = IFACE_("Raytree building canceled");
+		re->stats_draw(re->sdh, &re->i);
+	}
+	else {
+		/* Calculate raytree max_size
+		 * This is ONLY needed to kept a bogus behavior of SUN and HEMI lights */
+		INIT_MINMAX(min, max);
+		RE_rayobject_merge_bb(re->raytree, min, max);
+		if (min[0] > max[0]) {  /* empty raytree */
+			zero_v3(min);
+			zero_v3(max);
+		}
+		for (i=0; i<3; i++) {
+			/* TODO: explain why add top both min and max??? */
+			min[i] += 0.01f;
+			max[i] += 0.01f;
+			sub[i] = max[i]-min[i];
+		}
+
+		re->maxdist = len_v3(sub);
+
+		re->i.infostr = IFACE_("Raytree finished");
+		re->stats_draw(re->sdh, &re->i);
+	}
+
+#ifdef RE_RAYCOUNTER
+	memset(re_rc_counter, 0, sizeof(re_rc_counter));
+#endif
+}
+
+/* 	if (shi->osatex)  */
+static void shade_ray_set_derivative(ShadeInput *shi)
+{
+	float detsh, t00, t10, t01, t11;
+	int axis1, axis2;
+
+	/* find most stable axis to project */
+	axis_dominant_v3(&axis1, &axis2, shi->facenor);
+
+	/* compute u,v and derivatives */
+	if (shi->obi->flag & R_TRANSFORMED) {
+		float v1[3], v2[3], v3[3];
+
+		mul_v3_m3v3(v1, shi->obi->nmat, shi->v1->co);
+		mul_v3_m3v3(v2, shi->obi->nmat, shi->v2->co);
+		mul_v3_m3v3(v3, shi->obi->nmat, shi->v3->co);
+
+		/* same as below */
+		t00= v3[axis1]-v1[axis1]; t01= v3[axis2]-v1[axis2];
+		t10= v3[axis1]-v2[axis1]; t11= v3[axis2]-v2[axis2];
+	}
+	else {
+		const float *v1= shi->v1->co;
+		const float *v2= shi->v2->co;
+		const float *v3= shi->v3->co;
+
+		/* same as above */
+		t00= v3[axis1]-v1[axis1]; t01= v3[axis2]-v1[axis2];
+		t10= v3[axis1]-v2[axis1]; t11= v3[axis2]-v2[axis2];
+	}
+
+	detsh= 1.0f/(t00*t11-t10*t01);
+	t00*= detsh; t01*=detsh;
+	t10*=detsh; t11*=detsh;
+
+	shi->dx_u=  shi->dxco[axis1]*t11- shi->dxco[axis2]*t10;
+	shi->dx_v=  shi->dxco[axis2]*t00- shi->dxco[axis1]*t01;
+	shi->dy_u=  shi->dyco[axis1]*t11- shi->dyco[axis2]*t10;
+	shi->dy_v=  shi->dyco[axis2]*t00- shi->dyco[axis1]*t01;
+
+}
+
+/* main ray shader */
+void shade_ray(Isect *is, ShadeInput *shi, ShadeResult *shr)
+{
+	ObjectInstanceRen *obi = (ObjectInstanceRen *)is->hit.ob;
+	VlakRen *vlr = (VlakRen *)is->hit.face;
+
+	/* set up view vector */
+	copy_v3_v3(shi->view, is->dir);
+
+	/* render co */
+	shi->co[0]= is->start[0]+is->dist*(shi->view[0]);
+	shi->co[1]= is->start[1]+is->dist*(shi->view[1]);
+	shi->co[2]= is->start[2]+is->dist*(shi->view[2]);
+
+	normalize_v3(shi->view);
+
+	shi->obi= obi;
+	shi->obr= obi->obr;
+	shi->vlr= vlr;
+	shi->mat= vlr->mat;
+	shade_input_init_material(shi);
+
+	if (is->isect==2)
+		shade_input_set_triangle_i(shi, obi, vlr, 0, 2, 3);
+	else
+		shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
+
+	shi->u= is->u;
+	shi->v= is->v;
+	shi->dx_u= shi->dx_v= shi->dy_u= shi->dy_v=  0.0f;
+
+	if (shi->osatex)
+		shade_ray_set_derivative(shi);
+	shade_input_set_normals(shi);
+
+	shade_input_set_shade_texco(shi);
+	if (shi->mat->material_type == MA_TYPE_VOLUME) {
+		if (ELEM(is->mode, RE_RAY_SHADOW, RE_RAY_SHADOW_TRA)) {
+			shade_volume_shadow(shi, shr, is);
+		}
+		else {
+			shade_volume_outside(shi, shr);
+		}
+	}
+	else if (is->mode==RE_RAY_SHADOW_TRA) {
+		/* temp hack to prevent recursion */
+		if (shi->nodes==0 && shi->mat->nodetree && shi->mat->use_nodes) {
+			ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
+			shi->mat= vlr->mat;		/* shi->mat is being set in nodetree */
+		}
+		else
+			shade_color(shi, shr);
+	}
+	else {
+		if (shi->mat->nodetree && shi->mat->use_nodes) {
+			ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
+			shi->mat= vlr->mat;		/* shi->mat is being set in nodetree */
+		}
+		else {
+			shade_material_loop(shi, shr);
+		}
+
+		/* raytrace likes to separate the spec color */
+		sub_v3_v3v3(shr->diff, shr->combined, shr->spec);
+		copy_v3_v3(shr->diffshad, shr->diff);
+	}
+
+}
+
+static int refraction(float refract[3], const float n[3], const float view[3], float index)
+{
+	float dot, fac;
+
+	copy_v3_v3(refract, view);
+
+	dot = dot_v3v3(view, n);
+
+	if (dot>0.0f) {
+		index = 1.0f/index;
+		fac= 1.0f - (1.0f - dot*dot)*index*index;
+		if (fac <= 0.0f) return 0;
+		fac= -dot*index + sqrtf(fac);
+	}
+	else {
+		fac= 1.0f - (1.0f - dot*dot)*index*index;
+		if (fac <= 0.0f) return 0;
+		fac= -dot*index - sqrtf(fac);
+	}
+
+	refract[0]= index*view[0] + fac*n[0];
+	refract[1]= index*view[1] + fac*n[1];
+	refract[2]= index*view[2] + fac*n[2];
+
+	return 1;
+}
+
+static void reflection_simple(float ref[3], float n[3], const float view[3])
+{
+	const float f1= -2.0f * dot_v3v3(n, view);
+	madd_v3_v3v3fl(ref, view, n, f1);
+}
+
+/* orn = original face normal */
+static void reflection(float ref[3], float n[3], const float view[3], const float orn[3])
+{
+	float f1;
+
+	reflection_simple(ref, n, view);
+
+	/* test phong normals, then we should prevent vector going to the back */
+	f1= dot_v3v3(ref, orn);
+	if (f1>0.0f) {
+		f1+= 0.01f;
+		ref[0]-= f1*orn[0];
+		ref[1]-= f1*orn[1];
+		ref[2]-= f1*orn[2];
+	}
+}
+
+#if 0
+static void color_combine(float *result, float fac1, float fac2, float col1[3], float col2[3])
+{
+	float col1t[3], col2t[3];
+
+	col1t[0]= sqrt(col1[0]);
+	col1t[1]= sqrt(col1[1]);
+	col1t[2]= sqrt(col1[2]);
+	col2t[0]= sqrt(col2[0]);
+	col2t[1]= sqrt(col2[1]);
+	col2t[2]= sqrt(col2[2]);
+
+	result[0]= (fac1*col1t[0] + fac2*col2t[0]);
+	result[0]*= result[0];
+	result[1]= (fac1*col1t[1] + fac2*col2t[1]);
+	result[1]*= result[1];
+	result[2]= (fac1*col1t[2] + fac2*col2t[2]);
+	result[2]*= result[2];
+}
+#endif
+
+static float shade_by_transmission(Isect *is, ShadeInput *shi, ShadeResult *shr)
+{
+	float d;
+	if (0 == (shi->mat->mode & MA_TRANSP))
+		return -1;
+
+	if (shi->mat->tx_limit <= 0.0f) {
+		d= 1.0f;
+	}
+	else {
+		float p;
+
+		/* shi.co[] calculated by shade_ray() */
+		const float dx= shi->co[0] - is->start[0];
+		const float dy= shi->co[1] - is->start[1];
+		const float dz= shi->co[2] - is->start[2];
+		d = sqrtf(dx * dx + dy * dy + dz * dz);
+		if (d > shi->mat->tx_limit)
+			d= shi->mat->tx_limit;
+
+		p = shi->mat->tx_falloff;
+		if (p < 0.0f) p= 0.0f;
+		else if (p > 10.0f) p= 10.0f;
+
+		shr->alpha *= powf(d, p);
+		if (shr->alpha > 1.0f)
+			shr->alpha= 1.0f;
+	}
+
+	return d;
+}
+
+static void ray_fadeout_endcolor(float col[3], ShadeInput *origshi, ShadeInput *shi, ShadeResult *shr, Isect *isec, const float vec[3])
+{
+	/* un-intersected rays get either rendered material color or sky color */
+	if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOMAT) {
+		copy_v3_v3(col, shr->combined);
+	}
+	else if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOSKY) {
+		copy_v3_v3(shi->view, vec);
+		normalize_v3(shi->view);
+
+		shadeSkyView(col, isec->start, shi->view, NULL, shi->thread);
+		shadeSunView(col, shi->view);
+	}
+}
+
+static void ray_fadeout(Isect *is, ShadeInput *shi, float col[3], const float blendcol[3], float dist_mir)
+{
+	/* if fading out, linear blend against fade color */
+	float blendfac;
+
+	blendfac = 1.0f - len_v3v3(shi->co, is->start)/dist_mir;
+
+	col[0] = col[0]*blendfac + (1.0f - blendfac)*blendcol[0];
+	col[1] = col[1]*blendfac + (1.0f - blendfac)*blendcol[1];
+	col[2] = col[2]*blendfac + (1.0f - blendfac)*blendcol[2];
+}
+
+/* the main recursive tracer itself
+ * note: 'col' must be initialized */
+static void traceray(ShadeInput *origshi, ShadeResult *origshr, short depth, const float start[3], const float dir[3], float col[4], ObjectInstanceRen *obi, VlakRen *vlr, int traflag)
+{
+	ShadeInput shi = {NULL};
+	Isect isec;
+	float dist_mir = origshi->mat->dist_mir;
+
+	/* with high depth the number of rays can explode due to the path splitting
+	 * in two each time, giving 2^depth rays. we need to be able to cancel such
+	 * a render to avoid hanging, a better solution would be random picking
+	 * between directions and russian roulette termination */
+	if (R.test_break(R.tbh)) {
+		zero_v4(col);
+		return;
+	}
+
+	copy_v3_v3(isec.start, start);
+	copy_v3_v3(isec.dir, dir);
+	isec.dist = dist_mir > 0 ? dist_mir : RE_RAYTRACE_MAXDIST;
+	isec.mode= RE_RAY_MIRROR;
+	isec.check = RE_CHECK_VLR_RENDER;
+	isec.skip = RE_SKIP_VLR_NEIGHBOUR;
+	isec.hint = NULL;
+
+	isec.orig.ob   = obi;
+	isec.orig.face = vlr;
+	RE_RC_INIT(isec, shi);
+
+	/* database is in original view, obi->imat transforms current position back to original */
+	RE_instance_rotate_ray(origshi->obi, &isec);
+
+	if (RE_rayobject_raycast(R.raytree, &isec)) {
+		ShadeResult shr= {{0}};
+		float d= 1.0f;
+
+		RE_instance_rotate_ray_restore(origshi->obi, &isec);
+
+		/* for as long we don't have proper dx/dy transform for rays we copy over original */
+		copy_v3_v3(shi.dxco, origshi->dxco);
+		copy_v3_v3(shi.dyco, origshi->dyco);
+
+		shi.mask= origshi->mask;
+		shi.osatex= origshi->osatex;
+		shi.depth= origshi->depth + 1;					/* only used to indicate tracing */
+		shi.thread= origshi->thread;
+		//shi.sample= 0; // memset above, so don't need this
+		shi.xs= origshi->xs;
+		shi.ys= origshi->ys;
+		shi.do_manage= origshi->do_manage;
+		shi.lay= origshi->lay;
+		shi.passflag= SCE_PASS_COMBINED; /* result of tracing needs no pass info */
+		shi.combinedflag= 0xFFFFFF;		 /* ray trace does all options */
+		//shi.do_preview = false; // memset above, so don't need this
+		shi.light_override= origshi->light_override;
+		shi.mat_override= origshi->mat_override;
+
+		shade_ray(&isec, &shi, &shr);
+		/* ray has traveled inside the material, so shade by transmission */
+		if (traflag & RAY_INSIDE)
+			d= shade_by_transmission(&isec, &shi, &shr);
+
+		if (depth>0) {
+			float fr, fg, fb, f1;
+
+			if ((shi.mat->mode_l & MA_TRANSP) && shr.alpha < 1.0f && (shi.mat->mode_l & (MA_ZTRANSP | MA_RAYTRANSP))) {
+				float nf, f, refract[3], tracol[4];
+
+				tracol[0]= shi.r;
+				tracol[1]= shi.g;
+				tracol[2]= shi.b;
+				tracol[3]= col[3];	/* we pass on and accumulate alpha */
+
+				if ((shi.mat->mode & MA_TRANSP) && (shi.mat->mode & MA_RAYTRANSP)) {
+					/* don't overwrite traflag, it's value is used in mirror reflection */
+					int new_traflag = traflag;
+
+					if (new_traflag & RAY_INSIDE) {
+						/* inside the material, so use inverse normal */
+						float norm[3];
+						norm[0]= - shi.vn[0];
+						norm[1]= - shi.vn[1];
+						norm[2]= - shi.vn[2];
+
+						if (refraction(refract, norm, shi.view, shi.ang)) {
+							/* ray comes out from the material into air */
+							new_traflag &= ~RAY_INSIDE;
+						}
+						else {
+							/* total internal reflection (ray stays inside the material) */
+							reflection(refract, norm, shi.view, shi.vn);
+						}
+					}
+					else {
+						if (refraction(refract, shi.vn, shi.view, shi.ang)) {
+							/* ray goes in to the material from air */
+							new_traflag |= RAY_INSIDE;
+						}
+						else {
+							/* total external reflection (ray doesn't enter the material) */
+							reflection(refract, shi.vn, shi.view, shi.vn);
+						}
+					}
+					traceray(origshi, origshr, depth-1, shi.co, refract, tracol, shi.obi, shi.vlr, new_traflag);
+				}
+				else
+					traceray(origshi, origshr, depth-1, shi.co, shi.view, tracol, shi.obi, shi.vlr, 0);
+
+				f= shr.alpha; f1= 1.0f-f;
+				nf= (shi.mat->mode & MA_RAYTRANSP) ? d * shi.mat->filter : 0.0f;
+				fr= 1.0f+ nf*(shi.r-1.0f);
+				fg= 1.0f+ nf*(shi.g-1.0f);
+				fb= 1.0f+ nf*(shi.b-1.0f);
+				shr.diff[0]= f*shr.diff[0] + f1*fr*tracol[0];
+				shr.diff[1]= f*shr.diff[1] + f1*fg*tracol[1];
+				shr.diff[2]= f*shr.diff[2] + f1*fb*tracol[2];
+
+				shr.spec[0] *=f;
+				shr.spec[1] *=f;
+				shr.spec[2] *=f;
+
+				col[3]= f1*tracol[3] + f;
+			}
+			else {
+				col[3]= 1.0f;
+			}
+
+			float f;
+			if (shi.mat->mode_l & MA_RAYMIRROR) {
+				f= shi.ray_mirror;
+				if (f!=0.0f) f*= fresnel_fac(shi.view, shi.vn, shi.mat->fresnel_mir_i, shi.mat->fresnel_mir);
+			}
+			else f= 0.0f;
+
+			if (f!=0.0f) {
+				float mircol[4];
+				float ref[3];
+
+				reflection_simple(ref, shi.vn, shi.view);
+				traceray(origshi, origshr, depth-1, shi.co, ref, mircol, shi.obi, shi.vlr, traflag);
+
+				f1= 1.0f-f;
+
+				/* combine */
+				//color_combine(col, f*fr*(1.0f-shr.spec[0]), f1, col, shr.diff);
+				//col[0]+= shr.spec[0];
+				//col[1]+= shr.spec[1];
+				//col[2]+= shr.spec[2];
+
+				fr= shi.mirr;
+				fg= shi.mirg;
+				fb= shi.mirb;
+
+				col[0]= f*fr*(1.0f-shr.spec[0])*mircol[0] + f1*shr.diff[0] + shr.spec[0];
+				col[1]= f*fg*(1.0f-shr.spec[1])*mircol[1] + f1*shr.diff[1] + shr.spec[1];
+				col[2]= f*fb*(1.0f-shr.spec[2])*mircol[2] + f1*shr.diff[2] + shr.spec[2];
+			}
+			else {
+				col[0]= shr.diff[0] + shr.spec[0];
+				col[1]= shr.diff[1] + shr.spec[1];
+				col[2]= shr.diff[2] + shr.spec[2];
+			}
+
+			if (dist_mir > 0.0f) {
+				float blendcol[3];
+
+				/* max ray distance set, but found an intersection, so fade this color
+				 * out towards the sky/material color for a smooth transition */
+				ray_fadeout_endcolor(blendcol, origshi, &shi, origshr, &isec, dir);
+				ray_fadeout(&isec, &shi, col, blendcol, dist_mir);
+			}
+		}
+		else {
+			col[0]= shr.diff[0] + shr.spec[0];
+			col[1]= shr.diff[1] + shr.spec[1];
+			col[2]= shr.diff[2] + shr.spec[2];
+		}
+
+	}
+	else {
+		ray_fadeout_endcolor(col, origshi, &shi, origshr, &isec, dir);
+	}
+	RE_RC_MERGE(&origshi->raycounter, &shi.raycounter);
+}
+
+/* **************** jitter blocks ********** */
+
+/* calc distributed planar energy */
+
+static void DP_energy(float *table, float vec[2], int tot, float xsize, float ysize)
+{
+	int x, y, a;
+	float *fp, force[3], result[3];
+	float dx, dy, dist, min;
+
+	min= MIN2(xsize, ysize);
+	min*= min;
+	result[0]= result[1]= 0.0f;
+
+	for (y= -1; y<2; y++) {
+		dy= ysize*y;
+		for (x= -1; x<2; x++) {
+			dx= xsize*x;
+			fp= table;
+			for (a=0; a<tot; a++, fp+= 2) {
+				force[0]= vec[0] - fp[0]-dx;
+				force[1]= vec[1] - fp[1]-dy;
+				dist= force[0]*force[0] + force[1]*force[1];
+				if (dist < min && dist>0.0f) {
+					result[0]+= force[0]/dist;
+					result[1]+= force[1]/dist;
+				}
+			}
+		}
+	}
+	vec[0] += 0.1f*min*result[0]/(float)tot;
+	vec[1] += 0.1f*min*result[1]/(float)tot;
+	/* cyclic clamping */
+	vec[0]= vec[0] - xsize*floorf(vec[0]/xsize + 0.5f);
+	vec[1]= vec[1] - ysize*floorf(vec[1]/ysize + 0.5f);
+}
+
+/* random offset of 1 in 2 */
+static void jitter_plane_offset(float *jitter1, float *jitter2, int tot, float sizex, float sizey, float ofsx, float ofsy)
+{
+	float dsizex= sizex*ofsx;
+	float dsizey= sizey*ofsy;
+	float hsizex= 0.5f*sizex, hsizey= 0.5f*sizey;
+	int x;
+
+	for (x=tot; x>0; x--, jitter1+=2, jitter2+=2) {
+		jitter2[0]= jitter1[0] + dsizex;
+		jitter2[1]= jitter1[1] + dsizey;
+		if (jitter2[0] > hsizex) jitter2[0]-= sizex;
+		if (jitter2[1] > hsizey) jitter2[1]-= sizey;
+	}
+}
+
+/* called from convertBlenderScene.c */
+/* we do this in advance to get consistent random, not alter the render seed, and be threadsafe */
+void init_jitter_plane(LampRen *lar)
+{
+	float *fp;
+	int x, tot= lar->ray_totsamp;
+
+	/* test if already initialized */
+	if (lar->jitter) return;
+
+	/* at least 4, or max threads+1 tables */
+	if (BLENDER_MAX_THREADS < 4) x= 4;
+	else x= BLENDER_MAX_THREADS+1;
+	fp= lar->jitter= MEM_callocN(x*tot*2*sizeof(float), "lamp jitter tab");
+
+	/* if 1 sample, we leave table to be zero's */
+	if (tot>1) {
+		/* set per-lamp fixed seed */
+		RNG *rng = BLI_rng_new_srandom(tot);
+		int iter=12;
+
+		/* fill table with random locations, area_size large */
+		for (x=0; x<tot; x++, fp+=2) {
+			fp[0]= (BLI_rng_get_float(rng)-0.5f)*lar->area_size;
+			fp[1]= (BLI_rng_get_float(rng)-0.5f)*lar->area_sizey;
+		}
+
+		while (iter--) {
+			fp= lar->jitter;
+			for (x=tot; x>0; x--, fp+=2) {
+				DP_energy(lar->jitter, fp, tot, lar->area_size, lar->area_sizey);
+			}
+		}
+
+		BLI_rng_free(rng);
+	}
+	/* create the dithered tables (could just check lamp type!) */
+	jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.0f);
+	jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.5f);
+	jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0f, 0.5f);
+}
+
+/* table around origin, -0.5*size to 0.5*size */
+static float *give_jitter_plane(LampRen *lar, int thread, int xs, int ys)
+{
+	int tot;
+
+	tot= lar->ray_totsamp;
+
+	if (lar->ray_samp_type & LA_SAMP_JITTER) {
+		/* made it threadsafe */
+
+		if (lar->xold[thread]!=xs || lar->yold[thread]!=ys) {
+			jitter_plane_offset(lar->jitter, lar->jitter+2*(thread+1)*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(thread), BLI_thread_frand(thread));
+			lar->xold[thread]= xs;
+			lar->yold[thread]= ys;
+		}
+		return lar->jitter+2*(thread+1)*tot;
+	}
+	if (lar->ray_samp_type & LA_SAMP_DITHER) {
+		return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1));
+	}
+
+	return lar->jitter;
+}
+
+
+/* **************** QMC sampling *************** */
+
+static void halton_sample(double *ht_invprimes, double *ht_nums, double *v)
+{
+	/* incremental halton sequence generator, from:
+	 * "Instant Radiosity", Keller A. */
+	unsigned int i;
+
+	for (i = 0; i < 2; i++) {
+		double r = fabs((1.0 - ht_nums[i]) - 1e-10);
+
+		if (ht_invprimes[i] >= r) {
+			double lasth;
+			double h = ht_invprimes[i];
+
+			do {
+				lasth = h;
+				h *= ht_invprimes[i];
+			} while (h >= r);
+
+			ht_nums[i] += ((lasth + h) - 1.0);
+		}
+		else
+			ht_nums[i] += ht_invprimes[i];
+
+		v[i] = (float)ht_nums[i];
+	}
+}
+
+/* Generate Hammersley points in [0,1)^2
+ * From Lucille renderer */
+static void hammersley_create(double *out, int n)
+{
+	double p, t;
+	int k, kk;
+
+	for (k = 0; k < n; k++) {
+		t = 0;
+		for (p = 0.5, kk = k; kk; p *= 0.5, kk >>= 1) {
+			if (kk & 1) {		/* kk mod 2 = 1		*/
+				t += p;
+			}
+		}
+
+		out[2 * k + 0] = (double)k / (double)n;
+		out[2 * k + 1] = t;
+	}
+}
+
+static struct QMCSampler *QMC_initSampler(int type, int tot)
+{
+	QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler");
+	qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table");
+
+	qsa->tot = tot;
+	qsa->type = type;
+
+	if (qsa->type==SAMP_TYPE_HAMMERSLEY)
+		hammersley_create(qsa->samp2d, qsa->tot);
+
+	return qsa;
+}
+
+static void QMC_initPixel(QMCSampler *qsa, int thread)
+{
+	if (qsa->type==SAMP_TYPE_HAMMERSLEY) {
+		/* hammersley sequence is fixed, already created in QMCSampler init.
+		 * per pixel, gets a random offset. We create separate offsets per thread, for write-safety */
+		qsa->offs[thread][0] = 0.5f * BLI_thread_frand(thread);
+		qsa->offs[thread][1] = 0.5f * BLI_thread_frand(thread);
+	}
+	else { 	/* SAMP_TYPE_HALTON */
+
+		/* generate a new randomized halton sequence per pixel
+		 * to alleviate qmc artifacts and make it reproducible
+		 * between threads/frames */
+		double ht_invprimes[2], ht_nums[2];
+		double r[2];
+		int i;
+
+		ht_nums[0] = BLI_thread_frand(thread);
+		ht_nums[1] = BLI_thread_frand(thread);
+		ht_invprimes[0] = 0.5;
+		ht_invprimes[1] = 1.0/3.0;
+
+		for (i=0; i< qsa->tot; i++) {
+			halton_sample(ht_invprimes, ht_nums, r);
+			qsa->samp2d[2*i+0] = r[0];
+			qsa->samp2d[2*i+1] = r[1];
+		}
+	}
+}
+
+static void QMC_freeSampler(QMCSampler *qsa)
+{
+	MEM_freeN(qsa->samp2d);
+	MEM_freeN(qsa);
+}
+
+static void QMC_getSample(double *s, QMCSampler *qsa, int thread, int num)
+{
+	if (qsa->type == SAMP_TYPE_HAMMERSLEY) {
+		s[0] = fmod(qsa->samp2d[2*num+0] + qsa->offs[thread][0], 1.0f);
+		s[1] = fmod(qsa->samp2d[2*num+1] + qsa->offs[thread][1], 1.0f);
+	}
+	else { /* SAMP_TYPE_HALTON */
+		s[0] = qsa->samp2d[2*num+0];
+		s[1] = qsa->samp2d[2*num+1];
+	}
+}
+
+/* phong weighted disc using 'blur' for exponent, centred on 0,0 */
+static void QMC_samplePhong(float vec[3], QMCSampler *qsa, int thread, int num, float blur)
+{
+	double s[2];
+	float phi, pz, sqr;
+
+	QMC_getSample(s, qsa, thread, num);
+
+	phi = s[0]*2*M_PI;
+	pz = pow(s[1], blur);
+	sqr = sqrtf(1.0f - pz * pz);
+
+	vec[0] = (float)(cosf(phi)*sqr);
+	vec[1] = (float)(sinf(phi)*sqr);
+	vec[2] = 0.0f;
+}
+
+/* rect of edge lengths sizex, sizey, centred on 0.0,0.0 i.e. ranging from -sizex/2 to +sizey/2 */
+static void QMC_sampleRect(float vec[3], QMCSampler *qsa, int thread, int num, float sizex, float sizey)
+{
+	double s[2];
+
+	QMC_getSample(s, qsa, thread, num);
+
+	vec[0] = (float)(s[0] - 0.5) * sizex;
+	vec[1] = (float)(s[1] - 0.5) * sizey;
+	vec[2] = 0.0f;
+}
+
+/* disc of radius 'radius', centred on 0,0 */
+static void QMC_sampleDisc(float vec[3], QMCSampler *qsa, int thread, int num, float radius)
+{
+	double s[2];
+	float phi, sqr;
+
+	QMC_getSample(s, qsa, thread, num);
+
+	phi = s[0]*2*M_PI;
+	sqr = sqrt(s[1]);
+
+	vec[0] = cosf(phi)*sqr* radius/2.0f;
+	vec[1] = sinf(phi)*sqr* radius/2.0f;
+	vec[2] = 0.0f;
+}
+
+/* uniform hemisphere sampling */
+static void QMC_sampleHemi(float vec[3], QMCSampler *qsa, int thread, int num)
+{
+	double s[2];
+	float phi, sqr;
+
+	QMC_getSample(s, qsa, thread, num);
+
+	phi = s[0]*2.0*M_PI;
+	sqr = sqrt(s[1]);
+
+	vec[0] = cosf(phi)*sqr;
+	vec[1] = sinf(phi)*sqr;
+	vec[2] = (float)(1.0 - s[1]*s[1]);
+}
+
+#if 0 /* currently not used */
+/* cosine weighted hemisphere sampling */
+static void QMC_sampleHemiCosine(float vec[3], QMCSampler *qsa, int thread, int num)
+{
+	double s[2];
+	float phi, sqr;
+
+	QMC_getSample(s, qsa, thread, num);
+
+	phi = s[0]*2.f*M_PI;
+	sqr = s[1]*sqrt(2-s[1]*s[1]);
+
+	vec[0] = cos(phi)*sqr;
+	vec[1] = sin(phi)*sqr;
+	vec[2] = 1.f - s[1]*s[1];
+
+}
+#endif
+
+/* called from convertBlenderScene.c */
+void init_render_qmcsampler(Render *re)
+{
+	const int num_threads = re->r.threads;
+	re->qmcsamplers= MEM_callocN(sizeof(ListBase)*num_threads, "QMCListBase");
+	re->num_qmc_samplers = num_threads;
+}
+
+static QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot)
+{
+	QMCSampler *qsa;
+
+	/* create qmc samplers as needed, since recursion makes it hard to
+	 * predict how many are needed */
+
+	for (qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) {
+		if (qsa->type == type && qsa->tot == tot && !qsa->used) {
+			qsa->used = true;
+			return qsa;
+		}
+	}
+
+	qsa= QMC_initSampler(type, tot);
+	qsa->used = true;
+	BLI_addtail(&re->qmcsamplers[thread], qsa);
+
+	return qsa;
+}
+
+static void release_thread_qmcsampler(Render *UNUSED(re), int UNUSED(thread), QMCSampler *qsa)
+{
+	qsa->used= 0;
+}
+
+void free_render_qmcsampler(Render *re)
+{
+	if (re->qmcsamplers) {
+		QMCSampler *qsa, *next;
+		int a;
+		for (a = 0; a < re->num_qmc_samplers; a++) {
+			for (qsa=re->qmcsamplers[a].first; qsa; qsa=next) {
+				next= qsa->next;
+				QMC_freeSampler(qsa);
+			}
+
+			re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL;
+		}
+
+		MEM_freeN(re->qmcsamplers);
+		re->qmcsamplers= NULL;
+	}
+}
+
+static int adaptive_sample_variance(int samples, const float col[3], const float colsq[3], float thresh)
+{
+	float var[3], mean[3];
+
+	/* scale threshold just to give a bit more precision in input rather than dealing with
+	 * tiny tiny numbers in the UI */
+	thresh /= 2;
+
+	mean[0] = col[0] / (float)samples;
+	mean[1] = col[1] / (float)samples;
+	mean[2] = col[2] / (float)samples;
+
+	var[0] = (colsq[0] / (float)samples) - (mean[0]*mean[0]);
+	var[1] = (colsq[1] / (float)samples) - (mean[1]*mean[1]);
+	var[2] = (colsq[2] / (float)samples) - (mean[2]*mean[2]);
+
+	if ((var[0] * 0.4f < thresh) && (var[1] * 0.3f < thresh) && (var[2] * 0.6f < thresh))
+		return 1;
+	else
+		return 0;
+}
+
+static int adaptive_sample_contrast_val(int samples, float prev, float val, float thresh)
+{
+	/* if the last sample's contribution to the total value was below a small threshold
+	 * (i.e. the samples taken are very similar), then taking more samples that are probably
+	 * going to be the same is wasting effort */
+	if (fabsf(prev / (float)(samples - 1) - val / (float)samples ) < thresh) {
+		return 1;
+	}
+	else
+		return 0;
+}
+
+static float get_avg_speed(ShadeInput *shi)
+{
+	float pre_x, pre_y, post_x, post_y, speedavg;
+
+	pre_x = (shi->winspeed[0] == PASS_VECTOR_MAX)?0.0f:shi->winspeed[0];
+	pre_y = (shi->winspeed[1] == PASS_VECTOR_MAX)?0.0f:shi->winspeed[1];
+	post_x = (shi->winspeed[2] == PASS_VECTOR_MAX)?0.0f:shi->winspeed[2];
+	post_y = (shi->winspeed[3] == PASS_VECTOR_MAX)?0.0f:shi->winspeed[3];
+
+	speedavg = (sqrtf(pre_x * pre_x + pre_y * pre_y) + sqrtf(post_x * post_x + post_y * post_y)) / 2.0f;
+
+	return speedavg;
+}
+
+/* ***************** main calls ************** */
+
+
+static void trace_refract(float col[4], ShadeInput *shi, ShadeResult *shr)
+{
+	QMCSampler *qsa=NULL;
+	int samp_type;
+	int traflag=0;
+
+	float samp3d[3], orthx[3], orthy[3];
+	float v_refract[3], v_refract_new[3];
+	float sampcol[4], colsq[4];
+
+	float blur = pow3f(1.0f - shi->mat->gloss_tra);
+	short max_samples = shi->mat->samp_gloss_tra;
+	float adapt_thresh = shi->mat->adapt_thresh_tra;
+
+	int samples=0;
+
+	colsq[0] = colsq[1] = colsq[2] = 0.0;
+	col[0] = col[1] = col[2] = 0.0;
+	col[3]= shr->alpha;
+
+	if (blur > 0.0f) {
+		if (adapt_thresh != 0.0f) samp_type = SAMP_TYPE_HALTON;
+		else samp_type = SAMP_TYPE_HAMMERSLEY;
+
+		/* all samples are generated per pixel */
+		qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
+		QMC_initPixel(qsa, shi->thread);
+	}
+	else
+		max_samples = 1;
+
+
+	while (samples < max_samples) {
+		if (refraction(v_refract, shi->vn, shi->view, shi->ang)) {
+			traflag |= RAY_INSIDE;
+		}
+		else {
+			/* total external reflection can happen for materials with IOR < 1.0 */
+			if ((shi->vlr->flag & R_SMOOTH))
+				reflection(v_refract, shi->vn, shi->view, shi->facenor);
+			else
+				reflection_simple(v_refract, shi->vn, shi->view);
+
+			/* can't blur total external reflection */
+			max_samples = 1;
+		}
+
+		if (max_samples > 1) {
+			/* get a quasi-random vector from a phong-weighted disc */
+			QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
+
+			ortho_basis_v3v3_v3(orthx, orthy, v_refract);
+			mul_v3_fl(orthx, samp3d[0]);
+			mul_v3_fl(orthy, samp3d[1]);
+
+			/* and perturb the refraction vector in it */
+			add_v3_v3v3(v_refract_new, v_refract, orthx);
+			add_v3_v3(v_refract_new, orthy);
+
+			normalize_v3(v_refract_new);
+		}
+		else {
+			/* no blurriness, use the original normal */
+			copy_v3_v3(v_refract_new, v_refract);
+		}
+
+		sampcol[0]= sampcol[1]= sampcol[2]= sampcol[3]= 0.0f;
+
+		traceray(shi, shr, shi->mat->ray_depth_tra, shi->co, v_refract_new, sampcol, shi->obi, shi->vlr, traflag);
+
+		col[0] += sampcol[0];
+		col[1] += sampcol[1];
+		col[2] += sampcol[2];
+		col[3] += sampcol[3];
+
+		/* for variance calc */
+		colsq[0] += sampcol[0]*sampcol[0];
+		colsq[1] += sampcol[1]*sampcol[1];
+		colsq[2] += sampcol[2]*sampcol[2];
+
+		samples++;
+
+		/* adaptive sampling */
+		if (adapt_thresh < 1.0f && samples > max_samples/2) {
+			if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
+				break;
+
+			/* if the pixel so far is very dark, we can get away with less samples */
+			if ( (col[0] + col[1] + col[2])/3.0f/(float)samples < 0.01f )
+				max_samples--;
+		}
+	}
+
+	col[0] /= (float)samples;
+	col[1] /= (float)samples;
+	col[2] /= (float)samples;
+	col[3] /= (float)samples;
+
+	if (qsa)
+		release_thread_qmcsampler(&R, shi->thread, qsa);
+}
+
+static void trace_reflect(float col[3], ShadeInput *shi, ShadeResult *shr, float fresnelfac)
+{
+	QMCSampler *qsa=NULL;
+	int samp_type;
+
+	float samp3d[3], orthx[3], orthy[3];
+	float v_nor_new[3], v_reflect[3];
+	float sampcol[4], colsq[4];
+
+	float blur = pow3f(1.0f - shi->mat->gloss_mir);
+	short max_samples = shi->mat->samp_gloss_mir;
+	float adapt_thresh = shi->mat->adapt_thresh_mir;
+	float aniso = 1.0f - shi->mat->aniso_gloss_mir;
+
+	int samples=0;
+
+	col[0] = col[1] = col[2] = 0.0;
+	colsq[0] = colsq[1] = colsq[2] = 0.0;
+
+	if (blur > 0.0f) {
+		if (adapt_thresh != 0.0f) samp_type = SAMP_TYPE_HALTON;
+		else samp_type = SAMP_TYPE_HAMMERSLEY;
+
+		/* all samples are generated per pixel */
+		qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
+		QMC_initPixel(qsa, shi->thread);
+	}
+	else
+		max_samples = 1;
+
+	while (samples < max_samples) {
+
+		if (max_samples > 1) {
+			/* get a quasi-random vector from a phong-weighted disc */
+			QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
+
+			/* find the normal's perpendicular plane, blurring along tangents
+			 * if tangent shading enabled */
+			if (shi->mat->mode & (MA_TANGENT_V)) {
+				cross_v3_v3v3(orthx, shi->vn, shi->tang);      // bitangent
+				copy_v3_v3(orthy, shi->tang);
+				mul_v3_fl(orthx, samp3d[0]);
+				mul_v3_fl(orthy, samp3d[1]*aniso);
+			}
+			else {
+				ortho_basis_v3v3_v3(orthx, orthy, shi->vn);
+				mul_v3_fl(orthx, samp3d[0]);
+				mul_v3_fl(orthy, samp3d[1]);
+			}
+
+			/* and perturb the normal in it */
+			add_v3_v3v3(v_nor_new, shi->vn, orthx);
+			add_v3_v3(v_nor_new, orthy);
+			normalize_v3(v_nor_new);
+		}
+		else {
+			/* no blurriness, use the original normal */
+			copy_v3_v3(v_nor_new, shi->vn);
+		}
+
+		if ((shi->vlr->flag & R_SMOOTH))
+			reflection(v_reflect, v_nor_new, shi->view, shi->facenor);
+		else
+			reflection_simple(v_reflect, v_nor_new, shi->view);
+
+		sampcol[0]= sampcol[1]= sampcol[2]= sampcol[3]= 0.0f;
+
+		traceray(shi, shr, shi->mat->ray_depth, shi->co, v_reflect, sampcol, shi->obi, shi->vlr, 0);
+
+
+		col[0] += sampcol[0];
+		col[1] += sampcol[1];
+		col[2] += sampcol[2];
+
+		/* for variance calc */
+		colsq[0] += sampcol[0]*sampcol[0];
+		colsq[1] += sampcol[1]*sampcol[1];
+		colsq[2] += sampcol[2]*sampcol[2];
+
+		samples++;
+
+		/* adaptive sampling */
+		if (adapt_thresh > 0.0f && samples > max_samples/3) {
+			if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
+				break;
+
+			/* if the pixel so far is very dark, we can get away with less samples */
+			if ( (col[0] + col[1] + col[2])/3.0f/(float)samples < 0.01f )
+				max_samples--;
+
+			/* reduce samples when reflection is dim due to low ray mirror blend value or fresnel factor
+			 * and when reflection is blurry */
+			if (fresnelfac < 0.1f * (blur+1)) {
+				max_samples--;
+
+				/* even more for very dim */
+				if (fresnelfac < 0.05f * (blur+1))
+					max_samples--;
+			}
+		}
+	}
+
+	col[0] /= (float)samples;
+	col[1] /= (float)samples;
+	col[2] /= (float)samples;
+
+	if (qsa)
+		release_thread_qmcsampler(&R, shi->thread, qsa);
+}
+
+/* extern call from render loop */
+void ray_trace(ShadeInput *shi, ShadeResult *shr)
+{
+	float f1, fr, fg, fb;
+	float mircol[4], tracol[4];
+	float diff[3];
+	int do_tra, do_mir;
+
+	do_tra = ((shi->mode & MA_TRANSP) && (shi->mode & MA_RAYTRANSP) && shr->alpha != 1.0f && (shi->depth <= shi->mat->ray_depth_tra));
+	do_mir = ((shi->mat->mode & MA_RAYMIRROR) && shi->ray_mirror != 0.0f && (shi->depth <= shi->mat->ray_depth));
+
+	/* raytrace mirror and refract like to separate the spec color */
+	if (shi->combinedflag & SCE_PASS_SPEC)
+		sub_v3_v3v3(diff, shr->combined, shr->spec);
+	else
+		copy_v3_v3(diff, shr->combined);
+
+	if (do_tra) {
+		float olddiff[3], f;
+
+		trace_refract(tracol, shi, shr);
+
+		f= shr->alpha; f1= 1.0f-f;
+		fr= 1.0f+ shi->mat->filter*(shi->r-1.0f);
+		fg= 1.0f+ shi->mat->filter*(shi->g-1.0f);
+		fb= 1.0f+ shi->mat->filter*(shi->b-1.0f);
+
+		/* for refract pass */
+		copy_v3_v3(olddiff, diff);
+
+		diff[0]= f*diff[0] + f1*fr*tracol[0];
+		diff[1]= f*diff[1] + f1*fg*tracol[1];
+		diff[2]= f*diff[2] + f1*fb*tracol[2];
+
+		if (shi->passflag & SCE_PASS_REFRACT)
+			sub_v3_v3v3(shr->refr, diff, olddiff);
+
+		if (!(shi->combinedflag & SCE_PASS_REFRACT))
+			sub_v3_v3v3(diff, diff, shr->refr);
+
+		shr->alpha = min_ff(1.0f, tracol[3]);
+	}
+
+	if (do_mir) {
+		const float i= shi->ray_mirror*fresnel_fac(shi->view, shi->vn, shi->mat->fresnel_mir_i, shi->mat->fresnel_mir);
+		if (i!=0.0f) {
+
+			trace_reflect(mircol, shi, shr, i);
+
+			fr= i*shi->mirr;
+			fg= i*shi->mirg;
+			fb= i*shi->mirb;
+
+			if (shi->passflag & SCE_PASS_REFLECT) {
+				/* mirror pass is not blocked out with spec */
+				shr->refl[0]= fr*mircol[0] - fr*diff[0];
+				shr->refl[1]= fg*mircol[1] - fg*diff[1];
+				shr->refl[2]= fb*mircol[2] - fb*diff[2];
+			}
+
+			if (shi->combinedflag & SCE_PASS_REFLECT) {
+				/* values in shr->spec can be greater than 1.0.
+				 * In this case the mircol uses a zero blending factor, so ignoring it is ok.
+				 * Fixes bug #18837 - when the spec is higher then 1.0,
+				 * diff can become a negative color - Campbell  */
+
+				f1= 1.0f-i;
+
+				diff[0] *= f1;
+				diff[1] *= f1;
+				diff[2] *= f1;
+
+				if (shr->spec[0]<1.0f)	diff[0] += mircol[0] * (fr*(1.0f-shr->spec[0]));
+				if (shr->spec[1]<1.0f)	diff[1] += mircol[1] * (fg*(1.0f-shr->spec[1]));
+				if (shr->spec[2]<1.0f)	diff[2] += mircol[2] * (fb*(1.0f-shr->spec[2]));
+			}
+		}
+	}
+	/* put back together */
+	if (shi->combinedflag & SCE_PASS_SPEC)
+		add_v3_v3v3(shr->combined, diff, shr->spec);
+	else
+		copy_v3_v3(shr->combined, diff);
+}
+
+/* color 'shadfac' passes through 'col' with alpha and filter */
+/* filter is only applied on alpha defined transparent part */
+static void addAlphaLight(float shadfac[4], const float col[3], float alpha, float filter)
+{
+	float fr, fg, fb;
+
+	fr= 1.0f+ filter*(col[0]-1.0f);
+	fg= 1.0f+ filter*(col[1]-1.0f);
+	fb= 1.0f+ filter*(col[2]-1.0f);
+
+	shadfac[0]= alpha*col[0] + fr*(1.0f-alpha)*shadfac[0];
+	shadfac[1]= alpha*col[1] + fg*(1.0f-alpha)*shadfac[1];
+	shadfac[2]= alpha*col[2] + fb*(1.0f-alpha)*shadfac[2];
+
+	shadfac[3]= (1.0f-alpha)*shadfac[3];
+}
+
+static void ray_trace_shadow_tra(Isect *is, ShadeInput *origshi, int depth, int traflag, float col[4])
+{
+	/* ray to lamp, find first face that intersects, check alpha properties,
+	 * if it has col[3]>0.0f  continue. so exit when alpha is full */
+	const float initial_dist = is->dist;
+
+	if (RE_rayobject_raycast(R.raytree, is)) {
+		/* Warning regarding initializing to zero's, This is not that nice,
+		 * and possibly a bit slow for every ray, however some variables were
+		 * not initialized properly in, unless using
+		 * shade_input_initialize(...), we need to zero them. */
+		ShadeInput shi= {NULL};
+		/* end warning! - Campbell */
+
+		ShadeResult shr;
+
+		/* we got a face */
+
+		shi.depth= origshi->depth + 1;					/* only used to indicate tracing */
+		shi.mask= origshi->mask;
+		shi.thread= origshi->thread;
+		shi.passflag= SCE_PASS_COMBINED;
+		shi.combinedflag= 0xFFFFFF;		 /* ray trace does all options */
+
+		shi.xs= origshi->xs;
+		shi.ys= origshi->ys;
+		shi.do_manage= origshi->do_manage;
+		shi.lay= origshi->lay;
+		shi.nodes= origshi->nodes;
+
+		RE_instance_rotate_ray_restore(origshi->obi, is);
+
+		shade_ray(is, &shi, &shr);
+		if (shi.mat->material_type == MA_TYPE_SURFACE) {
+			const float d = (shi.mat->mode & MA_RAYTRANSP) ?
+			                ((traflag & RAY_TRA) ? shade_by_transmission(is, &shi, &shr) : 1.0f) :
+			                0.0f;
+			/* mix colors based on shadfac (rgb + amount of light factor) */
+			addAlphaLight(col, shr.diff, shr.alpha, d*shi.mat->filter);
+		}
+		else if (shi.mat->material_type == MA_TYPE_VOLUME) {
+			const float a = col[3];
+
+			col[0] = a*col[0] + shr.alpha*shr.combined[0];
+			col[1] = a*col[1] + shr.alpha*shr.combined[1];
+			col[2] = a*col[2] + shr.alpha*shr.combined[2];
+
+			col[3] = (1.0f - shr.alpha)*a;
+		}
+
+		if (depth>0 && col[3]>0.0f) {
+
+			/* adapt isect struct */
+			copy_v3_v3(is->start, shi.co);
+			is->dist = initial_dist-is->dist;
+			is->orig.ob   = shi.obi;
+			is->orig.face = shi.vlr;
+
+			ray_trace_shadow_tra(is, origshi, depth-1, traflag | RAY_TRA, col);
+		}
+
+		RE_RC_MERGE(&origshi->raycounter, &shi.raycounter);
+	}
+}
+
+
+/* aolight: function to create random unit sphere vectors for total random sampling */
+
+/* calc distributed spherical energy */
+static void DS_energy(float *sphere, int tot, float vec[3])
+{
+	float *fp, fac, force[3], res[3];
+	int a;
+
+	res[0]= res[1]= res[2]= 0.0f;
+
+	for (a=0, fp=sphere; a<tot; a++, fp+=3) {
+		sub_v3_v3v3(force, vec, fp);
+		fac = dot_v3v3(force, force);
+		if (fac!=0.0f) {
+			fac= 1.0f/fac;
+			res[0]+= fac*force[0];
+			res[1]+= fac*force[1];
+			res[2]+= fac*force[2];
+		}
+	}
+
+	mul_v3_fl(res, 0.5);
+	add_v3_v3(vec, res);
+	normalize_v3(vec);
+
+}
+
+/* called from convertBlenderScene.c */
+/* creates an equally distributed spherical sample pattern */
+/* and allocates threadsafe memory */
+void init_ao_sphere(Render *re, World *wrld)
+{
+	/* fixed random */
+	const int num_threads = re->r.threads;
+	RNG *rng;
+	float *fp;
+	int a, tot, iter= 16;
+
+	/* we make twice the amount of samples, because only a hemisphere is used */
+	tot= 2*wrld->aosamp*wrld->aosamp;
+
+	wrld->aosphere= MEM_mallocN(3*tot*sizeof(float), "AO sphere");
+	rng = BLI_rng_new_srandom(tot);
+
+	/* init */
+	fp= wrld->aosphere;
+	for (a=0; a<tot; a++, fp+= 3) {
+		BLI_rng_get_float_unit_v3(rng, fp);
+	}
+
+	while (iter--) {
+		for (a=0, fp= wrld->aosphere; a<tot; a++, fp+= 3) {
+			DS_energy(wrld->aosphere, tot, fp);
+		}
+	}
+
+	/* tables */
+	wrld->aotables= MEM_mallocN(num_threads*3*tot*sizeof(float), "AO tables");
+
+	BLI_rng_free(rng);
+}
+
+/* give per thread a table, we have to compare xs ys because of way OSA works... */
+static float *threadsafe_table_sphere(int test, int thread, int xs, int ys, int tot)
+{
+	static int xso[BLENDER_MAX_THREADS], yso[BLENDER_MAX_THREADS];
+	static int firsttime= 1;
+
+	if (firsttime) {
+		memset(xso, 255, sizeof(xso));
+		memset(yso, 255, sizeof(yso));
+		firsttime= 0;
+	}
+
+	if (xs==xso[thread] && ys==yso[thread]) return R.wrld.aotables+ thread*tot*3;
+	if (test) return NULL;
+	xso[thread]= xs; yso[thread]= ys;
+	return R.wrld.aotables+ thread*tot*3;
+}
+
+static float *sphere_sampler(int type, int resol, int thread, int xs, int ys, int reset)
+{
+	int tot;
+	float *vec;
+
+	tot= 2*resol*resol;
+
+	if (type & WO_AORNDSMP) {
+		/* total random sampling. NOT THREADSAFE! (should be removed, is not useful) */
+		RNG *rng = BLI_rng_new(BLI_thread_rand(thread));
+		float *sphere;
+		int a;
+
+		/* always returns table */
+		sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
+
+		vec= sphere;
+		for (a=0; a<tot; a++, vec+=3) {
+			BLI_rng_get_float_unit_v3(rng, vec);
+		}
+
+		BLI_rng_free(rng);
+
+		return sphere;
+	}
+	else {
+		float *sphere;
+		float *vec1;
+
+		/* returns table if xs and ys were equal to last call, and not resetting */
+		sphere= (reset)? NULL: threadsafe_table_sphere(1, thread, xs, ys, tot);
+		if (sphere==NULL) {
+			float cosfi, sinfi, cost, sint;
+			float ang;
+			int a;
+
+			sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
+
+			/* random rotation */
+			ang = BLI_thread_frand(thread);
+			sinfi = sinf(ang); cosfi = cosf(ang);
+			ang = BLI_thread_frand(thread);
+			sint = sinf(ang); cost = cosf(ang);
+
+			vec= R.wrld.aosphere;
+			vec1= sphere;
+			for (a=0; a<tot; a++, vec+=3, vec1+=3) {
+				vec1[0]= cost*cosfi*vec[0] - sinfi*vec[1] + sint*cosfi*vec[2];
+				vec1[1]= cost*sinfi*vec[0] + cosfi*vec[1] + sint*sinfi*vec[2];
+				vec1[2]= -sint*vec[0] + cost*vec[2];
+			}
+		}
+		return sphere;
+	}
+}
+
+static void ray_ao_qmc(ShadeInput *shi, float ao[3], float env[3])
+{
+	Isect isec;
+	RayHint point_hint;
+	QMCSampler *qsa=NULL;
+	float samp3d[3];
+	float up[3], side[3], dir[3], nrm[3];
+
+	float maxdist = R.wrld.aodist;
+	float fac=0.0f, prev=0.0f;
+	float adapt_thresh = R.wrld.ao_adapt_thresh;
+	float adapt_speed_fac = R.wrld.ao_adapt_speed_fac;
+
+	int samples=0;
+	int max_samples = R.wrld.aosamp*R.wrld.aosamp;
+
+	float dxyview[3], skyadded=0;
+	int envcolor;
+
+	RE_RC_INIT(isec, *shi);
+	isec.orig.ob   = shi->obi;
+	isec.orig.face = shi->vlr;
+	isec.check = RE_CHECK_VLR_NON_SOLID_MATERIAL;
+	isec.skip = RE_SKIP_VLR_NEIGHBOUR;
+	isec.hint = NULL;
+
+	isec.hit.ob   = NULL;
+	isec.hit.face = NULL;
+
+	isec.last_hit = NULL;
+
+	isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
+	isec.lay= -1;
+
+	copy_v3_v3(isec.start, shi->co);
+
+	RE_instance_rotate_ray_start(shi->obi, &isec);
+
+	RE_rayobject_hint_bb(R.raytree, &point_hint, isec.start, isec.start);
+	isec.hint = &point_hint;
+
+	zero_v3(ao);
+	zero_v3(env);
+
+	/* prevent sky colors to be added for only shadow (shadow becomes alpha) */
+	envcolor= R.wrld.aocolor;
+	if (shi->mat->mode & MA_ONLYSHADOW)
+		envcolor= WO_AOPLAIN;
+
+	if (envcolor == WO_AOSKYTEX) {
+		dxyview[0]= 1.0f/(float)R.wrld.aosamp;
+		dxyview[1]= 1.0f/(float)R.wrld.aosamp;
+		dxyview[2]= 0.0f;
+	}
+
+	if (shi->vlr->flag & R_SMOOTH) {
+		copy_v3_v3(nrm, shi->vn);
+	}
+	else {
+		copy_v3_v3(nrm, shi->facenor);
+	}
+
+	ortho_basis_v3v3_v3(up, side, nrm);
+
+	/* sampling init */
+	if (R.wrld.ao_samp_method==WO_AOSAMP_HALTON) {
+		float speedfac;
+
+		speedfac = get_avg_speed(shi) * adapt_speed_fac;
+		CLAMP(speedfac, 1.0f, 1000.0f);
+		max_samples /= speedfac;
+		if (max_samples < 5) max_samples = 5;
+
+		qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
+	}
+	else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY)
+		qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
+
+	QMC_initPixel(qsa, shi->thread);
+
+	while (samples < max_samples) {
+
+		/* sampling, returns quasi-random vector in unit hemisphere */
+		QMC_sampleHemi(samp3d, qsa, shi->thread, samples);
+
+		dir[0] = (samp3d[0]*up[0] + samp3d[1]*side[0] + samp3d[2]*nrm[0]);
+		dir[1] = (samp3d[0]*up[1] + samp3d[1]*side[1] + samp3d[2]*nrm[1]);
+		dir[2] = (samp3d[0]*up[2] + samp3d[1]*side[2] + samp3d[2]*nrm[2]);
+
+		normalize_v3(dir);
+
+		isec.dir[0] = -dir[0];
+		isec.dir[1] = -dir[1];
+		isec.dir[2] = -dir[2];
+		isec.dist = maxdist;
+
+		RE_instance_rotate_ray_dir(shi->obi, &isec);
+
+		prev = fac;
+
+		if (RE_rayobject_raycast(R.raytree, &isec)) {
+			if (R.wrld.aomode & WO_AODIST) fac+= expf(-isec.dist*R.wrld.aodistfac);
+			else fac+= 1.0f;
+		}
+		else if (envcolor!=WO_AOPLAIN) {
+			float skycol[4];
+			float view[3];
+
+			view[0]= -dir[0];
+			view[1]= -dir[1];
+			view[2]= -dir[2];
+			normalize_v3(view);
+
+			if (envcolor==WO_AOSKYCOL) {
+				const float skyfac= 0.5f * (1.0f + dot_v3v3(view, R.grvec));
+				env[0]+= (1.0f-skyfac)*R.wrld.horr + skyfac*R.wrld.zenr;
+				env[1]+= (1.0f-skyfac)*R.wrld.horg + skyfac*R.wrld.zeng;
+				env[2]+= (1.0f-skyfac)*R.wrld.horb + skyfac*R.wrld.zenb;
+			}
+			else {	/* WO_AOSKYTEX */
+				shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
+				shadeSunView(skycol, shi->view);
+				env[0]+= skycol[0];
+				env[1]+= skycol[1];
+				env[2]+= skycol[2];
+			}
+			skyadded++;
+		}
+
+		samples++;
+
+		if (qsa && qsa->type == SAMP_TYPE_HALTON) {
+			/* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */
+			if (adapt_thresh > 0.0f && (samples > max_samples/2) ) {
+
+				if (adaptive_sample_contrast_val(samples, prev, fac, adapt_thresh)) {
+					break;
+				}
+			}
+		}
+	}
+
+	/* average color times distances/hits formula */
+	ao[0]= ao[1]= ao[2]= 1.0f - fac/(float)samples;
+
+	if (envcolor!=WO_AOPLAIN && skyadded)
+		mul_v3_fl(env, (1.0f - fac/(float)samples)/((float)skyadded));
+	else
+		copy_v3_v3(env, ao);
+
+	if (qsa)
+		release_thread_qmcsampler(&R, shi->thread, qsa);
+}
+
+/* extern call from shade_lamp_loop, ambient occlusion calculus */
+static void ray_ao_spheresamp(ShadeInput *shi, float ao[3], float env[3])
+{
+	Isect isec;
+	RayHint point_hint;
+	float *vec, *nrm, bias, sh=0.0f;
+	float maxdist = R.wrld.aodist;
+	float dxyview[3];
+	int j= -1, tot, actual=0, skyadded=0, envcolor, resol= R.wrld.aosamp;
+
+	RE_RC_INIT(isec, *shi);
+	isec.orig.ob   = shi->obi;
+	isec.orig.face = shi->vlr;
+	isec.check = RE_CHECK_VLR_RENDER;
+	isec.skip = RE_SKIP_VLR_NEIGHBOUR;
+	isec.hint = NULL;
+
+	isec.hit.ob   = NULL;
+	isec.hit.face = NULL;
+
+	isec.last_hit = NULL;
+
+	isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
+	isec.lay= -1;
+
+	copy_v3_v3(isec.start, shi->co);
+	RE_instance_rotate_ray_start(shi->obi, &isec);
+
+	RE_rayobject_hint_bb(R.raytree, &point_hint, isec.start, isec.start);
+	isec.hint = &point_hint;
+
+	zero_v3(ao);
+	zero_v3(env);
+
+	/* bias prevents smoothed faces to appear flat */
+	if (shi->vlr->flag & R_SMOOTH) {
+		bias= R.wrld.aobias;
+		nrm= shi->vn;
+	}
+	else {
+		bias= 0.0f;
+		nrm= shi->facenor;
+	}
+
+	/* prevent sky colors to be added for only shadow (shadow becomes alpha) */
+	envcolor= R.wrld.aocolor;
+	if (shi->mat->mode & MA_ONLYSHADOW)
+		envcolor= WO_AOPLAIN;
+
+	if (resol>32) resol= 32;
+
+	/* get sphere samples. for faces we get the same samples for sample x/y values,
+	 * for strand render we always require a new sampler because x/y are not set */
+	vec= sphere_sampler(R.wrld.aomode, resol, shi->thread, shi->xs, shi->ys, shi->strand != NULL);
+
+	/* warning: since we use full sphere now, and dotproduct is below, we do twice as much */
+	tot= 2*resol*resol;
+
+	if (envcolor == WO_AOSKYTEX) {
+		dxyview[0]= 1.0f/(float)resol;
+		dxyview[1]= 1.0f/(float)resol;
+		dxyview[2]= 0.0f;
+	}
+
+	while (tot--) {
+
+		if (dot_v3v3(vec, nrm) > bias) {
+			/* only ao samples for mask */
+			if (R.r.mode & R_OSA) {
+				j++;
+				if (j==R.osa) j= 0;
+				if (!(shi->mask & (1<<j))) {
+					vec+=3;
+					continue;
+				}
+			}
+
+			actual++;
+
+			/* always set start/vec/dist */
+			isec.dir[0] = -vec[0];
+			isec.dir[1] = -vec[1];
+			isec.dir[2] = -vec[2];
+			isec.dist = maxdist;
+
+			RE_instance_rotate_ray_dir(shi->obi, &isec);
+
+			/* do the trace */
+			if (RE_rayobject_raycast(R.raytree, &isec)) {
+				if (R.wrld.aomode & WO_AODIST) sh+= expf(-isec.dist*R.wrld.aodistfac);
+				else sh+= 1.0f;
+			}
+			else if (envcolor!=WO_AOPLAIN) {
+				float skycol[4];
+				float view[3];
+
+				view[0]= -vec[0];
+				view[1]= -vec[1];
+				view[2]= -vec[2];
+				normalize_v3(view);
+
+				if (envcolor==WO_AOSKYCOL) {
+					const float fac = 0.5f * (1.0f + dot_v3v3(view, R.grvec));
+					env[0]+= (1.0f-fac)*R.wrld.horr + fac*R.wrld.zenr;
+					env[1]+= (1.0f-fac)*R.wrld.horg + fac*R.wrld.zeng;
+					env[2]+= (1.0f-fac)*R.wrld.horb + fac*R.wrld.zenb;
+				}
+				else {	/* WO_AOSKYTEX */
+					shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
+					shadeSunView(skycol, shi->view);
+					env[0]+= skycol[0];
+					env[1]+= skycol[1];
+					env[2]+= skycol[2];
+				}
+				skyadded++;
+			}
+		}
+		/* samples */
+		vec+= 3;
+	}
+
+	if (actual==0) sh= 1.0f;
+	else sh = 1.0f - sh/((float)actual);
+
+	/* average color times distances/hits formula */
+	ao[0]= ao[1]= ao[2]= sh;
+
+	if (envcolor!=WO_AOPLAIN && skyadded)
+		mul_v3_fl(env, sh/((float)skyadded));
+	else
+		copy_v3_v3(env, ao);
+}
+
+void ray_ao(ShadeInput *shi, float ao[3], float env[3])
+{
+	/* Unfortunately, the unusual way that the sphere sampler calculates roughly twice as many
+	 * samples as are actually traced, and skips them based on bias and OSA settings makes it very difficult
+	 * to reuse code between these two functions. This is the easiest way I can think of to do it
+	 * --broken */
+	if (ELEM(R.wrld.ao_samp_method, WO_AOSAMP_HAMMERSLEY, WO_AOSAMP_HALTON))
+		ray_ao_qmc(shi, ao, env);
+	else if (R.wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
+		ray_ao_spheresamp(shi, ao, env);
+}
+
+static void ray_shadow_jittered_coords(ShadeInput *shi, int max, float jitco[RE_MAX_OSA][3], int *totjitco)
+{
+	/* magic numbers for reordering sample positions to give better
+	 * results with adaptive sample, when it usually only takes 4 samples */
+	int order8[8] = {0, 1, 5, 6, 2, 3, 4, 7};
+	int order11[11] = {1, 3, 8, 10, 0, 2, 4, 5, 6, 7, 9};
+	int order16[16] = {1, 3, 9, 12, 0, 6, 7, 8, 13, 2, 4, 5, 10, 11, 14, 15};
+	int count = count_mask(shi->mask);
+
+	/* for better antialising shadow samples are distributed over the subpixel
+	 * sample coordinates, this only works for raytracing depth 0 though */
+	if (!shi->strand && shi->depth == 0 && count > 1 && count <= max) {
+		float xs, ys, zs, view[3];
+		int samp, ordsamp, tot= 0;
+
+		for (samp=0; samp<R.osa; samp++) {
+			if (R.osa == 8) ordsamp = order8[samp];
+			else if (R.osa == 11) ordsamp = order11[samp];
+			else if (R.osa == 16) ordsamp = order16[samp];
+			else ordsamp = samp;
+
+			if (shi->mask & (1<<ordsamp)) {
+				/* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */
+				xs= (float)shi->scanco[0] + R.jit[ordsamp][0] + 0.5f;
+				ys= (float)shi->scanco[1] + R.jit[ordsamp][1] + 0.5f;
+				zs= shi->scanco[2];
+
+				shade_input_calc_viewco(shi, xs, ys, zs, view, NULL, jitco[tot], NULL, NULL);
+				tot++;
+			}
+		}
+
+		*totjitco= tot;
+	}
+	else {
+		copy_v3_v3(jitco[0], shi->co);
+		*totjitco= 1;
+	}
+}
+
+static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, const float lampco[3], float shadfac[4], Isect *isec)
+{
+	QMCSampler *qsa=NULL;
+	int samples=0;
+	float samp3d[3];
+
+	float fac=0.0f, vec[3], end[3];
+	float colsq[4];
+	float adapt_thresh = lar->adapt_thresh;
+	int min_adapt_samples=4, max_samples = lar->ray_totsamp;
+	float start[3];
+	bool do_soft = true, full_osa = false;
+	int i;
+
+	float min[3], max[3];
+	RayHint bb_hint;
+
+	float jitco[RE_MAX_OSA][3];
+	int totjitco;
+
+	colsq[0] = colsq[1] = colsq[2] = 0.0;
+	if (isec->mode==RE_RAY_SHADOW_TRA) {
+		shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
+	}
+	else
+		shadfac[3]= 1.0f;
+
+	if (lar->ray_totsamp < 2) do_soft = false;
+	if ((R.r.mode & R_OSA) && (R.osa > 0) && (shi->vlr->flag & R_FULL_OSA)) full_osa = true;
+
+	if (full_osa) {
+		if (do_soft) max_samples  = max_samples/R.osa + 1;
+		else max_samples = 1;
+	}
+	else {
+		if (do_soft) max_samples = lar->ray_totsamp;
+		else if (shi->depth == 0) max_samples = (R.osa > 4)?R.osa:5;
+		else max_samples = 1;
+	}
+
+	ray_shadow_jittered_coords(shi, max_samples, jitco, &totjitco);
+
+	/* sampling init */
+	if (lar->ray_samp_method==LA_SAMP_HALTON)
+		qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
+	else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY)
+		qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
+
+	QMC_initPixel(qsa, shi->thread);
+
+	INIT_MINMAX(min, max);
+	for (i = 0; i < totjitco; i++) {
+		minmax_v3v3_v3(min, max, jitco[i]);
+	}
+	if (shi->obi->flag & R_ENV_TRANSFORMED) {
+		mul_m4_v3(shi->obi->imat, min);
+		mul_m4_v3(shi->obi->imat, max);
+	}
+	RE_rayobject_hint_bb(R.raytree, &bb_hint, min, max);
+
+	isec->hint = &bb_hint;
+	isec->check = RE_CHECK_VLR_RENDER;
+	isec->skip = RE_SKIP_VLR_NEIGHBOUR;
+	copy_v3_v3(vec, lampco);
+
+	while (samples < max_samples) {
+
+		isec->orig.ob   = shi->obi;
+		isec->orig.face = shi->vlr;
+
+		/* manually jitter the start shading co-ord per sample
+		 * based on the pre-generated OSA texture sampling offsets,
+		 * for anti-aliasing sharp shadow edges. */
+		copy_v3_v3(start, jitco[samples % totjitco]);
+
+		if (do_soft) {
+			/* sphere shadow source */
+			if (lar->type == LA_LOCAL) {
+				float ru[3], rv[3], v[3], s[3];
+
+				/* calc tangent plane vectors */
+				sub_v3_v3v3(v, start, lampco);
+				normalize_v3(v);
+				ortho_basis_v3v3_v3(ru, rv, v);
+
+				/* sampling, returns quasi-random vector in area_size disc */
+				QMC_sampleDisc(samp3d, qsa, shi->thread, samples, lar->area_size);
+
+				/* distribute disc samples across the tangent plane */
+				s[0] = samp3d[0]*ru[0] + samp3d[1]*rv[0];
+				s[1] = samp3d[0]*ru[1] + samp3d[1]*rv[1];
+				s[2] = samp3d[0]*ru[2] + samp3d[1]*rv[2];
+
+				copy_v3_v3(samp3d, s);
+			}
+			else {
+				/* sampling, returns quasi-random vector in [sizex,sizey]^2 plane */
+				QMC_sampleRect(samp3d, qsa, shi->thread, samples, lar->area_size, lar->area_sizey);
+
+				/* align samples to lamp vector */
+				mul_m3_v3(lar->mat, samp3d);
+			}
+			end[0] = vec[0]+samp3d[0];
+			end[1] = vec[1]+samp3d[1];
+			end[2] = vec[2]+samp3d[2];
+		}
+		else {
+			copy_v3_v3(end, vec);
+		}
+
+		if (shi->strand) {
+			/* bias away somewhat to avoid self intersection */
+			float jitbias= 0.5f*(len_v3(shi->dxco) + len_v3(shi->dyco));
+			float v[3];
+
+			sub_v3_v3v3(v, start, end);
+			normalize_v3(v);
+
+			start[0] -= jitbias*v[0];
+			start[1] -= jitbias*v[1];
+			start[2] -= jitbias*v[2];
+		}
+
+		copy_v3_v3(isec->start, start);
+		sub_v3_v3v3(isec->dir, end, start);
+		isec->dist = normalize_v3(isec->dir);
+
+		RE_instance_rotate_ray(shi->obi, isec);
+
+		/* trace the ray */
+		if (isec->mode==RE_RAY_SHADOW_TRA) {
+			float col[4] = {1.0f, 1.0f, 1.0f, 1.0f};
+
+			ray_trace_shadow_tra(isec, shi, DEPTH_SHADOW_TRA, 0, col);
+			shadfac[0] += col[0];
+			shadfac[1] += col[1];
+			shadfac[2] += col[2];
+			shadfac[3] += col[3];
+
+			/* for variance calc */
+			colsq[0] += col[0]*col[0];
+			colsq[1] += col[1]*col[1];
+			colsq[2] += col[2]*col[2];
+		}
+		else {
+			if ( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f;
+		}
+
+		samples++;
+
+		if (lar->ray_samp_method == LA_SAMP_HALTON) {
+
+			/* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */
+			if ((max_samples > min_adapt_samples) && (adapt_thresh > 0.0f) && (samples > max_samples / 3)) {
+				if (isec->mode==RE_RAY_SHADOW_TRA) {
+					if ((shadfac[3] / samples > (1.0f-adapt_thresh)) || (shadfac[3] / samples < adapt_thresh))
+						break;
+					else if (adaptive_sample_variance(samples, shadfac, colsq, adapt_thresh))
+						break;
+				}
+				else {
+					if ((fac / samples > (1.0f-adapt_thresh)) || (fac / samples < adapt_thresh))
+						break;
+				}
+			}
+		}
+	}
+
+	if (isec->mode==RE_RAY_SHADOW_TRA) {
+		shadfac[0] /= samples;
+		shadfac[1] /= samples;
+		shadfac[2] /= samples;
+		shadfac[3] /= samples;
+	}
+	else
+		shadfac[3]= 1.0f-fac/samples;
+
+	if (qsa)
+		release_thread_qmcsampler(&R, shi->thread, qsa);
+}
+
+static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, const float lampco[3], float shadfac[4], Isect *isec)
+{
+	/* area soft shadow */
+	const float *jitlamp;
+	float fac=0.0f, div=0.0f, vec[3];
+	int a, j= -1, mask;
+	RayHint point_hint;
+
+	if (isec->mode==RE_RAY_SHADOW_TRA) {
+		shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
+	}
+	else shadfac[3]= 1.0f;
+
+	fac= 0.0f;
+	jitlamp= give_jitter_plane(lar, shi->thread, shi->xs, shi->ys);
+
+	a= lar->ray_totsamp;
+
+	/* this correction to make sure we always take at least 1 sample */
+	mask= shi->mask;
+	if (a==4) mask |= (mask>>4)|(mask>>8);
+	else if (a==9) mask |= (mask>>9);
+
+	copy_v3_v3(isec->start, shi->co);
+	RE_instance_rotate_ray_start(shi->obi, isec);
+
+	isec->orig.ob   = shi->obi;
+	isec->orig.face = shi->vlr;
+	RE_rayobject_hint_bb(R.raytree, &point_hint, isec->start, isec->start);
+	isec->hint = &point_hint;
+
+	while (a--) {
+
+		if (R.r.mode & R_OSA) {
+			j++;
+			if (j>=R.osa) j= 0;
+			if (!(mask & (1<<j))) {
+				jitlamp+= 2;
+				continue;
+			}
+		}
+
+		vec[0]= jitlamp[0];
+		vec[1]= jitlamp[1];
+		vec[2]= 0.0f;
+		mul_m3_v3(lar->mat, vec);
+
+		/* set start and vec */
+		isec->dir[0] = vec[0]+lampco[0]-shi->co[0];
+		isec->dir[1] = vec[1]+lampco[1]-shi->co[1];
+		isec->dir[2] = vec[2]+lampco[2]-shi->co[2];
+
+		RE_instance_rotate_ray_dir(shi->obi, isec);
+
+		isec->dist = 1.0f;
+		isec->check = RE_CHECK_VLR_RENDER;
+		isec->skip = RE_SKIP_VLR_NEIGHBOUR;
+
+		if (isec->mode==RE_RAY_SHADOW_TRA) {
+			/* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
+			float col[4] = {1.0f, 1.0f, 1.0f, 1.0f};
+
+			ray_trace_shadow_tra(isec, shi, DEPTH_SHADOW_TRA, 0, col);
+			shadfac[0] += col[0];
+			shadfac[1] += col[1];
+			shadfac[2] += col[2];
+			shadfac[3] += col[3];
+		}
+		else if ( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f;
+
+		div+= 1.0f;
+		jitlamp+= 2;
+	}
+
+	if (isec->mode==RE_RAY_SHADOW_TRA) {
+		shadfac[0] /= div;
+		shadfac[1] /= div;
+		shadfac[2] /= div;
+		shadfac[3] /= div;
+	}
+	else {
+		/* sqrt makes nice umbra effect */
+		if (lar->ray_samp_type & LA_SAMP_UMBRA)
+			shadfac[3] = sqrtf(1.0f - fac / div);
+		else
+			shadfac[3] = 1.0f - fac / div;
+	}
+}
+/* extern call from shade_lamp_loop */
+void ray_shadow(ShadeInput *shi, LampRen *lar, float shadfac[4])
+{
+	Isect isec;
+	float lampco[3];
+
+	/* setup isec */
+	RE_RC_INIT(isec, *shi);
+	if (shi->mat->mode & MA_SHADOW_TRA) isec.mode= RE_RAY_SHADOW_TRA;
+	else isec.mode= RE_RAY_SHADOW;
+	isec.hint = NULL;
+
+	if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW))
+		isec.lay= lar->lay;
+	else
+		isec.lay= -1;
+
+	/* only when not mir tracing, first hit optimm */
+	if (shi->depth==0) {
+		isec.last_hit = lar->last_hit[shi->thread];
+	}
+	else {
+		isec.last_hit = NULL;
+	}
+
+	if (lar->type==LA_SUN || lar->type==LA_HEMI) {
+		/* jitter and QMC sampling add a displace vector to the lamp position
+		 * that's incorrect because a SUN lamp does not has an exact position
+		 * and the displace should be done at the ray vector instead of the
+		 * lamp position.
+		 * This is easily verified by noticing that shadows of SUN lights change
+		 * with the scene BB.
+		 *
+		 * This was detected during SoC 2009 - Raytrace Optimization, but to keep
+		 * consistency with older render code it wasn't removed.
+		 *
+		 * If the render code goes through some recode/serious bug-fix then this
+		 * is something to consider!
+		 */
+		lampco[0]= shi->co[0] - R.maxdist*lar->vec[0];
+		lampco[1]= shi->co[1] - R.maxdist*lar->vec[1];
+		lampco[2]= shi->co[2] - R.maxdist*lar->vec[2];
+	}
+	else {
+		copy_v3_v3(lampco, lar->co);
+	}
+
+	if (ELEM(lar->ray_samp_method, LA_SAMP_HALTON, LA_SAMP_HAMMERSLEY)) {
+
+		ray_shadow_qmc(shi, lar, lampco, shadfac, &isec);
+
+	}
+	else {
+		if (lar->ray_totsamp<2) {
+
+			isec.orig.ob   = shi->obi;
+			isec.orig.face = shi->vlr;
+
+			shadfac[3]= 1.0f;  /* 1.0=full light */
+
+			/* set up isec.dir */
+			copy_v3_v3(isec.start, shi->co);
+			sub_v3_v3v3(isec.dir, lampco, isec.start);
+			isec.dist = normalize_v3(isec.dir);
+
+			RE_instance_rotate_ray(shi->obi, &isec);
+
+			if (isec.mode==RE_RAY_SHADOW_TRA) {
+				/* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
+				float col[4] = {1.0f, 1.0f, 1.0f, 1.0f};
+
+				ray_trace_shadow_tra(&isec, shi, DEPTH_SHADOW_TRA, 0, col);
+				copy_v4_v4(shadfac, col);
+			}
+			else if (RE_rayobject_raycast(R.raytree, &isec))
+				shadfac[3]= 0.0f;
+		}
+		else {
+			ray_shadow_jitter(shi, lar, lampco, shadfac, &isec);
+		}
+	}
+
+	/* for first hit optim, set last interesected shadow face */
+	if (shi->depth==0) {
+		lar->last_hit[shi->thread] = isec.last_hit;
+	}
+
+}
+
diff --git a/source/blender/render/intern/source/render_result.c b/source/blender/render/intern/source/render_result.c
index 5fd897219c4..e0cacdf4b8f 100644
--- a/source/blender/render/intern/source/render_result.c
+++ b/source/blender/render/intern/source/render_result.c
@@ -95,7 +95,7 @@ void render_result_free(RenderResult *res)
 		if (rl->acolrect) MEM_freeN(rl->acolrect);
 		if (rl->scolrect) MEM_freeN(rl->scolrect);
 		if (rl->display_buffer) MEM_freeN(rl->display_buffer);
-		
+
 		while (rl->passes.first) {
 			RenderPass *rpass = rl->passes.first;
 			if (rpass->rect) MEM_freeN(rpass->rect);
@@ -128,13 +128,13 @@ void render_result_free(RenderResult *res)
 void render_result_free_list(ListBase *lb, RenderResult *rr)
 {
 	RenderResult *rrnext;
-	
+
 	for (; rr; rr = rrnext) {
 		rrnext = rr->next;
-		
+
 		if (lb && lb->first)
 			BLI_remlink(lb, rr);
-		
+
 		render_result_free(rr);
 	}
 }
@@ -206,7 +206,7 @@ static RenderPass *render_layer_add_pass(RenderResult *rr, RenderLayer *rl, int
 	const int view_id = BLI_findstringindex(&rr->views, viewname, offsetof(RenderView, name));
 	RenderPass *rpass = MEM_callocN(sizeof(RenderPass), name);
 	size_t rectsize = ((size_t)rr->rectx) * rr->recty * channels;
-	
+
 	rpass->channels = channels;
 	rpass->rectx = rl->rectx;
 	rpass->recty = rl->recty;
@@ -216,7 +216,7 @@ static RenderPass *render_layer_add_pass(RenderResult *rr, RenderLayer *rl, int
 	BLI_strncpy(rpass->chan_id, chan_id, sizeof(rpass->chan_id));
 	BLI_strncpy(rpass->view, viewname, sizeof(rpass->view));
 	set_pass_full_name(rpass->fullname, rpass->name, -1, rpass->view, rpass->chan_id);
-	
+
 	if (rl->exrhandle) {
 		int a;
 		for (a = 0; a < channels; a++) {
@@ -227,13 +227,13 @@ static RenderPass *render_layer_add_pass(RenderResult *rr, RenderLayer *rl, int
 	else {
 		float *rect;
 		int x;
-		
+
 		rpass->rect = MEM_mapallocN(sizeof(float) * rectsize, name);
 		if (rpass->rect == NULL) {
 			MEM_freeN(rpass);
 			return NULL;
 		}
-		
+
 		if (STREQ(rpass->name, RE_PASSNAME_VECTOR)) {
 			/* initialize to max speed */
 			rect = rpass->rect;
@@ -267,13 +267,13 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
 	RenderLayer *rl;
 	RenderView *rv;
 	int rectx, recty;
-	
+
 	rectx = BLI_rcti_size_x(partrct);
 	recty = BLI_rcti_size_y(partrct);
-	
+
 	if (rectx <= 0 || recty <= 0)
 		return NULL;
-	
+
 	rr = MEM_callocN(sizeof(RenderResult), "new render result");
 	rr->rectx = rectx;
 	rr->recty = recty;
@@ -286,7 +286,7 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
 	rr->tilerect.xmax = partrct->xmax - re->disprect.xmin;
 	rr->tilerect.ymin = partrct->ymin - re->disprect.ymin;
 	rr->tilerect.ymax = partrct->ymax - re->disprect.ymin;
-	
+
 	if (savebuffers) {
 		rr->do_exr_tile = true;
 	}
@@ -304,14 +304,14 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
 
 		rl = MEM_callocN(sizeof(RenderLayer), "new render layer");
 		BLI_addtail(&rr->layers, rl);
-		
+
 		BLI_strncpy(rl->name, view_layer->name, sizeof(rl->name));
 		rl->layflag = view_layer->layflag;
 		rl->passflag = view_layer->passflag; /* for debugging: view_layer->passflag | SCE_PASS_RAYHITS; */
 		rl->pass_xor = view_layer->pass_xor;
 		rl->rectx = rectx;
 		rl->recty = recty;
-		
+
 		if (rr->do_exr_tile) {
 			rl->display_buffer = MEM_mapallocN((size_t)rectx * recty * sizeof(unsigned int),
 			                                   "Combined display space rgba");
@@ -412,7 +412,7 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
 	if (BLI_listbase_is_empty(&rr->layers) && !(layername && layername[0])) {
 		rl = MEM_callocN(sizeof(RenderLayer), "new render layer");
 		BLI_addtail(&rr->layers, rl);
-		
+
 		rl->rectx = rectx;
 		rl->recty = recty;
 
@@ -439,15 +439,15 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
 		/* note, this has to be in sync with scene.c */
 		rl->layflag = 0x7FFF;    /* solid ztra halo strand */
 		rl->passflag = SCE_PASS_COMBINED;
-		
+
 		re->active_view_layer = 0;
 	}
-	
+
 	/* border render; calculate offset for use in compositor. compo is centralized coords */
 	/* XXX obsolete? I now use it for drawing border render offset (ton) */
 	rr->xof = re->disprect.xmin + BLI_rcti_cent_x(&re->disprect) - (re->winx / 2);
 	rr->yof = re->disprect.ymin + BLI_rcti_cent_y(&re->disprect) - (re->winy / 2);
-	
+
 	return rr;
 }
 
@@ -554,7 +554,7 @@ static void *ml_addlayer_cb(void *base, const char *str)
 {
 	RenderResult *rr = base;
 	RenderLayer *rl;
-	
+
 	rl = MEM_callocN(sizeof(RenderLayer), "new render layer");
 	BLI_addtail(&rr->layers, rl);
 
@@ -676,7 +676,7 @@ RenderResult *render_result_new_from_exr(void *exrhandle, const char *colorspace
 
 	rr->rectx = rectx;
 	rr->recty = recty;
-	
+
 	IMB_exr_multilayer_convert(exrhandle, rr, ml_addview_cb, ml_addlayer_cb, ml_addpass_cb);
 
 	for (rl = rr->layers.first; rl; rl = rl->next) {
@@ -695,7 +695,7 @@ RenderResult *render_result_new_from_exr(void *exrhandle, const char *colorspace
 			}
 		}
 	}
-	
+
 	return rr;
 }
 
@@ -740,16 +740,16 @@ static void do_merge_tile(RenderResult *rr, RenderResult *rrpart, float *target,
 {
 	int y, tilex, tiley;
 	size_t ofs, copylen;
-	
+
 	copylen = tilex = rrpart->rectx;
 	tiley = rrpart->recty;
-	
+
 	if (rrpart->crop) { /* filters add pixel extra */
 		tile += pixsize * (rrpart->crop + ((size_t)rrpart->crop) * tilex);
-		
+
 		copylen = tilex - 2 * rrpart->crop;
 		tiley -= 2 * rrpart->crop;
-		
+
 		ofs = (((size_t)rrpart->tilerect.ymin) + rrpart->crop) * rr->rectx + (rrpart->tilerect.xmin + rrpart->crop);
 		target += pixsize * ofs;
 	}
@@ -776,7 +776,7 @@ void render_result_merge(RenderResult *rr, RenderResult *rrpart)
 {
 	RenderLayer *rl, *rlp;
 	RenderPass *rpass, *rpassp;
-	
+
 	for (rl = rr->layers.first; rl; rl = rl->next) {
 		rlp = RE_GetRenderLayer(rrpart, rl->name);
 		if (rlp) {
@@ -956,7 +956,7 @@ void render_result_single_layer_begin(Render *re)
 
 	/* officially pushed result should be NULL... error can happen with do_seq */
 	RE_FreeRenderResult(re->pushedresult);
-	
+
 	re->pushedresult = re->result;
 	re->result = NULL;
 }
@@ -980,10 +980,10 @@ void render_result_single_layer_end(Render *re)
 	if (re->pushedresult->rectx == re->result->rectx && re->pushedresult->recty == re->result->recty) {
 		/* find which layer in re->pushedresult should be replaced */
 		rl = re->result->layers.first;
-		
+
 		/* render result should be empty after this */
 		BLI_remlink(&re->result->layers, rl);
-		
+
 		/* reconstruct render result layers */
 		for (nr = 0, view_layer = re->view_layers.first; view_layer; view_layer = view_layer->next, nr++) {
 			if (nr == re->active_view_layer) {
@@ -1010,9 +1010,9 @@ static void save_render_result_tile(RenderResult *rr, RenderResult *rrpart, cons
 	RenderLayer *rlp, *rl;
 	RenderPass *rpassp;
 	int offs, partx, party;
-	
+
 	BLI_thread_lock(LOCK_IMAGE);
-	
+
 	for (rlp = rrpart->layers.first; rlp; rlp = rlp->next) {
 		rl = RE_GetRenderLayer(rr, rlp->name);
 
@@ -1042,7 +1042,7 @@ static void save_render_result_tile(RenderResult *rr, RenderResult *rrpart, cons
 				                    xstride, xstride * rrpart->rectx, rpassp->rect + a + xstride * offs);
 			}
 		}
-		
+
 	}
 
 	party = rrpart->tilerect.ymin + rrpart->crop;
@@ -1068,7 +1068,7 @@ void render_result_save_empty_result_tiles(Render *re)
 	RenderPart *pa;
 	RenderResult *rr;
 	RenderLayer *rl;
-	
+
 	for (rr = re->result; rr; rr = rr->next) {
 		for (rl = rr->layers.first; rl; rl = rl->next) {
 			for (pa = re->parts.first; pa; pa = pa->next) {
@@ -1112,7 +1112,7 @@ void render_result_exr_file_end(Render *re)
 
 		rr->do_exr_tile = false;
 	}
-	
+
 	render_result_free_list(&re->fullresult, re->result);
 	re->result = NULL;
 
@@ -1131,7 +1131,7 @@ void render_result_exr_file_path(Scene *scene, const char *layname, int sample,
 {
 	char name[FILE_MAXFILE + MAX_ID_NAME + MAX_ID_NAME + 100];
 	const char *fi = BLI_path_basename(BKE_main_blendfile_path_from_global());
-	
+
 	if (sample == 0) {
 		BLI_snprintf(name, sizeof(name), "%s_%s_%s.exr", fi, scene->id.name + 2, layname);
 	}
@@ -1194,7 +1194,7 @@ int render_result_exr_file_read_path(RenderResult *rr, RenderLayer *rl_single, c
 	for (rl = rr->layers.first; rl; rl = rl->next) {
 		if (rl_single && rl_single != rl)
 			continue;
-		
+
 		/* passes are allocated in sync */
 		for (rpass = rl->passes.first; rpass; rpass = rpass->next) {
 			const int xstride = rpass->channels;
@@ -1292,7 +1292,7 @@ ImBuf *render_result_rect_to_ibuf(RenderResult *rr, RenderData *rd, const int vi
 
 	/* float factor for random dither, imbuf takes care of it */
 	ibuf->dither = rd->dither_intensity;
-	
+
 	/* prepare to gamma correct to sRGB color space
 	 * note that sequence editor can generate 8bpc render buffers
 	 */
@@ -1333,7 +1333,7 @@ void RE_render_result_rect_from_ibuf(RenderResult *rr, RenderData *UNUSED(rd), I
 
 		if (!rv->rectf)
 			rv->rectf = MEM_mallocN(4 * sizeof(float) * rr->rectx * rr->recty, "render_seq rectf");
-		
+
 		memcpy(rv->rectf, ibuf->rect_float, 4 * sizeof(float) * rr->rectx * rr->recty);
 
 		/* TSK! Since sequence render doesn't free the *rr render result, the old rect32
diff --git a/source/blender/render/intern/source/render_texture.c b/source/blender/render/intern/source/render_texture.c
index 79d13ecab5b..99da5b3ca01 100644
--- a/source/blender/render/intern/source/render_texture.c
+++ b/source/blender/render/intern/source/render_texture.c
@@ -95,7 +95,7 @@ static void tex_normal_derivate(Tex *tex, TexResult *texres)
 		float col[4];
 		if (BKE_colorband_evaluate(tex->coba, texres->tin, col)) {
 			float fac0, fac1, fac2, fac3;
-			
+
 			fac0= (col[0]+col[1]+col[2]);
 			BKE_colorband_evaluate(tex->coba, texres->nor[0], col);
 			fac1= (col[0]+col[1]+col[2]);
@@ -103,11 +103,11 @@ static void tex_normal_derivate(Tex *tex, TexResult *texres)
 			fac2= (col[0]+col[1]+col[2]);
 			BKE_colorband_evaluate(tex->coba, texres->nor[2], col);
 			fac3= (col[0]+col[1]+col[2]);
-			
+
 			texres->nor[0]= (fac0 - fac1) / 3.0f;
 			texres->nor[1]= (fac0 - fac2) / 3.0f;
 			texres->nor[2]= (fac0 - fac3) / 3.0f;
-			
+
 			return;
 		}
 	}
@@ -173,7 +173,7 @@ static int blend(Tex *tex, const float texvec[3], TexResult *texres)
 static int clouds(Tex *tex, const float texvec[3], TexResult *texres)
 {
 	int rv = TEX_INT;
-	
+
 	texres->tin = BLI_gTurbulence(tex->noisesize, texvec[0], texvec[1], texvec[2], tex->noisedepth, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis);
 
 	if (texres->nor!=NULL) {
@@ -181,7 +181,7 @@ static int clouds(Tex *tex, const float texvec[3], TexResult *texres)
 		texres->nor[0] = BLI_gTurbulence(tex->noisesize, texvec[0] + tex->nabla, texvec[1], texvec[2], tex->noisedepth,  (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis);
 		texres->nor[1] = BLI_gTurbulence(tex->noisesize, texvec[0], texvec[1] + tex->nabla, texvec[2], tex->noisedepth,  (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis);
 		texres->nor[2] = BLI_gTurbulence(tex->noisesize, texvec[0], texvec[1], texvec[2] + tex->nabla, tex->noisedepth,  (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis);
-		
+
 		tex_normal_derivate(tex, texres);
 		rv |= TEX_NOR;
 	}
@@ -215,7 +215,7 @@ static float tex_sin(float a)
 static float tex_saw(float a)
 {
 	const float b = 2*M_PI;
-	
+
 	int n = (int)(a / b);
 	a -= n*b;
 	if (a < 0) a += b;
@@ -227,9 +227,9 @@ static float tex_tri(float a)
 {
 	const float b = 2*M_PI;
 	const float rmax = 1.0;
-	
+
 	a = rmax - 2.0f*fabsf(floorf((a*(1.0f/b))+0.5f) - (a*(1.0f/b)));
-	
+
 	return a;
 }
 
@@ -244,9 +244,9 @@ static float wood_int(Tex *tex, float x, float y, float z)
 	waveform[0] = tex_sin;			/* assign address of tex_sin() function to pointer array */
 	waveform[1] = tex_saw;
 	waveform[2] = tex_tri;
-	
+
 	if ((wf>TEX_TRI) || (wf<TEX_SIN)) wf=0; /* check to be sure noisebasis2 is initialized ahead of time */
-		
+
 	if (wt==TEX_BAND) {
 		wi = waveform[wf]((x + y + z)*10.0f);
 	}
@@ -261,7 +261,7 @@ static float wood_int(Tex *tex, float x, float y, float z)
 		wi = tex->turbul*BLI_gNoise(tex->noisesize, x, y, z, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis);
 		wi = waveform[wf](sqrtf(x*x + y*y + z*z)*20.0f + wi);
 	}
-	
+
 	return wi;
 }
 
@@ -275,7 +275,7 @@ static int wood(Tex *tex, const float texvec[3], TexResult *texres)
 		texres->nor[0] = wood_int(tex, texvec[0] + tex->nabla, texvec[1], texvec[2]);
 		texres->nor[1] = wood_int(tex, texvec[0], texvec[1] + tex->nabla, texvec[2]);
 		texres->nor[2] = wood_int(tex, texvec[0], texvec[1], texvec[2] + tex->nabla);
-		
+
 		tex_normal_derivate(tex, texres);
 		rv |= TEX_NOR;
 	}
@@ -291,16 +291,16 @@ static float marble_int(Tex *tex, float x, float y, float z)
 	float n, mi;
 	short wf = tex->noisebasis2;	/* wave form:	TEX_SIN=0,  TEX_SAW=1,  TEX_TRI=2						*/
 	short mt = tex->stype;			/* marble type:	TEX_SOFT=0,	TEX_SHARP=1,TEX_SHAPER=2 					*/
-	
+
 	float (*waveform[3])(float);	/* create array of pointers to waveform functions */
 	waveform[0] = tex_sin;			/* assign address of tex_sin() function to pointer array */
 	waveform[1] = tex_saw;
 	waveform[2] = tex_tri;
-	
+
 	if ((wf>TEX_TRI) || (wf<TEX_SIN)) wf=0; /* check to be sure noisebasis2 isn't initialized ahead of time */
-	
+
 	n = 5.0f * (x + y + z);
-	
+
 	mi = n + tex->turbul * BLI_gTurbulence(tex->noisesize, x, y, z, tex->noisedepth, (tex->noisetype!=TEX_NOISESOFT),  tex->noisebasis);
 
 	if (mt>=TEX_SOFT) {  /* TEX_SOFT always true */
@@ -327,9 +327,9 @@ static int marble(Tex *tex, const float texvec[3], TexResult *texres)
 		texres->nor[0] = marble_int(tex, texvec[0] + tex->nabla, texvec[1], texvec[2]);
 		texres->nor[1] = marble_int(tex, texvec[0], texvec[1] + tex->nabla, texvec[2]);
 		texres->nor[2] = marble_int(tex, texvec[0], texvec[1], texvec[2] + tex->nabla);
-		
+
 		tex_normal_derivate(tex, texres);
-		
+
 		rv |= TEX_NOR;
 	}
 
@@ -397,8 +397,8 @@ static int magic(Tex *tex, const float texvec[3], TexResult *texres)
 
 	if (turb!=0.0f) {
 		turb*= 2.0f;
-		x/= turb; 
-		y/= turb; 
+		x/= turb;
+		y/= turb;
 		z/= turb;
 	}
 	texres->tr = 0.5f - x;
@@ -406,10 +406,10 @@ static int magic(Tex *tex, const float texvec[3], TexResult *texres)
 	texres->tb = 0.5f - z;
 
 	texres->tin= (1.0f / 3.0f) * (texres->tr + texres->tg + texres->tb);
-	
+
 	BRICONTRGB;
 	texres->ta = 1.0f;
-	
+
 	return TEX_RGB;
 }
 
@@ -420,9 +420,9 @@ static int stucci(Tex *tex, const float texvec[3], TexResult *texres)
 {
 	float nor[3], b2, ofs;
 	int retval= TEX_INT;
-	
+
 	b2= BLI_gNoise(tex->noisesize, texvec[0], texvec[1], texvec[2], (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis);
-	
+
 	ofs= tex->turbul/200.0f;
 
 	if (tex->stype) ofs*=(b2*b2);
@@ -431,27 +431,27 @@ static int stucci(Tex *tex, const float texvec[3], TexResult *texres)
 	nor[2] = BLI_gNoise(tex->noisesize, texvec[0], texvec[1], texvec[2]+ofs, (tex->noisetype!=TEX_NOISESOFT), tex->noisebasis);
 
 	texres->tin= nor[2];
-	
+
 	if (texres->nor) {
-		
+
 		copy_v3_v3(texres->nor, nor);
 		tex_normal_derivate(tex, texres);
-		
+
 		if (tex->stype==TEX_WALLOUT) {
 			texres->nor[0]= -texres->nor[0];
 			texres->nor[1]= -texres->nor[1];
 			texres->nor[2]= -texres->nor[2];
 		}
-		
+
 		retval |= TEX_NOR;
 	}
-	
+
 	if (tex->stype==TEX_WALLOUT)
 		texres->tin= 1.0f-texres->tin;
-	
+
 	if (texres->tin<0.0f)
 		texres->tin= 0.0f;
-	
+
 	return retval;
 }
 
@@ -477,7 +477,7 @@ static float mg_mFractalOrfBmTex(Tex *tex, const float texvec[3], TexResult *tex
 		texres->nor[0] = tex->ns_outscale*mgravefunc(texvec[0] + offs, texvec[1], texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->noisebasis);
 		texres->nor[1] = tex->ns_outscale*mgravefunc(texvec[0], texvec[1] + offs, texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->noisebasis);
 		texres->nor[2] = tex->ns_outscale*mgravefunc(texvec[0], texvec[1], texvec[2] + offs, tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->noisebasis);
-		
+
 		tex_normal_derivate(tex, texres);
 		rv |= TEX_NOR;
 	}
@@ -507,7 +507,7 @@ static float mg_ridgedOrHybridMFTex(Tex *tex, const float texvec[3], TexResult *
 		texres->nor[0] = tex->ns_outscale*mgravefunc(texvec[0] + offs, texvec[1], texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->mg_gain, tex->noisebasis);
 		texres->nor[1] = tex->ns_outscale*mgravefunc(texvec[0], texvec[1] + offs, texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->mg_gain, tex->noisebasis);
 		texres->nor[2] = tex->ns_outscale*mgravefunc(texvec[0], texvec[1], texvec[2] + offs, tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->mg_gain, tex->noisebasis);
-		
+
 		tex_normal_derivate(tex, texres);
 		rv |= TEX_NOR;
 	}
@@ -532,7 +532,7 @@ static float mg_HTerrainTex(Tex *tex, const float texvec[3], TexResult *texres)
 		texres->nor[0] = tex->ns_outscale*mg_HeteroTerrain(texvec[0] + offs, texvec[1], texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->noisebasis);
 		texres->nor[1] = tex->ns_outscale*mg_HeteroTerrain(texvec[0], texvec[1] + offs, texvec[2], tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->noisebasis);
 		texres->nor[2] = tex->ns_outscale*mg_HeteroTerrain(texvec[0], texvec[1], texvec[2] + offs, tex->mg_H, tex->mg_lacunarity, tex->mg_octaves, tex->mg_offset, tex->noisebasis);
-		
+
 		tex_normal_derivate(tex, texres);
 		rv |= TEX_NOR;
 	}
@@ -630,7 +630,7 @@ static float voronoiTex(Tex *tex, const float texvec[3], TexResult *texres)
 		texres->nor[1] = sc * fabsf(tex->vn_w1*da[0] + tex->vn_w2*da[1] + tex->vn_w3*da[2] + tex->vn_w4*da[3]);
 		voronoi(texvec[0], texvec[1], texvec[2] + offs, da, pa, tex->vn_mexp,  tex->vn_distm);
 		texres->nor[2] = sc * fabsf(tex->vn_w1*da[0] + tex->vn_w2*da[1] + tex->vn_w3*da[2] + tex->vn_w4*da[3]);
-		
+
 		tex_normal_derivate(tex, texres);
 		rv |= TEX_NOR;
 	}
@@ -640,7 +640,7 @@ static float voronoiTex(Tex *tex, const float texvec[3], TexResult *texres)
 		texres->ta = 1.0;
 		return (rv | TEX_RGB);
 	}
-	
+
 	BRICONT;
 
 	return rv;
@@ -653,20 +653,20 @@ static int texnoise(Tex *tex, TexResult *texres, int thread)
 {
 	float div=3.0;
 	int val, ran, loop, shift = 29;
-	
+
 	ran=  BLI_rng_thread_rand(random_tex_array, thread);
-	
+
 	loop= tex->noisedepth;
 
 	/* start from top bits since they have more variance */
 	val= ((ran >> shift) & 3);
-	
+
 	while (loop--) {
-		shift -= 2;		
+		shift -= 2;
 		val *= ((ran >> shift) & 3);
 		div *= 3.0f;
 	}
-	
+
 	texres->tin= ((float)val)/div;
 
 	BRICONT;
@@ -679,7 +679,7 @@ static int cubemap_glob(const float n[3], float x, float y, float z, float *adr1
 {
 	float x1, y1, z1, nor[3];
 	int ret;
-	
+
 	if (n==NULL) {
 		nor[0]= x; nor[1]= y; nor[2]= z;	/* use local render coord */
 	}
@@ -690,7 +690,7 @@ static int cubemap_glob(const float n[3], float x, float y, float z, float *adr1
 	x1 = fabsf(nor[0]);
 	y1 = fabsf(nor[1]);
 	z1 = fabsf(nor[2]);
-	
+
 	if (z1>=x1 && z1>=y1) {
 		*adr1 = (x + 1.0f) / 2.0f;
 		*adr2 = (y + 1.0f) / 2.0f;
@@ -719,13 +719,13 @@ static void do_2d_mapping(
 	Tex *tex;
 	float fx, fy, fac1, area[8];
 	int ok, proj, areaflag= 0, wrap;
-	
+
 	/* mtex variables localized, only cubemap doesn't cooperate yet... */
 	wrap= mtex->mapping;
 	tex= mtex->tex;
 
 	if (!(dxt && dyt)) {
-		
+
 		if (wrap==MTEX_FLAT) {
 			fx = (texvec[0] + 1.0f) / 2.0f;
 			fy = (texvec[1] + 1.0f) / 2.0f;
@@ -735,15 +735,15 @@ static void do_2d_mapping(
 		else {
 			cubemap_glob(n, texvec[0], texvec[1], texvec[2], &fx, &fy);
 		}
-		
+
 		/* repeat */
 		if (tex->extend==TEX_REPEAT) {
 			if (tex->xrepeat>1) {
 				float origf= fx *= tex->xrepeat;
-				
+
 				if (fx>1.0f) fx -= (int)(fx);
 				else if (fx<0.0f) fx+= 1-(int)(fx);
-				
+
 				if (tex->flag & TEX_REPEAT_XMIR) {
 					int orig= (int)floor(origf);
 					if (orig & 1)
@@ -752,10 +752,10 @@ static void do_2d_mapping(
 			}
 			if (tex->yrepeat>1) {
 				float origf= fy *= tex->yrepeat;
-				
+
 				if (fy>1.0f) fy -= (int)(fy);
 				else if (fy<0.0f) fy+= 1-(int)(fy);
-				
+
 				if (tex->flag & TEX_REPEAT_YMIR) {
 					int orig= (int)floor(origf);
 					if (orig & 1)
@@ -777,7 +777,7 @@ static void do_2d_mapping(
 		texvec[1]= fy;
 	}
 	else {
-		
+
 		if (wrap==MTEX_FLAT) {
 			fx= (texvec[0] + 1.0f) / 2.0f;
 			fy= (texvec[1] + 1.0f) / 2.0f;
@@ -854,55 +854,55 @@ static void do_2d_mapping(
 			dyt[2] *= 0.5f;
 
 		}
-		
+
 		/* if area, then reacalculate dxt[] and dyt[] */
 		if (areaflag) {
-			fx= area[0]; 
+			fx= area[0];
 			fy= area[1];
 			dxt[0]= area[2]-fx;
 			dxt[1]= area[3]-fy;
 			dyt[0]= area[4]-fx;
 			dyt[1]= area[5]-fy;
 		}
-		
+
 		/* repeat */
 		if (tex->extend==TEX_REPEAT) {
 			float max= 1.0f;
 			if (tex->xrepeat>1) {
 				float origf= fx *= tex->xrepeat;
-				
+
 				/* TXF: omit mirror here, see comments in do_material_tex() after do_2d_mapping() call */
 				if (tex->texfilter == TXF_BOX) {
 					if (fx>1.0f) fx -= (int)(fx);
 					else if (fx<0.0f) fx+= 1-(int)(fx);
-				
+
 					if (tex->flag & TEX_REPEAT_XMIR) {
 						int orig= (int)floor(origf);
 						if (orig & 1)
 							fx= 1.0f-fx;
 					}
 				}
-				
+
 				max= tex->xrepeat;
-				
+
 				dxt[0]*= tex->xrepeat;
 				dyt[0]*= tex->xrepeat;
 			}
 			if (tex->yrepeat>1) {
 				float origf= fy *= tex->yrepeat;
-				
+
 				/* TXF: omit mirror here, see comments in do_material_tex() after do_2d_mapping() call */
 				if (tex->texfilter == TXF_BOX) {
 					if (fy>1.0f) fy -= (int)(fy);
 					else if (fy<0.0f) fy+= 1-(int)(fy);
-				
+
 					if (tex->flag & TEX_REPEAT_YMIR) {
 						int orig= (int)floor(origf);
 						if (orig & 1)
 							fy= 1.0f-fy;
 					}
 				}
-				
+
 				if (max<tex->yrepeat)
 					max= tex->yrepeat;
 
@@ -913,7 +913,7 @@ static void do_2d_mapping(
 				dxt[2]*= max;
 				dyt[2]*= max;
 			}
-			
+
 		}
 		/* crop */
 		if (tex->cropxmin!=0.0f || tex->cropxmax!=1.0f) {
@@ -928,7 +928,7 @@ static void do_2d_mapping(
 			dxt[1]*= fac1;
 			dyt[1]*= fac1;
 		}
-		
+
 		texvec[0]= fx;
 		texvec[1]= fy;
 
@@ -953,7 +953,7 @@ static int multitex(Tex *tex,
 	int retval = 0; /* return value, int:0, col:1, nor:2, everything:3 */
 
 	texres->talpha = false;  /* is set when image texture returns alpha (considered premul) */
-	
+
 	if (use_nodes && tex->use_nodes && tex->nodetree) {
 		const float cfra = 1.0f; /* This was only set for Blender Internal render before. */
 		retval = ntreeTexExecTree(tex->nodetree, texres, texvec, dxt, dyt, osatex, thread,
@@ -1072,7 +1072,7 @@ static int multitex_nodes_intern(Tex *tex,
 
 	if (mtex)
 		which_output= mtex->which_output;
-	
+
 	if (tex->type==TEX_IMAGE) {
 		int rgbnor;
 
@@ -1093,7 +1093,7 @@ static int multitex_nodes_intern(Tex *tex,
 
 			if (mtex->mapto & (MAP_COL)) {
 				ImBuf *ibuf = BKE_image_pool_acquire_ibuf(tex->ima, &tex->iuser, pool);
-				
+
 				/* don't linearize float buffers, assumed to be linear */
 				if (ibuf != NULL &&
 				    ibuf->rect_float == NULL &&
@@ -1110,12 +1110,12 @@ static int multitex_nodes_intern(Tex *tex,
 			/* we don't have mtex, do default flat 2d projection */
 			MTex localmtex;
 			float texvec_l[3], dxt_l[3], dyt_l[3];
-			
+
 			localmtex.mapping= MTEX_FLAT;
 			localmtex.tex= tex;
 			localmtex.object= NULL;
 			localmtex.texco= TEXCO_ORCO;
-			
+
 			copy_v3_v3(texvec_l, texvec);
 			if (dxt && dyt) {
 				copy_v3_v3(dxt_l, dxt);
@@ -1125,7 +1125,7 @@ static int multitex_nodes_intern(Tex *tex,
 				zero_v3(dxt_l);
 				zero_v3(dyt_l);
 			}
-			
+
 			do_2d_mapping(&localmtex, texvec_l, NULL, dxt_l, dyt_l);
 			rgbnor = multitex(tex,
 			                  texvec_l,
@@ -1244,7 +1244,7 @@ int multitex_ext_safe(Tex *tex, float texvec[3], TexResult *texres, struct Image
 void texture_rgb_blend(float in[3], const float tex[3], const float out[3], float fact, float facg, int blendtype)
 {
 	float facm;
-	
+
 	switch (blendtype) {
 	case MTEX_BLEND:
 		fact*= facg;
@@ -1254,7 +1254,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa
 		in[1]= (fact*tex[1] + facm*out[1]);
 		in[2]= (fact*tex[2] + facm*out[2]);
 		break;
-		
+
 	case MTEX_MUL:
 		fact*= facg;
 		facm= 1.0f-fact;
@@ -1274,7 +1274,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa
 	case MTEX_OVERLAY:
 		fact*= facg;
 		facm= 1.0f-fact;
-		
+
 		if (out[0] < 0.5f)
 			in[0] = out[0] * (facm + 2.0f*fact*tex[0]);
 		else
@@ -1288,7 +1288,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa
 		else
 			in[2] = 1.0f - (facm + 2.0f*fact*(1.0f - tex[2])) * (1.0f - out[2]);
 		break;
-		
+
 	case MTEX_SUB:
 		fact= -fact;
 		ATTR_FALLTHROUGH;
@@ -1302,7 +1302,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa
 	case MTEX_DIV:
 		fact*= facg;
 		facm= 1.0f-fact;
-		
+
 		if (tex[0]!=0.0f)
 			in[0]= facm*out[0] + fact*out[0]/tex[0];
 		if (tex[1]!=0.0f)
@@ -1323,7 +1323,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa
 	case MTEX_DARK:
 		fact*= facg;
 		facm= 1.0f-fact;
-		
+
 		in[0] = min_ff(out[0], tex[0])*fact + out[0]*facm;
 		in[1] = min_ff(out[1], tex[1])*fact + out[1]*facm;
 		in[2] = min_ff(out[2], tex[2])*fact + out[2]*facm;
@@ -1336,7 +1336,7 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa
 		in[1] = max_ff(fact * tex[1], out[1]);
 		in[2] = max_ff(fact * tex[2], out[2]);
 		break;
-		
+
 	case MTEX_BLEND_HUE:
 		fact*= facg;
 		copy_v3_v3(in, out);
@@ -1357,16 +1357,16 @@ void texture_rgb_blend(float in[3], const float tex[3], const float out[3], floa
 		copy_v3_v3(in, out);
 		ramp_blend(MA_RAMP_COLOR, in, fact, tex);
 		break;
-	case MTEX_SOFT_LIGHT: 
-		fact*= facg; 
+	case MTEX_SOFT_LIGHT:
+		fact*= facg;
 		copy_v3_v3(in, out);
 		ramp_blend(MA_RAMP_SOFT, in, fact, tex);
-		break; 
-	case MTEX_LIN_LIGHT: 
-		fact*= facg; 
+		break;
+	case MTEX_LIN_LIGHT:
+		fact*= facg;
 		copy_v3_v3(in, out);
 		ramp_blend(MA_RAMP_LINEAR, in, fact, tex);
-		break; 
+		break;
 	}
 }
 
@@ -1376,7 +1376,7 @@ float texture_value_blend(float tex, float out, float fact, float facg, int blen
 	int flip= (facg < 0.0f);
 
 	facg= fabsf(facg);
-	
+
 	fact*= facg;
 	facm= 1.0f-fact;
 	if (flip) SWAP(float, fact, facm);
@@ -1429,19 +1429,19 @@ float texture_value_blend(float tex, float out, float fact, float facg, int blen
 		if (col > out) in= col; else in= out;
 		break;
 
-	case MTEX_SOFT_LIGHT: 
+	case MTEX_SOFT_LIGHT:
 		scf=1.0f - (1.0f - tex) * (1.0f - out);
 		in= facm*out + fact * ((1.0f - out) * tex * out) + (out * scf);
-		break;       
+		break;
 
-	case MTEX_LIN_LIGHT: 
+	case MTEX_LIN_LIGHT:
 		if (tex > 0.5f)
 			in = out + fact*(2.0f*(tex - 0.5f));
-		else 
+		else
 			in = out + fact*(2.0f*tex - 1.0f);
 		break;
 	}
-	
+
 	return in;
 }
 
@@ -1459,26 +1459,26 @@ int externtex(const MTex *mtex,
 	TexResult texr;
 	float dxt[3], dyt[3], texvec[3];
 	int rgb;
-	
+
 	tex= mtex->tex;
 	if (tex==NULL) return 0;
 	texr.nor= NULL;
-	
+
 	/* placement */
 	if (mtex->projx) texvec[0]= mtex->size[0]*(vec[mtex->projx-1]+mtex->ofs[0]);
 	else texvec[0]= mtex->size[0]*(mtex->ofs[0]);
-	
+
 	if (mtex->projy) texvec[1]= mtex->size[1]*(vec[mtex->projy-1]+mtex->ofs[1]);
 	else texvec[1]= mtex->size[1]*(mtex->ofs[1]);
-	
+
 	if (mtex->projz) texvec[2]= mtex->size[2]*(vec[mtex->projz-1]+mtex->ofs[2]);
 	else texvec[2]= mtex->size[2]*(mtex->ofs[2]);
-	
+
 	/* texture */
 	if (tex->type==TEX_IMAGE) {
 		do_2d_mapping(mtex, texvec, NULL, dxt, dyt);
 	}
-	
+
 	rgb = multitex(tex,
 	               texvec,
 	               dxt, dyt,
@@ -1489,7 +1489,7 @@ int externtex(const MTex *mtex,
 	               skip_load_image,
 	               texnode_preview,
 	               true);
-	
+
 	if (rgb) {
 		texr.tin = IMB_colormanagement_get_luminance(&texr.tr);
 	}
@@ -1498,7 +1498,7 @@ int externtex(const MTex *mtex,
 		texr.tg= mtex->g;
 		texr.tb= mtex->b;
 	}
-	
+
 	*tin= texr.tin;
 	*tr= texr.tr;
 	*tg= texr.tg;
diff --git a/source/blender/render/intern/source/rendercore.c b/source/blender/render/intern/source/rendercore.c
new file mode 100644
index 00000000000..99d2436d4bc
--- /dev/null
+++ b/source/blender/render/intern/source/rendercore.c
@@ -0,0 +1,2030 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributors: Hos, Robert Wenzlaff.
+ * Contributors: 2004/2005/2006 Blender Foundation, full recode
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/rendercore.c
+ *  \ingroup render
+ */
+
+
+/* system includes */
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+#include <string.h>
+#include <assert.h>
+
+/* External modules: */
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_blenlib.h"
+#include "BLI_rand.h"
+#include "BLI_threads.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_image_types.h"
+#include "DNA_lamp_types.h"
+#include "DNA_material_types.h"
+#include "DNA_group_types.h"
+
+/* local include */
+#include "renderpipeline.h"
+#include "render_result.h"
+#include "render_types.h"
+#include "renderdatabase.h"
+#include "occlusion.h"
+#include "pixelblending.h"
+#include "pixelshading.h"
+#include "shadbuf.h"
+#include "shading.h"
+#include "sss.h"
+#include "zbuf.h"
+
+/* own include */
+#include "rendercore.h"
+
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+/* x and y are current pixels in rect to be rendered */
+/* do not normalize! */
+void calc_view_vector(float view[3], float x, float y)
+{
+
+	view[2]= -ABS(R.clipsta);
+
+	if (R.r.mode & R_ORTHO) {
+		view[0]= view[1]= 0.0f;
+	}
+	else {
+
+		if (R.r.mode & R_PANORAMA) {
+			x-= R.panodxp;
+		}
+
+		/* move x and y to real viewplane coords */
+		x = (x / (float)R.winx);
+		view[0] = R.viewplane.xmin + x * BLI_rctf_size_x(&R.viewplane);
+
+		y = (y / (float)R.winy);
+		view[1] = R.viewplane.ymin + y * BLI_rctf_size_y(&R.viewplane);
+
+//		if (R.flag & R_SEC_FIELD) {
+//			if (R.r.mode & R_ODDFIELD) view[1]= (y+R.ystart)*R.ycor;
+//			else view[1]= (y+R.ystart+1.0)*R.ycor;
+//		}
+//		else view[1]= (y+R.ystart+R.bluroffsy+0.5)*R.ycor;
+
+		if (R.r.mode & R_PANORAMA) {
+			float u= view[0] + R.panodxv; float v= view[2];
+			view[0]= R.panoco*u + R.panosi*v;
+			view[2]= -R.panosi*u + R.panoco*v;
+		}
+	}
+}
+
+void calc_renderco_ortho(float co[3], float x, float y, int z)
+{
+	/* x and y 3d coordinate can be derived from pixel coord and winmat */
+	float fx= 2.0f/(R.winx*R.winmat[0][0]);
+	float fy= 2.0f/(R.winy*R.winmat[1][1]);
+	float zco;
+
+	co[0]= (x - 0.5f*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
+	co[1]= (y - 0.5f*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
+
+	zco= ((float)z)/2147483647.0f;
+	co[2]= R.winmat[3][2]/( R.winmat[2][3]*zco - R.winmat[2][2] );
+}
+
+void calc_renderco_zbuf(float co[3], const float view[3], int z)
+{
+	float fac, zco;
+
+	/* inverse of zbuf calc: zbuf = MAXZ*hoco_z/hoco_w */
+	zco= ((float)z)/2147483647.0f;
+	co[2]= R.winmat[3][2]/( R.winmat[2][3]*zco - R.winmat[2][2] );
+
+	fac= co[2]/view[2];
+	co[0]= fac*view[0];
+	co[1]= fac*view[1];
+}
+
+/* also used in zbuf.c and shadbuf.c */
+int count_mask(unsigned short mask)
+{
+	if (R.samples)
+		return (R.samples->cmask[mask & 255]+R.samples->cmask[mask>>8]);
+	return 0;
+}
+
+static int calchalo_z(HaloRen *har, int zz)
+{
+
+	if (har->type & HA_ONLYSKY) {
+		if (zz < 0x7FFFFFF0) zz= - 0x7FFFFF;	/* edge render messes zvalues */
+	}
+	else {
+		zz= (zz>>8);
+	}
+	return zz;
+}
+
+
+
+static void halo_pixelstruct(HaloRen *har, RenderLayer **rlpp, int totsample, int od, float dist, float xn, float yn, PixStr *ps)
+{
+	float col[4], accol[4], fac;
+	int amount, amountm, zz, flarec, sample, fullsample, mask=0;
+
+	fullsample= (totsample > 1);
+	amount= 0;
+	accol[0] = accol[1] = accol[2] = accol[3]= 0.0f;
+	col[0] = col[1] = col[2] = col[3]= 0.0f;
+	flarec= har->flarec;
+
+	while (ps) {
+		amountm= count_mask(ps->mask);
+		amount+= amountm;
+
+		zz= calchalo_z(har, ps->z);
+		if ((zz> har->zs) || (har->mat && (har->mat->mode & MA_HALO_SOFT))) {
+			if (shadeHaloFloat(har, col, zz, dist, xn, yn, flarec)) {
+				flarec= 0;
+
+				if (fullsample) {
+					for (sample=0; sample<totsample; sample++) {
+						if (ps->mask & (1 << sample)) {
+							float *pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+							addalphaAddfacFloat(pass + od*4, col, har->add);
+						}
+					}
+				}
+				else {
+					fac= ((float)amountm)/(float)R.osa;
+					accol[0]+= fac*col[0];
+					accol[1]+= fac*col[1];
+					accol[2]+= fac*col[2];
+					accol[3]+= fac*col[3];
+				}
+			}
+		}
+
+		mask |= ps->mask;
+		ps= ps->next;
+	}
+
+	/* now do the sky sub-pixels */
+	amount= R.osa-amount;
+	if (amount) {
+		if (shadeHaloFloat(har, col, 0x7FFFFF, dist, xn, yn, flarec)) {
+			if (!fullsample) {
+				fac= ((float)amount)/(float)R.osa;
+				accol[0]+= fac*col[0];
+				accol[1]+= fac*col[1];
+				accol[2]+= fac*col[2];
+				accol[3]+= fac*col[3];
+			}
+		}
+	}
+
+	if (fullsample) {
+		for (sample=0; sample<totsample; sample++) {
+			if (!(mask & (1 << sample))) {
+				float *pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+				addalphaAddfacFloat(pass + od*4, col, har->add);
+			}
+		}
+	}
+	else {
+		col[0]= accol[0];
+		col[1]= accol[1];
+		col[2]= accol[2];
+		col[3]= accol[3];
+
+		for (sample=0; sample<totsample; sample++) {
+			float *pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+			addalphaAddfacFloat(pass + od*4, col, har->add);
+		}
+	}
+}
+
+static void halo_tile(RenderPart *pa, RenderLayer *rl)
+{
+	RenderLayer *rlpp[RE_MAX_OSA];
+	HaloRen *har;
+	rcti disprect= pa->disprect, testrect= pa->disprect;
+	float dist, xsq, ysq, xn, yn;
+	float col[4];
+	intptr_t *rd= NULL;
+	int a, *rz, zz, y, sample, totsample, od;
+	short minx, maxx, miny, maxy, x;
+	unsigned int lay= rl->lay;
+
+	/* we don't render halos in the cropped area, gives errors in flare counter */
+	if (pa->crop) {
+		testrect.xmin+= pa->crop;
+		testrect.xmax-= pa->crop;
+		testrect.ymin+= pa->crop;
+		testrect.ymax-= pa->crop;
+	}
+
+	totsample= get_sample_layers(pa, rl, rlpp);
+
+	for (a=0; a<R.tothalo; a++) {
+		har= R.sortedhalos[a];
+
+		/* layer test, clip halo with y */
+		if ((har->lay & lay) == 0) {
+			/* pass */
+		}
+		else if (testrect.ymin > har->maxy) {
+			/* pass */
+		}
+		else if (testrect.ymax < har->miny) {
+			/* pass */
+		}
+		else {
+
+			minx= floor(har->xs-har->rad);
+			maxx= ceil(har->xs+har->rad);
+
+			if (testrect.xmin > maxx) {
+				/* pass */
+			}
+			else if (testrect.xmax < minx) {
+				/* pass */
+			}
+			else {
+
+				minx = max_ii(minx, testrect.xmin);
+				maxx = min_ii(maxx, testrect.xmax);
+
+				miny = max_ii(har->miny, testrect.ymin);
+				maxy = min_ii(har->maxy, testrect.ymax);
+
+				for (y=miny; y<maxy; y++) {
+					int rectofs= (y-disprect.ymin)*pa->rectx + (minx - disprect.xmin);
+					rz= pa->rectz + rectofs;
+					od= rectofs;
+
+					if (pa->rectdaps)
+						rd= pa->rectdaps + rectofs;
+
+					yn= (y-har->ys)*R.ycor;
+					ysq= yn*yn;
+
+					for (x=minx; x<maxx; x++, rz++, od++) {
+						xn= x- har->xs;
+						xsq= xn*xn;
+						dist= xsq+ysq;
+						if (dist<har->radsq) {
+							if (rd && *rd) {
+								halo_pixelstruct(har, rlpp, totsample, od, dist, xn, yn, (PixStr *)*rd);
+							}
+							else {
+								zz= calchalo_z(har, *rz);
+								if ((zz> har->zs) || (har->mat && (har->mat->mode & MA_HALO_SOFT))) {
+									if (shadeHaloFloat(har, col, zz, dist, xn, yn, har->flarec)) {
+										for (sample=0; sample<totsample; sample++) {
+											float * rect= RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+											addalphaAddfacFloat(rect + od*4, col, har->add);
+										}
+									}
+								}
+							}
+						}
+						if (rd) rd++;
+					}
+				}
+			}
+		}
+		if (R.test_break(R.tbh) ) break;
+	}
+}
+
+static void lamphalo_tile(RenderPart *pa, RenderLayer *rl)
+{
+	RenderLayer *rlpp[RE_MAX_OSA];
+	ShadeInput shi;
+	float *pass;
+	float fac, col[4];
+	intptr_t *rd= pa->rectdaps;
+	const int *rz= pa->rectz;
+	int x, y, sample, totsample, fullsample, od;
+
+	totsample= get_sample_layers(pa, rl, rlpp);
+	fullsample= (totsample > 1);
+
+	shade_input_initialize(&shi, pa, rl, 0); /* this zero's ShadeInput for us */
+
+	for (od=0, y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
+		for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, rz++, od++) {
+
+			calc_view_vector(shi.view, x, y);
+
+			if (rd && *rd) {
+				PixStr *ps= (PixStr *)*rd;
+				int count, totsamp= 0, mask= 0;
+
+				while (ps) {
+					if (R.r.mode & R_ORTHO)
+						calc_renderco_ortho(shi.co, (float)x, (float)y, ps->z);
+					else
+						calc_renderco_zbuf(shi.co, shi.view, ps->z);
+
+					totsamp+= count= count_mask(ps->mask);
+					mask |= ps->mask;
+
+					col[0]= col[1]= col[2]= col[3]= 0.0f;
+					renderspothalo(&shi, col, 1.0f);
+
+					if (fullsample) {
+						for (sample=0; sample<totsample; sample++) {
+							if (ps->mask & (1 << sample)) {
+								pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+								pass += od * 4;
+								pass[0]+= col[0];
+								pass[1]+= col[1];
+								pass[2]+= col[2];
+								pass[3]+= col[3];
+								if (pass[3]>1.0f) pass[3]= 1.0f;
+							}
+						}
+					}
+					else {
+						fac= ((float)count)/(float)R.osa;
+						pass = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname);
+						pass += od * 4;
+						pass[0]+= fac*col[0];
+						pass[1]+= fac*col[1];
+						pass[2]+= fac*col[2];
+						pass[3]+= fac*col[3];
+						if (pass[3]>1.0f) pass[3]= 1.0f;
+					}
+
+					ps= ps->next;
+				}
+
+				if (totsamp<R.osa) {
+					shi.co[2]= 0.0f;
+
+					col[0]= col[1]= col[2]= col[3]= 0.0f;
+					renderspothalo(&shi, col, 1.0f);
+
+					if (fullsample) {
+						for (sample=0; sample<totsample; sample++) {
+							if (!(mask & (1 << sample))) {
+
+								pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+								pass += od * 4;
+								pass[0]+= col[0];
+								pass[1]+= col[1];
+								pass[2]+= col[2];
+								pass[3]+= col[3];
+								if (pass[3]>1.0f) pass[3]= 1.0f;
+							}
+						}
+					}
+					else {
+						fac= ((float)R.osa-totsamp)/(float)R.osa;
+						pass = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname);
+						pass += od * 4;
+						pass[0]+= fac*col[0];
+						pass[1]+= fac*col[1];
+						pass[2]+= fac*col[2];
+						pass[3]+= fac*col[3];
+						if (pass[3]>1.0f) pass[3]= 1.0f;
+					}
+				}
+			}
+			else {
+				if (R.r.mode & R_ORTHO)
+					calc_renderco_ortho(shi.co, (float)x, (float)y, *rz);
+				else
+					calc_renderco_zbuf(shi.co, shi.view, *rz);
+
+				col[0]= col[1]= col[2]= col[3]= 0.0f;
+				renderspothalo(&shi, col, 1.0f);
+
+				for (sample=0; sample<totsample; sample++) {
+					pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+					pass += od * 4;
+					pass[0]+= col[0];
+					pass[1]+= col[1];
+					pass[2]+= col[2];
+					pass[3]+= col[3];
+					if (pass[3]>1.0f) pass[3]= 1.0f;
+				}
+			}
+
+			if (rd) rd++;
+		}
+		if (y&1)
+			if (R.test_break(R.tbh)) break;
+	}
+}
+
+
+/* ********************* MAINLOOPS ******************** */
+
+/* osa version */
+static void add_filt_passes(RenderLayer *rl, int curmask, int rectx, int offset, ShadeInput *shi, ShadeResult *shr)
+{
+	RenderPass *rpass;
+
+	for (rpass= rl->passes.first; rpass; rpass= rpass->next) {
+		float *fp, *col= NULL;
+		int pixsize= 3;
+
+		if (STREQ(rpass->name, RE_PASSNAME_COMBINED)) {
+			add_filt_fmask(curmask, shr->combined, rpass->rect + 4*offset, rectx);
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_Z)) {
+			fp = rpass->rect + offset;
+			*fp = shr->z;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_RGBA)) {
+			col = shr->col;
+			pixsize = 4;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_EMIT)) {
+			col = shr->emit;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_DIFFUSE)) {
+			col = shr->diff;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_SPEC)) {
+			col = shr->spec;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_SHADOW)) {
+			col = shr->shad;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_AO)) {
+			col = shr->ao;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_ENVIRONMENT)) {
+			col = shr->env;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_INDIRECT)) {
+			col = shr->indirect;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_REFLECT)) {
+			col = shr->refl;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_REFRACT)) {
+			col = shr->refr;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_NORMAL)) {
+			col = shr->nor;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_UV)) {
+			/* box filter only, gauss will screwup UV too much */
+			if (shi->totuv) {
+				float mult = (float)count_mask(curmask)/(float)R.osa;
+				fp = rpass->rect + 3*offset;
+				fp[0]+= mult*(0.5f + 0.5f*shi->uv[shi->actuv].uv[0]);
+				fp[1]+= mult*(0.5f + 0.5f*shi->uv[shi->actuv].uv[1]);
+				fp[2]+= mult;
+			}
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_INDEXOB)) {
+			/* no filter */
+			if (shi->vlr) {
+				fp = rpass->rect + offset;
+				if (*fp==0.0f)
+					*fp = (float)shi->obr->ob->index;
+			}
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_INDEXMA)) {
+			/* no filter */
+			if (shi->vlr) {
+					fp = rpass->rect + offset;
+					if (*fp==0.0f)
+							*fp = (float)shi->mat->index;
+			}
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_MIST)) {
+			/*  */
+			col = &shr->mist;
+			pixsize = 1;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_VECTOR)) {
+			/* add minimum speed in pixel, no filter */
+			fp = rpass->rect + 4*offset;
+			if ( (ABS(shr->winspeed[0]) + ABS(shr->winspeed[1]))< (ABS(fp[0]) + ABS(fp[1])) ) {
+				fp[0] = shr->winspeed[0];
+				fp[1] = shr->winspeed[1];
+			}
+			if ( (ABS(shr->winspeed[2]) + ABS(shr->winspeed[3]))< (ABS(fp[2]) + ABS(fp[3])) ) {
+				fp[2] = shr->winspeed[2];
+				fp[3] = shr->winspeed[3];
+			}
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_RAYHITS)) {
+			/*  */
+			col = shr->rayhits;
+			pixsize= 4;
+		}
+
+		if (col) {
+			fp= rpass->rect + pixsize*offset;
+			add_filt_fmask_pixsize(curmask, col, fp, rectx, pixsize);
+		}
+	}
+}
+
+/* non-osa version */
+static void add_passes(RenderLayer *rl, int offset, ShadeInput *shi, ShadeResult *shr)
+{
+	RenderPass *rpass;
+	float *fp;
+
+	for (rpass= rl->passes.first; rpass; rpass= rpass->next) {
+		float *col= NULL, uvcol[3];
+		int a, pixsize= 3;
+
+		if (STREQ(rpass->name, RE_PASSNAME_COMBINED)) {
+			/* copy combined to use for preview */
+			copy_v4_v4(rpass->rect + 4*offset, shr->combined);
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_Z)) {
+			fp = rpass->rect + offset;
+			*fp = shr->z;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_RGBA)) {
+			col = shr->col;
+			pixsize = 4;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_EMIT)) {
+			col = shr->emit;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_DIFFUSE)) {
+			col = shr->diff;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_SPEC)) {
+			col = shr->spec;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_SHADOW)) {
+			col = shr->shad;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_AO)) {
+			col = shr->ao;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_ENVIRONMENT)) {
+			col = shr->env;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_INDIRECT)) {
+			col = shr->indirect;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_REFLECT)) {
+			col = shr->refl;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_REFRACT)) {
+			col = shr->refr;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_NORMAL)) {
+			col = shr->nor;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_UV)) {
+			if (shi->totuv) {
+				uvcol[0] = 0.5f + 0.5f*shi->uv[shi->actuv].uv[0];
+				uvcol[1] = 0.5f + 0.5f*shi->uv[shi->actuv].uv[1];
+				uvcol[2] = 1.0f;
+				col = uvcol;
+			}
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_VECTOR)) {
+			col = shr->winspeed;
+			pixsize = 4;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_INDEXOB)) {
+			if (shi->vlr) {
+				fp = rpass->rect + offset;
+				*fp = (float)shi->obr->ob->index;
+			}
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_INDEXMA)) {
+			if (shi->vlr) {
+				fp = rpass->rect + offset;
+				*fp = (float)shi->mat->index;
+			}
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_MIST)) {
+			fp = rpass->rect + offset;
+			*fp = shr->mist;
+		}
+		else if (STREQ(rpass->name, RE_PASSNAME_RAYHITS)) {
+			col = shr->rayhits;
+			pixsize = 4;
+		}
+
+		if (col) {
+			fp = rpass->rect + pixsize*offset;
+			for (a=0; a<pixsize; a++)
+				fp[a] = col[a];
+		}
+	}
+}
+
+int get_sample_layers(RenderPart *pa, RenderLayer *rl, RenderLayer **rlpp)
+{
+
+	if (pa->fullresult.first) {
+		int sample, nr= BLI_findindex(&pa->result->layers, rl);
+
+		for (sample=0; sample<R.osa; sample++) {
+			RenderResult *rr= BLI_findlink(&pa->fullresult, sample);
+
+			rlpp[sample]= BLI_findlink(&rr->layers, nr);
+		}
+		return R.osa;
+	}
+	else {
+		rlpp[0]= rl;
+		return 1;
+	}
+}
+
+
+/* only do sky, is default in the solid layer (shade_tile) btw */
+static void sky_tile(RenderPart *pa, RenderLayer *rl)
+{
+	RenderLayer *rlpp[RE_MAX_OSA];
+	int x, y, od=0, totsample;
+
+	if (R.r.alphamode!=R_ADDSKY)
+		return;
+
+	totsample= get_sample_layers(pa, rl, rlpp);
+
+	for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
+		for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, od+=4) {
+			float col[4];
+			int sample;
+			bool done = false;
+
+			for (sample= 0; sample<totsample; sample++) {
+				float *pass = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+				pass += od;
+
+				if (pass[3]<1.0f) {
+
+					if (done==0) {
+						shadeSkyPixel(col, x, y, pa->thread);
+						done = true;
+					}
+
+					if (pass[3]==0.0f) {
+						copy_v4_v4(pass, col);
+						pass[3] = 1.0f;
+					}
+					else {
+						addAlphaUnderFloat(pass, col);
+						pass[3] = 1.0f;
+					}
+				}
+			}
+		}
+
+		if (y&1)
+			if (R.test_break(R.tbh)) break;
+	}
+}
+
+static void atm_tile(RenderPart *pa, RenderLayer *rl)
+{
+	RenderPass *zpass;
+	GroupObject *go;
+	LampRen *lar;
+	RenderLayer *rlpp[RE_MAX_OSA];
+	int totsample;
+	int x, y, od= 0;
+
+	totsample= get_sample_layers(pa, rl, rlpp);
+
+	/* check that z pass is enabled */
+	if (pa->rectz==NULL) return;
+	for (zpass= rl->passes.first; zpass; zpass= zpass->next)
+		if (STREQ(zpass->name, RE_PASSNAME_Z))
+			break;
+
+	if (zpass==NULL) return;
+
+	/* check for at least one sun lamp that its atmosphere flag is enabled */
+	for (go=R.lights.first; go; go= go->next) {
+		lar= go->lampren;
+		if (lar->type==LA_SUN && lar->sunsky && (lar->sunsky->effect_type & LA_SUN_EFFECT_AP))
+			break;
+	}
+	/* do nothign and return if there is no sun lamp */
+	if (go==NULL)
+		return;
+
+	/* for each x,y and each sample, and each sun lamp*/
+	for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
+		for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, od++) {
+			int sample;
+
+			for (sample=0; sample<totsample; sample++) {
+				const float *zrect = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_Z, R.viewname) + od;
+				float *rgbrect = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname) + 4*od;
+				float rgb[3] = {0};
+				bool done = false;
+
+				for (go=R.lights.first; go; go= go->next) {
+
+
+					lar= go->lampren;
+					if (lar->type==LA_SUN &&	lar->sunsky) {
+
+						/* if it's sky continue and don't apply atmosphere effect on it */
+						if (*zrect >= 9.9e10f || rgbrect[3]==0.0f) {
+							continue;
+						}
+
+						if ((lar->sunsky->effect_type & LA_SUN_EFFECT_AP)) {
+							float tmp_rgb[3];
+
+							/* skip if worldspace lamp vector is below horizon */
+							if (go->ob->obmat[2][2] < 0.f) {
+								continue;
+							}
+
+							copy_v3_v3(tmp_rgb, rgbrect);
+							if (rgbrect[3]!=1.0f) {	/* de-premul */
+								mul_v3_fl(tmp_rgb, 1.0f/rgbrect[3]);
+							}
+							shadeAtmPixel(lar->sunsky, tmp_rgb, x, y, *zrect);
+							if (rgbrect[3]!=1.0f) {	/* premul */
+								mul_v3_fl(tmp_rgb, rgbrect[3]);
+							}
+
+							if (done==0) {
+								copy_v3_v3(rgb, tmp_rgb);
+								done = true;
+							}
+							else {
+								rgb[0] = 0.5f*rgb[0] + 0.5f*tmp_rgb[0];
+								rgb[1] = 0.5f*rgb[1] + 0.5f*tmp_rgb[1];
+								rgb[2] = 0.5f*rgb[2] + 0.5f*tmp_rgb[2];
+							}
+						}
+					}
+				}
+
+				/* if at least for one sun lamp aerial perspective was applied*/
+				if (done) {
+					copy_v3_v3(rgbrect, rgb);
+				}
+			}
+		}
+	}
+}
+
+static void shadeDA_tile(RenderPart *pa, RenderLayer *rl)
+{
+	RenderResult *rr= pa->result;
+	ShadeSample ssamp;
+	intptr_t *rd, *rectdaps= pa->rectdaps;
+	int samp;
+	int x, y, seed, crop=0, offs=0, od;
+
+	if (R.test_break(R.tbh)) return;
+
+	/* irregular shadowb buffer creation */
+	if (R.r.mode & R_SHADOW)
+		ISB_create(pa, NULL);
+
+	/* we set per pixel a fixed seed, for random AO and shadow samples */
+	seed= pa->rectx*pa->disprect.ymin;
+
+	/* general shader info, passes */
+	shade_sample_initialize(&ssamp, pa, rl);
+
+	/* occlusion caching */
+	if (R.occlusiontree)
+		cache_occ_samples(&R, pa, &ssamp);
+
+	/* filtered render, for now we assume only 1 filter size */
+	if (pa->crop) {
+		crop= 1;
+		rectdaps+= pa->rectx + 1;
+		offs= pa->rectx + 1;
+	}
+
+	/* scanline updates have to be 2 lines behind */
+	rr->renrect.ymin = 0;
+	rr->renrect.ymax = -2*crop;
+	rr->renlay= rl;
+
+	for (y=pa->disprect.ymin+crop; y<pa->disprect.ymax-crop; y++, rr->renrect.ymax++) {
+		rd= rectdaps;
+		od= offs;
+
+		for (x=pa->disprect.xmin+crop; x<pa->disprect.xmax-crop; x++, rd++, od++) {
+			BLI_thread_srandom(pa->thread, seed++);
+
+			if (*rd) {
+				if (shade_samples(&ssamp, (PixStr *)(*rd), x, y)) {
+
+					/* multisample buffers or filtered mask filling? */
+					if (pa->fullresult.first) {
+						int a;
+						for (samp=0; samp<ssamp.tot; samp++) {
+							int smask= ssamp.shi[samp].mask;
+							for (a=0; a<R.osa; a++) {
+								int mask= 1<<a;
+								if (smask & mask)
+									add_passes(ssamp.rlpp[a], od, &ssamp.shi[samp], &ssamp.shr[samp]);
+							}
+						}
+					}
+					else {
+						for (samp=0; samp<ssamp.tot; samp++)
+							add_filt_passes(rl, ssamp.shi[samp].mask, pa->rectx, od, &ssamp.shi[samp], &ssamp.shr[samp]);
+					}
+				}
+			}
+		}
+
+		rectdaps+= pa->rectx;
+		offs+= pa->rectx;
+
+		if (y&1) if (R.test_break(R.tbh)) break;
+	}
+
+	/* disable scanline updating */
+	rr->renlay= NULL;
+
+	if (R.r.mode & R_SHADOW)
+		ISB_free(pa);
+
+	if (R.occlusiontree)
+		free_occ_samples(&R, pa);
+}
+
+/* ************* pixel struct ******** */
+
+
+static PixStrMain *addpsmain(ListBase *lb)
+{
+	PixStrMain *psm;
+
+	psm= (PixStrMain *)MEM_mallocN(sizeof(PixStrMain), "pixstrMain");
+	BLI_addtail(lb, psm);
+
+	psm->ps= (PixStr *)MEM_mallocN(4096*sizeof(PixStr), "pixstr");
+	psm->counter= 0;
+
+	return psm;
+}
+
+static void freeps(ListBase *lb)
+{
+	PixStrMain *psm, *psmnext;
+
+	for (psm= lb->first; psm; psm= psmnext) {
+		psmnext= psm->next;
+		if (psm->ps)
+			MEM_freeN(psm->ps);
+		MEM_freeN(psm);
+	}
+	BLI_listbase_clear(lb);
+}
+
+static void addps(ListBase *lb, intptr_t *rd, int obi, int facenr, int z, int maskz, unsigned short mask)
+{
+	PixStrMain *psm;
+	PixStr *ps, *last= NULL;
+
+	if (*rd) {
+		ps= (PixStr *)(*rd);
+
+		while (ps) {
+			if ( ps->obi == obi && ps->facenr == facenr ) {
+				ps->mask |= mask;
+				return;
+			}
+			last= ps;
+			ps= ps->next;
+		}
+	}
+
+	/* make new PS (pixel struct) */
+	psm= lb->last;
+
+	if (psm->counter==4095)
+		psm= addpsmain(lb);
+
+	ps= psm->ps + psm->counter++;
+
+	if (last) last->next= ps;
+	else *rd= (intptr_t)ps;
+
+	ps->next= NULL;
+	ps->obi= obi;
+	ps->facenr= facenr;
+	ps->z= z;
+	ps->maskz= maskz;
+	ps->mask = mask;
+	ps->shadfac= 0;
+}
+
+static void edge_enhance_add(RenderPart *pa, float *rectf, float *arect)
+{
+	float addcol[4];
+	int pix;
+
+	if (arect==NULL)
+		return;
+
+	for (pix= pa->rectx*pa->recty; pix>0; pix--, arect++, rectf+=4) {
+		if (*arect != 0.0f) {
+			addcol[0]= *arect * R.r.edgeR;
+			addcol[1]= *arect * R.r.edgeG;
+			addcol[2]= *arect * R.r.edgeB;
+			addcol[3]= *arect;
+			addAlphaOverFloat(rectf, addcol);
+		}
+	}
+}
+
+/* clamp alpha and RGB to 0..1 and 0..inf, can go outside due to filter */
+static void clamp_alpha_rgb_range(RenderPart *pa, RenderLayer *rl)
+{
+	RenderLayer *rlpp[RE_MAX_OSA];
+	int y, sample, totsample;
+
+	totsample= get_sample_layers(pa, rl, rlpp);
+
+	/* not for full sample, there we clamp after compositing */
+	if (totsample > 1)
+		return;
+
+	for (sample= 0; sample<totsample; sample++) {
+		float *rectf = RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_COMBINED, R.viewname);
+
+		for (y= pa->rectx*pa->recty; y>0; y--, rectf+=4) {
+			rectf[0] = MAX2(rectf[0], 0.0f);
+			rectf[1] = MAX2(rectf[1], 0.0f);
+			rectf[2] = MAX2(rectf[2], 0.0f);
+			CLAMP(rectf[3], 0.0f, 1.0f);
+		}
+	}
+}
+
+/* adds only alpha values */
+static void edge_enhance_tile(RenderPart *pa, float *rectf, int *rectz)
+{
+	/* use zbuffer to define edges, add it to the image */
+	int y, x, col, *rz, *rz1, *rz2, *rz3;
+	int zval1, zval2, zval3;
+	float *rf;
+
+	/* shift values in zbuffer 4 to the right (anti overflows), for filter we need multiplying with 12 max */
+	rz= rectz;
+	if (rz==NULL) return;
+
+	for (y=0; y<pa->recty; y++)
+		for (x=0; x<pa->rectx; x++, rz++) (*rz)>>= 4;
+
+	rz1= rectz;
+	rz2= rz1+pa->rectx;
+	rz3= rz2+pa->rectx;
+
+	rf= rectf+pa->rectx+1;
+
+	for (y=0; y<pa->recty-2; y++) {
+		for (x=0; x<pa->rectx-2; x++, rz1++, rz2++, rz3++, rf++) {
+
+			/* prevent overflow with sky z values */
+			zval1=   rz1[0] + 2*rz1[1] +   rz1[2];
+			zval2=  2*rz2[0]           + 2*rz2[2];
+			zval3=   rz3[0] + 2*rz3[1] +   rz3[2];
+
+			col= ( 4*rz2[1] - (zval1 + zval2 + zval3)/3 );
+			if (col<0) col= -col;
+
+			col >>= 5;
+			if (col > (1<<16)) col= (1<<16);
+			else col= (R.r.edgeint*col)>>8;
+
+			if (col>0) {
+				float fcol;
+
+				if (col>255) fcol= 1.0f;
+				else fcol= (float)col/255.0f;
+
+				if (R.osa)
+					*rf+= fcol/(float)R.osa;
+				else
+					*rf= fcol;
+			}
+		}
+		rz1+= 2;
+		rz2+= 2;
+		rz3+= 2;
+		rf+= 2;
+	}
+
+	/* shift back zbuf values, we might need it still */
+	rz= rectz;
+	for (y=0; y<pa->recty; y++)
+		for (x=0; x<pa->rectx; x++, rz++) (*rz)<<= 4;
+
+}
+
+static void reset_sky_speed(RenderPart *pa, RenderLayer *rl)
+{
+	/* for all pixels with max speed, set to zero */
+	RenderLayer *rlpp[RE_MAX_OSA];
+	float *fp;
+	int a, sample, totsample;
+
+	totsample= get_sample_layers(pa, rl, rlpp);
+
+	for (sample= 0; sample<totsample; sample++) {
+		fp= RE_RenderLayerGetPass(rlpp[sample], RE_PASSNAME_VECTOR, R.viewname);
+		if (fp==NULL) break;
+
+		for (a= 4*pa->rectx*pa->recty - 1; a>=0; a--)
+			if (fp[a] == PASS_VECTOR_MAX) fp[a]= 0.0f;
+	}
+}
+
+static unsigned short *make_solid_mask(RenderPart *pa)
+{
+	intptr_t *rd= pa->rectdaps;
+	unsigned short *solidmask, *sp;
+	int x;
+
+	if (rd==NULL) return NULL;
+
+	sp=solidmask= MEM_mallocN(sizeof(short)*pa->rectx*pa->recty, "solidmask");
+
+	for (x=pa->rectx*pa->recty; x>0; x--, rd++, sp++) {
+		if (*rd) {
+			PixStr *ps= (PixStr *)*rd;
+
+			*sp= ps->mask;
+			for (ps= ps->next; ps; ps= ps->next)
+				*sp |= ps->mask;
+		}
+		else
+			*sp= 0;
+	}
+
+	return solidmask;
+}
+
+static void addAlphaOverFloatMask(float *dest, float *source, unsigned short dmask, unsigned short smask)
+{
+	unsigned short shared= dmask & smask;
+	float mul= 1.0f - source[3];
+
+	if (shared) {	/* overlapping masks */
+
+		/* masks differ, we make a mixture of 'add' and 'over' */
+		if (shared!=dmask) {
+			float shared_bits= (float)count_mask(shared);		/* alpha over */
+			float tot_bits= (float)count_mask(smask|dmask);		/* alpha add */
+
+			float add= (tot_bits - shared_bits)/tot_bits;		/* add level */
+			mul= add + (1.0f-add)*mul;
+		}
+	}
+	else if (dmask && smask) {
+		/* works for premul only, of course */
+		dest[0]+= source[0];
+		dest[1]+= source[1];
+		dest[2]+= source[2];
+		dest[3]+= source[3];
+
+		return;
+	}
+
+	dest[0]= (mul*dest[0]) + source[0];
+	dest[1]= (mul*dest[1]) + source[1];
+	dest[2]= (mul*dest[2]) + source[2];
+	dest[3]= (mul*dest[3]) + source[3];
+}
+
+typedef struct ZbufSolidData {
+	RenderLayer *rl;
+	ListBase *psmlist;
+	float *edgerect;
+} ZbufSolidData;
+
+static void make_pixelstructs(RenderPart *pa, ZSpan *zspan, int sample, void *data)
+{
+	ZbufSolidData *sdata = (ZbufSolidData *)data;
+	ListBase *lb= sdata->psmlist;
+	intptr_t *rd= pa->rectdaps;
+	const int *ro= zspan->recto;
+	const int *rp= zspan->rectp;
+	const int *rz= zspan->rectz;
+	const int *rm= zspan->rectmask;
+	int x, y;
+	int mask= 1<<sample;
+
+	for (y=0; y<pa->recty; y++) {
+		for (x=0; x<pa->rectx; x++, rd++, rp++, ro++, rz++, rm++) {
+			if (*rp) {
+				addps(lb, rd, *ro, *rp, *rz, (zspan->rectmask)? *rm: 0, mask);
+			}
+		}
+	}
+
+	if (sdata->rl->layflag & SCE_LAY_EDGE)
+		if (R.r.mode & R_EDGE)
+			edge_enhance_tile(pa, sdata->edgerect, zspan->rectz);
+}
+
+/* main call for shading Delta Accum, for OSA */
+/* supposed to be fully threadable! */
+void zbufshadeDA_tile(RenderPart *pa)
+{
+	RenderResult *rr= pa->result;
+	RenderLayer *rl;
+	ListBase psmlist= {NULL, NULL};
+	float *edgerect= NULL;
+
+	/* allocate the necessary buffers */
+				/* zbuffer inits these rects */
+	pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto");
+	pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp");
+	pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz");
+	for (rl= rr->layers.first; rl; rl= rl->next) {
+		float *rect = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname);
+
+		if ((rl->layflag & SCE_LAY_ZMASK) && (rl->layflag & SCE_LAY_NEG_ZMASK))
+			pa->rectmask= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectmask");
+
+		/* initialize pixelstructs and edge buffer */
+		addpsmain(&psmlist);
+		pa->rectdaps= MEM_callocN(sizeof(intptr_t)*pa->rectx*pa->recty+4, "zbufDArectd");
+
+		if (rl->layflag & SCE_LAY_EDGE)
+			if (R.r.mode & R_EDGE)
+				edgerect= MEM_callocN(sizeof(float)*pa->rectx*pa->recty, "rectedge");
+
+		/* always fill visibility */
+		for (pa->sample=0; pa->sample<R.osa; pa->sample+=4) {
+			ZbufSolidData sdata;
+
+			sdata.rl= rl;
+			sdata.psmlist= &psmlist;
+			sdata.edgerect= edgerect;
+			zbuffer_solid(pa, rl, make_pixelstructs, &sdata);
+			if (R.test_break(R.tbh)) break;
+		}
+
+		/* shades solid */
+		if (rl->layflag & SCE_LAY_SOLID)
+			shadeDA_tile(pa, rl);
+
+		/* lamphalo after solid, before ztra, looks nicest because ztra does own halo */
+		if (R.flag & R_LAMPHALO)
+			if (rl->layflag & SCE_LAY_HALO)
+				lamphalo_tile(pa, rl);
+
+		/* halo before ztra, because ztra fills in zbuffer now */
+		if (R.flag & R_HALO)
+			if (rl->layflag & SCE_LAY_HALO)
+				halo_tile(pa, rl);
+
+		/* transp layer */
+		if (R.flag & R_ZTRA || R.totstrand) {
+			if (rl->layflag & (SCE_LAY_ZTRA|SCE_LAY_STRAND)) {
+				if (pa->fullresult.first) {
+					zbuffer_transp_shade(pa, rl, rect, &psmlist);
+				}
+				else {
+					unsigned short *ztramask, *solidmask= NULL; /* 16 bits, MAX_OSA */
+
+					/* allocate, but not free here, for asynchronous display of this rect in main thread */
+					rl->acolrect= MEM_callocN(4*sizeof(float)*pa->rectx*pa->recty, "alpha layer");
+
+					/* swap for live updates, and it is used in zbuf.c!!! */
+					SWAP(float *, rl->acolrect, rect);
+					ztramask = zbuffer_transp_shade(pa, rl, rect, &psmlist);
+					SWAP(float *, rl->acolrect, rect);
+
+					/* zbuffer transp only returns ztramask if there's solid rendered */
+					if (ztramask)
+						solidmask= make_solid_mask(pa);
+
+					if (ztramask && solidmask) {
+						unsigned short *sps= solidmask, *spz= ztramask;
+						unsigned short fullmask= (1<<R.osa)-1;
+						float *fcol= rect;
+						float *acol= rl->acolrect;
+						int x;
+
+						for (x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4, sps++, spz++) {
+							if (*sps == fullmask)
+								addAlphaOverFloat(fcol, acol);
+							else
+								addAlphaOverFloatMask(fcol, acol, *sps, *spz);
+						}
+					}
+					else {
+						float *fcol= rect;
+						float *acol= rl->acolrect;
+						int x;
+						for (x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4) {
+							addAlphaOverFloat(fcol, acol);
+						}
+					}
+					if (solidmask) MEM_freeN(solidmask);
+					if (ztramask) MEM_freeN(ztramask);
+				}
+			}
+		}
+
+		/* sun/sky */
+		if (rl->layflag & SCE_LAY_SKY)
+			atm_tile(pa, rl);
+
+		/* sky before edge */
+		if (rl->layflag & SCE_LAY_SKY)
+			sky_tile(pa, rl);
+
+		/* extra layers */
+		if (rl->layflag & SCE_LAY_EDGE)
+			if (R.r.mode & R_EDGE)
+				edge_enhance_add(pa, rect, edgerect);
+
+		if (rl->passflag & SCE_PASS_VECTOR)
+			reset_sky_speed(pa, rl);
+
+		/* clamp alpha to 0..1 range, can go outside due to filter */
+		clamp_alpha_rgb_range(pa, rl);
+
+		/* free stuff within loop! */
+		MEM_freeN(pa->rectdaps); pa->rectdaps= NULL;
+		freeps(&psmlist);
+
+		if (edgerect) MEM_freeN(edgerect);
+		edgerect= NULL;
+
+		if (pa->rectmask) {
+			MEM_freeN(pa->rectmask);
+			pa->rectmask= NULL;
+		}
+	}
+
+	/* free all */
+	MEM_freeN(pa->recto); pa->recto= NULL;
+	MEM_freeN(pa->rectp); pa->rectp= NULL;
+	MEM_freeN(pa->rectz); pa->rectz= NULL;
+
+	/* display active layer */
+	rr->renrect.ymin=rr->renrect.ymax = 0;
+	rr->renlay= render_get_active_layer(&R, rr);
+}
+
+
+/* ------------------------------------------------------------------------ */
+
+/* non OSA case, full tile render */
+/* supposed to be fully threadable! */
+void zbufshade_tile(RenderPart *pa)
+{
+	ShadeSample ssamp;
+	RenderResult *rr= pa->result;
+	RenderLayer *rl;
+	PixStr ps;
+	float *edgerect= NULL;
+
+	/* fake pixel struct, to comply to osa render */
+	ps.next= NULL;
+	ps.mask= 0xFFFF;
+
+	/* zbuffer code clears/inits rects */
+	pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto");
+	pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp");
+	pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz");
+
+	for (rl= rr->layers.first; rl; rl= rl->next) {
+		float *rect= RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname);
+		if ((rl->layflag & SCE_LAY_ZMASK) && (rl->layflag & SCE_LAY_NEG_ZMASK))
+			pa->rectmask= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectmask");
+
+		/* general shader info, passes */
+		shade_sample_initialize(&ssamp, pa, rl);
+
+		zbuffer_solid(pa, rl, NULL, NULL);
+
+		if (!R.test_break(R.tbh)) {	/* NOTE: this if () is not consistent */
+
+			/* edges only for solid part, ztransp doesn't support it yet anti-aliased */
+			if (rl->layflag & SCE_LAY_EDGE) {
+				if (R.r.mode & R_EDGE) {
+					edgerect= MEM_callocN(sizeof(float)*pa->rectx*pa->recty, "rectedge");
+					edge_enhance_tile(pa, edgerect, pa->rectz);
+				}
+			}
+
+			/* initialize scanline updates for main thread */
+			rr->renrect.ymin = 0;
+			rr->renlay= rl;
+
+			if (rl->layflag & SCE_LAY_SOLID) {
+				const float *fcol = rect;
+				const int *ro= pa->recto, *rp= pa->rectp, *rz= pa->rectz;
+				int x, y, offs=0, seed;
+
+				/* we set per pixel a fixed seed, for random AO and shadow samples */
+				seed= pa->rectx*pa->disprect.ymin;
+
+				/* irregular shadowb buffer creation */
+				if (R.r.mode & R_SHADOW)
+					ISB_create(pa, NULL);
+
+				if (R.occlusiontree)
+					cache_occ_samples(&R, pa, &ssamp);
+
+				for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++, rr->renrect.ymax++) {
+					for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, ro++, rz++, rp++, fcol+=4, offs++) {
+						/* per pixel fixed seed */
+						BLI_thread_srandom(pa->thread, seed++);
+
+						if (*rp) {
+							ps.obi= *ro;
+							ps.facenr= *rp;
+							ps.z= *rz;
+							if (shade_samples(&ssamp, &ps, x, y)) {
+								/* combined and passes */
+								add_passes(rl, offs, ssamp.shi, ssamp.shr);
+							}
+						}
+					}
+					if (y&1)
+						if (R.test_break(R.tbh)) break;
+				}
+
+				if (R.occlusiontree)
+					free_occ_samples(&R, pa);
+
+				if (R.r.mode & R_SHADOW)
+					ISB_free(pa);
+			}
+
+			/* disable scanline updating */
+			rr->renlay= NULL;
+		}
+
+		/* lamphalo after solid, before ztra, looks nicest because ztra does own halo */
+		if (R.flag & R_LAMPHALO)
+			if (rl->layflag & SCE_LAY_HALO)
+				lamphalo_tile(pa, rl);
+
+		/* halo before ztra, because ztra fills in zbuffer now */
+		if (R.flag & R_HALO)
+			if (rl->layflag & SCE_LAY_HALO)
+				halo_tile(pa, rl);
+
+		if (R.flag & R_ZTRA || R.totstrand) {
+			if (rl->layflag & (SCE_LAY_ZTRA|SCE_LAY_STRAND)) {
+				float *fcol, *acol;
+				int x;
+
+				/* allocate, but not free here, for asynchronous display of this rect in main thread */
+				rl->acolrect= MEM_callocN(4*sizeof(float)*pa->rectx*pa->recty, "alpha layer");
+
+				/* swap for live updates */
+				SWAP(float *, rl->acolrect, rect);
+				zbuffer_transp_shade(pa, rl, rect, NULL);
+				SWAP(float *, rl->acolrect, rect);
+
+				fcol= rect; acol= rl->acolrect;
+				for (x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4) {
+					addAlphaOverFloat(fcol, acol);
+				}
+			}
+		}
+
+		/* sun/sky */
+		if (rl->layflag & SCE_LAY_SKY)
+			atm_tile(pa, rl);
+
+		/* sky before edge */
+		if (rl->layflag & SCE_LAY_SKY)
+			sky_tile(pa, rl);
+
+		if (!R.test_break(R.tbh)) {
+			if (rl->layflag & SCE_LAY_EDGE)
+				if (R.r.mode & R_EDGE)
+					edge_enhance_add(pa, rect, edgerect);
+		}
+
+		if (rl->passflag & SCE_PASS_VECTOR)
+			reset_sky_speed(pa, rl);
+
+		if (edgerect) MEM_freeN(edgerect);
+		edgerect= NULL;
+
+		if (pa->rectmask) {
+			MEM_freeN(pa->rectmask);
+			pa->rectmask= NULL;
+		}
+	}
+
+	/* display active layer */
+	rr->renrect.ymin=rr->renrect.ymax = 0;
+	rr->renlay= render_get_active_layer(&R, rr);
+
+	MEM_freeN(pa->recto); pa->recto= NULL;
+	MEM_freeN(pa->rectp); pa->rectp= NULL;
+	MEM_freeN(pa->rectz); pa->rectz= NULL;
+}
+
+/* SSS preprocess tile render, fully threadable */
+typedef struct ZBufSSSHandle {
+	RenderPart *pa;
+	ListBase psmlist;
+	int totps;
+} ZBufSSSHandle;
+
+static void addps_sss(void *cb_handle, int obi, int facenr, int x, int y, int z)
+{
+	ZBufSSSHandle *handle = cb_handle;
+	RenderPart *pa= handle->pa;
+
+	/* extra border for filter gives double samples on part edges,
+	 * don't use those */
+	if (x<pa->crop || x>=pa->rectx-pa->crop)
+		return;
+	if (y<pa->crop || y>=pa->recty-pa->crop)
+		return;
+
+	if (pa->rectall) {
+		intptr_t *rs= pa->rectall + pa->rectx*y + x;
+
+		addps(&handle->psmlist, rs, obi, facenr, z, 0, 0);
+		handle->totps++;
+	}
+	if (pa->rectz) {
+		int *rz= pa->rectz + pa->rectx*y + x;
+		int *rp= pa->rectp + pa->rectx*y + x;
+		int *ro= pa->recto + pa->rectx*y + x;
+
+		if (z < *rz) {
+			if (*rp == 0)
+				handle->totps++;
+			*rz= z;
+			*rp= facenr;
+			*ro= obi;
+		}
+	}
+	if (pa->rectbackz) {
+		int *rz= pa->rectbackz + pa->rectx*y + x;
+		int *rp= pa->rectbackp + pa->rectx*y + x;
+		int *ro= pa->rectbacko + pa->rectx*y + x;
+
+		if (z >= *rz) {
+			if (*rp == 0)
+				handle->totps++;
+			*rz= z;
+			*rp= facenr;
+			*ro= obi;
+		}
+	}
+}
+
+static void shade_sample_sss(ShadeSample *ssamp, Material *mat, ObjectInstanceRen *obi, VlakRen *vlr, int quad, float x, float y, float z, float *co, float color[3], float *area)
+{
+	ShadeInput *shi= ssamp->shi;
+	ShadeResult shr;
+	float /* texfac,*/ /* UNUSED */ orthoarea, nor[3], alpha, sx, sy;
+
+	/* cache for shadow */
+	shi->samplenr= R.shadowsamplenr[shi->thread]++;
+
+	if (quad)
+		shade_input_set_triangle_i(shi, obi, vlr, 0, 2, 3);
+	else
+		shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
+
+	/* center pixel */
+	sx = x + 0.5f;
+	sy = y + 0.5f;
+
+	/* we estimate the area here using shi->dxco and shi->dyco. we need to
+	 * enabled shi->osatex these are filled. we compute two areas, one with
+	 * the normal pointed at the camera and one with the original normal, and
+	 * then clamp to avoid a too large contribution from a single pixel */
+	shi->osatex= 1;
+
+	copy_v3_v3(nor, shi->facenor);
+	calc_view_vector(shi->facenor, sx, sy);
+	normalize_v3(shi->facenor);
+	shade_input_set_viewco(shi, x, y, sx, sy, z);
+	orthoarea= len_v3(shi->dxco)*len_v3(shi->dyco);
+
+	copy_v3_v3(shi->facenor, nor);
+	shade_input_set_viewco(shi, x, y, sx, sy, z);
+	*area = min_ff(len_v3(shi->dxco) * len_v3(shi->dyco), 2.0f * orthoarea);
+
+	shade_input_set_uv(shi);
+	shade_input_set_normals(shi);
+
+	/* we don't want flipped normals, they screw up back scattering */
+	if (shi->flippednor)
+		shade_input_flip_normals(shi);
+
+	/* not a pretty solution, but fixes common cases */
+	if (shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
+		negate_v3(shi->vn);
+		negate_v3(shi->vno);
+		negate_v3(shi->nmapnorm);
+	}
+
+	/* if nodetree, use the material that we are currently preprocessing
+	 * instead of the node material */
+	if (shi->mat->nodetree && shi->mat->use_nodes)
+		shi->mat= mat;
+
+	/* init material vars */
+	shade_input_init_material(shi);
+
+	/* render */
+	shade_input_set_shade_texco(shi);
+
+	shade_samples_do_AO(ssamp);
+	shade_material_loop(shi, &shr);
+
+	copy_v3_v3(co, shi->co);
+	copy_v3_v3(color, shr.combined);
+
+	/* texture blending */
+	/* texfac= shi->mat->sss_texfac; */ /* UNUSED */
+
+	alpha= shr.combined[3];
+	*area *= alpha;
+}
+
+static void zbufshade_sss_free(RenderPart *pa)
+{
+#if 0
+	MEM_freeN(pa->rectall); pa->rectall= NULL;
+	freeps(&handle.psmlist);
+#else
+	MEM_freeN(pa->rectz); pa->rectz= NULL;
+	MEM_freeN(pa->rectp); pa->rectp= NULL;
+	MEM_freeN(pa->recto); pa->recto= NULL;
+	MEM_freeN(pa->rectbackz); pa->rectbackz= NULL;
+	MEM_freeN(pa->rectbackp); pa->rectbackp= NULL;
+	MEM_freeN(pa->rectbacko); pa->rectbacko= NULL;
+#endif
+}
+
+void zbufshade_sss_tile(RenderPart *pa)
+{
+	Render *re= &R;
+	ShadeSample ssamp;
+	ZBufSSSHandle handle;
+	RenderResult *rr= pa->result;
+	RenderLayer *rl;
+	VlakRen *vlr;
+	Material *mat= re->sss_mat;
+	float (*co)[3], (*color)[3], *area, *fcol;
+	int x, y, seed, quad, totpoint;
+	const bool display = (re->r.scemode & (R_BUTS_PREVIEW | R_VIEWPORT_PREVIEW)) == 0;
+	int *ro, *rz, *rp, *rbo, *rbz, *rbp, lay;
+#if 0
+	PixStr *ps;
+	intptr_t *rs;
+	int z;
+#endif
+
+	/* setup pixelstr list and buffer for zbuffering */
+	handle.pa= pa;
+	handle.totps= 0;
+
+#if 0
+	handle.psmlist.first= handle.psmlist.last= NULL;
+	addpsmain(&handle.psmlist);
+
+	pa->rectall= MEM_callocN(sizeof(intptr_t)*pa->rectx*pa->recty+4, "rectall");
+#else
+	pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto");
+	pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp");
+	pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz");
+	pa->rectbacko= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbacko");
+	pa->rectbackp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbackp");
+	pa->rectbackz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbackz");
+#endif
+
+	/* setup shade sample with correct passes */
+	memset(&ssamp, 0, sizeof(ssamp));
+	shade_sample_initialize(&ssamp, pa, rr->layers.first);
+	ssamp.tot= 1;
+
+	for (rl=rr->layers.first; rl; rl=rl->next) {
+		ssamp.shi[0].lay |= rl->lay;
+		ssamp.shi[0].layflag |= rl->layflag;
+		ssamp.shi[0].passflag |= rl->passflag;
+		ssamp.shi[0].combinedflag |= ~rl->pass_xor;
+	}
+
+	rl= rr->layers.first;
+	ssamp.shi[0].passflag |= SCE_PASS_RGBA|SCE_PASS_COMBINED;
+	ssamp.shi[0].combinedflag &= ~(SCE_PASS_SPEC);
+	ssamp.shi[0].mat_override= NULL;
+	ssamp.shi[0].light_override= NULL;
+	lay= ssamp.shi[0].lay;
+
+	/* create the pixelstrs to be used later */
+	zbuffer_sss(pa, lay, &handle, addps_sss);
+
+	if (handle.totps==0) {
+		zbufshade_sss_free(pa);
+		return;
+	}
+
+	fcol= RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, R.viewname);
+
+	co= MEM_mallocN(sizeof(float)*3*handle.totps, "SSSCo");
+	color= MEM_mallocN(sizeof(float)*3*handle.totps, "SSSColor");
+	area= MEM_mallocN(sizeof(float)*handle.totps, "SSSArea");
+
+#if 0
+	/* create ISB (does not work currently!) */
+	if (re->r.mode & R_SHADOW)
+		ISB_create(pa, NULL);
+#endif
+
+	if (display) {
+		/* initialize scanline updates for main thread */
+		rr->renrect.ymin = 0;
+		rr->renlay= rl;
+	}
+
+	seed= pa->rectx*pa->disprect.ymin;
+#if 0
+	rs= pa->rectall;
+#else
+	rz= pa->rectz;
+	rp= pa->rectp;
+	ro= pa->recto;
+	rbz= pa->rectbackz;
+	rbp= pa->rectbackp;
+	rbo= pa->rectbacko;
+#endif
+	totpoint= 0;
+
+	for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++, rr->renrect.ymax++) {
+		for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, fcol+=4) {
+			/* per pixel fixed seed */
+			BLI_thread_srandom(pa->thread, seed++);
+
+#if 0
+			if (rs) {
+				/* for each sample in this pixel, shade it */
+				for (ps = (PixStr *)(*rs); ps; ps=ps->next) {
+					ObjectInstanceRen *obi= &re->objectinstance[ps->obi];
+					ObjectRen *obr= obi->obr;
+					vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
+					quad= (ps->facenr & RE_QUAD_OFFS);
+					z= ps->z;
+
+					shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, z,
+						co[totpoint], color[totpoint], &area[totpoint]);
+
+					totpoint++;
+
+					add_v3_v3(fcol, color);
+					fcol[3]= 1.0f;
+				}
+
+				rs++;
+			}
+#else
+			if (rp) {
+				if (*rp != 0) {
+					ObjectInstanceRen *obi= &re->objectinstance[*ro];
+					ObjectRen *obr= obi->obr;
+
+					/* shade front */
+					vlr= RE_findOrAddVlak(obr, (*rp-1) & RE_QUAD_MASK);
+					quad= ((*rp) & RE_QUAD_OFFS);
+
+					shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, *rz,
+						co[totpoint], color[totpoint], &area[totpoint]);
+
+					add_v3_v3(fcol, color[totpoint]);
+					fcol[3]= 1.0f;
+					totpoint++;
+				}
+
+				rp++; rz++; ro++;
+			}
+
+			if (rbp) {
+				if (*rbp != 0 && !(*rbp == *(rp-1) && *rbo == *(ro-1))) {
+					ObjectInstanceRen *obi= &re->objectinstance[*rbo];
+					ObjectRen *obr= obi->obr;
+
+					/* shade back */
+					vlr= RE_findOrAddVlak(obr, (*rbp-1) & RE_QUAD_MASK);
+					quad= ((*rbp) & RE_QUAD_OFFS);
+
+					shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, *rbz,
+						co[totpoint], color[totpoint], &area[totpoint]);
+
+					/* to indicate this is a back sample */
+					area[totpoint]= -area[totpoint];
+
+					add_v3_v3(fcol, color[totpoint]);
+					fcol[3]= 1.0f;
+					totpoint++;
+				}
+
+				rbz++; rbp++; rbo++;
+			}
+#endif
+		}
+
+		if (y&1)
+			if (re->test_break(re->tbh)) break;
+	}
+
+	/* note: after adding we do not free these arrays, sss keeps them */
+	if (totpoint > 0) {
+		sss_add_points(re, co, color, area, totpoint);
+	}
+	else {
+		MEM_freeN(co);
+		MEM_freeN(color);
+		MEM_freeN(area);
+	}
+
+#if 0
+	if (re->r.mode & R_SHADOW)
+		ISB_free(pa);
+#endif
+
+	if (display) {
+		/* display active layer */
+		rr->renrect.ymin=rr->renrect.ymax = 0;
+		rr->renlay= render_get_active_layer(&R, rr);
+	}
+
+	zbufshade_sss_free(pa);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void renderhalo_post(RenderResult *rr, float *rectf, HaloRen *har)	/* postprocess version */
+{
+	float dist, xsq, ysq, xn, yn, colf[4], *rectft, *rtf;
+	float haloxs, haloys;
+	int minx, maxx, miny, maxy, x, y;
+
+	/* calculate the disprect mapped coordinate for halo. note: rectx is disprect corrected */
+	haloxs= har->xs - R.disprect.xmin;
+	haloys= har->ys - R.disprect.ymin;
+
+	har->miny= miny= haloys - har->rad/R.ycor;
+	har->maxy= maxy= haloys + har->rad/R.ycor;
+
+	if (maxy < 0) {
+		/* pass */
+	}
+	else if (rr->recty < miny) {
+		/* pass */
+	}
+	else {
+		minx = floor(haloxs - har->rad);
+		maxx = ceil(haloxs + har->rad);
+
+		if (maxx < 0) {
+			/* pass */
+		}
+		else if (rr->rectx < minx) {
+			/* pass */
+		}
+		else {
+			if (minx<0) minx= 0;
+			if (maxx>=rr->rectx) maxx= rr->rectx-1;
+			if (miny<0) miny= 0;
+			if (maxy>rr->recty) maxy= rr->recty;
+
+			rectft= rectf+ 4*rr->rectx*miny;
+
+			for (y=miny; y<maxy; y++) {
+
+				rtf= rectft+4*minx;
+
+				yn= (y - haloys)*R.ycor;
+				ysq= yn*yn;
+
+				for (x=minx; x<=maxx; x++) {
+					xn= x - haloxs;
+					xsq= xn*xn;
+					dist= xsq+ysq;
+					if (dist<har->radsq) {
+
+						if (shadeHaloFloat(har, colf, 0x7FFFFF, dist, xn, yn, har->flarec))
+							addalphaAddfacFloat(rtf, colf, har->add);
+					}
+					rtf+=4;
+				}
+
+				rectft+= 4*rr->rectx;
+
+				if (R.test_break(R.tbh)) break;
+			}
+		}
+	}
+}
+/* ------------------------------------------------------------------------ */
+
+static void renderflare(RenderResult *rr, float *rectf, HaloRen *har)
+{
+	extern const float hashvectf[];
+	HaloRen fla;
+	Material *ma;
+	const float *rc;
+	float rad, alfa, visifac, vec[3];
+	int b, type;
+
+	fla= *har;
+	fla.linec= fla.ringc= fla.flarec= 0;
+
+	rad= har->rad;
+	alfa= har->alfa;
+
+	visifac= R.ycor*(har->pixels);
+	/* all radials added / r^3 == 1.0f! */
+	visifac /= (har->rad*har->rad*har->rad);
+	visifac*= visifac;
+
+	ma= har->mat;
+
+	/* first halo: just do */
+
+	har->rad= rad*ma->flaresize*visifac;
+	har->radsq= har->rad*har->rad;
+	har->zs= fla.zs= 0;
+
+	har->alfa= alfa*visifac;
+
+	renderhalo_post(rr, rectf, har);
+
+	/* next halo's: the flares */
+	rc= hashvectf + ma->seed2;
+
+	for (b=1; b<har->flarec; b++) {
+
+		fla.r = fabsf(rc[0]);
+		fla.g = fabsf(rc[1]);
+		fla.b = fabsf(rc[2]);
+		fla.alfa= ma->flareboost*fabsf(alfa*visifac*rc[3]);
+		fla.hard= 20.0f + fabsf(70.0f*rc[7]);
+		fla.tex= 0;
+
+		type= (int)(fabsf(3.9f*rc[6]));
+
+		fla.rad = ma->subsize * sqrtf(fabsf(2.0f * har->rad * rc[4]));
+
+		if (type==3) {
+			fla.rad*= 3.0f;
+			fla.rad+= R.rectx/10;
+		}
+
+		fla.radsq= fla.rad*fla.rad;
+
+		vec[0]= 1.4f*rc[5]*(har->xs-R.winx/2);
+		vec[1]= 1.4f*rc[5]*(har->ys-R.winy/2);
+		vec[2]= 32.0f*sqrtf(vec[0]*vec[0] + vec[1]*vec[1] + 1.0f);
+
+		fla.xs= R.winx/2 + vec[0] + (1.2f+rc[8])*R.rectx*vec[0]/vec[2];
+		fla.ys= R.winy/2 + vec[1] + (1.2f+rc[8])*R.rectx*vec[1]/vec[2];
+
+		if (R.flag & R_SEC_FIELD) {
+			if (R.r.mode & R_ODDFIELD) fla.ys += 0.5f;
+			else fla.ys -= 0.5f;
+		}
+		if (type & 1) fla.type= HA_FLARECIRC;
+		else fla.type= 0;
+		renderhalo_post(rr, rectf, &fla);
+
+		fla.alfa*= 0.5f;
+		if (type & 2) fla.type= HA_FLARECIRC;
+		else fla.type= 0;
+		renderhalo_post(rr, rectf, &fla);
+
+		rc+= 7;
+	}
+}
+
+/* needs recode... integrate this better! */
+void add_halo_flare(Render *re)
+{
+	RenderResult *rr= re->result;
+	RenderLayer *rl;
+	HaloRen *har;
+	int a, mode;
+	float *rect;
+
+	/* for now, we get the first renderlayer in list with halos set */
+	for (rl= rr->layers.first; rl; rl= rl->next) {
+		bool do_draw = false;
+
+		if ((rl->layflag & SCE_LAY_HALO) == 0)
+			continue;
+
+		rect = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, re->viewname);
+
+		if (rect==NULL)
+			continue;
+
+		mode= R.r.mode;
+		R.r.mode &= ~R_PANORAMA;
+
+		project_renderdata(&R, projectverto, 0, 0, 0);
+
+		for (a=0; a<R.tothalo; a++) {
+			har= R.sortedhalos[a];
+
+			if (har->flarec && (har->lay & rl->lay)) {
+				do_draw = true;
+				renderflare(rr, rect, har);
+			}
+		}
+
+		if (do_draw) {
+			/* weak... the display callback wants an active renderlayer pointer... */
+			rr->renlay= rl;
+			re->display_update(re->duh, rr, NULL);
+		}
+
+		R.r.mode= mode;
+	}
+}
+
+void render_internal_update_passes(RenderEngine *engine, Scene *scene, SceneRenderLayer *srl)
+{
+	int type;
+
+	RE_engine_register_pass(engine, scene, srl, RE_PASSNAME_COMBINED, 4, "RGBA", SOCK_RGBA);
+
+#define CHECK_PASS(name, channels, chanid) \
+	if (srl->passflag & (SCE_PASS_ ## name)) { \
+		if (channels == 4) type = SOCK_RGBA; \
+		else if (channels == 3) type = SOCK_VECTOR; \
+		else type = SOCK_FLOAT; \
+		RE_engine_register_pass(engine, scene, srl, RE_PASSNAME_ ## name, channels, chanid, type); \
+	}
+
+	CHECK_PASS(Z,           1, "Z");
+	CHECK_PASS(VECTOR,      4, "XYZW");
+	CHECK_PASS(NORMAL,      3, "XYZ");
+	CHECK_PASS(UV,          3, "UVA");
+	CHECK_PASS(RGBA,        4, "RGBA");
+	CHECK_PASS(EMIT,        3, "RGB");
+	CHECK_PASS(DIFFUSE,     3, "RGB");
+	CHECK_PASS(SPEC,        3, "RGB");
+	CHECK_PASS(AO,          3, "RGB");
+	CHECK_PASS(ENVIRONMENT, 3, "RGB");
+	CHECK_PASS(INDIRECT,    3, "RGB");
+	CHECK_PASS(SHADOW,      3, "RGB");
+	CHECK_PASS(REFLECT,     3, "RGB");
+	CHECK_PASS(REFRACT,     3, "RGB");
+	CHECK_PASS(INDEXOB,     1, "X");
+	CHECK_PASS(INDEXMA,     1, "X");
+	CHECK_PASS(MIST,        1, "Z");
+
+#undef CHECK_PASS
+}
diff --git a/source/blender/render/intern/source/renderdatabase.c b/source/blender/render/intern/source/renderdatabase.c
new file mode 100644
index 00000000000..67bfd1bfdc7
--- /dev/null
+++ b/source/blender/render/intern/source/renderdatabase.c
@@ -0,0 +1,1603 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributor(s): 2004-2006, Blender Foundation, full recode
+ *
+ * ***** END GPL/BL DUAL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/renderdatabase.c
+ *  \ingroup render
+ */
+
+
+/*
+ * Storage, retrieval and query of render specific data.
+ *
+ * All data from a Blender scene is converted by the renderconverter/
+ * into a special format that is used by the render module to make
+ * images out of. These functions interface to the render-specific
+ * database.
+ *
+ * The blo{ha/ve/vl} arrays store pointers to blocks of 256 data
+ * entries each.
+ *
+ * The index of an entry is >>8 (the highest 24 * bits), to find an
+ * offset in a 256-entry block.
+ *
+ * - If the 256-entry block entry has an entry in the
+ * vertnodes/vlaknodes/bloha array of the current block, the i-th entry in
+ * that block is allocated to this entry.
+ *
+ * - If the entry has no block allocated for it yet, memory is
+ * allocated.
+ *
+ * The pointer to the correct entry is returned. Memory is guaranteed
+ * to exist (as long as the malloc does not break). Since guarded
+ * allocation is used, memory _must_ be available. Otherwise, an
+ * exit(0) would occur.
+ *
+ */
+
+#include <limits.h>
+#include <math.h>
+#include <string.h>
+
+#include "MEM_guardedalloc.h"
+
+
+#include "BLI_math.h"
+#include "BLI_blenlib.h"
+#include "BLI_utildefines.h"
+#include "BLI_hash.h"
+
+#include "DNA_material_types.h"
+#include "DNA_meshdata_types.h"
+#include "DNA_texture_types.h"
+#include "DNA_listBase.h"
+#include "DNA_particle_types.h"
+
+#include "BKE_customdata.h"
+#include "BKE_DerivedMesh.h"
+
+#include "RE_render_ext.h"	/* externtex */
+
+#include "rayintersection.h"
+#include "rayobject.h"
+#include "render_types.h"
+#include "renderdatabase.h"
+#include "zbuf.h"
+
+/* ------------------------------------------------------------------------- */
+
+/* More dynamic allocation of options for render vertices and faces, so we don't
+ * have to reserve this space inside vertices.
+ * Important; vertices and faces, should have been created already (to get tables
+ * checked) that's a reason why the calls demand VertRen/VlakRen * as arg, not
+ * the index */
+
+/* NOTE! the hardcoded table size 256 is used still in code for going quickly over vertices/faces */
+#define RE_STRESS_ELEMS		1
+#define RE_RAD_ELEMS		4
+#define RE_STRAND_ELEMS		1
+#define RE_TANGENT_ELEMS	3
+#define RE_WINSPEED_ELEMS	4
+#define RE_MTFACE_ELEMS		1
+#define RE_MCOL_ELEMS		4
+#define RE_UV_ELEMS			2
+#define RE_VLAK_ORIGINDEX_ELEMS	1
+#define RE_VERT_ORIGINDEX_ELEMS	1
+#define RE_SURFNOR_ELEMS	3
+#define RE_RADFACE_ELEMS	1
+#define RE_SIMPLIFY_ELEMS	2
+#define RE_FACE_ELEMS		1
+#define RE_NMAP_TANGENT_ELEMS	16
+
+float *RE_vertren_get_stress(ObjectRen *obr, VertRen *ver, int verify)
+{
+	float *stress;
+	int nr= ver->index>>8;
+
+	stress= obr->vertnodes[nr].stress;
+	if (stress==NULL) {
+		if (verify)
+			stress= obr->vertnodes[nr].stress= MEM_mallocN(256*RE_STRESS_ELEMS*sizeof(float), "stress table");
+		else
+			return NULL;
+	}
+	return stress + (ver->index & 255)*RE_STRESS_ELEMS;
+}
+
+/* this one callocs! */
+float *RE_vertren_get_rad(ObjectRen *obr, VertRen *ver, int verify)
+{
+	float *rad;
+	int nr= ver->index>>8;
+
+	rad= obr->vertnodes[nr].rad;
+	if (rad==NULL) {
+		if (verify)
+			rad= obr->vertnodes[nr].rad= MEM_callocN(256*RE_RAD_ELEMS*sizeof(float), "rad table");
+		else
+			return NULL;
+	}
+	return rad + (ver->index & 255)*RE_RAD_ELEMS;
+}
+
+float *RE_vertren_get_strand(ObjectRen *obr, VertRen *ver, int verify)
+{
+	float *strand;
+	int nr= ver->index>>8;
+
+	strand= obr->vertnodes[nr].strand;
+	if (strand==NULL) {
+		if (verify)
+			strand= obr->vertnodes[nr].strand= MEM_mallocN(256*RE_STRAND_ELEMS*sizeof(float), "strand table");
+		else
+			return NULL;
+	}
+	return strand + (ver->index & 255)*RE_STRAND_ELEMS;
+}
+
+/* needs calloc */
+float *RE_vertren_get_tangent(ObjectRen *obr, VertRen *ver, int verify)
+{
+	float *tangent;
+	int nr= ver->index>>8;
+
+	tangent= obr->vertnodes[nr].tangent;
+	if (tangent==NULL) {
+		if (verify)
+			tangent= obr->vertnodes[nr].tangent= MEM_callocN(256*RE_TANGENT_ELEMS*sizeof(float), "tangent table");
+		else
+			return NULL;
+	}
+	return tangent + (ver->index & 255)*RE_TANGENT_ELEMS;
+}
+
+/* needs calloc! not all renderverts have them */
+/* also winspeed is exception, it is stored per instance */
+float *RE_vertren_get_winspeed(ObjectInstanceRen *obi, VertRen *ver, int verify)
+{
+	float *winspeed;
+	int totvector;
+
+	winspeed= obi->vectors;
+	if (winspeed==NULL) {
+		if (verify) {
+			totvector= obi->obr->totvert + obi->obr->totstrand;
+			winspeed= obi->vectors= MEM_callocN(totvector*RE_WINSPEED_ELEMS*sizeof(float), "winspeed table");
+		}
+		else
+			return NULL;
+	}
+	return winspeed + ver->index*RE_WINSPEED_ELEMS;
+}
+
+int *RE_vertren_get_origindex(ObjectRen *obr, VertRen *ver, int verify)
+{
+	int *origindex;
+	int nr= ver->index>>8;
+
+	origindex= obr->vertnodes[nr].origindex;
+	if (origindex==NULL) {
+		if (verify)
+			origindex= obr->vertnodes[nr].origindex= MEM_mallocN(256*RE_VERT_ORIGINDEX_ELEMS*sizeof(int), "origindex table");
+		else
+			return NULL;
+	}
+	return origindex + (ver->index & 255)*RE_VERT_ORIGINDEX_ELEMS;
+}
+
+VertRen *RE_vertren_copy(ObjectRen *obr, VertRen *ver)
+{
+	VertRen *v1= RE_findOrAddVert(obr, obr->totvert++);
+	float *fp1, *fp2;
+	int *int1, *int2;
+	int index= v1->index;
+
+	*v1= *ver;
+	v1->index= index;
+
+	fp1= RE_vertren_get_stress(obr, ver, 0);
+	if (fp1) {
+		fp2= RE_vertren_get_stress(obr, v1, 1);
+		memcpy(fp2, fp1, RE_STRESS_ELEMS*sizeof(float));
+	}
+	fp1= RE_vertren_get_rad(obr, ver, 0);
+	if (fp1) {
+		fp2= RE_vertren_get_rad(obr, v1, 1);
+		memcpy(fp2, fp1, RE_RAD_ELEMS*sizeof(float));
+	}
+	fp1= RE_vertren_get_strand(obr, ver, 0);
+	if (fp1) {
+		fp2= RE_vertren_get_strand(obr, v1, 1);
+		memcpy(fp2, fp1, RE_STRAND_ELEMS*sizeof(float));
+	}
+	fp1= RE_vertren_get_tangent(obr, ver, 0);
+	if (fp1) {
+		fp2= RE_vertren_get_tangent(obr, v1, 1);
+		memcpy(fp2, fp1, RE_TANGENT_ELEMS*sizeof(float));
+	}
+	int1= RE_vertren_get_origindex(obr, ver, 0);
+	if (int1) {
+		int2= RE_vertren_get_origindex(obr, v1, 1);
+		memcpy(int2, int1, RE_VERT_ORIGINDEX_ELEMS*sizeof(int));
+	}
+	return v1;
+}
+
+VertRen *RE_findOrAddVert(ObjectRen *obr, int nr)
+{
+	VertTableNode *temp;
+	VertRen *v;
+	int a;
+
+	if (nr<0) {
+		printf("error in findOrAddVert: %d\n", nr);
+		return NULL;
+	}
+	a= nr>>8;
+
+	if (a>=obr->vertnodeslen-1) {  /* Need to allocate more columns..., and keep last element NULL for free loop */
+		temp= obr->vertnodes;
+
+		obr->vertnodes= MEM_mallocN(sizeof(VertTableNode)*(obr->vertnodeslen+TABLEINITSIZE), "vertnodes");
+		if (temp) memcpy(obr->vertnodes, temp, obr->vertnodeslen*sizeof(VertTableNode));
+		memset(obr->vertnodes+obr->vertnodeslen, 0, TABLEINITSIZE*sizeof(VertTableNode));
+
+		obr->vertnodeslen+=TABLEINITSIZE;
+		if (temp) MEM_freeN(temp);
+	}
+
+	v= obr->vertnodes[a].vert;
+	if (v==NULL) {
+		int i;
+
+		v= (VertRen *)MEM_callocN(256*sizeof(VertRen), "findOrAddVert");
+		obr->vertnodes[a].vert= v;
+
+		for (i= (nr & 0xFFFFFF00), a=0; a<256; a++, i++) {
+			v[a].index= i;
+		}
+	}
+	v+= (nr & 255);
+	return v;
+}
+
+/* ------------------------------------------------------------------------ */
+
+MTFace *RE_vlakren_get_tface(ObjectRen *obr, VlakRen *vlr, int n, char **name, int verify)
+{
+	VlakTableNode *node;
+	int nr= vlr->index>>8, vlakindex= (vlr->index&255);
+	int index= (n<<8) + vlakindex;
+
+	node= &obr->vlaknodes[nr];
+
+	if (verify) {
+		if (n>=node->totmtface) {
+			MTFace *mtface= node->mtface;
+			int size= (n+1)*256;
+
+			node->mtface= MEM_callocN(size*sizeof(MTFace), "Vlak mtface");
+
+			if (mtface) {
+				size= node->totmtface*256;
+				memcpy(node->mtface, mtface, size*sizeof(MTFace));
+				MEM_freeN(mtface);
+			}
+
+			node->totmtface= n+1;
+		}
+	}
+	else {
+		if (n>=node->totmtface)
+			return NULL;
+
+		if (name) *name= obr->mtface[n];
+	}
+
+	return node->mtface + index;
+}
+
+MCol *RE_vlakren_get_mcol(ObjectRen *obr, VlakRen *vlr, int n, char **name, int verify)
+{
+	VlakTableNode *node;
+	int nr= vlr->index>>8, vlakindex= (vlr->index&255);
+	int index= (n<<8) + vlakindex;
+
+	node= &obr->vlaknodes[nr];
+
+	if (verify) {
+		if (n>=node->totmcol) {
+			MCol *mcol= node->mcol;
+			int size= (n+1)*256;
+
+			node->mcol= MEM_callocN(size*sizeof(MCol)*RE_MCOL_ELEMS, "Vlak mcol");
+
+			if (mcol) {
+				size= node->totmcol*256;
+				memcpy(node->mcol, mcol, size*sizeof(MCol)*RE_MCOL_ELEMS);
+				MEM_freeN(mcol);
+			}
+
+			node->totmcol= n+1;
+		}
+	}
+	else {
+		if (n>=node->totmcol)
+			return NULL;
+
+		if (name) *name= obr->mcol[n];
+	}
+
+	return node->mcol + index*RE_MCOL_ELEMS;
+}
+
+int *RE_vlakren_get_origindex(ObjectRen *obr, VlakRen *vlak, int verify)
+{
+	int *origindex;
+	int nr= vlak->index>>8;
+
+	origindex= obr->vlaknodes[nr].origindex;
+	if (origindex==NULL) {
+		if (verify)
+			origindex= obr->vlaknodes[nr].origindex= MEM_callocN(256*RE_VLAK_ORIGINDEX_ELEMS*sizeof(int), "origindex table");
+		else
+			return NULL;
+	}
+	return origindex + (vlak->index & 255)*RE_VLAK_ORIGINDEX_ELEMS;
+}
+
+float *RE_vlakren_get_surfnor(ObjectRen *obr, VlakRen *vlak, int verify)
+{
+	float *surfnor;
+	int nr= vlak->index>>8;
+
+	surfnor= obr->vlaknodes[nr].surfnor;
+	if (surfnor==NULL) {
+		if (verify)
+			surfnor= obr->vlaknodes[nr].surfnor= MEM_callocN(256*RE_SURFNOR_ELEMS*sizeof(float), "surfnor table");
+		else
+			return NULL;
+	}
+	return surfnor + (vlak->index & 255)*RE_SURFNOR_ELEMS;
+}
+
+float *RE_vlakren_get_nmap_tangent(ObjectRen *obr, VlakRen *vlak, int index, bool verify)
+{
+	float **tangents;
+	int nr= vlak->index>>8;
+
+	tangents = obr->vlaknodes[nr].tangent_arrays;
+
+	if (index + 1 > 8) {
+		return NULL;
+	}
+
+	index = index < 0 ? 0: index;
+
+	if (tangents[index] == NULL) {
+		if (verify) {
+			tangents[index] = MEM_callocN(256*RE_NMAP_TANGENT_ELEMS*sizeof(float), "tangent table");
+		}
+		else
+			return NULL;
+	}
+
+	return tangents[index] + (vlak->index & 255)*RE_NMAP_TANGENT_ELEMS;
+}
+
+RadFace **RE_vlakren_get_radface(ObjectRen *obr, VlakRen *vlak, int verify)
+{
+	RadFace **radface;
+	int nr= vlak->index>>8;
+
+	radface= obr->vlaknodes[nr].radface;
+	if (radface==NULL) {
+		if (verify)
+			radface = obr->vlaknodes[nr].radface= MEM_callocN(256 * RE_RADFACE_ELEMS * sizeof(void *), "radface table");
+		else
+			return NULL;
+	}
+	return radface + (vlak->index & 255)*RE_RADFACE_ELEMS;
+}
+
+VlakRen *RE_vlakren_copy(ObjectRen *obr, VlakRen *vlr)
+{
+	VlakRen *vlr1 = RE_findOrAddVlak(obr, obr->totvlak++);
+	MTFace *mtface, *mtface1;
+	MCol *mcol, *mcol1;
+	float *surfnor, *surfnor1;
+	float *tangent, *tangent1;
+	int *origindex, *origindex1;
+	RadFace **radface, **radface1;
+	int i, index = vlr1->index;
+	char *name;
+
+	*vlr1= *vlr;
+	vlr1->index= index;
+
+	for (i=0; (mtface=RE_vlakren_get_tface(obr, vlr, i, &name, 0)) != NULL; i++) {
+		mtface1= RE_vlakren_get_tface(obr, vlr1, i, &name, 1);
+		memcpy(mtface1, mtface, sizeof(MTFace)*RE_MTFACE_ELEMS);
+	}
+
+	for (i=0; (mcol=RE_vlakren_get_mcol(obr, vlr, i, &name, 0)) != NULL; i++) {
+		mcol1= RE_vlakren_get_mcol(obr, vlr1, i, &name, 1);
+		memcpy(mcol1, mcol, sizeof(MCol)*RE_MCOL_ELEMS);
+	}
+
+	origindex= RE_vlakren_get_origindex(obr, vlr, 0);
+	if (origindex) {
+		origindex1= RE_vlakren_get_origindex(obr, vlr1, 1);
+		/* Just an int, but memcpy for consistency. */
+		memcpy(origindex1, origindex, sizeof(int)*RE_VLAK_ORIGINDEX_ELEMS);
+	}
+
+	surfnor= RE_vlakren_get_surfnor(obr, vlr, 0);
+	if (surfnor) {
+		surfnor1= RE_vlakren_get_surfnor(obr, vlr1, 1);
+		copy_v3_v3(surfnor1, surfnor);
+	}
+
+	for (i=0; i < MAX_MTFACE; i++) {
+		tangent = RE_vlakren_get_nmap_tangent(obr, vlr, i, false);
+		if (!tangent)
+			continue;
+		tangent1 = RE_vlakren_get_nmap_tangent(obr, vlr1, i, true);
+		memcpy(tangent1, tangent, sizeof(float)*RE_NMAP_TANGENT_ELEMS);
+	}
+
+	radface= RE_vlakren_get_radface(obr, vlr, 0);
+	if (radface) {
+		radface1= RE_vlakren_get_radface(obr, vlr1, 1);
+		*radface1= *radface;
+	}
+
+	return vlr1;
+}
+
+void RE_vlakren_get_normal(Render *UNUSED(re), ObjectInstanceRen *obi, VlakRen *vlr, float r_nor[3])
+{
+	float (*nmat)[3]= obi->nmat;
+
+	if (obi->flag & R_TRANSFORMED) {
+		mul_v3_m3v3(r_nor, nmat, vlr->n);
+		normalize_v3(r_nor);
+	}
+	else {
+		copy_v3_v3(r_nor, vlr->n);
+	}
+}
+
+void RE_set_customdata_names(ObjectRen *obr, CustomData *data)
+{
+	/* CustomData layer names are stored per object here, because the
+	 * DerivedMesh which stores the layers is freed */
+
+	CustomDataLayer *layer;
+	int numtf = 0, numcol = 0, i, mtfn, mcn;
+
+	if (CustomData_has_layer(data, CD_MTFACE)) {
+		numtf= CustomData_number_of_layers(data, CD_MTFACE);
+		obr->mtface= MEM_callocN(sizeof(*obr->mtface)*numtf, "mtfacenames");
+	}
+
+	if (CustomData_has_layer(data, CD_MCOL)) {
+		numcol= CustomData_number_of_layers(data, CD_MCOL);
+		obr->mcol= MEM_callocN(sizeof(*obr->mcol)*numcol, "mcolnames");
+	}
+
+	for (i=0, mtfn=0, mcn=0; i < data->totlayer; i++) {
+		layer= &data->layers[i];
+
+		if (layer->type == CD_MTFACE) {
+			BLI_strncpy(obr->mtface[mtfn++], layer->name, sizeof(layer->name));
+			obr->actmtface= CLAMPIS(layer->active_rnd, 0, numtf);
+			obr->bakemtface= layer->active;
+		}
+		else if (layer->type == CD_MCOL) {
+			BLI_strncpy(obr->mcol[mcn++], layer->name, sizeof(layer->name));
+			obr->actmcol= CLAMPIS(layer->active_rnd, 0, numcol);
+		}
+	}
+}
+
+VlakRen *RE_findOrAddVlak(ObjectRen *obr, int nr)
+{
+	VlakTableNode *temp;
+	VlakRen *v;
+	int a;
+
+	if (nr<0) {
+		printf("error in findOrAddVlak: %d\n", nr);
+		return obr->vlaknodes[0].vlak;
+	}
+	a= nr>>8;
+
+	if (a>=obr->vlaknodeslen-1) {  /* Need to allocate more columns..., and keep last element NULL for free loop */
+		temp= obr->vlaknodes;
+
+		obr->vlaknodes= MEM_mallocN(sizeof(VlakTableNode)*(obr->vlaknodeslen+TABLEINITSIZE), "vlaknodes");
+		if (temp) memcpy(obr->vlaknodes, temp, obr->vlaknodeslen*sizeof(VlakTableNode));
+		memset(obr->vlaknodes+obr->vlaknodeslen, 0, TABLEINITSIZE*sizeof(VlakTableNode));
+
+		obr->vlaknodeslen+=TABLEINITSIZE;  /*Does this really need to be power of 2?*/
+		if (temp) MEM_freeN(temp);
+	}
+
+	v= obr->vlaknodes[a].vlak;
+
+	if (v==NULL) {
+		int i;
+
+		v= (VlakRen *)MEM_callocN(256*sizeof(VlakRen), "findOrAddVlak");
+		obr->vlaknodes[a].vlak= v;
+
+		for (i= (nr & 0xFFFFFF00), a=0; a<256; a++, i++)
+			v[a].index= i;
+	}
+	v+= (nr & 255);
+	return v;
+}
+
+/* ------------------------------------------------------------------------ */
+
+float *RE_strandren_get_surfnor(ObjectRen *obr, StrandRen *strand, int verify)
+{
+	float *surfnor;
+	int nr= strand->index>>8;
+
+	surfnor= obr->strandnodes[nr].surfnor;
+	if (surfnor==NULL) {
+		if (verify)
+			surfnor= obr->strandnodes[nr].surfnor= MEM_callocN(256*RE_SURFNOR_ELEMS*sizeof(float), "surfnor strand table");
+		else
+			return NULL;
+	}
+	return surfnor + (strand->index & 255)*RE_SURFNOR_ELEMS;
+}
+
+float *RE_strandren_get_uv(ObjectRen *obr, StrandRen *strand, int n, char **name, int verify)
+{
+	StrandTableNode *node;
+	int nr= strand->index>>8, strandindex= (strand->index&255);
+	int index= (n<<8) + strandindex;
+
+	node= &obr->strandnodes[nr];
+
+	if (verify) {
+		if (n>=node->totuv) {
+			float *uv= node->uv;
+			int size= (n+1)*256;
+
+			node->uv= MEM_callocN(size*sizeof(float)*RE_UV_ELEMS, "strand uv table");
+
+			if (uv) {
+				size= node->totuv*256;
+				memcpy(node->uv, uv, size*sizeof(float)*RE_UV_ELEMS);
+				MEM_freeN(uv);
+			}
+
+			node->totuv= n+1;
+		}
+	}
+	else {
+		if (n>=node->totuv)
+			return NULL;
+
+		if (name) *name= obr->mtface[n];
+	}
+
+	return node->uv + index*RE_UV_ELEMS;
+}
+
+MCol *RE_strandren_get_mcol(ObjectRen *obr, StrandRen *strand, int n, char **name, int verify)
+{
+	StrandTableNode *node;
+	int nr= strand->index>>8, strandindex= (strand->index&255);
+	int index= (n<<8) + strandindex;
+
+	node= &obr->strandnodes[nr];
+
+	if (verify) {
+		if (n>=node->totmcol) {
+			MCol *mcol= node->mcol;
+			int size= (n+1)*256;
+
+			node->mcol= MEM_callocN(size*sizeof(MCol)*RE_MCOL_ELEMS, "strand mcol table");
+
+			if (mcol) {
+				size= node->totmcol*256;
+				memcpy(node->mcol, mcol, size*sizeof(MCol)*RE_MCOL_ELEMS);
+				MEM_freeN(mcol);
+			}
+
+			node->totmcol= n+1;
+		}
+	}
+	else {
+		if (n>=node->totmcol)
+			return NULL;
+
+		if (name) *name= obr->mcol[n];
+	}
+
+	return node->mcol + index*RE_MCOL_ELEMS;
+}
+
+float *RE_strandren_get_simplify(struct ObjectRen *obr, struct StrandRen *strand, int verify)
+{
+	float *simplify;
+	int nr= strand->index>>8;
+
+	simplify= obr->strandnodes[nr].simplify;
+	if (simplify==NULL) {
+		if (verify)
+			simplify= obr->strandnodes[nr].simplify= MEM_callocN(256*RE_SIMPLIFY_ELEMS*sizeof(float), "simplify strand table");
+		else
+			return NULL;
+	}
+	return simplify + (strand->index & 255)*RE_SIMPLIFY_ELEMS;
+}
+
+int *RE_strandren_get_face(ObjectRen *obr, StrandRen *strand, int verify)
+{
+	int *face;
+	int nr= strand->index>>8;
+
+	face= obr->strandnodes[nr].face;
+	if (face==NULL) {
+		if (verify)
+			face= obr->strandnodes[nr].face= MEM_callocN(256*RE_FACE_ELEMS*sizeof(int), "face strand table");
+		else
+			return NULL;
+	}
+	return face + (strand->index & 255)*RE_FACE_ELEMS;
+}
+
+/* winspeed is exception, it is stored per instance */
+float *RE_strandren_get_winspeed(ObjectInstanceRen *obi, StrandRen *strand, int verify)
+{
+	float *winspeed;
+	int totvector;
+
+	winspeed= obi->vectors;
+	if (winspeed==NULL) {
+		if (verify) {
+			totvector= obi->obr->totvert + obi->obr->totstrand;
+			winspeed= obi->vectors= MEM_callocN(totvector*RE_WINSPEED_ELEMS*sizeof(float), "winspeed strand table");
+		}
+		else
+			return NULL;
+	}
+	return winspeed + (obi->obr->totvert + strand->index)*RE_WINSPEED_ELEMS;
+}
+
+StrandRen *RE_findOrAddStrand(ObjectRen *obr, int nr)
+{
+	StrandTableNode *temp;
+	StrandRen *v;
+	int a;
+
+	if (nr<0) {
+		printf("error in findOrAddStrand: %d\n", nr);
+		return obr->strandnodes[0].strand;
+	}
+	a= nr>>8;
+
+	if (a>=obr->strandnodeslen-1) {  /* Need to allocate more columns..., and keep last element NULL for free loop */
+		temp= obr->strandnodes;
+
+		obr->strandnodes= MEM_mallocN(sizeof(StrandTableNode)*(obr->strandnodeslen+TABLEINITSIZE), "strandnodes");
+		if (temp) memcpy(obr->strandnodes, temp, obr->strandnodeslen*sizeof(StrandTableNode));
+		memset(obr->strandnodes+obr->strandnodeslen, 0, TABLEINITSIZE*sizeof(StrandTableNode));
+
+		obr->strandnodeslen+=TABLEINITSIZE;  /*Does this really need to be power of 2?*/
+		if (temp) MEM_freeN(temp);
+	}
+
+	v= obr->strandnodes[a].strand;
+
+	if (v==NULL) {
+		int i;
+
+		v= (StrandRen *)MEM_callocN(256*sizeof(StrandRen), "findOrAddStrand");
+		obr->strandnodes[a].strand= v;
+
+		for (i= (nr & 0xFFFFFF00), a=0; a<256; a++, i++)
+			v[a].index= i;
+	}
+	v+= (nr & 255);
+	return v;
+}
+
+StrandBuffer *RE_addStrandBuffer(ObjectRen *obr, int totvert)
+{
+	StrandBuffer *strandbuf;
+
+	strandbuf= MEM_callocN(sizeof(StrandBuffer), "StrandBuffer");
+	strandbuf->vert= MEM_callocN(sizeof(StrandVert)*totvert, "StrandVert");
+	strandbuf->totvert= totvert;
+	strandbuf->obr= obr;
+
+	obr->strandbuf= strandbuf;
+
+	return strandbuf;
+}
+
+/* ------------------------------------------------------------------------ */
+
+ObjectRen *RE_addRenderObject(Render *re, Object *ob, Object *par, int index, int psysindex, int lay)
+{
+	ObjectRen *obr= MEM_callocN(sizeof(ObjectRen), "object render struct");
+
+	BLI_addtail(&re->objecttable, obr);
+	obr->ob= ob;
+	obr->par= par;
+	obr->index= index;
+	obr->psysindex= psysindex;
+	obr->lay= lay;
+
+	return obr;
+}
+
+void free_renderdata_vertnodes(VertTableNode *vertnodes)
+{
+	int a;
+
+	if (vertnodes==NULL) return;
+
+	for (a=0; vertnodes[a].vert; a++) {
+		MEM_freeN(vertnodes[a].vert);
+
+		if (vertnodes[a].rad)
+			MEM_freeN(vertnodes[a].rad);
+		if (vertnodes[a].strand)
+			MEM_freeN(vertnodes[a].strand);
+		if (vertnodes[a].tangent)
+			MEM_freeN(vertnodes[a].tangent);
+		if (vertnodes[a].stress)
+			MEM_freeN(vertnodes[a].stress);
+		if (vertnodes[a].winspeed)
+			MEM_freeN(vertnodes[a].winspeed);
+		if (vertnodes[a].origindex)
+			MEM_freeN(vertnodes[a].origindex);
+	}
+
+	MEM_freeN(vertnodes);
+}
+
+void free_renderdata_vlaknodes(VlakTableNode *vlaknodes)
+{
+	int a;
+
+	if (vlaknodes==NULL) return;
+
+	for (a=0; vlaknodes[a].vlak; a++) {
+		MEM_freeN(vlaknodes[a].vlak);
+
+		if (vlaknodes[a].mtface)
+			MEM_freeN(vlaknodes[a].mtface);
+		if (vlaknodes[a].mcol)
+			MEM_freeN(vlaknodes[a].mcol);
+		if (vlaknodes[a].origindex)
+			MEM_freeN(vlaknodes[a].origindex);
+		if (vlaknodes[a].surfnor)
+			MEM_freeN(vlaknodes[a].surfnor);
+		for (int b = 0; b < MAX_MTFACE; b++) {
+			if (vlaknodes[a].tangent_arrays[b])
+				MEM_freeN(vlaknodes[a].tangent_arrays[b]);
+		}
+		if (vlaknodes[a].radface)
+			MEM_freeN(vlaknodes[a].radface);
+	}
+
+	MEM_freeN(vlaknodes);
+}
+
+static void free_renderdata_strandnodes(StrandTableNode *strandnodes)
+{
+	int a;
+
+	if (strandnodes==NULL) return;
+
+	for (a=0; strandnodes[a].strand; a++) {
+		MEM_freeN(strandnodes[a].strand);
+
+		if (strandnodes[a].uv)
+			MEM_freeN(strandnodes[a].uv);
+		if (strandnodes[a].mcol)
+			MEM_freeN(strandnodes[a].mcol);
+		if (strandnodes[a].winspeed)
+			MEM_freeN(strandnodes[a].winspeed);
+		if (strandnodes[a].surfnor)
+			MEM_freeN(strandnodes[a].surfnor);
+		if (strandnodes[a].simplify)
+			MEM_freeN(strandnodes[a].simplify);
+		if (strandnodes[a].face)
+			MEM_freeN(strandnodes[a].face);
+	}
+
+	MEM_freeN(strandnodes);
+}
+
+void free_renderdata_tables(Render *re)
+{
+	ObjectInstanceRen *obi;
+	ObjectRen *obr;
+	StrandBuffer *strandbuf;
+	int a=0;
+
+	for (obr=re->objecttable.first; obr; obr=obr->next) {
+		if (obr->vertnodes) {
+			free_renderdata_vertnodes(obr->vertnodes);
+			obr->vertnodes= NULL;
+			obr->vertnodeslen= 0;
+		}
+
+		if (obr->vlaknodes) {
+			free_renderdata_vlaknodes(obr->vlaknodes);
+			obr->vlaknodes= NULL;
+			obr->vlaknodeslen= 0;
+			obr->totvlak= 0;
+		}
+
+		if (obr->bloha) {
+			for (a=0; obr->bloha[a]; a++)
+				MEM_freeN(obr->bloha[a]);
+
+			MEM_freeN(obr->bloha);
+			obr->bloha= NULL;
+			obr->blohalen= 0;
+		}
+
+		if (obr->strandnodes) {
+			free_renderdata_strandnodes(obr->strandnodes);
+			obr->strandnodes= NULL;
+			obr->strandnodeslen= 0;
+		}
+
+		strandbuf= obr->strandbuf;
+		if (strandbuf) {
+			if (strandbuf->vert) MEM_freeN(strandbuf->vert);
+			if (strandbuf->bound) MEM_freeN(strandbuf->bound);
+			MEM_freeN(strandbuf);
+		}
+
+		if (obr->mtface)
+			MEM_freeN(obr->mtface);
+
+		if (obr->mcol)
+			MEM_freeN(obr->mcol);
+
+		if (obr->rayfaces) {
+			MEM_freeN(obr->rayfaces);
+			obr->rayfaces = NULL;
+		}
+
+		if (obr->rayprimitives) {
+			MEM_freeN(obr->rayprimitives);
+			obr->rayprimitives = NULL;
+		}
+
+		if (obr->raytree) {
+			RE_rayobject_free(obr->raytree);
+			obr->raytree = NULL;
+		}
+	}
+
+	if (re->objectinstance) {
+		for (obi=re->instancetable.first; obi; obi=obi->next) {
+			if (obi->vectors)
+				MEM_freeN(obi->vectors);
+
+			if (obi->raytree)
+				RE_rayobject_free(obi->raytree);
+		}
+
+		MEM_freeN(re->objectinstance);
+		re->objectinstance= NULL;
+		re->totinstance= 0;
+		re->instancetable.first= re->instancetable.last= NULL;
+	}
+
+	if (re->sortedhalos) {
+		MEM_freeN(re->sortedhalos);
+		re->sortedhalos= NULL;
+	}
+
+	BLI_freelistN(&re->customdata_names);
+	BLI_freelistN(&re->objecttable);
+	BLI_freelistN(&re->instancetable);
+}
+
+/* ------------------------------------------------------------------------ */
+
+HaloRen *RE_findOrAddHalo(ObjectRen *obr, int nr)
+{
+	HaloRen *h, **temp;
+	int a;
+
+	if (nr<0) {
+		printf("error in findOrAddHalo: %d\n", nr);
+		return NULL;
+	}
+	a= nr>>8;
+
+	if (a>=obr->blohalen-1) {  /* Need to allocate more columns..., and keep last element NULL for free loop */
+		//printf("Allocating %i more halo groups.  %i total.\n",
+		//	TABLEINITSIZE, obr->blohalen+TABLEINITSIZE );
+		temp=obr->bloha;
+
+		obr->bloha = (HaloRen **)MEM_callocN(sizeof(void *) * (obr->blohalen + TABLEINITSIZE), "Bloha");
+		if (temp) memcpy(obr->bloha, temp, obr->blohalen*sizeof(void *));
+		memset(&(obr->bloha[obr->blohalen]), 0, TABLEINITSIZE * sizeof(void *));
+		obr->blohalen+=TABLEINITSIZE;  /*Does this really need to be power of 2?*/
+		if (temp) MEM_freeN(temp);
+	}
+
+	h= obr->bloha[a];
+	if (h==NULL) {
+		h= (HaloRen *)MEM_callocN(256*sizeof(HaloRen), "findOrAdHalo");
+		obr->bloha[a]= h;
+	}
+	h+= (nr & 255);
+	return h;
+}
+
+/* ------------------------------------------------------------------------- */
+
+HaloRen *RE_inithalo(Render *re, ObjectRen *obr, Material *ma,
+                     const float vec[3], const float vec1[3],
+                     const float *orco, float hasize, float vectsize, int seed)
+{
+	const bool skip_load_image = (re->r.scemode & R_NO_IMAGE_LOAD) != 0;
+	const bool texnode_preview = (re->r.scemode & R_TEXNODE_PREVIEW) != 0;
+	HaloRen *har;
+	MTex *mtex;
+	float tin, tr, tg, tb, ta;
+	float xn, yn, zn, texvec[3], hoco[4], hoco1[4];
+
+	if (hasize==0.0f) return NULL;
+
+	projectverto(vec, re->winmat, hoco);
+	if (hoco[3]==0.0f) return NULL;
+	if (vec1) {
+		projectverto(vec1, re->winmat, hoco1);
+		if (hoco1[3]==0.0f) return NULL;
+	}
+
+	har= RE_findOrAddHalo(obr, obr->tothalo++);
+	copy_v3_v3(har->co, vec);
+	har->hasize= hasize;
+
+	/* actual projectvert is done in function project_renderdata() because of parts/border/pano */
+	/* we do it here for sorting of halos */
+	zn= hoco[3];
+	har->xs= 0.5f*re->winx*(hoco[0]/zn);
+	har->ys= 0.5f*re->winy*(hoco[1]/zn);
+	har->zs= 0x7FFFFF*(hoco[2]/zn);
+
+	har->zBufDist = 0x7FFFFFFF*(hoco[2]/zn);
+
+	/* halovect */
+	if (vec1) {
+
+		har->type |= HA_VECT;
+
+		xn=  har->xs - 0.5f*re->winx*(hoco1[0]/hoco1[3]);
+		yn=  har->ys - 0.5f*re->winy*(hoco1[1]/hoco1[3]);
+		if (yn == 0.0f && xn >= 0.0f) zn = 0.0f;
+		else zn = atan2f(yn, xn);
+
+		har->sin = sinf(zn);
+		har->cos = cosf(zn);
+		zn= len_v3v3(vec1, vec);
+
+		har->hasize= vectsize*zn + (1.0f-vectsize)*hasize;
+
+		sub_v3_v3v3(har->no, vec, vec1);
+		normalize_v3(har->no);
+	}
+
+	if (ma->mode & MA_HALO_XALPHA) har->type |= HA_XALPHA;
+
+	har->alfa= ma->alpha;
+	har->r= ma->r;
+	har->g= ma->g;
+	har->b= ma->b;
+	har->add= (255.0f*ma->add);
+	har->mat= ma;
+	har->hard= ma->har;
+	har->seed= seed % 256;
+
+	if (ma->mode & MA_STAR) har->starpoints= ma->starc;
+	if (ma->mode & MA_HALO_LINES) har->linec= ma->linec;
+	if (ma->mode & MA_HALO_RINGS) har->ringc= ma->ringc;
+	if (ma->mode & MA_HALO_FLARE) har->flarec= ma->flarec;
+
+
+	if (ma->mtex[0]) {
+
+		if (ma->mode & MA_HALOTEX) {
+			har->tex = 1;
+		}
+		else if (har->mat->septex & (1 << 0)) {
+			/* only 1 level textures */
+		}
+		else {
+			mtex= ma->mtex[0];
+			copy_v3_v3(texvec, vec);
+
+			if (mtex->texco & TEXCO_NORM) {
+				;
+			}
+			else if (mtex->texco & TEXCO_OBJECT) {
+				/* texvec[0]+= imatbase->ivec[0]; */
+				/* texvec[1]+= imatbase->ivec[1]; */
+				/* texvec[2]+= imatbase->ivec[2]; */
+				/* mul_m3_v3(imatbase->imat, texvec); */
+			}
+			else {
+				if (orco) {
+					copy_v3_v3(texvec, orco);
+				}
+			}
+
+			externtex(mtex,
+			          texvec,
+			          &tin, &tr, &tg, &tb, &ta,
+			          0,
+			          re->pool,
+			          skip_load_image,
+			          texnode_preview);
+
+			yn= tin*mtex->colfac;
+			//zn= tin*mtex->alphafac;
+
+			if (mtex->mapto & MAP_COL) {
+				zn= 1.0f-yn;
+				har->r= (yn*tr+ zn*ma->r);
+				har->g= (yn*tg+ zn*ma->g);
+				har->b= (yn*tb+ zn*ma->b);
+			}
+			if (mtex->texco & TEXCO_UV) {
+				har->alfa= tin;
+			}
+			if (mtex->mapto & MAP_ALPHA)
+				har->alfa= tin;
+		}
+	}
+
+	har->pool = re->pool;
+	har->skip_load_image = skip_load_image;
+	har->texnode_preview = texnode_preview;
+
+	return har;
+}
+
+HaloRen *RE_inithalo_particle(Render *re, ObjectRen *obr, DerivedMesh *dm, Material *ma,
+                              const float vec[3], const float vec1[3],
+                              const float *orco, const float *uvco, float hasize, float vectsize, int seed, const float pa_co[3])
+{
+	const bool skip_load_image = (re->r.scemode & R_NO_IMAGE_LOAD) != 0;
+	const bool texnode_preview = (re->r.scemode & R_TEXNODE_PREVIEW) != 0;
+	HaloRen *har;
+	MTex *mtex;
+	float tin, tr, tg, tb, ta;
+	float xn, yn, zn, texvec[3], hoco[4], hoco1[4], in[3], tex[3], out[3];
+	int i, hasrgb;
+
+	if (hasize==0.0f) return NULL;
+
+	projectverto(vec, re->winmat, hoco);
+	if (hoco[3]==0.0f) return NULL;
+	if (vec1) {
+		projectverto(vec1, re->winmat, hoco1);
+		if (hoco1[3]==0.0f) return NULL;
+	}
+
+	har= RE_findOrAddHalo(obr, obr->tothalo++);
+	copy_v3_v3(har->co, vec);
+	har->hasize= hasize;
+
+	/* actual projectvert is done in function project_renderdata() because of parts/border/pano */
+	/* we do it here for sorting of halos */
+	zn= hoco[3];
+	har->xs= 0.5f*re->winx*(hoco[0]/zn);
+	har->ys= 0.5f*re->winy*(hoco[1]/zn);
+	har->zs= 0x7FFFFF*(hoco[2]/zn);
+
+	har->zBufDist = 0x7FFFFFFF*(hoco[2]/zn);
+
+	/* halovect */
+	if (vec1) {
+
+		har->type |= HA_VECT;
+
+		xn=  har->xs - 0.5f*re->winx*(hoco1[0]/hoco1[3]);
+		yn=  har->ys - 0.5f*re->winy*(hoco1[1]/hoco1[3]);
+		if (yn == 0.0f && xn >= 0.0f) zn = 0.0f;
+		else zn = atan2f(yn, xn);
+
+		har->sin = sinf(zn);
+		har->cos = cosf(zn);
+		zn= len_v3v3(vec1, vec)*0.5f;
+
+		har->hasize= vectsize*zn + (1.0f-vectsize)*hasize;
+
+		sub_v3_v3v3(har->no, vec, vec1);
+		normalize_v3(har->no);
+	}
+
+	if (ma->mode & MA_HALO_XALPHA) har->type |= HA_XALPHA;
+
+	har->alfa= ma->alpha;
+	har->r= ma->r;
+	har->g= ma->g;
+	har->b= ma->b;
+	har->add= (255.0f*ma->add);
+	har->mat= ma;
+	har->hard= ma->har;
+	har->seed= seed % 256;
+
+	if (ma->mode & MA_STAR) har->starpoints= ma->starc;
+	if (ma->mode & MA_HALO_LINES) har->linec= ma->linec;
+	if (ma->mode & MA_HALO_RINGS) har->ringc= ma->ringc;
+	if (ma->mode & MA_HALO_FLARE) har->flarec= ma->flarec;
+
+	if ((ma->mode & MA_HALOTEX) && ma->mtex[0])
+		har->tex= 1;
+
+	for (i=0; i<MAX_MTEX; i++)
+		if (ma->mtex[i] && (ma->septex & (1<<i))==0) {
+			mtex= ma->mtex[i];
+			copy_v3_v3(texvec, vec);
+
+			if (mtex->texco & TEXCO_NORM) {
+				;
+			}
+			else if (mtex->texco & TEXCO_OBJECT) {
+				if (mtex->object)
+					mul_m4_v3(mtex->object->imat_ren, texvec);
+			}
+			else if (mtex->texco & TEXCO_GLOB) {
+				copy_v3_v3(texvec, vec);
+			}
+			else if (mtex->texco & TEXCO_UV && uvco) {
+				int uv_index=CustomData_get_named_layer_index(&dm->faceData, CD_MTFACE, mtex->uvname);
+				if (uv_index<0)
+					uv_index=CustomData_get_active_layer_index(&dm->faceData, CD_MTFACE);
+
+				uv_index-=CustomData_get_layer_index(&dm->faceData, CD_MTFACE);
+
+				texvec[0]=2.0f*uvco[2*uv_index]-1.0f;
+				texvec[1]=2.0f*uvco[2*uv_index+1]-1.0f;
+				texvec[2]=0.0f;
+			}
+			else if (mtex->texco & TEXCO_PARTICLE) {
+				/* particle coordinates in range [0, 1] */
+				texvec[0] = 2.f * pa_co[0] - 1.f;
+				texvec[1] = 2.f * pa_co[1] - 1.f;
+				texvec[2] = pa_co[2];
+			}
+			else if (orco) {
+				copy_v3_v3(texvec, orco);
+			}
+
+			hasrgb = externtex(mtex,
+			                   texvec,
+			                   &tin, &tr, &tg, &tb, &ta,
+			                   0,
+			                   re->pool,
+			                   skip_load_image,
+			                   texnode_preview);
+
+			//yn= tin*mtex->colfac;
+			//zn= tin*mtex->alphafac;
+			if (mtex->mapto & MAP_COL) {
+				tex[0]=tr;
+				tex[1]=tg;
+				tex[2]=tb;
+				out[0]=har->r;
+				out[1]=har->g;
+				out[2]=har->b;
+
+				texture_rgb_blend(in, tex, out, tin, mtex->colfac, mtex->blendtype);
+			//	zn= 1.0-yn;
+				//har->r= (yn*tr+ zn*ma->r);
+				//har->g= (yn*tg+ zn*ma->g);
+				//har->b= (yn*tb+ zn*ma->b);
+				har->r= in[0];
+				har->g= in[1];
+				har->b= in[2];
+			}
+
+			/* alpha returned, so let's use it instead of intensity */
+			if (hasrgb)
+				tin = ta;
+
+			if (mtex->mapto & MAP_ALPHA)
+				har->alfa = texture_value_blend(mtex->def_var, har->alfa, tin, mtex->alphafac, mtex->blendtype);
+			if (mtex->mapto & MAP_HAR)
+				har->hard = 1.0f+126.0f*texture_value_blend(mtex->def_var, ((float)har->hard)/127.0f, tin, mtex->hardfac, mtex->blendtype);
+			if (mtex->mapto & MAP_RAYMIRR)
+				har->hasize = 100.0f*texture_value_blend(mtex->def_var, har->hasize/100.0f, tin, mtex->raymirrfac, mtex->blendtype);
+			if (mtex->mapto & MAP_TRANSLU) {
+				float add = texture_value_blend(mtex->def_var, (float)har->add/255.0f, tin, mtex->translfac, mtex->blendtype);
+				CLAMP(add, 0.f, 1.f);
+				har->add = 255.0f*add;
+			}
+			/* now what on earth is this good for?? */
+			//if (mtex->texco & 16) {
+			//	har->alfa= tin;
+			//}
+		}
+
+	har->pool = re->pool;
+	har->skip_load_image = (re->r.scemode & R_NO_IMAGE_LOAD) != 0;
+	har->texnode_preview = (re->r.scemode & R_TEXNODE_PREVIEW) != 0;
+
+	return har;
+}
+
+/* -------------------------- operations on entire database ----------------------- */
+
+/* ugly function for halos in panorama */
+static int panotestclip(Render *re, bool do_pano, float v[4])
+{
+	/* part size (ensure we run RE_parts_clamp first) */
+	BLI_assert(re->partx == min_ii(re->r.tilex, re->rectx));
+	BLI_assert(re->party == min_ii(re->r.tiley, re->recty));
+
+	if (do_pano == false) {
+		return testclip(v);
+	}
+	else {
+		/* to be used for halos en infos */
+		float abs4;
+		short c = 0;
+
+		int xparts = (re->rectx + re->partx - 1) / re->partx;
+
+		abs4= fabsf(v[3]);
+
+		if (v[2]< -abs4) c=16;		/* this used to be " if (v[2]<0) ", see clippz() */
+		else if (v[2]> abs4) c+= 32;
+
+		if ( v[1]>abs4) c+=4;
+		else if ( v[1]< -abs4) c+=8;
+
+		abs4*= xparts;
+		if ( v[0]>abs4) c+=2;
+		else if ( v[0]< -abs4) c+=1;
+
+		return c;
+	}
+}
+
+/**
+ * This adds the hcs coordinates to vertices. It iterates over all
+ * vertices, halos and faces. After the conversion, we clip in hcs.
+ *
+ * Elsewhere, all primites are converted to vertices.
+ * Called in
+ * - envmapping (envmap.c)
+ * - shadow buffering (shadbuf.c)
+ */
+
+void project_renderdata(Render *re,
+                        void (*projectfunc)(const float *, float mat[4][4], float *),
+                        bool do_pano, float xoffs, bool UNUSED(do_buckets))
+{
+	ObjectRen *obr;
+	HaloRen *har = NULL;
+	float zn, vec[3], hoco[4];
+	int a;
+
+	if (do_pano) {
+		float panophi= xoffs;
+
+		re->panosi = sinf(panophi);
+		re->panoco = cosf(panophi);
+	}
+
+	for (obr=re->objecttable.first; obr; obr=obr->next) {
+		/* calculate view coordinates (and zbuffer value) */
+		for (a=0; a<obr->tothalo; a++) {
+			if ((a & 255)==0) har= obr->bloha[a>>8];
+			else har++;
+
+			if (do_pano) {
+				vec[0]= re->panoco*har->co[0] + re->panosi*har->co[2];
+				vec[1]= har->co[1];
+				vec[2]= -re->panosi*har->co[0] + re->panoco*har->co[2];
+			}
+			else {
+				copy_v3_v3(vec, har->co);
+			}
+
+			projectfunc(vec, re->winmat, hoco);
+
+			/* we clip halos less critical, but not for the Z */
+			hoco[0]*= 0.5f;
+			hoco[1]*= 0.5f;
+
+			if ( panotestclip(re, do_pano, hoco) ) {
+				har->miny= har->maxy= -10000;	/* that way render clips it */
+			}
+			else if (hoco[3]<0.0f) {
+				har->miny= har->maxy= -10000;	/* render clips it */
+			}
+			else { /* do the projection...*/
+				/* bring back hocos */
+				hoco[0]*= 2.0f;
+				hoco[1]*= 2.0f;
+
+				zn= hoco[3];
+				har->xs= 0.5f*re->winx*(1.0f+hoco[0]/zn); /* the 0.5 negates the previous 2...*/
+				har->ys= 0.5f*re->winy*(1.0f+hoco[1]/zn);
+
+				/* this should be the zbuffer coordinate */
+				har->zs= 0x7FFFFF*(hoco[2]/zn);
+				/* taking this from the face clip functions? seems ok... */
+				har->zBufDist = 0x7FFFFFFF*(hoco[2]/zn);
+
+				vec[0]+= har->hasize;
+				projectfunc(vec, re->winmat, hoco);
+				vec[0]-= har->hasize;
+				zn= hoco[3];
+				har->rad= fabsf(har->xs- 0.5f*re->winx*(1.0f+hoco[0]/zn));
+
+				/* this clip is not really OK, to prevent stars to become too large */
+				if (har->type & HA_ONLYSKY) {
+					if (har->rad>3.0f) har->rad= 3.0f;
+				}
+
+				har->radsq= har->rad*har->rad;
+
+				har->miny= har->ys - har->rad/re->ycor;
+				har->maxy= har->ys + har->rad/re->ycor;
+
+				/* the Zd value is still not really correct for pano */
+
+				vec[2] -= har->hasize;  /* z negative, otherwise it's clipped */
+				projectfunc(vec, re->winmat, hoco);
+				zn = hoco[3];
+				zn = fabsf((float)har->zs - 0x7FFFFF * (hoco[2] / zn));
+				har->zd = CLAMPIS(zn, 0, INT_MAX);
+
+			}
+
+		}
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+
+void RE_updateRenderInstance(Render *re, ObjectInstanceRen *obi, int flag)
+{
+	/* flag specifies what things have changed. */
+	if (flag & RE_OBJECT_INSTANCES_UPDATE_OBMAT) {
+		copy_m4_m4(obi->obmat, obi->ob->obmat);
+		invert_m4_m4(obi->obinvmat, obi->obmat);
+	}
+	if (flag & RE_OBJECT_INSTANCES_UPDATE_VIEW) {
+		mul_m4_m4m4(obi->localtoviewmat, re->viewmat, obi->obmat);
+		mul_m4_m4m4(obi->localtoviewinvmat, obi->obinvmat, re->viewinv);
+	}
+}
+
+void RE_updateRenderInstances(Render *re, int flag)
+{
+	int i = 0;
+	for (i = 0; i < re->totinstance; i++)
+		RE_updateRenderInstance(re, &re->objectinstance[i], flag);
+}
+
+ObjectInstanceRen *RE_addRenderInstance(
+        Render *re, ObjectRen *obr, Object *ob, Object *par,
+        int index, int psysindex, float mat[4][4], int lay, const DupliObject *dob)
+{
+	ObjectInstanceRen *obi;
+	float mat3[3][3];
+
+	obi= MEM_callocN(sizeof(ObjectInstanceRen), "ObjectInstanceRen");
+	obi->obr= obr;
+	obi->ob= ob;
+	obi->par= par;
+	obi->index= index;
+	obi->psysindex= psysindex;
+	obi->lay= lay;
+
+	/* Fill particle info */
+	if (par && dob) {
+		const ParticleSystem *psys = dob->particle_system;
+		if (psys) {
+			int part_index;
+			if (obi->index < psys->totpart) {
+				part_index = obi->index;
+			}
+			else if (psys->child) {
+				part_index = psys->child[obi->index - psys->totpart].parent;
+			}
+			else {
+				part_index = -1;
+			}
+
+			if (part_index >= 0) {
+				const ParticleData *p = &psys->particles[part_index];
+				obi->part_index = part_index;
+				obi->part_size = p->size;
+				obi->part_age = RE_GetStats(re)->cfra - p->time;
+				obi->part_lifetime = p->lifetime;
+
+				copy_v3_v3(obi->part_co, p->state.co);
+				copy_v3_v3(obi->part_vel, p->state.vel);
+				copy_v3_v3(obi->part_avel, p->state.ave);
+			}
+		}
+	}
+
+	/* Fill object info */
+	if (dob) {
+		obi->random_id = dob->random_id;
+	}
+	else {
+		obi->random_id = BLI_hash_int_2d(BLI_hash_string(obi->ob->id.name + 2), 0);
+	}
+
+	RE_updateRenderInstance(re, obi, RE_OBJECT_INSTANCES_UPDATE_OBMAT | RE_OBJECT_INSTANCES_UPDATE_VIEW);
+
+	if (mat) {
+		copy_m4_m4(obi->mat, mat);
+		copy_m3_m4(mat3, mat);
+		invert_m3_m3(obi->nmat, mat3);
+		transpose_m3(obi->nmat);
+		obi->flag |= R_DUPLI_TRANSFORMED;
+	}
+
+	BLI_addtail(&re->instancetable, obi);
+
+	return obi;
+}
+
+void RE_instance_get_particle_info(struct ObjectInstanceRen *obi, float *index, float *random, float *age, float *lifetime, float co[3], float *size, float vel[3], float angvel[3])
+{
+	*index = obi->part_index;
+	*random = BLI_hash_int_01(obi->part_index);
+	*age = obi->part_age;
+	*lifetime = obi->part_lifetime;
+	copy_v3_v3(co, obi->part_co);
+	*size = obi->part_size;
+	copy_v3_v3(vel, obi->part_vel);
+	copy_v3_v3(angvel, obi->part_avel);
+}
+
+
+void RE_makeRenderInstances(Render *re)
+{
+	ObjectInstanceRen *obi, *oldobi;
+	ListBase newlist;
+	int tot;
+
+	/* convert list of object instances to an array for index based lookup */
+	tot= BLI_listbase_count(&re->instancetable);
+	re->objectinstance= MEM_callocN(sizeof(ObjectInstanceRen)*tot, "ObjectInstance");
+	re->totinstance= tot;
+	newlist.first= newlist.last= NULL;
+
+	obi= re->objectinstance;
+	for (oldobi=re->instancetable.first; oldobi; oldobi=oldobi->next) {
+		*obi= *oldobi;
+
+		if (obi->obr) {
+			obi->prev= obi->next= NULL;
+			BLI_addtail(&newlist, obi);
+			obi++;
+		}
+		else
+			re->totinstance--;
+	}
+
+	BLI_freelistN(&re->instancetable);
+	re->instancetable= newlist;
+}
+
+/* four functions to facilitate envmap rotation for raytrace */
+void RE_instance_rotate_ray_start(ObjectInstanceRen *obi, Isect *is)
+{
+	if (obi && (obi->flag & R_ENV_TRANSFORMED)) {
+		copy_v3_v3(is->origstart, is->start);
+		mul_m4_v3(obi->imat, is->start);
+	}
+}
+
+void RE_instance_rotate_ray_dir(ObjectInstanceRen *obi, Isect *is)
+{
+	if (obi && (obi->flag & R_ENV_TRANSFORMED)) {
+		float end[3];
+
+		copy_v3_v3(is->origdir, is->dir);
+		add_v3_v3v3(end, is->origstart, is->dir);
+
+		mul_m4_v3(obi->imat, end);
+		sub_v3_v3v3(is->dir, end, is->start);
+	}
+}
+
+void RE_instance_rotate_ray(ObjectInstanceRen *obi, Isect *is)
+{
+	RE_instance_rotate_ray_start(obi, is);
+	RE_instance_rotate_ray_dir(obi, is);
+}
+
+void RE_instance_rotate_ray_restore(ObjectInstanceRen *obi, Isect *is)
+{
+	if (obi && (obi->flag & R_ENV_TRANSFORMED)) {
+		copy_v3_v3(is->start, is->origstart);
+		copy_v3_v3(is->dir, is->origdir);
+	}
+}
+
+int clip_render_object(float boundbox[2][3], float bounds[4], float winmat[4][4])
+{
+	float mat[4][4], vec[4];
+	int a, fl, flag = -1;
+
+	copy_m4_m4(mat, winmat);
+
+	for (a=0; a < 8; a++) {
+		vec[0]= (a & 1)? boundbox[0][0]: boundbox[1][0];
+		vec[1]= (a & 2)? boundbox[0][1]: boundbox[1][1];
+		vec[2]= (a & 4)? boundbox[0][2]: boundbox[1][2];
+		vec[3]= 1.0;
+		mul_m4_v4(mat, vec);
+
+		fl = 0;
+		if (bounds) {
+			if      (vec[0] < bounds[0] * vec[3]) fl |= 1;
+			else if (vec[0] > bounds[1] * vec[3]) fl |= 2;
+
+			if      (vec[1] > bounds[3] * vec[3]) fl |= 4;
+			else if (vec[1] < bounds[2] * vec[3]) fl |= 8;
+		}
+		else {
+			if      (vec[0] < -vec[3]) fl |= 1;
+			else if (vec[0] >  vec[3]) fl |= 2;
+
+			if      (vec[1] >  vec[3]) fl |= 4;
+			else if (vec[1] < -vec[3]) fl |= 8;
+		}
+		if      (vec[2] < -vec[3]) fl |= 16;
+		else if (vec[2] >  vec[3]) fl |= 32;
+
+		flag &= fl;
+		if (flag == 0) {
+			return 0;
+		}
+	}
+
+	return flag;
+}
+
diff --git a/source/blender/render/intern/source/shadbuf.c b/source/blender/render/intern/source/shadbuf.c
new file mode 100644
index 00000000000..04e9177241b
--- /dev/null
+++ b/source/blender/render/intern/source/shadbuf.c
@@ -0,0 +1,2647 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * Contributor(s): 2004-2006, Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/shadbuf.c
+ *  \ingroup render
+ */
+
+
+#include <math.h>
+#include <string.h>
+
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_group_types.h"
+#include "DNA_lamp_types.h"
+#include "DNA_material_types.h"
+
+#include "BLI_math.h"
+#include "BLI_blenlib.h"
+#include "BLI_jitter_2d.h"
+#include "BLI_memarena.h"
+#include "BLI_rand.h"
+#include "BLI_utildefines.h"
+
+#include "BKE_global.h"
+#include "BKE_scene.h"
+
+#include "PIL_time.h"
+
+#include "render_types.h"
+#include "renderdatabase.h"
+#include "rendercore.h"
+#include "shadbuf.h"
+#include "shading.h"
+#include "zbuf.h"
+
+/* XXX, could be better implemented... this is for endian issues */
+#ifdef __BIG_ENDIAN__
+//#  define RCOMP	3
+#  define GCOMP	2
+#  define BCOMP	1
+#  define ACOMP	0
+#else
+//#  define RCOMP	0
+#  define GCOMP	1
+#  define BCOMP	2
+#  define ACOMP	3
+#endif
+
+#define RCT_SIZE_X(rct)       ((rct)->xmax - (rct)->xmin)
+#define RCT_SIZE_Y(rct)       ((rct)->ymax - (rct)->ymin)
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+/* ------------------------------------------------------------------------- */
+
+/* initshadowbuf() in convertBlenderScene.c */
+
+/* ------------------------------------------------------------------------- */
+
+static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
+{
+	int len4, *rz;
+	int x2, y2;
+
+	x2= x1+tile;
+	y2= y1+tile;
+	if (x2>=size) x2= size-1;
+	if (y2>=size) y2= size-1;
+
+	if (x1>=x2 || y1>=y2) return;
+
+	len4= 4*(x2- x1);
+	rz= rectz + size*y1 + x1;
+	for (; y1<y2; y1++) {
+		memcpy(r1, rz, len4);
+		rz+= size;
+		r1+= len4;
+	}
+}
+
+#if 0
+static int sizeoflampbuf(ShadBuf *shb)
+{
+	int num, count=0;
+	char *cp;
+
+	cp= shb->cbuf;
+	num= (shb->size*shb->size)/256;
+
+	while (num--) count+= *(cp++);
+
+	return 256*count;
+}
+#endif
+
+/* not threadsafe... */
+static float *give_jitter_tab(int samp)
+{
+	/* these are all possible jitter tables, takes up some
+	 * 12k, not really bad!
+	 * For soft shadows, it saves memory and render time
+	 */
+	static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
+	static float jit[1496][2];
+	static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+	int a, offset=0;
+
+	if (samp<2) samp= 2;
+	else if (samp>16) samp= 16;
+
+	for (a=0; a<samp-1; a++) offset+= tab[a];
+
+	if (ctab[samp]==0) {
+		ctab[samp]= 1;
+		BLI_jitter_init((float (*)[2])jit[offset], samp*samp);
+	}
+
+	return jit[offset];
+
+}
+
+static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype)
+{
+	float *jit, totw= 0.0f;
+	int samp= get_render_shadow_samples(&re->r, shb->samp);
+	int a, tot=samp*samp;
+
+	shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
+
+	for (jit= shb->jit, a=0; a<tot; a++, jit+=2) {
+		if (filtertype==LA_SHADBUF_TENT)
+			shb->weight[a] = 0.71f - sqrtf(jit[0] * jit[0] + jit[1] * jit[1]);
+		else if (filtertype==LA_SHADBUF_GAUSS)
+			shb->weight[a] = RE_filter_value(R_FILTER_GAUSS, 1.8f * sqrtf(jit[0] * jit[0] + jit[1] * jit[1]));
+		else
+			shb->weight[a]= 1.0f;
+
+		totw+= shb->weight[a];
+	}
+
+	totw= 1.0f/totw;
+	for (a=0; a<tot; a++) {
+		shb->weight[a]*= totw;
+	}
+}
+
+static int verg_deepsample(const void *poin1, const void *poin2)
+{
+	const DeepSample *ds1= (const DeepSample*)poin1;
+	const DeepSample *ds2= (const DeepSample*)poin2;
+
+	if (ds1->z < ds2->z) return -1;
+	else if (ds1->z == ds2->z) return 0;
+	else return 1;
+}
+
+static int compress_deepsamples(DeepSample *dsample, int tot, float epsilon)
+{
+	/* uses doubles to avoid overflows and other numerical issues,
+	 * could be improved */
+	DeepSample *ds, *newds;
+	float v;
+	double slope, slopemin, slopemax, min, max, div, newmin, newmax;
+	int a, first, z, newtot= 0;
+
+#if 0
+	if (print) {
+		for (a=0, ds=dsample; a<tot; a++, ds++)
+			printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v);
+		printf("\n");
+	}
+#endif
+
+	/* read from and write into same array */
+	ds= dsample;
+	newds= dsample;
+	a= 0;
+
+	/* as long as we are not at the end of the array */
+	for (a++, ds++; a<tot; a++, ds++) {
+		slopemin= 0.0f;
+		slopemax= 0.0f;
+		first= 1;
+
+		for (; a<tot; a++, ds++) {
+			//dz= ds->z - newds->z;
+			if (ds->z == newds->z) {
+				/* still in same z position, simply check
+				 * visibility difference against epsilon */
+				if (!(fabsf(newds->v - ds->v) <= epsilon)) {
+					break;
+				}
+			}
+			else {
+				/* compute slopes */
+				div= (double)0x7FFFFFFF / ((double)ds->z - (double)newds->z);
+				min= (double)((ds->v - epsilon) - newds->v) * div;
+				max= (double)((ds->v + epsilon) - newds->v) * div;
+
+				/* adapt existing slopes */
+				if (first) {
+					newmin= min;
+					newmax= max;
+					first= 0;
+				}
+				else {
+					newmin= MAX2(slopemin, min);
+					newmax= MIN2(slopemax, max);
+
+					/* verify if there is still space between the slopes */
+					if (newmin > newmax) {
+						ds--;
+						a--;
+						break;
+					}
+				}
+
+				slopemin= newmin;
+				slopemax= newmax;
+			}
+		}
+
+		if (a == tot) {
+			ds--;
+			a--;
+		}
+
+		/* always previous z */
+		z= ds->z;
+
+		if (first || a==tot-1) {
+			/* if slopes were not initialized, use last visibility */
+			v= ds->v;
+		}
+		else {
+			/* compute visibility at center between slopes at z */
+			slope = (slopemin + slopemax) * 0.5;
+			v = (double)newds->v + slope * ((double)(z - newds->z) / (double)0x7FFFFFFF);
+		}
+
+		newds++;
+		newtot++;
+
+		newds->z= z;
+		newds->v= v;
+	}
+
+	if (newtot == 0 || (newds->v != (newds-1)->v))
+		newtot++;
+
+#if 0
+	if (print) {
+		for (a=0, ds=dsample; a<newtot; a++, ds++)
+			printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v);
+		printf("\n");
+	}
+#endif
+
+	return newtot;
+}
+
+static float deep_alpha(Render *re, int obinr, int facenr, bool use_strand)
+{
+	ObjectInstanceRen *obi= &re->objectinstance[obinr];
+	Material *ma;
+
+	if (use_strand) {
+		StrandRen *strand= RE_findOrAddStrand(obi->obr, facenr-1);
+		ma= strand->buffer->ma;
+	}
+	else {
+		VlakRen *vlr= RE_findOrAddVlak(obi->obr, (facenr-1) & RE_QUAD_MASK);
+		ma= vlr->mat;
+	}
+
+	return ma->shad_alpha;
+}
+
+static void compress_deepshadowbuf(Render *re, ShadBuf *shb, APixstr *apixbuf, APixstrand *apixbufstrand)
+{
+	ShadSampleBuf *shsample;
+	DeepSample *ds[RE_MAX_OSA], *sampleds[RE_MAX_OSA], *dsb, *newbuf;
+	APixstr *ap, *apn;
+	APixstrand *aps, *apns;
+	float visibility;
+
+	const int totbuf= shb->totbuf;
+	const float totbuf_f= (float)shb->totbuf;
+	const float totbuf_f_inv= 1.0f/totbuf_f;
+	const int size= shb->size;
+
+	int a, b, c, tot, minz, found, prevtot, newtot;
+	int sampletot[RE_MAX_OSA], totsample = 0, totsamplec = 0;
+
+	shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf");
+	BLI_addtail(&shb->buffers, shsample);
+
+	shsample->totbuf = MEM_callocN(sizeof(int) * size * size, "deeptotbuf");
+	shsample->deepbuf = MEM_callocN(sizeof(DeepSample *) * size * size, "deepbuf");
+
+	ap= apixbuf;
+	aps= apixbufstrand;
+	for (a=0; a<size*size; a++, ap++, aps++) {
+		/* count number of samples */
+		for (c=0; c<totbuf; c++)
+			sampletot[c]= 0;
+
+		tot= 0;
+		for (apn=ap; apn; apn=apn->next)
+			for (b=0; b<4; b++)
+				if (apn->p[b])
+					for (c=0; c<totbuf; c++)
+						if (apn->mask[b] & (1<<c))
+							sampletot[c]++;
+
+		if (apixbufstrand) {
+			for (apns=aps; apns; apns=apns->next)
+				for (b=0; b<4; b++)
+					if (apns->p[b])
+						for (c=0; c<totbuf; c++)
+							if (apns->mask[b] & (1<<c))
+								sampletot[c]++;
+		}
+
+		for (c=0; c<totbuf; c++)
+			tot += sampletot[c];
+
+		if (tot == 0) {
+			shsample->deepbuf[a]= NULL;
+			shsample->totbuf[a]= 0;
+			continue;
+		}
+
+		/* fill samples */
+		ds[0]= sampleds[0]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
+		for (c=1; c<totbuf; c++)
+			ds[c]= sampleds[c]= sampleds[c-1] + sampletot[c-1]*2;
+
+		for (apn=ap; apn; apn=apn->next) {
+			for (b=0; b<4; b++) {
+				if (apn->p[b]) {
+					for (c=0; c<totbuf; c++) {
+						if (apn->mask[b] & (1<<c)) {
+							/* two entries to create step profile */
+							ds[c]->z= apn->z[b];
+							ds[c]->v= 1.0f; /* not used */
+							ds[c]++;
+							ds[c]->z= apn->z[b];
+							ds[c]->v= deep_alpha(re, apn->obi[b], apn->p[b], 0);
+							ds[c]++;
+						}
+					}
+				}
+			}
+		}
+
+		if (apixbufstrand) {
+			for (apns=aps; apns; apns=apns->next) {
+				for (b=0; b<4; b++) {
+					if (apns->p[b]) {
+						for (c=0; c<totbuf; c++) {
+							if (apns->mask[b] & (1<<c)) {
+								/* two entries to create step profile */
+								ds[c]->z= apns->z[b];
+								ds[c]->v= 1.0f; /* not used */
+								ds[c]++;
+								ds[c]->z= apns->z[b];
+								ds[c]->v= deep_alpha(re, apns->obi[b], apns->p[b], 1);
+								ds[c]++;
+							}
+						}
+					}
+				}
+			}
+		}
+
+		for (c=0; c<totbuf; c++) {
+			/* sort by increasing z */
+			qsort(sampleds[c], sampletot[c], sizeof(DeepSample)*2, verg_deepsample);
+
+			/* sum visibility, replacing alpha values */
+			visibility= 1.0f;
+			ds[c]= sampleds[c];
+
+			for (b=0; b<sampletot[c]; b++) {
+				/* two entries creating step profile */
+				ds[c]->v= visibility;
+				ds[c]++;
+
+				visibility *= 1.0f-ds[c]->v;
+				ds[c]->v= visibility;
+				ds[c]++;
+			}
+
+			/* halfway trick, probably won't work well for volumes? */
+			ds[c]= sampleds[c];
+			for (b=0; b<sampletot[c]; b++) {
+				if (b+1 < sampletot[c]) {
+					ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
+					ds[c]++;
+					ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
+					ds[c]++;
+				}
+				else {
+					ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
+					ds[c]++;
+					ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
+					ds[c]++;
+				}
+			}
+
+			/* init for merge loop */
+			ds[c]= sampleds[c];
+			sampletot[c] *= 2;
+		}
+
+		shsample->deepbuf[a]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
+		shsample->totbuf[a]= 0;
+
+		/* merge buffers */
+		dsb= shsample->deepbuf[a];
+		while (1) {
+			minz= 0;
+			found= 0;
+
+			for (c=0; c<totbuf; c++) {
+				if (sampletot[c] && (!found || ds[c]->z < minz)) {
+					minz= ds[c]->z;
+					found= 1;
+				}
+			}
+
+			if (!found)
+				break;
+
+			dsb->z= minz;
+			dsb->v= 0.0f;
+
+			visibility= 0.0f;
+			for (c=0; c<totbuf; c++) {
+				if (sampletot[c] && ds[c]->z == minz) {
+					ds[c]++;
+					sampletot[c]--;
+				}
+
+				if (sampleds[c] == ds[c])
+					visibility += totbuf_f_inv;
+				else
+					visibility += (ds[c]-1)->v / totbuf_f;
+			}
+
+			dsb->v= visibility;
+			dsb++;
+			shsample->totbuf[a]++;
+		}
+
+		prevtot= shsample->totbuf[a];
+		totsample += prevtot;
+
+		newtot= compress_deepsamples(shsample->deepbuf[a], prevtot, shb->compressthresh);
+		shsample->totbuf[a]= newtot;
+		totsamplec += newtot;
+
+		if (newtot < prevtot) {
+			newbuf= MEM_mallocN(sizeof(DeepSample)*newtot, "cdeepsample");
+			memcpy(newbuf, shsample->deepbuf[a], sizeof(DeepSample)*newtot);
+			MEM_freeN(shsample->deepbuf[a]);
+			shsample->deepbuf[a]= newbuf;
+		}
+
+		MEM_freeN(sampleds[0]);
+	}
+
+	//printf("%d -> %d, ratio %f\n", totsample, totsamplec, (float)totsamplec/(float)totsample);
+}
+
+/* create Z tiles (for compression): this system is 24 bits!!! */
+static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
+{
+	ShadSampleBuf *shsample;
+	float dist;
+	uintptr_t *ztile;
+	int *rz, *rz1, verg, verg1, size= shb->size;
+	int a, x, y, minx, miny, byt1, byt2;
+	char *rc, *rcline, *ctile, *zt;
+
+	shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf");
+	BLI_addtail(&shb->buffers, shsample);
+
+	shsample->zbuf= MEM_mallocN(sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
+	shsample->cbuf= MEM_callocN((size*size)/256, "initshadbuf3");
+
+	ztile= (uintptr_t *)shsample->zbuf;
+	ctile= shsample->cbuf;
+
+	/* help buffer */
+	rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
+
+	for (y=0; y<size; y+=16) {
+		if (y< size/2) miny= y+15-size/2;
+		else miny= y-size/2;
+
+		for (x=0; x<size; x+=16) {
+
+			/* is tile within spotbundle? */
+			a= size/2;
+			if (x< a) minx= x+15-a;
+			else minx= x-a;
+
+			dist = sqrtf((float)(minx * minx + miny * miny));
+
+			if (square==0 && dist>(float)(a+12)) {	/* 12, tested with a onlyshadow lamp */
+				a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
+				rz1= (&verg)+1;
+			}
+			else {
+				copy_to_ztile(rectz, size, x, y, 16, rcline);
+				rz1= (int *)rcline;
+
+				verg= (*rz1 & 0xFFFFFF00);
+
+				for (a=0;a<256;a++, rz1++) {
+					if ( (*rz1 & 0xFFFFFF00) !=verg) break;
+				}
+			}
+			if (a==256) { /* complete empty tile */
+				*ctile= 0;
+				*ztile= *(rz1-1);
+			}
+			else {
+
+				/* ACOMP etc. are defined to work L/B endian */
+
+				rc= rcline;
+				rz1= (int *)rcline;
+				verg=  rc[ACOMP];
+				verg1= rc[BCOMP];
+				rc+= 4;
+				byt1= 1; byt2= 1;
+				for (a=1;a<256;a++, rc+=4) {
+					byt1 &= (verg==rc[ACOMP]);
+					byt2 &= (verg1==rc[BCOMP]);
+
+					if (byt1==0) break;
+				}
+				if (byt1 && byt2) {	/* only store byte */
+					*ctile= 1;
+					*ztile= (uintptr_t)MEM_mallocN(256+4, "tile1");
+					rz= (int *)*ztile;
+					*rz= *rz1;
+
+					zt= (char *)(rz+1);
+					rc= rcline;
+					for (a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];
+				}
+				else if (byt1) {		/* only store short */
+					*ctile= 2;
+					*ztile= (uintptr_t)MEM_mallocN(2*256+4, "Tile2");
+					rz= (int *)*ztile;
+					*rz= *rz1;
+
+					zt= (char *)(rz+1);
+					rc= rcline;
+					for (a=0; a<256; a++, zt+=2, rc+=4) {
+						zt[0]= rc[BCOMP];
+						zt[1]= rc[GCOMP];
+					}
+				}
+				else {			/* store triple */
+					*ctile= 3;
+					*ztile= (uintptr_t)MEM_mallocN(3*256, "Tile3");
+
+					zt= (char *)*ztile;
+					rc= rcline;
+					for (a=0; a<256; a++, zt+=3, rc+=4) {
+						zt[0]= rc[ACOMP];
+						zt[1]= rc[BCOMP];
+						zt[2]= rc[GCOMP];
+					}
+				}
+			}
+			ztile++;
+			ctile++;
+		}
+	}
+
+	MEM_freeN(rcline);
+}
+
+/* sets start/end clipping. lar->shb should be initialized */
+static void shadowbuf_autoclip(Render *re, LampRen *lar)
+{
+	ObjectInstanceRen *obi;
+	ObjectRen *obr;
+	VlakRen *vlr= NULL;
+	VertRen *ver= NULL;
+	Material *ma= NULL;
+	float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
+	unsigned int lay = -1;
+	int i, a, maxtotvert, ok= 1;
+	char *clipflag;
+
+	minz= 1.0e30f; maxz= -1.0e30f;
+	copy_m4_m4(viewmat, lar->shb->viewmat);
+
+	if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
+
+	maxtotvert= 0;
+	for (obr=re->objecttable.first; obr; obr=obr->next)
+		maxtotvert = max_ii(obr->totvert, maxtotvert);
+
+	clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag");
+
+	/* set clip in vertices when face visible */
+	for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
+		obr= obi->obr;
+
+		if (obi->flag & R_TRANSFORMED)
+			mul_m4_m4m4(obviewmat, viewmat, obi->mat);
+		else
+			copy_m4_m4(obviewmat, viewmat);
+
+		memset(clipflag, 0, sizeof(char)*obr->totvert);
+
+		/* clear clip, is being set if face is visible (clip is calculated for real later) */
+		for (a=0; a<obr->totvlak; a++) {
+			if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
+			else vlr++;
+
+			/* note; these conditions are copied from zbuffer_shadow() */
+			if (vlr->mat!= ma) {
+				ma= vlr->mat;
+				ok= 1;
+				if ((ma->mode2 & MA_CASTSHADOW)==0 || (ma->mode & MA_SHADBUF)==0) ok= 0;
+			}
+
+			if (ok && (obi->lay & lay)) {
+				clipflag[vlr->v1->index]= 1;
+				clipflag[vlr->v2->index]= 1;
+				clipflag[vlr->v3->index]= 1;
+				if (vlr->v4) clipflag[vlr->v4->index]= 1;
+			}
+		}
+
+		/* calculate min and max */
+		for (a=0; a< obr->totvert;a++) {
+			if ((a & 255)==0) ver= RE_findOrAddVert(obr, a);
+			else ver++;
+
+			if (clipflag[a]) {
+				copy_v3_v3(vec, ver->co);
+				mul_m4_v3(obviewmat, vec);
+				/* Z on visible side of lamp space */
+				if (vec[2] < 0.0f) {
+					float inpr, z= -vec[2];
+
+					/* since vec is rotated in lampspace, this is how to get the cosine of angle */
+					/* precision is set 20% larger */
+					vec[2]*= 1.2f;
+					normalize_v3(vec);
+					inpr= - vec[2];
+
+					if (inpr>=lar->spotsi) {
+						if (z<minz) minz= z;
+						if (z>maxz) maxz= z;
+					}
+				}
+			}
+		}
+	}
+
+	MEM_freeN(clipflag);
+
+	/* set clipping min and max */
+	if (minz < maxz) {
+		float delta= (maxz - minz);	/* threshold to prevent precision issues */
+
+		//printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
+		if (lar->bufflag & LA_SHADBUF_AUTO_START)
+			lar->shb->d= minz - delta*0.02f;	/* 0.02 is arbitrary... needs more thinking! */
+		if (lar->bufflag & LA_SHADBUF_AUTO_END)
+			lar->shb->clipend= maxz + delta*0.1f;
+
+		/* bias was calculated as percentage, we scale it to prevent animation issues */
+		delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
+		//printf("bias delta %f\n", delta);
+		lar->shb->bias= (int) (delta*(float)lar->shb->bias);
+	}
+}
+
+static void makeflatshadowbuf(Render *re, LampRen *lar, float *jitbuf)
+{
+	ShadBuf *shb= lar->shb;
+	int *rectz, samples;
+
+	/* zbuffering */
+	rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
+
+	for (samples=0; samples<shb->totbuf; samples++) {
+		zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
+		/* create Z tiles (for compression): this system is 24 bits!!! */
+		compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
+
+		if (re->test_break(re->tbh))
+			break;
+	}
+
+	MEM_freeN(rectz);
+}
+
+static void makedeepshadowbuf(Render *re, LampRen *lar, float *jitbuf)
+{
+	ShadBuf *shb= lar->shb;
+	APixstr *apixbuf;
+	APixstrand *apixbufstrand= NULL;
+	ListBase apsmbase= {NULL, NULL};
+
+	/* zbuffering */
+	apixbuf= MEM_callocN(sizeof(APixstr)*shb->size*shb->size, "APixbuf");
+	if (re->totstrand)
+		apixbufstrand= MEM_callocN(sizeof(APixstrand)*shb->size*shb->size, "APixbufstrand");
+
+	zbuffer_abuf_shadow(re, lar, shb->persmat, apixbuf, apixbufstrand, &apsmbase, shb->size,
+		shb->totbuf, (float(*)[2])jitbuf);
+
+	/* create Z tiles (for compression): this system is 24 bits!!! */
+	compress_deepshadowbuf(re, shb, apixbuf, apixbufstrand);
+
+	MEM_freeN(apixbuf);
+	if (apixbufstrand)
+		MEM_freeN(apixbufstrand);
+	freepsA(&apsmbase);
+}
+
+void makeshadowbuf(Render *re, LampRen *lar)
+{
+	ShadBuf *shb= lar->shb;
+	float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
+
+	if (lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
+		shadowbuf_autoclip(re, lar);
+
+	/* just to enforce identical behavior of all irregular buffers */
+	if (lar->buftype==LA_SHADBUF_IRREGULAR)
+		shb->size= 1024;
+
+	/* matrices and window: in winmat the transformation is being put,
+	 * transforming from observer view to lamp view, including lamp window matrix */
+
+	angle= saacos(lar->spotsi);
+	temp = 0.5f * shb->size * cosf(angle) / sinf(angle);
+	shb->pixsize= (shb->d)/temp;
+	wsize= shb->pixsize*(shb->size/2.0f);
+
+	perspective_m4(shb->winmat, -wsize, wsize, -wsize, wsize, shb->d, shb->clipend);
+	mul_m4_m4m4(shb->persmat, shb->winmat, shb->viewmat);
+
+	if (ELEM(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) {
+		shb->totbuf= lar->buffers;
+
+		/* jitter, weights - not threadsafe! */
+		BLI_thread_lock(LOCK_CUSTOM1);
+		shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
+		make_jitter_weight_tab(re, shb, lar->filtertype);
+		BLI_thread_unlock(LOCK_CUSTOM1);
+
+		if (shb->totbuf==4) jitbuf= give_jitter_tab(2);
+		else if (shb->totbuf==9) jitbuf= give_jitter_tab(3);
+		else jitbuf= twozero;
+
+		/* zbuffering */
+		if (lar->buftype == LA_SHADBUF_DEEP) {
+			makedeepshadowbuf(re, lar, jitbuf);
+			shb->totbuf= 1;
+		}
+		else
+			makeflatshadowbuf(re, lar, jitbuf);
+
+		/* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
+	}
+}
+
+static void *do_shadow_thread(void *re_v)
+{
+	Render *re = (Render *)re_v;
+	LampRen *lar;
+
+	do {
+		BLI_thread_lock(LOCK_CUSTOM1);
+		for (lar=re->lampren.first; lar; lar=lar->next) {
+			if (lar->shb && !lar->thread_assigned) {
+				lar->thread_assigned= 1;
+				break;
+			}
+		}
+		BLI_thread_unlock(LOCK_CUSTOM1);
+
+		/* if type is irregular, this only sets the perspective matrix and autoclips */
+		if (lar) {
+			makeshadowbuf(re, lar);
+			BLI_thread_lock(LOCK_CUSTOM1);
+			lar->thread_ready= 1;
+			BLI_thread_unlock(LOCK_CUSTOM1);
+		}
+	} while (lar && !re->test_break(re->tbh));
+
+	return NULL;
+}
+
+static volatile int g_break= 0;
+static int thread_break(void *UNUSED(arg))
+{
+	return g_break;
+}
+
+void threaded_makeshadowbufs(Render *re)
+{
+	ListBase threads;
+	LampRen *lar;
+	int a, totthread= 0;
+	int (*test_break)(void *);
+
+	/* count number of threads to use */
+	if (G.is_rendering) {
+		for (lar=re->lampren.first; lar; lar= lar->next)
+			if (lar->shb)
+				totthread++;
+
+		totthread = min_ii(totthread, re->r.threads);
+	}
+	else
+		totthread = 1; /* preview render */
+
+	if (totthread <= 1) {
+		for (lar=re->lampren.first; lar; lar= lar->next) {
+			if (re->test_break(re->tbh)) break;
+			if (lar->shb) {
+				/* if type is irregular, this only sets the perspective matrix and autoclips */
+				makeshadowbuf(re, lar);
+			}
+		}
+	}
+	else {
+		/* swap test break function */
+		test_break= re->test_break;
+		re->test_break= thread_break;
+
+		for (lar=re->lampren.first; lar; lar= lar->next) {
+			lar->thread_assigned= 0;
+			lar->thread_ready= 0;
+		}
+
+		BLI_threadpool_init(&threads, do_shadow_thread, totthread);
+
+		for (a=0; a<totthread; a++)
+			BLI_threadpool_insert(&threads, re);
+
+		/* keep rendering as long as there are shadow buffers not ready */
+		do {
+			if ((g_break=test_break(re->tbh)))
+				break;
+
+			PIL_sleep_ms(50);
+
+			BLI_thread_lock(LOCK_CUSTOM1);
+			for (lar=re->lampren.first; lar; lar= lar->next)
+				if (lar->shb && !lar->thread_ready)
+					break;
+			BLI_thread_unlock(LOCK_CUSTOM1);
+		} while (lar);
+
+		BLI_threadpool_end(&threads);
+
+		/* unset threadsafety */
+		re->test_break= test_break;
+		g_break= 0;
+	}
+}
+
+void freeshadowbuf(LampRen *lar)
+{
+	if (lar->shb) {
+		ShadBuf *shb= lar->shb;
+		ShadSampleBuf *shsample;
+		int b, v;
+
+		for (shsample= shb->buffers.first; shsample; shsample= shsample->next) {
+			if (shsample->deepbuf) {
+				v= shb->size*shb->size;
+				for (b=0; b<v; b++)
+					if (shsample->deepbuf[b])
+						MEM_freeN(shsample->deepbuf[b]);
+
+				MEM_freeN(shsample->deepbuf);
+				MEM_freeN(shsample->totbuf);
+			}
+			else {
+				intptr_t *ztile= shsample->zbuf;
+				const char *ctile= shsample->cbuf;
+
+				v= (shb->size*shb->size)/256;
+				for (b=0; b<v; b++, ztile++, ctile++)
+					if (*ctile) MEM_freeN((void *) *ztile);
+
+				MEM_freeN(shsample->zbuf);
+				MEM_freeN(shsample->cbuf);
+			}
+		}
+		BLI_freelistN(&shb->buffers);
+
+		if (shb->weight) MEM_freeN(shb->weight);
+		MEM_freeN(lar->shb);
+
+		lar->shb= NULL;
+	}
+}
+
+
+static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
+{
+	/* return a 1 if fully compressed shadbuf-tile && z==const */
+	int ofs;
+	const char *ct;
+
+	if (shsample->deepbuf)
+		return 0;
+
+	/* always test borders of shadowbuffer */
+	if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1;
+	if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1;
+
+	/* calc z */
+	ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
+	ct= shsample->cbuf+ofs;
+	if (*ct==0) {
+		if (nr==0) {
+			*rz= *( (int **)(shsample->zbuf+ofs) );
+			return 1;
+		}
+		else if (*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+static float readdeepvisibility(DeepSample *dsample, int tot, int z, int bias, float *biast)
+{
+	DeepSample *ds, *prevds;
+	float t;
+	int a;
+
+	/* tricky stuff here; we use ints which can overflow easily with bias values */
+
+	ds= dsample;
+	for (a=0; a<tot && (z-bias > ds->z); a++, ds++) {}
+
+	if (a == tot) {
+		if (biast)
+			*biast= 0.0f;
+		return (ds-1)->v; /* completely behind all samples */
+	}
+
+	/* check if this read needs bias blending */
+	if (biast) {
+		if (z > ds->z)
+			*biast= (float)(z - ds->z)/(float)bias;
+		else
+			*biast= 0.0f;
+	}
+
+	if (a == 0)
+		return 1.0f; /* completely in front of all samples */
+
+	/* converting to float early here because ds->z - prevds->z can overflow */
+	prevds= ds-1;
+	t= ((float)(z-bias) - (float)prevds->z)/((float)ds->z - (float)prevds->z);
+	return t*ds->v + (1.0f-t)*prevds->v;
+}
+
+static float readdeepshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
+{
+	float v, biasv, biast;
+	int ofs, tot;
+
+	if (zs < - 0x7FFFFE00 + bias)
+		return 1.0;	/* extreme close to clipstart */
+
+	/* calc z */
+	ofs= ys*shb->size + xs;
+	tot= shsample->totbuf[ofs];
+	if (tot == 0)
+		return 1.0f;
+
+	v= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, bias, &biast);
+
+	if (biast != 0.0f) {
+		/* in soft bias area */
+		biasv = readdeepvisibility(shsample->deepbuf[ofs], tot, zs, 0, NULL);
+
+		biast= biast*biast;
+		return (1.0f-biast)*v + biast*biasv;
+	}
+
+	return v;
+}
+
+/* return 1.0 : fully in light */
+static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
+{
+	float temp;
+	int *rz, ofs;
+	int zsamp=0;
+	char *ct, *cz;
+
+	/* simpleclip */
+	/* if (xs<0 || ys<0) return 1.0; */
+	/* if (xs>=shb->size || ys>=shb->size) return 1.0; */
+
+	/* always test borders of shadowbuffer */
+	if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1;
+	if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1;
+
+	if (shsample->deepbuf)
+		return readdeepshadowbuf(shb, shsample, bias, xs, ys, zs);
+
+	/* calc z */
+	ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
+	ct= shsample->cbuf+ofs;
+	rz= *( (int **)(shsample->zbuf+ofs) );
+
+	if (*ct==3) {
+		ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
+		cz= (char *)&zsamp;
+		cz[ACOMP]= ct[0];
+		cz[BCOMP]= ct[1];
+		cz[GCOMP]= ct[2];
+	}
+	else if (*ct==2) {
+		ct= ((char *)rz);
+		ct+= 4+2*16*(ys & 15)+2*(xs & 15);
+		zsamp= *rz;
+
+		cz= (char *)&zsamp;
+		cz[BCOMP]= ct[0];
+		cz[GCOMP]= ct[1];
+	}
+	else if (*ct==1) {
+		ct= ((char *)rz);
+		ct+= 4+16*(ys & 15)+(xs & 15);
+		zsamp= *rz;
+
+		cz= (char *)&zsamp;
+		cz[GCOMP]= ct[0];
+
+	}
+	else {
+		/* got warning on this for 64 bits.... */
+		/* but it's working code! in this case rz is not a pointer but zvalue (ton) */
+		zsamp= GET_INT_FROM_POINTER(rz);
+	}
+
+	/* tricky stuff here; we use ints which can overflow easily with bias values */
+
+	if (zsamp > zs) return 1.0; 		/* absolute no shadow */
+	else if (zs < - 0x7FFFFE00 + bias) return 1.0;	/* extreme close to clipstart */
+	else if (zsamp < zs-bias) return 0.0;	/* absolute in shadow */
+	else {					/* soft area */
+
+		temp=  ( (float)(zs- zsamp) )/(float)bias;
+		return 1.0f - temp*temp;
+
+	}
+}
+
+static void shadowbuf_project_co(float *x, float *y, float *z, ShadBuf *shb, const float co[3])
+{
+	float hco[4], size= 0.5f*(float)shb->size;
+
+	copy_v3_v3(hco, co);
+	hco[3]= 1.0f;
+
+	mul_m4_v4(shb->persmat, hco);
+
+	*x= size*(1.0f+hco[0]/hco[3]);
+	*y= size*(1.0f+hco[1]/hco[3]);
+	if (z) *z= (hco[2]/hco[3]);
+}
+
+/* the externally called shadow testing (reading) function */
+/* return 1.0: no shadow at all */
+float testshadowbuf(Render *re, ShadBuf *shb, const float co[3], const float dxco[3], const float dyco[3], float inp, float mat_bias)
+{
+	ShadSampleBuf *shsample;
+	float fac, dco[3], dx[3], dy[3], shadfac=0.0f;
+	float xs1, ys1, zs1, *jit, *weight, xres, yres, biasf;
+	int xs, ys, zs, bias, *rz;
+	short a, num;
+
+	/* crash preventer */
+	if (shb->buffers.first==NULL)
+		return 1.0f;
+
+	/* when facing away, assume fully in shadow */
+	if (inp <= 0.0f)
+		return 0.0f;
+
+	/* project coordinate to pixel space */
+	shadowbuf_project_co(&xs1, &ys1, &zs1, shb, co);
+
+	/* clip z coordinate, z is projected so that (-1.0, 1.0) matches
+	 * (clipstart, clipend), so we can do this simple test */
+	if (zs1>=1.0f)
+		return 0.0f;
+	else if (zs1<= -1.0f)
+		return 1.0f;
+
+	zs= ((float)0x7FFFFFFF)*zs1;
+
+	/* take num*num samples, increase area with fac */
+	num= get_render_shadow_samples(&re->r, shb->samp);
+	num= num*num;
+	fac= shb->soft;
+
+	/* compute z bias */
+	if (mat_bias!=0.0f) biasf= shb->bias*mat_bias;
+	else biasf= shb->bias;
+	/* with inp==1.0, bias is half the size. correction value was 1.1, giving errors
+	 * on cube edges, with one side being almost frontal lighted (ton)  */
+	bias= (1.5f-inp*inp)*biasf;
+
+	/* in case of no filtering we can do things simpler */
+	if (num==1) {
+		for (shsample= shb->buffers.first; shsample; shsample= shsample->next)
+			shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
+
+		return shadfac/(float)shb->totbuf;
+	}
+
+	/* calculate filter size */
+	add_v3_v3v3(dco, co, dxco);
+	shadowbuf_project_co(&dx[0], &dx[1], NULL, shb, dco);
+	dx[0]= xs1 - dx[0];
+	dx[1]= ys1 - dx[1];
+
+	add_v3_v3v3(dco, co, dyco);
+	shadowbuf_project_co(&dy[0], &dy[1], NULL, shb, dco);
+	dy[0]= xs1 - dy[0];
+	dy[1]= ys1 - dy[1];
+
+	xres = fac * (fabsf(dx[0]) + fabsf(dy[0]));
+	yres = fac * (fabsf(dx[1]) + fabsf(dy[1]));
+	if (xres<1.0f) xres= 1.0f;
+	if (yres<1.0f) yres= 1.0f;
+
+	/* make xs1/xs1 corner of sample area */
+	xs1 -= xres*0.5f;
+	ys1 -= yres*0.5f;
+
+	/* in case we have a constant value in a tile, we can do quicker lookup */
+	if (xres<16.0f && yres<16.0f) {
+		shsample= shb->buffers.first;
+		if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
+			if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
+				if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
+					if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
+						return readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
+					}
+				}
+			}
+		}
+	}
+
+	/* full jittered shadow buffer lookup */
+	for (shsample= shb->buffers.first; shsample; shsample= shsample->next) {
+		jit= shb->jit;
+		weight= shb->weight;
+
+		for (a=num; a>0; a--, jit+=2, weight++) {
+			/* instead of jit i tried random: ugly! */
+			/* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
+			/* xs1 and ys1 are already corrected to be corner of sample area */
+			xs= xs1 + xres*(jit[0] + 0.5f);
+			ys= ys1 + yres*(jit[1] + 0.5f);
+
+			shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
+		}
+	}
+
+	/* Renormalizes for the sample number: */
+	return shadfac/(float)shb->totbuf;
+}
+
+/* different function... sampling behind clipend can be LIGHT, bias is negative! */
+/* return: light */
+static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
+{
+	float temp;
+	int *rz, ofs;
+	int bias, zbias, zsamp;
+	char *ct, *cz;
+
+	/* negative! The other side is more important */
+	bias= -shb->bias;
+
+	/* simpleclip */
+	if (xs<0 || ys<0) return 0.0;
+	if (xs>=shb->size || ys>=shb->size) return 0.0;
+
+	/* calc z */
+	ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
+	ct= shsample->cbuf+ofs;
+	rz= *( (int **)(shsample->zbuf+ofs) );
+
+	if (*ct==3) {
+		ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
+		cz= (char *)&zsamp;
+		zsamp= 0;
+		cz[ACOMP]= ct[0];
+		cz[BCOMP]= ct[1];
+		cz[GCOMP]= ct[2];
+	}
+	else if (*ct==2) {
+		ct= ((char *)rz);
+		ct+= 4+2*16*(ys & 15)+2*(xs & 15);
+		zsamp= *rz;
+
+		cz= (char *)&zsamp;
+		cz[BCOMP]= ct[0];
+		cz[GCOMP]= ct[1];
+	}
+	else if (*ct==1) {
+		ct= ((char *)rz);
+		ct+= 4+16*(ys & 15)+(xs & 15);
+		zsamp= *rz;
+
+		cz= (char *)&zsamp;
+		cz[GCOMP]= ct[0];
+
+	}
+	else {
+		/* same as before */
+		/* still working code! (ton) */
+		zsamp= GET_INT_FROM_POINTER(rz);
+	}
+
+	/* NO schadow when sampled at 'eternal' distance */
+
+	if (zsamp >= 0x7FFFFE00) return 1.0;
+
+	if (zsamp > zs) return 1.0; 		/* absolute no shadww */
+	else {
+		/* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
+		zbias= 0x7fffffff - zs;
+		if (zbias > -bias) {
+			if ( zsamp < zs-bias) return 0.0;	/* absolute in shadow */
+		}
+		else return 0.0;	/* absolute shadow */
+	}
+
+	/* soft area */
+
+	temp=  ( (float)(zs- zsamp) )/(float)bias;
+	return 1.0f - temp*temp;
+}
+
+
+float shadow_halo(LampRen *lar, const float p1[3], const float p2[3])
+{
+	/* p1 p2 already are rotated in spot-space */
+	ShadBuf *shb= lar->shb;
+	ShadSampleBuf *shsample;
+	float co[4], siz;
+	float lambda, lambda_o, lambda_x, lambda_y, ldx, ldy;
+	float zf, xf1, yf1, zf1, xf2, yf2, zf2;
+	float count, lightcount;
+	int x, y, z, xs1, ys1;
+	int dx = 0, dy = 0;
+
+	siz= 0.5f*(float)shb->size;
+
+	co[0]= p1[0];
+	co[1]= p1[1];
+	co[2]= p1[2]/lar->sh_zfac;
+	co[3]= 1.0;
+	mul_m4_v4(shb->winmat, co);	/* rational hom co */
+	xf1= siz*(1.0f+co[0]/co[3]);
+	yf1= siz*(1.0f+co[1]/co[3]);
+	zf1= (co[2]/co[3]);
+
+
+	co[0]= p2[0];
+	co[1]= p2[1];
+	co[2]= p2[2]/lar->sh_zfac;
+	co[3]= 1.0;
+	mul_m4_v4(shb->winmat, co);	/* rational hom co */
+	xf2= siz*(1.0f+co[0]/co[3]);
+	yf2= siz*(1.0f+co[1]/co[3]);
+	zf2= (co[2]/co[3]);
+
+	/* the 2dda (a pixel line formula) */
+
+	xs1= (int)xf1;
+	ys1= (int)yf1;
+
+	if (xf1 != xf2) {
+		if (xf2-xf1 > 0.0f) {
+			lambda_x= (xf1-xs1-1.0f)/(xf1-xf2);
+			ldx= -shb->shadhalostep/(xf1-xf2);
+			dx= shb->shadhalostep;
+		}
+		else {
+			lambda_x= (xf1-xs1)/(xf1-xf2);
+			ldx= shb->shadhalostep/(xf1-xf2);
+			dx= -shb->shadhalostep;
+		}
+	}
+	else {
+		lambda_x= 1.0;
+		ldx= 0.0;
+	}
+
+	if (yf1 != yf2) {
+		if (yf2-yf1 > 0.0f) {
+			lambda_y= (yf1-ys1-1.0f)/(yf1-yf2);
+			ldy= -shb->shadhalostep/(yf1-yf2);
+			dy= shb->shadhalostep;
+		}
+		else {
+			lambda_y= (yf1-ys1)/(yf1-yf2);
+			ldy= shb->shadhalostep/(yf1-yf2);
+			dy= -shb->shadhalostep;
+		}
+	}
+	else {
+		lambda_y= 1.0;
+		ldy= 0.0;
+	}
+
+	x= xs1;
+	y= ys1;
+	lambda= count= lightcount= 0.0;
+
+/* printf("start %x %x	\n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
+
+	do {
+		lambda_o= lambda;
+
+		if (lambda_x==lambda_y) {
+			lambda_x+= ldx;
+			x+= dx;
+			lambda_y+= ldy;
+			y+= dy;
+		}
+		else {
+			if (lambda_x<lambda_y) {
+				lambda_x+= ldx;
+				x+= dx;
+			}
+			else {
+				lambda_y+= ldy;
+				y+= dy;
+			}
+		}
+
+		lambda = min_ff(lambda_x, lambda_y);
+
+		/* not making any progress? */
+		if (lambda==lambda_o) break;
+
+		/* clip to end of volume */
+		lambda = min_ff(lambda, 1.0f);
+
+		zf= zf1 + lambda*(zf2-zf1);
+		count+= (float)shb->totbuf;
+
+		if (zf<= -1.0f) lightcount += 1.0f;	/* close to the spot */
+		else {
+
+			/* make sure, behind the clipend we extend halolines. */
+			if (zf>=1.0f) z= 0x7FFFF000;
+			else z= (int)(0x7FFFF000*zf);
+
+			for (shsample= shb->buffers.first; shsample; shsample= shsample->next)
+				lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
+
+		}
+	}
+	while (lambda < 1.0f);
+
+	if (count!=0.0f) return (lightcount/count);
+	return 0.0f;
+
+}
+
+
+/* ********************* Irregular Shadow Buffer (ISB) ************* */
+/* ********** storage of all view samples in a raster of lists ***** */
+
+/* based on several articles describing this method, like:
+ * The Irregular Z-Buffer and its Application to Shadow Mapping
+ * Gregory S. Johnson - William R. Mark - Christopher A. Burns
+ * and
+ * Alias-Free Shadow Maps
+ * Timo Aila and Samuli Laine
+ */
+
+/* bsp structure (actually kd tree) */
+
+#define BSPMAX_SAMPLE	128
+#define BSPMAX_DEPTH	32
+
+/* aligned with struct rctf */
+typedef struct Boxf {
+	float xmin, xmax;
+	float ymin, ymax;
+	float zmin, zmax;
+} Boxf;
+
+typedef struct ISBBranch {
+	struct ISBBranch *left, *right;
+	float divider[2];
+	Boxf box;
+	short totsamp, index, full, unused;
+	ISBSample **samples;
+} ISBBranch;
+
+typedef struct BSPFace {
+	Boxf box;
+	const float *v1, *v2, *v3, *v4;
+	int obi;		/* object for face lookup */
+	int facenr;		/* index to retrieve VlakRen */
+	int type;		/* only for strand now */
+	short shad_alpha, is_full;
+
+	/* strand caching data, optimize for point_behind_strand() */
+	float radline, radline_end, len;
+	float vec1[3], vec2[3], rc[3];
+} BSPFace;
+
+/* boxes are in lamp projection */
+static void init_box(Boxf *box)
+{
+	box->xmin = 1000000.0f;
+	box->xmax = 0;
+	box->ymin = 1000000.0f;
+	box->ymax = 0;
+	box->zmin= 0x7FFFFFFF;
+	box->zmax= - 0x7FFFFFFF;
+}
+
+/* use v1 to calculate boundbox */
+static void bound_boxf(Boxf *box, const float v1[3])
+{
+	if (v1[0] < box->xmin) box->xmin = v1[0];
+	if (v1[0] > box->xmax) box->xmax = v1[0];
+	if (v1[1] < box->ymin) box->ymin = v1[1];
+	if (v1[1] > box->ymax) box->ymax = v1[1];
+	if (v1[2] < box->zmin) box->zmin= v1[2];
+	if (v1[2] > box->zmax) box->zmax= v1[2];
+}
+
+/* use v1 to calculate boundbox */
+static void bound_rectf(rctf *box, const float v1[2])
+{
+	if (v1[0] < box->xmin) box->xmin = v1[0];
+	if (v1[0] > box->xmax) box->xmax = v1[0];
+	if (v1[1] < box->ymin) box->ymin = v1[1];
+	if (v1[1] > box->ymax) box->ymax = v1[1];
+}
+
+
+/* halfway splitting, for initializing a more regular tree */
+static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
+{
+
+	/* if level > 0 we create new branches and go deeper */
+	if (level > 0) {
+		ISBBranch *left, *right;
+		int i;
+
+		/* splitpoint */
+		root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
+		root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
+
+		/* find best splitpoint */
+		if (RCT_SIZE_X(&root->box) > RCT_SIZE_Y(&root->box))
+			i = root->index = 0;
+		else
+			i = root->index = 1;
+
+		left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
+		right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
+
+		/* box info */
+		left->box= root->box;
+		right->box= root->box;
+		if (i==0) {
+			left->box.xmax = root->divider[0];
+			right->box.xmin = root->divider[0];
+		}
+		else {
+			left->box.ymax = root->divider[1];
+			right->box.ymin = root->divider[1];
+		}
+		isb_bsp_split_init(left, mem, level-1);
+		isb_bsp_split_init(right, mem, level-1);
+	}
+	else {
+		/* we add sample array */
+		root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
+	}
+}
+
+/* note; if all samples on same location we just spread them over 2 new branches */
+static void isb_bsp_split(ISBBranch *root, MemArena *mem)
+{
+	ISBBranch *left, *right;
+	ISBSample *samples[BSPMAX_SAMPLE];
+	int a, i;
+
+	/* splitpoint */
+	root->divider[0]= root->divider[1]= 0.0f;
+	for (a=BSPMAX_SAMPLE-1; a>=0; a--) {
+		root->divider[0]+= root->samples[a]->zco[0];
+		root->divider[1]+= root->samples[a]->zco[1];
+	}
+	root->divider[0]/= BSPMAX_SAMPLE;
+	root->divider[1]/= BSPMAX_SAMPLE;
+
+	/* find best splitpoint */
+	if (RCT_SIZE_X(&root->box) > RCT_SIZE_Y(&root->box))
+		i = root->index = 0;
+	else
+		i = root->index = 1;
+
+	/* new branches */
+	left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
+	right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
+
+	/* new sample array */
+	left->samples = BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
+	right->samples = samples;  /* tmp */
+
+	/* split samples */
+	for (a=BSPMAX_SAMPLE-1; a>=0; a--) {
+		int comp= 0;
+		/* this prevents adding samples all to 1 branch when divider is equal to samples */
+		if (root->samples[a]->zco[i] == root->divider[i])
+			comp= a & 1;
+		else if (root->samples[a]->zco[i] < root->divider[i])
+			comp= 1;
+
+		if (comp==1) {
+			left->samples[left->totsamp]= root->samples[a];
+			left->totsamp++;
+		}
+		else {
+			right->samples[right->totsamp]= root->samples[a];
+			right->totsamp++;
+		}
+	}
+
+	/* copy samples from tmp */
+	memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
+	right->samples= root->samples;
+	root->samples= NULL;
+
+	/* box info */
+	left->box= root->box;
+	right->box= root->box;
+	if (i==0) {
+		left->box.xmax = root->divider[0];
+		right->box.xmin = root->divider[0];
+	}
+	else {
+		left->box.ymax = root->divider[1];
+		right->box.ymin = root->divider[1];
+	}
+}
+
+/* inserts sample in main tree, also splits on threshold */
+/* returns 1 if error */
+static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
+{
+	ISBBranch *bspn= root;
+	const float *zco= sample->zco;
+	int i= 0;
+
+	/* debug counter, also used to check if something was filled in ever */
+	root->totsamp++;
+
+	/* going over branches until last one found */
+	while (bspn->left) {
+		if (zco[bspn->index] <= bspn->divider[bspn->index])
+			bspn= bspn->left;
+		else
+			bspn= bspn->right;
+		i++;
+	}
+	/* bspn now is the last branch */
+
+	if (bspn->totsamp==BSPMAX_SAMPLE) {
+		printf("error in bsp branch\n");	/* only for debug, cannot happen */
+		return 1;
+	}
+
+	/* insert */
+	bspn->samples[bspn->totsamp]= sample;
+	bspn->totsamp++;
+
+	/* split if allowed and needed */
+	if (bspn->totsamp==BSPMAX_SAMPLE) {
+		if (i==BSPMAX_DEPTH) {
+			bspn->totsamp--;	/* stop filling in... will give errors */
+			return 1;
+		}
+		isb_bsp_split(bspn, memarena);
+	}
+	return 0;
+}
+
+/* initialize vars in face, for optimal point-in-face test */
+static void bspface_init_strand(BSPFace *face)
+{
+
+	face->radline= 0.5f* len_v2v2(face->v1, face->v2);
+
+	mid_v3_v3v3(face->vec1, face->v1, face->v2);
+	if (face->v4)
+		mid_v3_v3v3(face->vec2, face->v3, face->v4);
+	else
+		copy_v3_v3(face->vec2, face->v3);
+
+	face->rc[0]= face->vec2[0]-face->vec1[0];
+	face->rc[1]= face->vec2[1]-face->vec1[1];
+	face->rc[2]= face->vec2[2]-face->vec1[2];
+
+	face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
+
+	if (face->len != 0.0f) {
+		face->radline_end = face->radline / sqrtf(face->len);
+		face->len = 1.0f / face->len;
+	}
+}
+
+/* brought back to a simple 2d case */
+static int point_behind_strand(const float p[3], BSPFace *face)
+{
+	/* v1 - v2 is radius, v1 - v3 length */
+	float dist, rc[2], pt[2];
+
+	/* using code from dist_to_line_segment_v2(), distance vec to line-piece */
+
+	if (face->len==0.0f) {
+		rc[0]= p[0]-face->vec1[0];
+		rc[1]= p[1]-face->vec1[1];
+		dist = len_v2(rc);
+
+		if (dist < face->radline)
+			return 1;
+	}
+	else {
+		float lambda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
+
+		if (lambda > -face->radline_end && lambda < 1.0f+face->radline_end) {
+			/* hesse for dist: */
+			//dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
+
+			pt[0]= lambda*face->rc[0]+face->vec1[0];
+			pt[1]= lambda*face->rc[1]+face->vec1[1];
+
+			rc[0]= pt[0]-p[0];
+			rc[1]= pt[1]-p[1];
+			dist = len_v2(rc);
+
+			if (dist < face->radline) {
+				float zval= face->vec1[2] + lambda*face->rc[2];
+				if (p[2] > zval)
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/* return 1 if inside. code derived from src/parametrizer.c */
+static int point_behind_tria2d(const float p[3], const float v1[3], const float v2[3], const float v3[3])
+{
+	float a[2], c[2], h[2], div;
+	float u, v;
+
+	a[0] = v2[0] - v1[0];
+	a[1] = v2[1] - v1[1];
+	c[0] = v3[0] - v1[0];
+	c[1] = v3[1] - v1[1];
+
+	div = a[0]*c[1] - a[1]*c[0];
+	if (div==0.0f)
+		return 0;
+
+	h[0] = p[0] - v1[0];
+	h[1] = p[1] - v1[1];
+
+	div = 1.0f/div;
+
+	u = (h[0]*c[1] - h[1]*c[0])*div;
+	if (u >= 0.0f) {
+		v = (a[0]*h[1] - a[1]*h[0])*div;
+		if (v >= 0.0f) {
+			if ( u + v <= 1.0f) {
+				/* inside, now check if point p is behind */
+				float z=  (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
+				if (z <= p[2])
+					return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+#if 0
+/* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
+
+/* check if line v1-v2 has all rect points on other side of point v3 */
+static int rect_outside_line(rctf *rect, const float v1[3], const float v2[3], const float v3[3])
+{
+	float a, b, c;
+	int side;
+
+	/* line formula for v1-v2 */
+	a= v2[1]-v1[1];
+	b= v1[0]-v2[0];
+	c= -a*v1[0] - b*v1[1];
+	side= a*v3[0] + b*v3[1] + c < 0.0f;
+
+	/* the four quad points */
+	if ( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
+		if ( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
+			if ( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
+				if ( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
+					return 1;
+	return 0;
+}
+
+/* check if one of the triangle edges separates all rect points on 1 side */
+static int rect_isect_tria(rctf *rect, const float v1[3], const float v2[3], const float v3[3])
+{
+	if (rect_outside_line(rect, v1, v2, v3))
+		return 0;
+	if (rect_outside_line(rect, v2, v3, v1))
+		return 0;
+	if (rect_outside_line(rect, v3, v1, v2))
+		return 0;
+	return 1;
+}
+#endif
+
+/* if face overlaps a branch, it executes func. recursive */
+static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
+{
+
+	/* are we descending? */
+	if (bspn->left) {
+		/* hrmf, the box struct cannot be addressed with index */
+		if (bspn->index==0) {
+			if (face->box.xmin <= bspn->divider[0])
+				isb_bsp_face_inside(bspn->left, face);
+			if (face->box.xmax > bspn->divider[0])
+				isb_bsp_face_inside(bspn->right, face);
+		}
+		else {
+			if (face->box.ymin <= bspn->divider[1])
+				isb_bsp_face_inside(bspn->left, face);
+			if (face->box.ymax > bspn->divider[1])
+				isb_bsp_face_inside(bspn->right, face);
+		}
+	}
+	else {
+		/* else: end branch reached */
+		int a;
+
+		if (bspn->totsamp==0) return;
+
+		/* check for nodes entirely in shadow, can be skipped */
+		if (bspn->totsamp==bspn->full)
+			return;
+
+		/* if bsp node is entirely in front of face, give up */
+		if (bspn->box.zmax < face->box.zmin)
+			return;
+
+		/* if face boundbox is outside of branch rect, give up */
+		if (0==BLI_rctf_isect((rctf *)&face->box, (rctf *)&bspn->box, NULL))
+			return;
+
+		/* test all points inside branch */
+		for (a=bspn->totsamp-1; a>=0; a--) {
+			ISBSample *samp= bspn->samples[a];
+
+			if ((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
+				if (face->box.zmin < samp->zco[2]) {
+					if (BLI_rctf_isect_pt_v((rctf *)&face->box, samp->zco)) {
+						int inshadow= 0;
+
+						if (face->type) {
+							if (point_behind_strand(samp->zco, face))
+								inshadow= 1;
+						}
+						else if ( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
+							inshadow= 1;
+						else if (face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
+							inshadow= 1;
+
+						if (inshadow) {
+							*(samp->shadfac) += face->shad_alpha;
+							/* optimize; is_full means shad_alpha==4096 */
+							if (*(samp->shadfac) >= 4096 || face->is_full) {
+								bspn->full++;
+								samp->shadfac= NULL;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+/* based on available samples, recalculate the bounding box for bsp nodes, recursive */
+static void isb_bsp_recalc_box(ISBBranch *root)
+{
+	if (root->left) {
+		isb_bsp_recalc_box(root->left);
+		isb_bsp_recalc_box(root->right);
+	}
+	else if (root->totsamp) {
+		int a;
+
+		init_box(&root->box);
+		for (a=root->totsamp-1; a>=0; a--)
+			bound_boxf(&root->box, root->samples[a]->zco);
+	}
+}
+
+/* callback function for zbuf clip */
+static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr,
+                                const float *v1, const float *v2, const float *v3, const float *v4)
+{
+	BSPFace face;
+
+	face.v1= v1;
+	face.v2= v2;
+	face.v3= v3;
+	face.v4= v4;
+	face.obi= obi;
+	face.facenr= zvlnr & ~RE_QUAD_OFFS;
+	face.type= R_STRAND;
+	if (R.osa)
+		face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
+	else
+		face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
+
+	face.is_full= (zspan->shad_alpha==1.0f);
+
+	/* setup boundbox */
+	init_box(&face.box);
+	bound_boxf(&face.box, v1);
+	bound_boxf(&face.box, v2);
+	bound_boxf(&face.box, v3);
+	if (v4)
+		bound_boxf(&face.box, v4);
+
+	/* optimize values */
+	bspface_init_strand(&face);
+
+	isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
+
+}
+
+/* callback function for zbuf clip */
+static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr,
+                              const float *v1, const float *v2, const float *v3, const float *v4)
+{
+	BSPFace face;
+
+	face.v1= v1;
+	face.v2= v2;
+	face.v3= v3;
+	face.v4= v4;
+	face.obi= obi;
+	face.facenr= zvlnr & ~RE_QUAD_OFFS;
+	face.type= 0;
+	if (R.osa)
+		face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
+	else
+		face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
+
+	face.is_full= (zspan->shad_alpha==1.0f);
+
+	/* setup boundbox */
+	init_box(&face.box);
+	bound_boxf(&face.box, v1);
+	bound_boxf(&face.box, v2);
+	bound_boxf(&face.box, v3);
+	if (v4)
+		bound_boxf(&face.box, v4);
+
+	isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
+}
+
+static int testclip_minmax(const float ho[4], const float minmax[4])
+{
+	float wco= ho[3];
+	int flag= 0;
+
+	if ( ho[0] > minmax[1]*wco) flag = 1;
+	else if ( ho[0]< minmax[0]*wco) flag = 2;
+
+	if ( ho[1] > minmax[3]*wco) flag |= 4;
+	else if ( ho[1]< minmax[2]*wco) flag |= 8;
+
+	return flag;
+}
+
+/* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
+static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
+{
+	ObjectInstanceRen *obi;
+	ObjectRen *obr;
+	ShadBuf *shb= lar->shb;
+	ZSpan zspan, zspanstrand;
+	VlakRen *vlr= NULL;
+	Material *ma= NULL;
+	float minmaxf[4], winmat[4][4];
+	int size= shb->size;
+	int i, a, ok=1, lay= -1;
+
+	/* further optimize, also sets minz maxz */
+	isb_bsp_recalc_box(root);
+
+	/* extra clipping for minmax */
+	minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
+	minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
+	minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
+	minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
+
+	if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
+
+	/* (ab)use zspan, since we use zbuffer clipping code */
+	zbuf_alloc_span(&zspan, size, size, re->clipcrop);
+
+	zspan.zmulx=  ((float)size)/2.0f;
+	zspan.zmuly=  ((float)size)/2.0f;
+	zspan.zofsx= -0.5f;
+	zspan.zofsy= -0.5f;
+
+	/* pass on bsp root to zspan */
+	zspan.rectz= (int *)root;
+
+	/* filling methods */
+	zspanstrand= zspan;
+	//	zspan.zbuflinefunc= zbufline_onlyZ;
+	zspan.zbuffunc= isb_bsp_test_face;
+	zspanstrand.zbuffunc= isb_bsp_test_strand;
+
+	for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
+		obr= obi->obr;
+
+		if (obi->flag & R_TRANSFORMED)
+			mul_m4_m4m4(winmat, shb->persmat, obi->mat);
+		else
+			copy_m4_m4(winmat, shb->persmat);
+
+		for (a=0; a<obr->totvlak; a++) {
+
+			if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
+			else vlr++;
+
+			/* note, these conditions are copied in shadowbuf_autoclip() */
+			if (vlr->mat!= ma) {
+				ma= vlr->mat;
+				ok= 1;
+				if ((ma->mode2 & MA_CASTSHADOW)==0 || (ma->mode & MA_SHADBUF)==0) ok= 0;
+				if (ma->material_type == MA_TYPE_WIRE) ok= 0;
+				zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
+			}
+
+			if (ok && (obi->lay & lay)) {
+				float hoco[4][4];
+				int c1, c2, c3, c4=0;
+				int d1, d2, d3, d4=0;
+				int partclip;
+
+				/* create hocos per face, it is while render */
+				projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
+				projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
+				projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
+				if (vlr->v4) {
+					projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
+				}
+
+				/* minmax clipping */
+				if (vlr->v4) partclip= d1 & d2 & d3 & d4;
+				else partclip= d1 & d2 & d3;
+
+				if (partclip==0) {
+
+					/* window clipping */
+					c1= testclip(hoco[0]);
+					c2= testclip(hoco[1]);
+					c3= testclip(hoco[2]);
+					if (vlr->v4)
+						c4= testclip(hoco[3]);
+
+					/* ***** NO WIRE YET */
+					if (ma->material_type == MA_TYPE_WIRE) {
+						if (vlr->v4)
+							zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
+						else
+							zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], NULL, c1, c2, c3, 0);
+					}
+					else if (vlr->v4) {
+						if (vlr->flag & R_STRAND)
+							zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
+						else
+							zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
+					}
+					else
+						zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
+
+				}
+			}
+		}
+	}
+
+	zbuf_free_span(&zspan);
+}
+
+/* returns 1 when the viewpixel is visible in lampbuffer */
+static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float co_r[3])
+{
+	float hoco[4], v1[3], nor[3];
+	float dface, fac, siz;
+
+	RE_vlakren_get_normal(&R, obi, vlr, nor);
+	copy_v3_v3(v1, vlr->v1->co);
+	if (obi->flag & R_TRANSFORMED)
+		mul_m4_v3(obi->mat, v1);
+
+	/* from shadepixel() */
+	dface = dot_v3v3(v1, nor);
+	hoco[3]= 1.0f;
+
+	/* ortho viewplane cannot intersect using view vector originating in (0, 0, 0) */
+	if (R.r.mode & R_ORTHO) {
+		/* x and y 3d coordinate can be derived from pixel coord and winmat */
+		float fx= 2.0f/(R.winx*R.winmat[0][0]);
+		float fy= 2.0f/(R.winy*R.winmat[1][1]);
+
+		hoco[0]= (x - 0.5f*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
+		hoco[1]= (y - 0.5f*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
+
+		/* using a*x + b*y + c*z = d equation, (a b c) is normal */
+		if (nor[2]!=0.0f)
+			hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
+		else
+			hoco[2]= 0.0f;
+	}
+	else {
+		float div, view[3];
+
+		calc_view_vector(view, x, y);
+
+		div = dot_v3v3(nor, view);
+		if (div==0.0f)
+			return 0;
+
+		fac= dface/div;
+
+		hoco[0]= fac*view[0];
+		hoco[1]= fac*view[1];
+		hoco[2]= fac*view[2];
+	}
+
+	/* move 3d vector to lampbuf */
+	mul_m4_v4(shb->persmat, hoco);	/* rational hom co */
+
+	/* clip We can test for -1.0/1.0 because of the properties of the
+	 * coordinate transformations. */
+	fac = fabsf(hoco[3]);
+	if (hoco[0]<-fac || hoco[0]>fac)
+		return 0;
+	if (hoco[1]<-fac || hoco[1]>fac)
+		return 0;
+	if (hoco[2]<-fac || hoco[2]>fac)
+		return 0;
+
+	siz= 0.5f*(float)shb->size;
+	co_r[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
+	co_r[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
+	co_r[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
+
+	/* XXXX bias, much less than normal shadbuf, or do we need a constant? */
+	co_r[2] -= 0.05f*shb->bias;
+
+	return 1;
+}
+
+/* storage of shadow results, solid osa and transp case */
+static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
+{
+	ISBShadfacA *new;
+	float shadfacf;
+
+	/* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
+	if (R.osa)
+		shadfacf= ((float)shadfac*R.osa)/(4096.0f*samples);
+	else
+		shadfacf= ((float)shadfac)/(4096.0f);
+
+	new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
+	new->obi= obi;
+	new->facenr= facenr & ~RE_QUAD_OFFS;
+	new->shadfac= shadfacf;
+	if (*isbsapp)
+		new->next= (*isbsapp);
+	else
+		new->next= NULL;
+
+	*isbsapp= new;
+}
+
+/* adding samples, solid case */
+static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
+{
+	int xi, yi, *xcos, *ycos;
+	int sample, bsp_err= 0;
+
+	/* bsp split doesn't like to handle regular sequences */
+	xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos");
+	ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos");
+	for (xi=0; xi<pa->rectx; xi++)
+		xcos[xi]= xi;
+	for (yi=0; yi<pa->recty; yi++)
+		ycos[yi]= yi;
+	BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
+	BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
+
+	for (sample=0; sample<(R.osa?R.osa:1); sample++) {
+		ISBSample *samp= samplebuf[sample], *samp1;
+
+		for (yi=0; yi<pa->recty; yi++) {
+			int y= ycos[yi];
+			for (xi=0; xi<pa->rectx; xi++) {
+				int x= xcos[xi];
+				samp1= samp + y*pa->rectx + x;
+				if (samp1->facenr)
+					bsp_err |= isb_bsp_insert(root, memarena, samp1);
+			}
+			if (bsp_err) break;
+		}
+	}
+
+	MEM_freeN(xcos);
+	MEM_freeN(ycos);
+
+	return bsp_err;
+}
+
+/* solid version */
+/* lar->shb, pa->rectz and pa->rectp should exist */
+static void isb_make_buffer(RenderPart *pa, LampRen *lar)
+{
+	ShadBuf *shb= lar->shb;
+	ISBData *isbdata;
+	ISBSample *samp, *samplebuf[16];	/* should be RE_MAX_OSA */
+	ISBBranch root;
+	MemArena *memarena;
+	intptr_t *rd;
+	int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
+
+	/* storage for shadow, per thread */
+	isbdata= shb->isb_result[pa->thread];
+
+	/* to map the shi->xs and ys coordinate */
+	isbdata->minx= pa->disprect.xmin;
+	isbdata->miny= pa->disprect.ymin;
+	isbdata->rectx= pa->rectx;
+	isbdata->recty= pa->recty;
+
+	/* branches are added using memarena (32k branches) */
+	memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
+	BLI_memarena_use_calloc(memarena);
+
+	/* samplebuf is in camera view space (pixels) */
+	for (sample=0; sample<(R.osa?R.osa:1); sample++)
+		samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
+
+	/* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
+	if (R.osa==0)
+		isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
+
+	/* setup bsp root */
+	memset(&root, 0, sizeof(ISBBranch));
+	root.box.xmin = (float)shb->size;
+	root.box.ymin = (float)shb->size;
+
+	/* create the sample buffers */
+	for (sindex=0, y=0; y<pa->recty; y++) {
+		for (x=0; x<pa->rectx; x++, sindex++) {
+
+			/* this makes it a long function, but splitting it out would mean 10+ arguments */
+			/* first check OSA case */
+			if (R.osa) {
+				rd= pa->rectdaps + sindex;
+				if (*rd) {
+					float xs= (float)(x + pa->disprect.xmin);
+					float ys= (float)(y + pa->disprect.ymin);
+
+					for (sample=0; sample<R.osa; sample++) {
+						PixStr *ps= (PixStr *)(*rd);
+						int mask= (1<<sample);
+
+						while (ps) {
+							if (ps->mask & mask)
+								break;
+							ps= ps->next;
+						}
+						if (ps && ps->facenr>0) {
+							ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
+							ObjectRen *obr= obi->obr;
+							VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
+
+							samp= samplebuf[sample] + sindex;
+							/* convert image plane pixel location to lamp buffer space */
+							if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
+								samp->obi= ps->obi;
+								samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
+								ps->shadfac= 0;
+								samp->shadfac= &ps->shadfac;
+								bound_rectf((rctf *)&root.box, samp->zco);
+							}
+						}
+					}
+				}
+			}
+			else {
+				rectp= pa->rectp + sindex;
+				recto= pa->recto + sindex;
+				if (*rectp>0) {
+					ObjectInstanceRen *obi= &R.objectinstance[*recto];
+					ObjectRen *obr= obi->obr;
+					VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
+					float xs= (float)(x + pa->disprect.xmin);
+					float ys= (float)(y + pa->disprect.ymin);
+
+					samp= samplebuf[0] + sindex;
+					/* convert image plane pixel location to lamp buffer space */
+					if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
+						samp->obi= *recto;
+						samp->facenr= *rectp & ~RE_QUAD_OFFS;
+						samp->shadfac= isbdata->shadfacs + sindex;
+						bound_rectf((rctf *)&root.box, samp->zco);
+					}
+				}
+			}
+		}
+	}
+
+	/* simple method to see if we have samples */
+	if (root.box.xmin != (float)shb->size) {
+		/* now create a regular split, root.box has the initial bounding box of all pixels */
+		/* split bsp 8 levels deep, in regular grid (16 x 16) */
+		isb_bsp_split_init(&root, memarena, 8);
+
+		/* insert all samples in BSP now */
+		bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
+
+		if (bsp_err==0) {
+			/* go over all faces and fill in shadow values */
+
+			isb_bsp_fillfaces(&R, lar, &root);	/* shb->persmat should have been calculated */
+
+			/* copy shadow samples to persistent buffer, reduce memory overhead */
+			if (R.osa) {
+				ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
+
+				isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
+				BLI_memarena_use_calloc(isbdata->memarena);
+
+				for (rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
+
+					if (*rd) {
+						PixStr *ps= (PixStr *)(*rd);
+						while (ps) {
+							if (ps->shadfac)
+								isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
+							ps= ps->next;
+						}
+					}
+				}
+			}
+		}
+	}
+	else {
+		if (isbdata->shadfacs) {
+			MEM_freeN(isbdata->shadfacs);
+			isbdata->shadfacs= NULL;
+		}
+	}
+
+	/* free BSP */
+	BLI_memarena_free(memarena);
+
+	/* free samples */
+	for (x=0; x<(R.osa?R.osa:1); x++)
+		MEM_freeN(samplebuf[x]);
+
+	if (bsp_err) printf("error in filling bsp\n");
+}
+
+/* add sample to buffer, isbsa is the root sample in a buffer */
+static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
+{
+	ISBSampleA *new;
+
+	new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
+	if (*isbsa)
+		new->next= (*isbsa);
+	else
+		new->next= NULL;
+
+	*isbsa= new;
+	return new;
+}
+
+/* adding samples in BSP, transparent case */
+static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
+{
+	int xi, yi, *xcos, *ycos;
+	int sample, bsp_err= 0;
+
+	/* bsp split doesn't like to handle regular sequences */
+	xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos");
+	ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos");
+	for (xi=0; xi<pa->rectx; xi++)
+		xcos[xi]= xi;
+	for (yi=0; yi<pa->recty; yi++)
+		ycos[yi]= yi;
+	BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
+	BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
+
+	for (sample=0; sample<(R.osa?R.osa:1); sample++) {
+		ISBSampleA **samp= samplebuf[sample], *samp1;
+
+		for (yi=0; yi<pa->recty; yi++) {
+			int y= ycos[yi];
+			for (xi=0; xi<pa->rectx; xi++) {
+				int x= xcos[xi];
+
+				samp1= *(samp + y*pa->rectx + x);
+				while (samp1) {
+					bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
+					samp1= samp1->next;
+				}
+			}
+			if (bsp_err) break;
+		}
+	}
+
+	MEM_freeN(xcos);
+	MEM_freeN(ycos);
+
+	return bsp_err;
+}
+
+
+/* Ztransp version */
+/* lar->shb, pa->rectz and pa->rectp should exist */
+static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
+{
+	ShadBuf *shb= lar->shb;
+	ISBData *isbdata;
+	ISBSampleA *samp, **samplebuf[16];	/* MAX_OSA */
+	ISBBranch root;
+	MemArena *memarena;
+	APixstr *ap;
+	int x, y, sindex, sample, bsp_err=0;
+
+	/* storage for shadow, per thread */
+	isbdata= shb->isb_result[pa->thread];
+
+	/* to map the shi->xs and ys coordinate */
+	isbdata->minx= pa->disprect.xmin;
+	isbdata->miny= pa->disprect.ymin;
+	isbdata->rectx= pa->rectx;
+	isbdata->recty= pa->recty;
+
+	/* branches are added using memarena (32k branches) */
+	memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
+	BLI_memarena_use_calloc(memarena);
+
+	/* samplebuf is in camera view space (pixels) */
+	for (sample=0; sample<(R.osa?R.osa:1); sample++)
+		samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
+
+	/* setup bsp root */
+	memset(&root, 0, sizeof(ISBBranch));
+	root.box.xmin = (float)shb->size;
+	root.box.ymin = (float)shb->size;
+
+	/* create the sample buffers */
+	for (ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
+		for (x=0; x<pa->rectx; x++, sindex++, ap++) {
+
+			if (ap->p[0]) {
+				APixstr *apn;
+				float xs= (float)(x + pa->disprect.xmin);
+				float ys= (float)(y + pa->disprect.ymin);
+
+				for (apn=ap; apn; apn= apn->next) {
+					int a;
+					for (a=0; a<4; a++) {
+						if (apn->p[a]) {
+							ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
+							ObjectRen *obr= obi->obr;
+							VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
+							float zco[3];
+
+							/* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
+							apn->shadfac[a]= 0;
+
+							if (R.osa) {
+								for (sample=0; sample<R.osa; sample++) {
+									int mask= (1<<sample);
+
+									if (apn->mask[a] & mask) {
+
+										/* convert image plane pixel location to lamp buffer space */
+										if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
+											samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
+											samp->obi= apn->obi[a];
+											samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
+											samp->shadfac= &apn->shadfac[a];
+
+											copy_v3_v3(samp->zco, zco);
+											bound_rectf((rctf *)&root.box, samp->zco);
+										}
+									}
+								}
+							}
+							else {
+
+								/* convert image plane pixel location to lamp buffer space */
+								if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
+
+									samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
+									samp->obi= apn->obi[a];
+									samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
+									samp->shadfac= &apn->shadfac[a];
+
+									copy_v3_v3(samp->zco, zco);
+									bound_rectf((rctf *)&root.box, samp->zco);
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/* simple method to see if we have samples */
+	if (root.box.xmin != (float)shb->size) {
+		/* now create a regular split, root.box has the initial bounding box of all pixels */
+		/* split bsp 8 levels deep, in regular grid (16 x 16) */
+		isb_bsp_split_init(&root, memarena, 8);
+
+		/* insert all samples in BSP now */
+		bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
+
+		if (bsp_err==0) {
+			ISBShadfacA **isbsa;
+
+			/* go over all faces and fill in shadow values */
+			isb_bsp_fillfaces(&R, lar, &root);	/* shb->persmat should have been calculated */
+
+			/* copy shadow samples to persistent buffer, reduce memory overhead */
+			isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
+
+			isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
+
+			for (ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
+
+				if (ap->p[0]) {
+					APixstr *apn;
+					for (apn=ap; apn; apn= apn->next) {
+						int a;
+						for (a=0; a<4; a++) {
+							if (apn->p[a] && apn->shadfac[a]) {
+								if (R.osa)
+									isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
+								else
+									isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/* free BSP */
+	BLI_memarena_free(memarena);
+
+	/* free samples */
+	for (x=0; x<(R.osa?R.osa:1); x++)
+		MEM_freeN(samplebuf[x]);
+
+	if (bsp_err) printf("error in filling bsp\n");
+}
+
+
+
+/* exported */
+
+/* returns amount of light (1.0 = no shadow) */
+/* note, shadepixel() rounds the coordinate, not the real sample info */
+float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
+{
+	/* if raytracing, we can't accept irregular shadow */
+	if (shi->depth==0) {
+		ISBData *isbdata= shb->isb_result[shi->thread];
+
+		if (isbdata) {
+			if (isbdata->shadfacs || isbdata->shadfaca) {
+				int x= shi->xs - isbdata->minx;
+
+				if (x >= 0 && x < isbdata->rectx) {
+					int y= shi->ys - isbdata->miny;
+
+					if (y >= 0 && y < isbdata->recty) {
+						if (isbdata->shadfacs) {
+							const short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
+							return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
+						}
+						else {
+							int sindex= y*isbdata->rectx + x;
+							int obi= shi->obi - R.objectinstance;
+							ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
+
+							while (isbsa) {
+								if (isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
+									return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
+								isbsa= isbsa->next;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	return 1.0f;
+}
+
+/* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
+void ISB_create(RenderPart *pa, APixstr *apixbuf)
+{
+	GroupObject *go;
+
+	/* go over all lamps, and make the irregular buffers */
+	for (go=R.lights.first; go; go= go->next) {
+		LampRen *lar= go->lampren;
+
+		if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
+
+			/* create storage for shadow, per thread */
+			lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
+
+			if (apixbuf)
+				isb_make_buffer_transp(pa, apixbuf, lar);
+			else
+				isb_make_buffer(pa, lar);
+		}
+	}
+}
+
+
+/* end of part rendering, free stored shadow data for this thread from all lamps */
+void ISB_free(RenderPart *pa)
+{
+	GroupObject *go;
+
+	/* go over all lamps, and free the irregular buffers */
+	for (go=R.lights.first; go; go= go->next) {
+		LampRen *lar= go->lampren;
+
+		if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
+			ISBData *isbdata= lar->shb->isb_result[pa->thread];
+
+			if (isbdata) {
+				if (isbdata->shadfacs)
+					MEM_freeN(isbdata->shadfacs);
+				if (isbdata->shadfaca)
+					MEM_freeN(isbdata->shadfaca);
+
+				if (isbdata->memarena)
+					BLI_memarena_free(isbdata->memarena);
+
+				MEM_freeN(isbdata);
+				lar->shb->isb_result[pa->thread]= NULL;
+			}
+		}
+	}
+}
diff --git a/source/blender/render/intern/source/shadeinput.c b/source/blender/render/intern/source/shadeinput.c
new file mode 100644
index 00000000000..d79749871c3
--- /dev/null
+++ b/source/blender/render/intern/source/shadeinput.c
@@ -0,0 +1,1490 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2006 Blender Foundation
+ * All rights reserved.
+ *
+ * Contributors: Hos, Robert Wenzlaff.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/shadeinput.c
+ *  \ingroup render
+ */
+
+
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_lamp_types.h"
+#include "DNA_meshdata_types.h"
+#include "DNA_material_types.h"
+#include "DNA_particle_types.h"
+
+#include "BKE_scene.h"
+
+#include "BKE_node.h"
+
+/* local include */
+#include "raycounter.h"
+#include "render_types.h"
+#include "renderdatabase.h"
+#include "rendercore.h"
+#include "shading.h"
+#include "strand.h"
+#include "texture.h"
+#include "volumetric.h"
+#include "zbuf.h"
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+/* Shade Sample order:
+ *
+ * - shade_samples_fill_with_ps()
+ *     - for each sample
+ *         - shade_input_set_triangle()  <- if prev sample-face is same, use shade_input_copy_triangle()
+ *         - if vlr
+ *             - shade_input_set_viewco()    <- not for ray or bake
+ *             - shade_input_set_uv()        <- not for ray or bake
+ *             - shade_input_set_normals()
+ * - shade_samples()
+ *     - if AO
+ *         - shade_samples_do_AO()
+ *     - if shading happens
+ *         - for each sample
+ *             - shade_input_set_shade_texco()
+ *             - shade_samples_do_shade()
+ * - OSA: distribute sample result with filter masking
+ *
+ */
+
+/* initialize material variables in shadeinput,
+ * doing inverse gamma correction where applicable */
+void shade_input_init_material(ShadeInput *shi)
+{
+	/* note, keep this synced with render_types.h */
+	memcpy(&shi->r, &shi->mat->r, 23 * sizeof(float));
+	shi->har = shi->mat->har;
+}
+
+/* also used as callback for nodes */
+/* delivers a fully filled in ShadeResult, for all passes */
+void shade_material_loop(ShadeInput *shi, ShadeResult *shr)
+{
+
+	shade_lamp_loop(shi, shr);  /* clears shr */
+
+	if (shi->translucency != 0.0f) {
+		ShadeResult shr_t;
+		float fac = shi->translucency;
+
+		shade_input_init_material(shi);
+		negate_v3_v3(shi->vn, shi->vno);
+		negate_v3(shi->facenor);
+		shi->depth++;   /* hack to get real shadow now */
+		shade_lamp_loop(shi, &shr_t);
+		shi->depth--;
+
+		/* a couple of passes */
+		madd_v3_v3fl(shr->combined, shr_t.combined, fac);
+		if (shi->passflag & SCE_PASS_SPEC)
+			madd_v3_v3fl(shr->spec, shr_t.spec, fac);
+		if (shi->passflag & SCE_PASS_DIFFUSE) {
+			madd_v3_v3fl(shr->diff, shr_t.diff, fac);
+			madd_v3_v3fl(shr->diffshad, shr_t.diffshad, fac);
+		}
+		if (shi->passflag & SCE_PASS_SHADOW)
+			madd_v3_v3fl(shr->shad, shr_t.shad, fac);
+
+		negate_v3(shi->vn);
+		negate_v3(shi->facenor);
+	}
+
+	/* depth >= 1 when ray-shading */
+	if (shi->depth == 0 || shi->volume_depth > 0) {
+		if (R.r.mode & R_RAYTRACE) {
+			if (shi->ray_mirror != 0.0f || ((shi->mode & MA_TRANSP) && (shi->mode & MA_RAYTRANSP) && shr->alpha != 1.0f)) {
+				/* ray trace works on combined, but gives pass info */
+				ray_trace(shi, shr);
+			}
+		}
+		/* disable adding of sky for raytransp */
+		if ((shi->mode & MA_TRANSP) && (shi->mode & MA_RAYTRANSP))
+			if ((shi->layflag & SCE_LAY_SKY) && (R.r.alphamode == R_ADDSKY))
+				shr->alpha = 1.0f;
+	}
+
+	if (R.r.mode & R_RAYTRACE) {
+		if (R.render_volumes_inside.first)
+			shade_volume_inside(shi, shr);
+	}
+}
+
+
+/* do a shade, finish up some passes, apply mist */
+void shade_input_do_shade(ShadeInput *shi, ShadeResult *shr)
+{
+	bool compat = false;
+	float alpha;
+
+	/* ------  main shading loop -------- */
+#ifdef RE_RAYCOUNTER
+	memset(&shi->raycounter, 0, sizeof(shi->raycounter));
+#endif
+
+	if (shi->mat->nodetree && shi->mat->use_nodes) {
+		compat = ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
+	}
+
+	/* also run this when node shaders fail, due to incompatible shader nodes */
+	if (compat == false) {
+		/* copy all relevant material vars, note, keep this synced with render_types.h */
+		shade_input_init_material(shi);
+
+		if (shi->mat->material_type == MA_TYPE_VOLUME) {
+			if (R.r.mode & R_RAYTRACE) {
+				shade_volume_outside(shi, shr);
+			}
+		}
+		else { /* MA_TYPE_SURFACE, MA_TYPE_WIRE */
+			shade_material_loop(shi, shr);
+		}
+	}
+
+	/* copy additional passes */
+	if (shi->passflag & (SCE_PASS_VECTOR | SCE_PASS_NORMAL)) {
+		copy_v4_v4(shr->winspeed, shi->winspeed);
+		copy_v3_v3(shr->nor, shi->vn);
+	}
+
+	/* MIST */
+	if ((shi->passflag & SCE_PASS_MIST) || ((R.wrld.mode & WO_MIST) && (shi->mat->mode & MA_NOMIST) == 0)) {
+		if (R.r.mode & R_ORTHO)
+			shr->mist = mistfactor(-shi->co[2], shi->co);
+		else
+			shr->mist = mistfactor(len_v3(shi->co), shi->co);
+	}
+	else shr->mist = 0.0f;
+
+	if ((R.wrld.mode & WO_MIST) && (shi->mat->mode & MA_NOMIST) == 0) {
+		alpha = shr->mist;
+	}
+	else alpha = 1.0f;
+
+	/* add mist and premul color */
+	if (shr->alpha != 1.0f || alpha != 1.0f) {
+		float fac = alpha * (shr->alpha);
+		shr->combined[3] = fac;
+
+		if (shi->mat->material_type != MA_TYPE_VOLUME)
+			mul_v3_fl(shr->combined, fac);
+	}
+	else
+		shr->combined[3] = 1.0f;
+
+	/* add z */
+	shr->z = -shi->co[2];
+
+	/* RAYHITS */
+#if 0
+	if (1 || shi->passflag & SCE_PASS_RAYHITS) {
+		shr->rayhits[0] = (float)shi->raycounter.faces.test;
+		shr->rayhits[1] = (float)shi->raycounter.bb.hit;
+		shr->rayhits[2] = 0.0;
+		shr->rayhits[3] = 1.0;
+	}
+#endif
+
+	RE_RC_MERGE(&re_rc_counter[shi->thread], &shi->raycounter);
+}
+
+/* **************************************************************************** */
+/*                    ShadeInput                                                */
+/* **************************************************************************** */
+
+
+void vlr_set_uv_indices(VlakRen *vlr, int *i1, int *i2, int *i3)
+{
+	/* to prevent storing new tfaces or vcols, we check a split runtime */
+	/* 		4---3		4---3 */
+	/*		|\ 1|	or  |1 /| */
+	/*		|0\ |		|/ 0| */
+	/*		1---2		1---2 	0 = orig face, 1 = new face */
+
+	/* Update vert nums to point to correct verts of original face */
+	if (vlr->flag & R_DIVIDE_24) {
+		if (vlr->flag & R_FACE_SPLIT) {
+			(*i1)++; (*i2)++; (*i3)++;
+		}
+		else {
+			(*i3)++;
+		}
+	}
+	else if (vlr->flag & R_FACE_SPLIT) {
+		(*i2)++; (*i3)++;
+	}
+}
+
+/* copy data from face to ShadeInput, general case */
+/* indices 0 1 2 3 only */
+void shade_input_set_triangle_i(ShadeInput *shi, ObjectInstanceRen *obi, VlakRen *vlr, short i1, short i2, short i3)
+{
+	VertRen **vpp = &vlr->v1;
+
+	shi->vlr = vlr;
+	shi->obi = obi;
+	shi->obr = obi->obr;
+
+	shi->v1 = vpp[i1];
+	shi->v2 = vpp[i2];
+	shi->v3 = vpp[i3];
+
+	shi->i1 = i1;
+	shi->i2 = i2;
+	shi->i3 = i3;
+
+	/* note, shi->mat is set in node shaders */
+	shi->mat = shi->mat_override ? shi->mat_override : vlr->mat;
+
+	shi->osatex = (shi->mat->texco & TEXCO_OSA);
+	shi->mode = shi->mat->mode_l;        /* or-ed result for all nodes */
+	shi->mode2 = shi->mat->mode2_l;
+
+	/* facenormal copy, can get flipped */
+	shi->flippednor = 0;
+	RE_vlakren_get_normal(&R, obi, vlr, shi->facenor);
+
+	/* calculate vertexnormals */
+	if (vlr->flag & R_SMOOTH) {
+		copy_v3_v3(shi->n1, shi->v1->n);
+		copy_v3_v3(shi->n2, shi->v2->n);
+		copy_v3_v3(shi->n3, shi->v3->n);
+
+		if (obi->flag & R_TRANSFORMED) {
+			mul_m3_v3(obi->nmat, shi->n1); normalize_v3(shi->n1);
+			mul_m3_v3(obi->nmat, shi->n2); normalize_v3(shi->n2);
+			mul_m3_v3(obi->nmat, shi->n3); normalize_v3(shi->n3);
+		}
+	}
+}
+
+/* copy data from face to ShadeInput, scanline case */
+void shade_input_set_triangle(ShadeInput *shi, int obi, int facenr, int UNUSED(normal_flip))
+{
+	if (facenr > 0) {
+		shi->obi = &R.objectinstance[obi];
+		shi->obr = shi->obi->obr;
+		shi->facenr = (facenr - 1) & RE_QUAD_MASK;
+		if (shi->facenr < shi->obr->totvlak) {
+			VlakRen *vlr = RE_findOrAddVlak(shi->obr, shi->facenr);
+
+			if (facenr & RE_QUAD_OFFS)
+				shade_input_set_triangle_i(shi, shi->obi, vlr, 0, 2, 3);
+			else
+				shade_input_set_triangle_i(shi, shi->obi, vlr, 0, 1, 2);
+		}
+		else
+			shi->vlr = NULL;  /* general signal we got sky */
+	}
+	else
+		shi->vlr = NULL;  /* general signal we got sky */
+}
+
+/* full osa case: copy static info */
+void shade_input_copy_triangle(ShadeInput *shi, ShadeInput *from)
+{
+	/* not so nice, but works... warning is in RE_shader_ext.h */
+	memcpy(shi, from, sizeof(struct ShadeInputCopy));
+}
+
+/* copy data from strand to shadeinput */
+void shade_input_set_strand(ShadeInput *shi, StrandRen *strand, StrandPoint *spoint)
+{
+	/* note, shi->mat is set in node shaders */
+	shi->mat = shi->mat_override ? shi->mat_override : strand->buffer->ma;
+
+	shi->osatex = (shi->mat->texco & TEXCO_OSA);
+	shi->mode = shi->mat->mode_l;        /* or-ed result for all nodes */
+
+	/* shade_input_set_viewco equivalent */
+	copy_v3_v3(shi->co, spoint->co);
+	copy_v3_v3(shi->view, shi->co);
+	normalize_v3(shi->view);
+
+	shi->xs = (int)spoint->x;
+	shi->ys = (int)spoint->y;
+
+	if (shi->osatex || (R.r.mode & R_SHADOW)) {
+		copy_v3_v3(shi->dxco, spoint->dtco);
+		copy_v3_v3(shi->dyco, spoint->dsco);
+	}
+
+	/* dxview, dyview, not supported */
+
+	/* facenormal, simply viewco flipped */
+	copy_v3_v3(shi->facenor, spoint->nor);
+
+	/* shade_input_set_normals equivalent */
+	if (shi->mat->mode & MA_TANGENT_STR) {
+		copy_v3_v3(shi->vn, spoint->tan);
+	}
+	else {
+		float cross[3];
+
+		cross_v3_v3v3(cross, spoint->co, spoint->tan);
+		cross_v3_v3v3(shi->vn, cross, spoint->tan);
+		normalize_v3(shi->vn);
+
+		if (dot_v3v3(shi->vn, shi->view) < 0.0f)
+			negate_v3(shi->vn);
+	}
+
+	copy_v3_v3(shi->vno, shi->vn);
+}
+
+void shade_input_set_strand_texco(ShadeInput *shi, StrandRen *strand, StrandVert *svert, StrandPoint *spoint)
+{
+	StrandBuffer *strandbuf = strand->buffer;
+	ObjectRen *obr = strandbuf->obr;
+	StrandVert *sv;
+	int mode = shi->mode;        /* or-ed result for all nodes */
+	short texco = shi->mat->texco;
+
+	if ((shi->mat->texco & TEXCO_REFL)) {
+		/* shi->dxview, shi->dyview, not supported */
+	}
+
+	if (shi->osatex && (texco & (TEXCO_NORM | TEXCO_REFL))) {
+		/* not supported */
+	}
+
+	if (mode & (MA_TANGENT_V | MA_NORMAP_TANG)) {
+		copy_v3_v3(shi->tang, spoint->tan);
+		copy_v3_v3(shi->nmaptang, spoint->tan);
+	}
+
+	if (mode & MA_STR_SURFDIFF) {
+		const float *surfnor = RE_strandren_get_surfnor(obr, strand, 0);
+
+		if (surfnor)
+			copy_v3_v3(shi->surfnor, surfnor);
+		else
+			copy_v3_v3(shi->surfnor, shi->vn);
+
+		if (shi->mat->strand_surfnor > 0.0f) {
+			shi->surfdist = 0.0f;
+			for (sv = strand->vert; sv != svert; sv++)
+				shi->surfdist += len_v3v3(sv->co, (sv + 1)->co);
+			shi->surfdist += spoint->t * len_v3v3(sv->co, (sv + 1)->co);
+		}
+	}
+
+	if (R.r.mode & R_SPEED) {
+		const float *speed;
+
+		speed = RE_strandren_get_winspeed(shi->obi, strand, 0);
+		if (speed)
+			copy_v4_v4(shi->winspeed, speed);
+		else
+			shi->winspeed[0] = shi->winspeed[1] = shi->winspeed[2] = shi->winspeed[3] = 0.0f;
+	}
+
+	/* shade_input_set_shade_texco equivalent */
+	if (texco & NEED_UV) {
+		if (texco & TEXCO_ORCO) {
+			copy_v3_v3(shi->lo, strand->orco);
+			/* no shi->osatex, orco derivatives are zero */
+		}
+
+		if (texco & TEXCO_GLOB) {
+			mul_v3_m4v3(shi->gl, R.viewinv, shi->co);
+
+			if (shi->osatex) {
+				mul_v3_mat3_m4v3(shi->dxgl, R.viewinv, shi->dxco);
+				mul_v3_mat3_m4v3(shi->dygl, R.viewinv, shi->dyco);
+			}
+		}
+
+		if (texco & TEXCO_STRAND) {
+			shi->strandco = spoint->strandco;
+
+			if (shi->osatex) {
+				shi->dxstrand = spoint->dtstrandco;
+				shi->dystrand = 0.0f;
+			}
+		}
+
+		if ((texco & TEXCO_UV) || (mode & (MA_VERTEXCOL | MA_VERTEXCOLP | MA_FACETEXTURE))) {
+			MCol *mcol;
+			const float *uv;
+			char *name;
+			int i;
+
+			shi->totuv = 0;
+			shi->totcol = 0;
+			shi->actuv = obr->actmtface;
+			shi->actcol = obr->actmcol;
+
+			if (mode & (MA_VERTEXCOL | MA_VERTEXCOLP)) {
+				for (i = 0; (mcol = RE_strandren_get_mcol(obr, strand, i, &name, 0)); i++) {
+					ShadeInputCol *scol = &shi->col[i];
+					const char *cp = (char *)mcol;
+
+					shi->totcol++;
+					scol->name = name;
+
+					scol->col[0] = cp[3] / 255.0f;
+					scol->col[1] = cp[2] / 255.0f;
+					scol->col[2] = cp[1] / 255.0f;
+					scol->col[3] = cp[0] / 255.0f;
+				}
+
+				if (shi->totcol) {
+					shi->vcol[0] = shi->col[shi->actcol].col[0];
+					shi->vcol[1] = shi->col[shi->actcol].col[1];
+					shi->vcol[2] = shi->col[shi->actcol].col[2];
+					shi->vcol[3] = shi->col[shi->actcol].col[3];
+				}
+				else {
+					shi->vcol[0] = 0.0f;
+					shi->vcol[1] = 0.0f;
+					shi->vcol[2] = 0.0f;
+					shi->vcol[3] = 0.0f;
+				}
+			}
+
+			for (i = 0; (uv = RE_strandren_get_uv(obr, strand, i, &name, 0)); i++) {
+				ShadeInputUV *suv = &shi->uv[i];
+
+				shi->totuv++;
+				suv->name = name;
+
+				if (strandbuf->overrideuv == i) {
+					suv->uv[0] = -1.0f;
+					suv->uv[1] = spoint->strandco;
+					suv->uv[2] = 0.0f;
+				}
+				else {
+					suv->uv[0] = -1.0f + 2.0f * uv[0];
+					suv->uv[1] = -1.0f + 2.0f * uv[1];
+					suv->uv[2] = 0.0f;   /* texture.c assumes there are 3 coords */
+				}
+
+				if (shi->osatex) {
+					suv->dxuv[0] = 0.0f;
+					suv->dxuv[1] = 0.0f;
+					suv->dyuv[0] = 0.0f;
+					suv->dyuv[1] = 0.0f;
+				}
+
+				if ((mode & MA_FACETEXTURE) && i == obr->actmtface) {
+					if ((mode & (MA_VERTEXCOL | MA_VERTEXCOLP)) == 0) {
+						shi->vcol[0] = 1.0f;
+						shi->vcol[1] = 1.0f;
+						shi->vcol[2] = 1.0f;
+						shi->vcol[3] = 1.0f;
+					}
+				}
+			}
+
+			if (shi->totuv == 0) {
+				ShadeInputUV *suv = &shi->uv[0];
+
+				suv->uv[0] = 0.0f;
+				suv->uv[1] = spoint->strandco;
+				suv->uv[2] = 0.0f;   /* texture.c assumes there are 3 coords */
+
+				if (mode & MA_FACETEXTURE) {
+					/* no tface? set at 1.0f */
+					shi->vcol[0] = 1.0f;
+					shi->vcol[1] = 1.0f;
+					shi->vcol[2] = 1.0f;
+					shi->vcol[3] = 1.0f;
+				}
+			}
+
+		}
+
+		if (texco & TEXCO_NORM) {
+			shi->orn[0] = -shi->vn[0];
+			shi->orn[1] = -shi->vn[1];
+			shi->orn[2] = -shi->vn[2];
+		}
+
+		if (texco & TEXCO_STRESS) {
+			/* not supported */
+		}
+
+		if (texco & TEXCO_TANGENT) {
+			if ((mode & MA_TANGENT_V) == 0) {
+				/* just prevent surprises */
+				shi->tang[0] = shi->tang[1] = shi->tang[2] = 0.0f;
+				shi->nmaptang[0] = shi->nmaptang[1] = shi->nmaptang[2] = 0.0f;
+			}
+		}
+	}
+
+	/* this only avalailable for scanline renders */
+	if (shi->depth == 0) {
+		if (texco & TEXCO_WINDOW) {
+			shi->winco[0] = -1.0f + 2.0f * spoint->x / (float)R.winx;
+			shi->winco[1] = -1.0f + 2.0f * spoint->y / (float)R.winy;
+			shi->winco[2] = 0.0f;
+
+			/* not supported */
+			if (shi->osatex) {
+				shi->dxwin[0] = 0.0f;
+				shi->dywin[1] = 0.0f;
+				shi->dxwin[0] = 0.0f;
+				shi->dywin[1] = 0.0f;
+			}
+		}
+	}
+
+	if (shi->do_manage) {
+		if (mode & (MA_VERTEXCOL | MA_VERTEXCOLP | MA_FACETEXTURE)) {
+			srgb_to_linearrgb_v3_v3(shi->vcol, shi->vcol);
+		}
+	}
+
+}
+
+/* from scanline pixel coordinates to 3d coordinates, requires set_triangle */
+void shade_input_calc_viewco(ShadeInput *shi, float x, float y, float z, float view[3], float dxyview[2], float co[3], float dxco[3], float dyco[3])
+{
+	/* returns not normalized, so is in viewplane coords */
+	calc_view_vector(view, x, y);
+
+	if (shi->mat->material_type == MA_TYPE_WIRE) {
+		/* wire cannot use normal for calculating shi->co, so
+		 * we reconstruct the coordinate less accurate */
+		if (R.r.mode & R_ORTHO)
+			calc_renderco_ortho(co, x, y, z);
+		else
+			calc_renderco_zbuf(co, view, z);
+	}
+	else {
+		/* for non-wire, intersect with the triangle to get the exact coord */
+		float fac, dface, v1[3];
+
+		copy_v3_v3(v1, shi->v1->co);
+		if (shi->obi->flag & R_TRANSFORMED)
+			mul_m4_v3(shi->obi->mat, v1);
+
+		dface = dot_v3v3(v1, shi->facenor);
+
+		/* ortho viewplane cannot intersect using view vector originating in (0,0,0) */
+		if (R.r.mode & R_ORTHO) {
+			/* x and y 3d coordinate can be derived from pixel coord and winmat */
+			float fx = 2.0f / (R.winx * R.winmat[0][0]);
+			float fy = 2.0f / (R.winy * R.winmat[1][1]);
+
+			co[0] = (x - 0.5f * R.winx) * fx - R.winmat[3][0] / R.winmat[0][0];
+			co[1] = (y - 0.5f * R.winy) * fy - R.winmat[3][1] / R.winmat[1][1];
+
+			/* using a*x + b*y + c*z = d equation, (a b c) is normal */
+			if (shi->facenor[2] != 0.0f)
+				co[2] = (dface - shi->facenor[0] * co[0] - shi->facenor[1] * co[1]) / shi->facenor[2];
+			else
+				co[2] = 0.0f;
+
+			if (dxco && dyco) {
+				dxco[0] = fx;
+				dxco[1] = 0.0f;
+				if (shi->facenor[2] != 0.0f)
+					dxco[2] = -(shi->facenor[0] * fx) / shi->facenor[2];
+				else
+					dxco[2] = 0.0f;
+
+				dyco[0] = 0.0f;
+				dyco[1] = fy;
+				if (shi->facenor[2] != 0.0f)
+					dyco[2] = -(shi->facenor[1] * fy) / shi->facenor[2];
+				else
+					dyco[2] = 0.0f;
+
+				if (dxyview) {
+					fac = (co[2] != 0.0f) ? (1.0f / co[2]) : 0.0f;
+					dxyview[0] = -R.viewdx * fac;
+					dxyview[1] = -R.viewdy * fac;
+				}
+			}
+		}
+		else {
+			float div;
+
+			div = dot_v3v3(shi->facenor, view);
+			if (div != 0.0f) fac = dface / div;
+			else fac = 0.0f;
+
+			co[0] = fac * view[0];
+			co[1] = fac * view[1];
+			co[2] = fac * view[2];
+
+			/* pixel dx/dy for render coord */
+			if (dxco && dyco) {
+				float u = dface / (div - R.viewdx * shi->facenor[0]);
+				float v = dface / (div - R.viewdy * shi->facenor[1]);
+
+				dxco[0] = co[0] - (view[0] - R.viewdx) * u;
+				dxco[1] = co[1] - (view[1]) * u;
+				dxco[2] = co[2] - (view[2]) * u;
+
+				dyco[0] = co[0] - (view[0]) * v;
+				dyco[1] = co[1] - (view[1] - R.viewdy) * v;
+				dyco[2] = co[2] - (view[2]) * v;
+
+				if (dxyview) {
+					if (fac != 0.0f) fac = 1.0f / fac;
+					dxyview[0] = -R.viewdx * fac;
+					dxyview[1] = -R.viewdy * fac;
+				}
+			}
+		}
+	}
+
+	/* set camera coords - for scanline, it's always 0.0,0.0,0.0 (render is in camera space)
+	 * however for raytrace it can be different - the position of the last intersection */
+	shi->camera_co[0] = shi->camera_co[1] = shi->camera_co[2] = 0.0f;
+
+	/* cannot normalize earlier, code above needs it at viewplane level */
+	normalize_v3(view);
+}
+
+/* from scanline pixel coordinates to 3d coordinates, requires set_triangle */
+void shade_input_set_viewco(ShadeInput *shi, float x, float y, float xs, float ys, float z)
+{
+	float *dxyview = NULL, *dxco = NULL, *dyco = NULL;
+
+	/* currently in use for dithering (soft shadow), node preview, irregular shad */
+	shi->xs = (int)xs;
+	shi->ys = (int)ys;
+
+	/* original scanline coordinate without jitter */
+	shi->scanco[0] = x;
+	shi->scanco[1] = y;
+	shi->scanco[2] = z;
+
+	/* check if we need derivatives */
+	if (shi->osatex || (R.r.mode & R_SHADOW)) {
+		dxco = shi->dxco;
+		dyco = shi->dyco;
+
+		if ((shi->mat->texco & TEXCO_REFL))
+			dxyview = &shi->dxview;
+	}
+
+	shade_input_calc_viewco(shi, xs, ys, z, shi->view, dxyview, shi->co, dxco, dyco);
+}
+
+void barycentric_differentials_from_position(
+	const float co[3], const float v1[3], const float v2[3], const float v3[3],
+	const float dxco[3], const float dyco[3], const float facenor[3], const bool differentials,
+	float *u, float *v, float *dx_u, float *dx_v, float *dy_u, float *dy_v)
+{
+	/* find most stable axis to project */
+	int axis1, axis2;
+	axis_dominant_v3(&axis1, &axis2, facenor);
+
+	/* compute u,v and derivatives */
+	float t00 = v3[axis1] - v1[axis1];
+	float t01 = v3[axis2] - v1[axis2];
+	float t10 = v3[axis1] - v2[axis1];
+	float t11 = v3[axis2] - v2[axis2];
+
+	float detsh = (t00 * t11 - t10 * t01);
+	detsh = (detsh != 0.0f) ? 1.0f / detsh : 0.0f;
+	t00 *= detsh; t01 *= detsh;
+	t10 *= detsh; t11 *= detsh;
+
+	*u = (v3[axis1] - co[axis1]) * t11 - (v3[axis2] - co[axis2]) * t10;
+	*v = (v3[axis2] - co[axis2]) * t00 - (v3[axis1] - co[axis1]) * t01;
+	if (differentials) {
+		*dx_u =  dxco[axis1] * t11 - dxco[axis2] * t10;
+		*dx_v =  dxco[axis2] * t00 - dxco[axis1] * t01;
+		*dy_u =  dyco[axis1] * t11 - dyco[axis2] * t10;
+		*dy_v =  dyco[axis2] * t00 - dyco[axis1] * t01;
+	}
+}
+/* calculate U and V, for scanline (silly render face u and v are in range -1 to 0) */
+void shade_input_set_uv(ShadeInput *shi)
+{
+	VlakRen *vlr = shi->vlr;
+
+	if ((vlr->flag & R_SMOOTH) || (shi->mat->texco & NEED_UV) || (shi->passflag & SCE_PASS_UV)) {
+		float v1[3], v2[3], v3[3];
+
+		copy_v3_v3(v1, shi->v1->co);
+		copy_v3_v3(v2, shi->v2->co);
+		copy_v3_v3(v3, shi->v3->co);
+
+		if (shi->obi->flag & R_TRANSFORMED) {
+			mul_m4_v3(shi->obi->mat, v1);
+			mul_m4_v3(shi->obi->mat, v2);
+			mul_m4_v3(shi->obi->mat, v3);
+		}
+
+		/* exception case for wire render of edge */
+		if (vlr->v2 == vlr->v3) {
+			float lend, lenc;
+
+			lend = len_v3v3(v2, v1);
+			lenc = len_v3v3(shi->co, v1);
+
+			if (lend == 0.0f) {
+				shi->u = shi->v = 0.0f;
+			}
+			else {
+				shi->u = -(1.0f - lenc / lend);
+				shi->v = 0.0f;
+			}
+
+			if (shi->osatex) {
+				shi->dx_u =  0.0f;
+				shi->dx_v =  0.0f;
+				shi->dy_u =  0.0f;
+				shi->dy_v =  0.0f;
+			}
+		}
+		else {
+			barycentric_differentials_from_position(
+				shi->co, v1, v2, v3, shi->dxco, shi->dyco, shi->facenor, shi->osatex,
+				&shi->u, &shi->v, &shi->dx_u, &shi->dx_v, &shi->dy_u, &shi->dy_v);
+
+			shi->u = -shi->u;
+			shi->v = -shi->v;
+
+			/* u and v are in range -1 to 0, we allow a little bit extra but not too much, screws up speedvectors */
+			CLAMP(shi->u, -2.0f, 1.0f);
+			CLAMP(shi->v, -2.0f, 1.0f);
+		}
+	}
+}
+
+void shade_input_set_normals(ShadeInput *shi)
+{
+	float u = shi->u, v = shi->v;
+	float l = 1.0f + u + v;
+
+	shi->flippednor = 0;
+
+	/* test flip normals to viewing direction */
+	if (!(shi->vlr->flag & R_TANGENT)) {
+		if (dot_v3v3(shi->facenor, shi->view) < 0.0f) {
+			negate_v3(shi->facenor);
+			shi->flippednor = 1;
+		}
+	}
+
+	/* calculate vertexnormals */
+	if (shi->vlr->flag & R_SMOOTH) {
+		float *n1 = shi->n1, *n2 = shi->n2, *n3 = shi->n3;
+
+		if (shi->flippednor) {
+			negate_v3(n1);
+			negate_v3(n2);
+			negate_v3(n3);
+		}
+
+		shi->vn[0] = l * n3[0] - u * n1[0] - v * n2[0];
+		shi->vn[1] = l * n3[1] - u * n1[1] - v * n2[1];
+		shi->vn[2] = l * n3[2] - u * n1[2] - v * n2[2];
+
+		/* use unnormalized normal (closer to games) */
+		copy_v3_v3(shi->nmapnorm, shi->vn);
+
+		normalize_v3(shi->vn);
+	}
+	else {
+		copy_v3_v3(shi->vn, shi->facenor);
+		copy_v3_v3(shi->nmapnorm, shi->vn);
+	}
+
+	/* used in nodes */
+	copy_v3_v3(shi->vno, shi->vn);
+
+	/* flip normals to viewing direction */
+	if (!(shi->vlr->flag & R_TANGENT))
+		if (dot_v3v3(shi->facenor, shi->view) < 0.0f)
+			shade_input_flip_normals(shi);
+}
+
+/* XXX shi->flippednor messes up otherwise */
+void shade_input_set_vertex_normals(ShadeInput *shi)
+{
+	float u = shi->u, v = shi->v;
+	float l = 1.0f + u + v;
+
+	/* calculate vertexnormals */
+	if (shi->vlr->flag & R_SMOOTH) {
+		const float *n1 = shi->n1, *n2 = shi->n2, *n3 = shi->n3;
+
+		shi->vn[0] = l * n3[0] - u * n1[0] - v * n2[0];
+		shi->vn[1] = l * n3[1] - u * n1[1] - v * n2[1];
+		shi->vn[2] = l * n3[2] - u * n1[2] - v * n2[2];
+
+		/* use unnormalized normal (closer to games) */
+		copy_v3_v3(shi->nmapnorm, shi->vn);
+
+		normalize_v3(shi->vn);
+	}
+	else {
+		copy_v3_v3(shi->vn, shi->facenor);
+		copy_v3_v3(shi->nmapnorm, shi->vn);
+	}
+
+	/* used in nodes */
+	copy_v3_v3(shi->vno, shi->vn);
+}
+
+
+/* use by raytrace, sss, bake to flip into the right direction */
+void shade_input_flip_normals(ShadeInput *shi)
+{
+	negate_v3(shi->facenor);
+	negate_v3(shi->vn);
+	negate_v3(shi->vno);
+	negate_v3(shi->nmapnorm);
+	shi->flippednor = !shi->flippednor;
+}
+
+void shade_input_set_shade_texco(ShadeInput *shi)
+{
+	ObjectInstanceRen *obi = shi->obi;
+	ObjectRen *obr = shi->obr;
+	VertRen *v1 = shi->v1, *v2 = shi->v2, *v3 = shi->v3;
+	float u = shi->u, v = shi->v;
+	float l = 1.0f + u + v, dl;
+	int mode = shi->mode;        /* or-ed result for all nodes */
+	int mode2 = shi->mode2;
+	short texco = shi->mat->texco;
+	const bool need_mikk_tangent = (mode & MA_NORMAP_TANG || R.flag & R_NEED_TANGENT);
+	const bool need_mikk_tangent_concrete = (mode2 & MA_TANGENT_CONCRETE) != 0;
+
+	/* calculate dxno */
+	if (shi->vlr->flag & R_SMOOTH) {
+
+		if (shi->osatex && (texco & (TEXCO_NORM | TEXCO_REFL)) ) {
+			const float *n1 = shi->n1, *n2 = shi->n2, *n3 = shi->n3;
+
+			dl = shi->dx_u + shi->dx_v;
+			shi->dxno[0] = dl * n3[0] - shi->dx_u * n1[0] - shi->dx_v * n2[0];
+			shi->dxno[1] = dl * n3[1] - shi->dx_u * n1[1] - shi->dx_v * n2[1];
+			shi->dxno[2] = dl * n3[2] - shi->dx_u * n1[2] - shi->dx_v * n2[2];
+			dl = shi->dy_u + shi->dy_v;
+			shi->dyno[0] = dl * n3[0] - shi->dy_u * n1[0] - shi->dy_v * n2[0];
+			shi->dyno[1] = dl * n3[1] - shi->dy_u * n1[1] - shi->dy_v * n2[1];
+			shi->dyno[2] = dl * n3[2] - shi->dy_u * n1[2] - shi->dy_v * n2[2];
+
+		}
+	}
+
+	/* calc tangents */
+	if (mode & (MA_TANGENT_V | MA_NORMAP_TANG) || mode2 & MA_TANGENT_CONCRETE || R.flag & R_NEED_TANGENT) {
+		const float *s1, *s2, *s3;
+		float tl, tu, tv;
+
+		if (shi->vlr->flag & R_SMOOTH) {
+			tl = l;
+			tu = u;
+			tv = v;
+		}
+		else {
+			/* qdn: flat faces have tangents too,
+			 * could pick either one, using average here */
+			tl = 1.0f / 3.0f;
+			tu = -1.0f / 3.0f;
+			tv = -1.0f / 3.0f;
+		}
+
+		shi->tang[0] = shi->tang[1] = shi->tang[2] = 0.0f;
+		shi->nmaptang[0] = shi->nmaptang[1] = shi->nmaptang[2] = 0.0f;
+
+		if (mode & MA_TANGENT_V) {
+			s1 = RE_vertren_get_tangent(obr, v1, 0);
+			s2 = RE_vertren_get_tangent(obr, v2, 0);
+			s3 = RE_vertren_get_tangent(obr, v3, 0);
+
+			if (s1 && s2 && s3) {
+				shi->tang[0] = (tl * s3[0] - tu * s1[0] - tv * s2[0]);
+				shi->tang[1] = (tl * s3[1] - tu * s1[1] - tv * s2[1]);
+				shi->tang[2] = (tl * s3[2] - tu * s1[2] - tv * s2[2]);
+
+				if (obi->flag & R_TRANSFORMED)
+					mul_m3_v3(obi->nmat, shi->tang);
+
+				normalize_v3(shi->tang);
+				copy_v3_v3(shi->nmaptang, shi->tang);
+			}
+		}
+
+		if (need_mikk_tangent || need_mikk_tangent_concrete) {
+			int j1 = shi->i1, j2 = shi->i2, j3 = shi->i3;
+			float c0[3], c1[3], c2[3];
+			int acttang = obr->actmtface;
+
+			vlr_set_uv_indices(shi->vlr, &j1, &j2, &j3);
+
+			/* cycle through all tangent in vlakren */
+			for (int i = 0; i < MAX_MTFACE; i++) {
+				const float *tangent = RE_vlakren_get_nmap_tangent(obr, shi->vlr, i, false);
+				if (!tangent)
+					continue;
+
+				copy_v3_v3(c0, &tangent[j1 * 4]);
+				copy_v3_v3(c1, &tangent[j2 * 4]);
+				copy_v3_v3(c2, &tangent[j3 * 4]);
+
+				/* keeping tangents normalized at vertex level
+				 * corresponds better to how it's done in game engines */
+				if (obi->flag & R_TRANSFORMED) {
+					mul_mat3_m4_v3(obi->mat, c0); normalize_v3(c0);
+					mul_mat3_m4_v3(obi->mat, c1); normalize_v3(c1);
+					mul_mat3_m4_v3(obi->mat, c2); normalize_v3(c2);
+				}
+
+				/* we don't normalize the interpolated TBN tangent
+				 * corresponds better to how it's done in game engines */
+				shi->tangents[i][0] = (tl * c2[0] - tu * c0[0] - tv * c1[0]);
+				shi->tangents[i][1] = (tl * c2[1] - tu * c0[1] - tv * c1[1]);
+				shi->tangents[i][2] = (tl * c2[2] - tu * c0[2] - tv * c1[2]);
+
+				/* the sign is the same for all 3 vertices of any
+				 * non degenerate triangle. */
+				shi->tangents[i][3] = tangent[j1 * 4 + 3];
+
+				if (acttang == i && need_mikk_tangent) {
+					for (int m = 0; m < 4; m++) {
+						shi->nmaptang[m] = shi->tangents[i][m];
+					}
+				}
+			}
+		}
+	}
+
+	if (mode & MA_STR_SURFDIFF) {
+		const float *surfnor = RE_vlakren_get_surfnor(obr, shi->vlr, 0);
+
+		if (surfnor) {
+			copy_v3_v3(shi->surfnor, surfnor);
+			if (obi->flag & R_TRANSFORMED)
+				mul_m3_v3(obi->nmat, shi->surfnor);
+		}
+		else
+			copy_v3_v3(shi->surfnor, shi->vn);
+
+		shi->surfdist = 0.0f;
+	}
+
+	if (R.r.mode & R_SPEED) {
+		const float *s1, *s2, *s3;
+
+		s1 = RE_vertren_get_winspeed(obi, v1, 0);
+		s2 = RE_vertren_get_winspeed(obi, v2, 0);
+		s3 = RE_vertren_get_winspeed(obi, v3, 0);
+		if (s1 && s2 && s3) {
+			shi->winspeed[0] = (l * s3[0] - u * s1[0] - v * s2[0]);
+			shi->winspeed[1] = (l * s3[1] - u * s1[1] - v * s2[1]);
+			shi->winspeed[2] = (l * s3[2] - u * s1[2] - v * s2[2]);
+			shi->winspeed[3] = (l * s3[3] - u * s1[3] - v * s2[3]);
+		}
+		else {
+			shi->winspeed[0] = shi->winspeed[1] = shi->winspeed[2] = shi->winspeed[3] = 0.0f;
+		}
+	}
+
+	/* pass option forces UV calc */
+	if ((shi->passflag & SCE_PASS_UV) || (R.flag & R_NEED_VCOL))
+		texco |= (NEED_UV | TEXCO_UV);
+
+	/* texture coordinates. shi->dxuv shi->dyuv have been set */
+	if (texco & NEED_UV) {
+
+		if (texco & TEXCO_ORCO) {
+			if (v1->orco) {
+				const float *o1, *o2, *o3;
+
+				o1 = v1->orco;
+				o2 = v2->orco;
+				o3 = v3->orco;
+
+				shi->lo[0] = l * o3[0] - u * o1[0] - v * o2[0];
+				shi->lo[1] = l * o3[1] - u * o1[1] - v * o2[1];
+				shi->lo[2] = l * o3[2] - u * o1[2] - v * o2[2];
+
+				if (shi->osatex) {
+					dl = shi->dx_u + shi->dx_v;
+					shi->dxlo[0] = dl * o3[0] - shi->dx_u * o1[0] - shi->dx_v * o2[0];
+					shi->dxlo[1] = dl * o3[1] - shi->dx_u * o1[1] - shi->dx_v * o2[1];
+					shi->dxlo[2] = dl * o3[2] - shi->dx_u * o1[2] - shi->dx_v * o2[2];
+					dl = shi->dy_u + shi->dy_v;
+					shi->dylo[0] = dl * o3[0] - shi->dy_u * o1[0] - shi->dy_v * o2[0];
+					shi->dylo[1] = dl * o3[1] - shi->dy_u * o1[1] - shi->dy_v * o2[1];
+					shi->dylo[2] = dl * o3[2] - shi->dy_u * o1[2] - shi->dy_v * o2[2];
+				}
+			}
+
+			copy_v3_v3(shi->duplilo, obi->dupliorco);
+		}
+
+		if (texco & TEXCO_GLOB) {
+			copy_v3_v3(shi->gl, shi->co);
+			mul_m4_v3(R.viewinv, shi->gl);
+			if (shi->osatex) {
+				copy_v3_v3(shi->dxgl, shi->dxco);
+				mul_mat3_m4_v3(R.viewinv, shi->dxgl);
+				copy_v3_v3(shi->dygl, shi->dyco);
+				mul_mat3_m4_v3(R.viewinv, shi->dygl);
+			}
+		}
+
+		if (texco & TEXCO_STRAND) {
+			shi->strandco = (l * v3->accum - u * v1->accum - v * v2->accum);
+			if (shi->osatex) {
+				dl = shi->dx_u + shi->dx_v;
+				shi->dxstrand = dl * v3->accum - shi->dx_u * v1->accum - shi->dx_v * v2->accum;
+				dl = shi->dy_u + shi->dy_v;
+				shi->dystrand = dl * v3->accum - shi->dy_u * v1->accum - shi->dy_v * v2->accum;
+			}
+		}
+
+		if ((texco & TEXCO_UV) || (mode & (MA_VERTEXCOL | MA_VERTEXCOLP | MA_FACETEXTURE)) || (R.flag & R_NEED_VCOL)) {
+			VlakRen *vlr = shi->vlr;
+			MTFace *tface;
+			MCol *mcol;
+			char *name;
+			int i, j1 = shi->i1, j2 = shi->i2, j3 = shi->i3;
+
+			/* uv and vcols are not copied on split, so set them according vlr divide flag */
+			vlr_set_uv_indices(vlr, &j1, &j2, &j3);
+
+			shi->totuv = 0;
+			shi->totcol = 0;
+			shi->actuv = obr->actmtface;
+			shi->actcol = obr->actmcol;
+
+			if ((mode & (MA_VERTEXCOL | MA_VERTEXCOLP)) || (R.flag & R_NEED_VCOL)) {
+				for (i = 0; (mcol = RE_vlakren_get_mcol(obr, vlr, i, &name, 0)); i++) {
+					ShadeInputCol *scol = &shi->col[i];
+					const char *cp1, *cp2, *cp3;
+					float a[3];
+
+					shi->totcol++;
+					scol->name = name;
+
+					cp1 = (char *)(mcol + j1);
+					cp2 = (char *)(mcol + j2);
+					cp3 = (char *)(mcol + j3);
+
+					/* alpha values */
+					a[0] = ((float)cp1[0]) / 255.f;
+					a[1] = ((float)cp2[0]) / 255.f;
+					a[2] = ((float)cp3[0]) / 255.f;
+					scol->col[3] = l * a[2] - u * a[0] - v * a[1];
+
+					/* sample premultiplied color value */
+					scol->col[0] = (l * ((float)cp3[3]) * a[2] - u * ((float)cp1[3]) * a[0] - v * ((float)cp2[3]) * a[1]) / 255.f;
+					scol->col[1] = (l * ((float)cp3[2]) * a[2] - u * ((float)cp1[2]) * a[0] - v * ((float)cp2[2]) * a[1]) / 255.f;
+					scol->col[2] = (l * ((float)cp3[1]) * a[2] - u * ((float)cp1[1]) * a[0] - v * ((float)cp2[1]) * a[1]) / 255.f;
+
+					/* if not zero alpha, restore non-multiplied color */
+					if (scol->col[3]) {
+						mul_v3_fl(scol->col, 1.0f / scol->col[3]);
+					}
+				}
+
+				if (shi->totcol) {
+					shi->vcol[0] = shi->col[shi->actcol].col[0];
+					shi->vcol[1] = shi->col[shi->actcol].col[1];
+					shi->vcol[2] = shi->col[shi->actcol].col[2];
+					shi->vcol[3] = shi->col[shi->actcol].col[3];
+				}
+				else {
+					shi->vcol[0] = 0.0f;
+					shi->vcol[1] = 0.0f;
+					shi->vcol[2] = 0.0f;
+					shi->vcol[3] = 1.0f;
+				}
+			}
+
+			for (i = 0; (tface = RE_vlakren_get_tface(obr, vlr, i, &name, 0)); i++) {
+				ShadeInputUV *suv = &shi->uv[i];
+				const float *uv1 = tface->uv[j1];
+				const float *uv2 = tface->uv[j2];
+				const float *uv3 = tface->uv[j3];
+
+				shi->totuv++;
+				suv->name = name;
+
+				if ((shi->mat->mapflag & MA_MAPFLAG_UVPROJECT) && (shi->depth == 0)) {
+					float x = shi->xs;
+					float y = shi->ys;
+
+					float s1[2] = {-1.0f + 2.0f * uv1[0], -1.0f + 2.0f * uv1[1]};
+					float s2[2] = {-1.0f + 2.0f * uv2[0], -1.0f + 2.0f * uv2[1]};
+					float s3[2] = {-1.0f + 2.0f * uv3[0], -1.0f + 2.0f * uv3[1]};
+
+
+					float obwinmat[4][4], winmat[4][4], ho1[4], ho2[4], ho3[4];
+					float Zmulx, Zmuly;
+					float hox, hoy, l_proj, dl_proj, u_proj, v_proj;
+					float s00, s01, s10, s11, detsh;
+
+					/* old globals, localized now */
+					Zmulx =  ((float)R.winx) / 2.0f;
+					Zmuly =  ((float)R.winy) / 2.0f;
+
+					zbuf_make_winmat(&R, winmat);
+					if (shi->obi->flag & R_TRANSFORMED)
+						mul_m4_m4m4(obwinmat, winmat, obi->mat);
+					else
+						copy_m4_m4(obwinmat, winmat);
+
+					zbuf_render_project(obwinmat, v1->co, ho1);
+					zbuf_render_project(obwinmat, v2->co, ho2);
+					zbuf_render_project(obwinmat, v3->co, ho3);
+
+					s00 = ho3[0] / ho3[3] - ho1[0] / ho1[3];
+					s01 = ho3[1] / ho3[3] - ho1[1] / ho1[3];
+					s10 = ho3[0] / ho3[3] - ho2[0] / ho2[3];
+					s11 = ho3[1] / ho3[3] - ho2[1] / ho2[3];
+
+					detsh = s00 * s11 - s10 * s01;
+					detsh = (detsh != 0.0f) ? 1.0f / detsh : 0.0f;
+					s00 *= detsh; s01 *= detsh;
+					s10 *= detsh; s11 *= detsh;
+
+					/* recalc u and v again */
+					hox = x / Zmulx - 1.0f;
+					hoy = y / Zmuly - 1.0f;
+					u_proj = (hox - ho3[0] / ho3[3]) * s11 - (hoy - ho3[1] / ho3[3]) * s10;
+					v_proj = (hoy - ho3[1] / ho3[3]) * s00 - (hox - ho3[0] / ho3[3]) * s01;
+					l_proj = 1.0f + u_proj + v_proj;
+
+					suv->uv[0] = l_proj * s3[0] - u_proj * s1[0] - v_proj * s2[0];
+					suv->uv[1] = l_proj * s3[1] - u_proj * s1[1] - v_proj * s2[1];
+					suv->uv[2] = 0.0f;
+
+					if (shi->osatex) {
+						float dxuv[2], dyuv[2];
+						dxuv[0] =  s11 / Zmulx;
+						dxuv[1] =  -s01 / Zmulx;
+						dyuv[0] =  -s10 / Zmuly;
+						dyuv[1] =  s00 / Zmuly;
+
+						dl_proj = dxuv[0] + dxuv[1];
+						suv->dxuv[0] = dl_proj * s3[0] - dxuv[0] * s1[0] - dxuv[1] * s2[0];
+						suv->dxuv[1] = dl_proj * s3[1] - dxuv[0] * s1[1] - dxuv[1] * s2[1];
+						dl_proj = dyuv[0] + dyuv[1];
+						suv->dyuv[0] = dl_proj * s3[0] - dyuv[0] * s1[0] - dyuv[1] * s2[0];
+						suv->dyuv[1] = dl_proj * s3[1] - dyuv[0] * s1[1] - dyuv[1] * s2[1];
+					}
+				}
+				else {
+
+					suv->uv[0] = -1.0f + 2.0f * (l * uv3[0] - u * uv1[0] - v * uv2[0]);
+					suv->uv[1] = -1.0f + 2.0f * (l * uv3[1] - u * uv1[1] - v * uv2[1]);
+					suv->uv[2] = 0.0f;   /* texture.c assumes there are 3 coords */
+
+					if (shi->osatex) {
+						float duv[2];
+
+						dl = shi->dx_u + shi->dx_v;
+						duv[0] = shi->dx_u;
+						duv[1] = shi->dx_v;
+
+						suv->dxuv[0] = 2.0f * (dl * uv3[0] - duv[0] * uv1[0] - duv[1] * uv2[0]);
+						suv->dxuv[1] = 2.0f * (dl * uv3[1] - duv[0] * uv1[1] - duv[1] * uv2[1]);
+
+						dl = shi->dy_u + shi->dy_v;
+						duv[0] = shi->dy_u;
+						duv[1] = shi->dy_v;
+
+						suv->dyuv[0] = 2.0f * (dl * uv3[0] - duv[0] * uv1[0] - duv[1] * uv2[0]);
+						suv->dyuv[1] = 2.0f * (dl * uv3[1] - duv[0] * uv1[1] - duv[1] * uv2[1]);
+					}
+
+					if ((mode & MA_FACETEXTURE) && i == obr->actmtface) {
+						if (((mode & (MA_VERTEXCOL | MA_VERTEXCOLP)) == 0) && ((R.flag & R_NEED_VCOL) == 0)) {
+							shi->vcol[0] = 1.0f;
+							shi->vcol[1] = 1.0f;
+							shi->vcol[2] = 1.0f;
+							shi->vcol[3] = 1.0f;
+						}
+						if (tface->tpage) {
+							render_realtime_texture(shi, tface->tpage);
+						}
+					}
+				}
+			}
+
+			shi->dupliuv[0] = -1.0f + 2.0f * obi->dupliuv[0];
+			shi->dupliuv[1] = -1.0f + 2.0f * obi->dupliuv[1];
+			shi->dupliuv[2] = 0.0f;
+
+			if (shi->totuv == 0) {
+				ShadeInputUV *suv = &shi->uv[0];
+
+				suv->uv[0] = 2.0f * (u + .5f);
+				suv->uv[1] = 2.0f * (v + .5f);
+				suv->uv[2] = 0.0f;   /* texture.c assumes there are 3 coords */
+
+				if (mode & MA_FACETEXTURE) {
+					/* no tface? set at 1.0f */
+					shi->vcol[0] = 1.0f;
+					shi->vcol[1] = 1.0f;
+					shi->vcol[2] = 1.0f;
+					shi->vcol[3] = 1.0f;
+				}
+			}
+		}
+
+		if (texco & TEXCO_NORM) {
+			shi->orn[0] = -shi->vn[0];
+			shi->orn[1] = -shi->vn[1];
+			shi->orn[2] = -shi->vn[2];
+		}
+
+		if (texco & TEXCO_STRESS) {
+			const float *s1, *s2, *s3;
+
+			s1 = RE_vertren_get_stress(obr, v1, 0);
+			s2 = RE_vertren_get_stress(obr, v2, 0);
+			s3 = RE_vertren_get_stress(obr, v3, 0);
+			if (s1 && s2 && s3) {
+				shi->stress = l * s3[0] - u * s1[0] - v * s2[0];
+				if (shi->stress < 1.0f) shi->stress -= 1.0f;
+				else shi->stress = (shi->stress - 1.0f) / shi->stress;
+			}
+			else shi->stress = 0.0f;
+		}
+
+		if (texco & TEXCO_TANGENT) {
+			if ((mode & MA_TANGENT_V) == 0) {
+				/* just prevent surprises */
+				shi->tang[0] = shi->tang[1] = shi->tang[2] = 0.0f;
+				shi->nmaptang[0] = shi->nmaptang[1] = shi->nmaptang[2] = 0.0f;
+			}
+		}
+	}
+
+	/* this only avalailable for scanline renders */
+	if (shi->depth == 0) {
+		float x = shi->xs;
+		float y = shi->ys;
+
+		if (texco & TEXCO_WINDOW) {
+			shi->winco[0] = -1.0f + 2.0f * x / (float)R.winx;
+			shi->winco[1] = -1.0f + 2.0f * y / (float)R.winy;
+			shi->winco[2] = 0.0f;
+			if (shi->osatex) {
+				shi->dxwin[0] = 2.0f / (float)R.winx;
+				shi->dywin[1] = 2.0f / (float)R.winy;
+				shi->dxwin[1] = shi->dxwin[2] = 0.0f;
+				shi->dywin[0] = shi->dywin[2] = 0.0f;
+			}
+		}
+	}
+	/* else {
+	 * Note! For raytracing winco is not set,
+	 * important because thus means all shader input's need to have their variables set to zero
+	 * else un-initialized values are used
+	 */
+	if (shi->do_manage) {
+		if ((mode & (MA_VERTEXCOL | MA_VERTEXCOLP | MA_FACETEXTURE)) || (R.flag & R_NEED_VCOL)) {
+			srgb_to_linearrgb_v3_v3(shi->vcol, shi->vcol);
+		}
+	}
+
+}
+
+/* ****************** ShadeSample ************************************** */
+
+/* initialize per part, not per pixel! */
+void shade_input_initialize(ShadeInput *shi, RenderPart *pa, RenderLayer *rl, int sample)
+{
+
+	memset(shi, 0, sizeof(ShadeInput));
+
+	shi->sample = sample;
+	shi->thread = pa->thread;
+	shi->do_preview = (R.r.scemode & R_MATNODE_PREVIEW) != 0;
+
+	shi->do_manage = BKE_scene_check_color_management_enabled(R.scene);
+	shi->use_world_space_shading = BKE_scene_use_world_space_shading(R.scene);
+
+	shi->lay = rl->lay;
+	shi->layflag = rl->layflag;
+	shi->passflag = rl->passflag;
+	shi->combinedflag = ~rl->pass_xor;
+	shi->mat_override = rl->mat_override;
+	shi->light_override = rl->light_override;
+//	shi->rl= rl;
+	/* note shi.depth==0  means first hit, not raytracing */
+
+}
+
+/* initialize per part, not per pixel! */
+void shade_sample_initialize(ShadeSample *ssamp, RenderPart *pa, RenderLayer *rl)
+{
+	int a, tot;
+
+	tot = R.osa == 0 ? 1 : R.osa;
+
+	for (a = 0; a < tot; a++) {
+		shade_input_initialize(&ssamp->shi[a], pa, rl, a);
+		memset(&ssamp->shr[a], 0, sizeof(ShadeResult));
+	}
+
+	get_sample_layers(pa, rl, ssamp->rlpp);
+}
+
+/* Do AO or (future) GI */
+void shade_samples_do_AO(ShadeSample *ssamp)
+{
+	if (!(R.r.mode & R_SHADOW))
+		return;
+	if (!(R.r.mode & R_RAYTRACE) && !(R.wrld.ao_gather_method == WO_AOGATHER_APPROX))
+		return;
+
+	if (R.wrld.mode & (WO_AMB_OCC | WO_ENV_LIGHT | WO_INDIRECT_LIGHT)) {
+		ShadeInput *shi = &ssamp->shi[0];
+		int sample;
+
+		if (((shi->passflag & SCE_PASS_COMBINED) && (shi->combinedflag & (SCE_PASS_AO | SCE_PASS_ENVIRONMENT | SCE_PASS_INDIRECT))) ||
+		    (shi->passflag & (SCE_PASS_AO | SCE_PASS_ENVIRONMENT | SCE_PASS_INDIRECT)))
+		{
+			for (sample = 0; sample < ssamp->tot; shi++, sample++)
+				if (!(shi->mode & MA_SHLESS))
+					ambient_occlusion(shi);     /* stores in shi->ao[] */
+		}
+	}
+}
+
+
+void shade_samples_fill_with_ps(ShadeSample *ssamp, PixStr *ps, int x, int y)
+{
+	ShadeInput *shi;
+	float xs, ys;
+
+	ssamp->tot = 0;
+
+	for (shi = ssamp->shi; ps; ps = ps->next) {
+		shade_input_set_triangle(shi, ps->obi, ps->facenr, 1);
+
+		if (shi->vlr) { /* NULL happens for env material or for 'all z' */
+			unsigned short curmask = ps->mask;
+
+			/* full osa is only set for OSA renders */
+			if (shi->vlr->flag & R_FULL_OSA) {
+				short shi_cp = 0, samp;
+
+				for (samp = 0; samp < R.osa; samp++) {
+					if (curmask & (1 << samp)) {
+						/* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */
+						xs = (float)x + R.jit[samp][0] + 0.5f;
+						ys = (float)y + R.jit[samp][1] + 0.5f;
+
+						if (shi_cp)
+							shade_input_copy_triangle(shi, shi - 1);
+
+						shi->mask = (1 << samp);
+//						shi->rl= ssamp->rlpp[samp];
+						shi->samplenr = R.shadowsamplenr[shi->thread]++; /* this counter is not being reset per pixel */
+						shade_input_set_viewco(shi, x, y, xs, ys, (float)ps->z);
+						shade_input_set_uv(shi);
+						if (shi_cp == 0)
+							shade_input_set_normals(shi);
+						else  /* XXX shi->flippednor messes up otherwise */
+							shade_input_set_vertex_normals(shi);
+
+						shi_cp = 1;
+						shi++;
+					}
+				}
+			}
+			else {
+				if (R.osa) {
+					short b = R.samples->centmask[curmask];
+					xs = (float)x + R.samples->centLut[b & 15] + 0.5f;
+					ys = (float)y + R.samples->centLut[b >> 4] + 0.5f;
+				}
+				else if (R.i.curblur) {
+					xs= (float)x + R.mblur_jit[R.i.curblur-1][0] + 0.5f;
+					ys= (float)y + R.mblur_jit[R.i.curblur-1][1] + 0.5f;
+				}
+				else {
+					xs = (float)x + 0.5f;
+					ys = (float)y + 0.5f;
+				}
+
+				shi->mask = curmask;
+				shi->samplenr = R.shadowsamplenr[shi->thread]++;
+				shade_input_set_viewco(shi, x, y, xs, ys, (float)ps->z);
+				shade_input_set_uv(shi);
+				shade_input_set_normals(shi);
+				shi++;
+			}
+
+			/* total sample amount, shi->sample is static set in initialize */
+			if (shi != ssamp->shi)
+				ssamp->tot = (shi - 1)->sample + 1;
+		}
+	}
+}
+
+/* shades samples, returns true if anything happened */
+int shade_samples(ShadeSample *ssamp, PixStr *ps, int x, int y)
+{
+	shade_samples_fill_with_ps(ssamp, ps, x, y);
+
+	if (ssamp->tot) {
+		ShadeInput *shi = ssamp->shi;
+		ShadeResult *shr = ssamp->shr;
+		int samp;
+
+		/* if shadow or AO? */
+		shade_samples_do_AO(ssamp);
+
+		/* if shade (all shadepinputs have same passflag) */
+		if (ssamp->shi[0].passflag & ~(SCE_PASS_Z | SCE_PASS_INDEXOB | SCE_PASS_INDEXMA)) {
+
+			for (samp = 0; samp < ssamp->tot; samp++, shi++, shr++) {
+				shade_input_set_shade_texco(shi);
+				shade_input_do_shade(shi, shr);
+			}
+		}
+		else if (shi->passflag & SCE_PASS_Z) {
+			for (samp = 0; samp < ssamp->tot; samp++, shi++, shr++)
+				shr->z = -shi->co[2];
+		}
+
+		return 1;
+	}
+	return 0;
+}
+
diff --git a/source/blender/render/intern/source/shadeoutput.c b/source/blender/render/intern/source/shadeoutput.c
new file mode 100644
index 00000000000..090c249defb
--- /dev/null
+++ b/source/blender/render/intern/source/shadeoutput.c
@@ -0,0 +1,2182 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2006 Blender Foundation
+ * All rights reserved.
+ *
+ * Contributors: Hos, Robert Wenzlaff.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/shadeoutput.c
+ *  \ingroup render
+ */
+
+#include <stdio.h>
+#include <float.h>
+#include <math.h>
+#include <string.h>
+
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+#include "BKE_colorband.h"
+#include "BKE_colortools.h"
+#include "BKE_material.h"
+
+#include "DNA_group_types.h"
+#include "DNA_lamp_types.h"
+#include "DNA_material_types.h"
+
+/* local include */
+#include "occlusion.h"
+#include "render_types.h"
+#include "rendercore.h"
+#include "shadbuf.h"
+#include "sss.h"
+#include "texture.h"
+
+#include "shading.h" /* own include */
+
+#include "IMB_colormanagement.h"
+
+/* could enable at some point but for now there are far too many conversions */
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wdouble-promotion"
+#endif
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+ListBase *get_lights(ShadeInput *shi)
+{
+
+	if (R.r.scemode & R_BUTS_PREVIEW)
+		return &R.lights;
+	if (shi->light_override)
+		return &shi->light_override->gobject;
+	if (shi->mat && shi->mat->group)
+		return &shi->mat->group->gobject;
+
+	return &R.lights;
+}
+
+#if 0
+static void fogcolor(const float colf[3], float *rco, float *view)
+{
+	float alpha, stepsize, startdist, dist, hor[4], zen[3], vec[3], dview[3];
+	float div=0.0f, distfac;
+
+	hor[0]= R.wrld.horr; hor[1]= R.wrld.horg; hor[2]= R.wrld.horb;
+	zen[0]= R.wrld.zenr; zen[1]= R.wrld.zeng; zen[2]= R.wrld.zenb;
+
+	copy_v3_v3(vec, rco);
+
+	/* we loop from cur coord to mist start in steps */
+	stepsize= 1.0f;
+
+	div= ABS(view[2]);
+	dview[0]= view[0]/(stepsize*div);
+	dview[1]= view[1]/(stepsize*div);
+	dview[2]= -stepsize;
+
+	startdist= -rco[2] + BLI_frand();
+	for (dist= startdist; dist>R.wrld.miststa; dist-= stepsize) {
+
+		hor[0]= R.wrld.horr; hor[1]= R.wrld.horg; hor[2]= R.wrld.horb;
+		alpha= 1.0f;
+		do_sky_tex(vec, vec, NULL, hor, zen, &alpha);
+
+		distfac= (dist-R.wrld.miststa)/R.wrld.mistdist;
+
+		hor[3]= hor[0]*distfac*distfac;
+
+		/* premul! */
+		alpha= hor[3];
+		hor[0]= hor[0]*alpha;
+		hor[1]= hor[1]*alpha;
+		hor[2]= hor[2]*alpha;
+		addAlphaOverFloat(colf, hor);
+
+		sub_v3_v3(vec, dview);
+	}
+}
+#endif
+
+/* zcor is distance, co the 3d coordinate in eye space, return alpha */
+float mistfactor(float zcor, float const co[3])
+{
+	float fac, hi;
+
+	fac = zcor - R.wrld.miststa;	/* zcor is calculated per pixel */
+
+	/* fac= -co[2]-R.wrld.miststa; */
+
+	if (fac > 0.0f) {
+		if (fac < R.wrld.mistdist) {
+
+			fac = (fac / R.wrld.mistdist);
+
+			if (R.wrld.mistype == 0) {
+				fac *= fac;
+			}
+			else if (R.wrld.mistype == 1) {
+				/* pass */
+			}
+			else {
+				fac = sqrtf(fac);
+			}
+		}
+		else {
+			fac = 1.0f;
+		}
+	}
+	else {
+		fac = 0.0f;
+	}
+
+	/* height switched off mist */
+	if (R.wrld.misthi!=0.0f && fac!=0.0f) {
+		/* at height misthi the mist is completely gone */
+
+		hi = R.viewinv[0][2] * co[0] +
+		     R.viewinv[1][2] * co[1] +
+		     R.viewinv[2][2] * co[2] +
+		     R.viewinv[3][2];
+
+		if (hi > R.wrld.misthi) {
+			fac = 0.0f;
+		}
+		else if (hi>0.0f) {
+			hi= (R.wrld.misthi-hi)/R.wrld.misthi;
+			fac*= hi*hi;
+		}
+	}
+
+	return (1.0f-fac)* (1.0f-R.wrld.misi);
+}
+
+static void spothalo(struct LampRen *lar, ShadeInput *shi, float *intens)
+{
+	double a, b, c, disc, nray[3], npos[3];
+	double t0, t1 = 0.0f, t2= 0.0f, t3;
+	float p1[3], p2[3], ladist, maxz = 0.0f, maxy = 0.0f, haint;
+	int cuts;
+	bool do_clip = true, use_yco = false;
+
+	*intens= 0.0f;
+	haint= lar->haint;
+
+	if (R.r.mode & R_ORTHO) {
+		/* camera pos (view vector) cannot be used... */
+		/* camera position (cox,coy,0) rotate around lamp */
+		p1[0]= shi->co[0]-lar->co[0];
+		p1[1]= shi->co[1]-lar->co[1];
+		p1[2]= -lar->co[2];
+		mul_m3_v3(lar->imat, p1);
+		copy_v3db_v3fl(npos, p1);  /* npos is double! */
+
+		/* pre-scale */
+		npos[2] *= (double)lar->sh_zfac;
+	}
+	else {
+		copy_v3db_v3fl(npos, lar->sh_invcampos);	/* in initlamp calculated */
+	}
+
+	/* rotate view */
+	copy_v3db_v3fl(nray, shi->view);
+	mul_m3_v3_double(lar->imat, nray);
+
+	if (R.wrld.mode & WO_MIST) {
+		/* patchy... */
+		haint *= mistfactor(-lar->co[2], lar->co);
+		if (haint==0.0f) {
+			return;
+		}
+	}
+
+
+	/* rotate maxz */
+	if (shi->co[2]==0.0f) {
+		do_clip = false;  /* for when halo at sky */
+	}
+	else {
+		p1[0]= shi->co[0]-lar->co[0];
+		p1[1]= shi->co[1]-lar->co[1];
+		p1[2]= shi->co[2]-lar->co[2];
+
+		maxz= lar->imat[0][2]*p1[0]+lar->imat[1][2]*p1[1]+lar->imat[2][2]*p1[2];
+		maxz*= lar->sh_zfac;
+		maxy= lar->imat[0][1]*p1[0]+lar->imat[1][1]*p1[1]+lar->imat[2][1]*p1[2];
+
+		if (fabs(nray[2]) < FLT_EPSILON) {
+			use_yco = true;
+		}
+	}
+
+	/* scale z to make sure volume is normalized */
+	nray[2] *= (double)lar->sh_zfac;
+	/* nray does not need normalization */
+
+	ladist= lar->sh_zfac*lar->dist;
+
+	/* solve */
+	a = nray[0] * nray[0] + nray[1] * nray[1] - nray[2]*nray[2];
+	b = nray[0] * npos[0] + nray[1] * npos[1] - nray[2]*npos[2];
+	c = npos[0] * npos[0] + npos[1] * npos[1] - npos[2]*npos[2];
+
+	cuts= 0;
+	if (fabs(a) < DBL_EPSILON) {
+		/*
+		 * Only one intersection point...
+		 */
+		return;
+	}
+	else {
+		disc = b*b - a*c;
+
+		if (disc==0.0) {
+			t1=t2= (-b)/ a;
+			cuts= 2;
+		}
+		else if (disc > 0.0) {
+			disc = sqrt(disc);
+			t1 = (-b + disc) / a;
+			t2 = (-b - disc) / a;
+			cuts= 2;
+		}
+	}
+	if (cuts==2) {
+		int ok1=0, ok2=0;
+
+		/* sort */
+		if (t1>t2) {
+			a= t1; t1= t2; t2= a;
+		}
+
+		/* z of intersection points with diabolo */
+		p1[2]= npos[2] + t1*nray[2];
+		p2[2]= npos[2] + t2*nray[2];
+
+		/* evaluate both points */
+		if (p1[2]<=0.0f) ok1= 1;
+		if (p2[2]<=0.0f && t1!=t2) ok2= 1;
+
+		/* at least 1 point with negative z */
+		if (ok1==0 && ok2==0) return;
+
+		/* intersction point with -ladist, the bottom of the cone */
+		if (use_yco == false) {
+			t3= ((double)(-ladist)-npos[2])/nray[2];
+
+			/* de we have to replace one of the intersection points? */
+			if (ok1) {
+				if (p1[2]<-ladist) t1= t3;
+			}
+			else {
+				t1= t3;
+			}
+			if (ok2) {
+				if (p2[2]<-ladist) t2= t3;
+			}
+			else {
+				t2= t3;
+			}
+		}
+		else if (ok1==0 || ok2==0) return;
+
+		/* at least 1 visible interesction point */
+		if (t1<0.0 && t2<0.0) return;
+
+		if (t1<0.0) t1= 0.0;
+		if (t2<0.0) t2= 0.0;
+
+		if (t1==t2) return;
+
+		/* sort again to be sure */
+		if (t1>t2) {
+			a= t1; t1= t2; t2= a;
+		}
+
+		/* calculate t0: is the maximum visible z (when halo is intersected by face) */
+		if (do_clip) {
+			if (use_yco == false) t0 = ((double)maxz - npos[2]) / nray[2];
+			else                  t0 = ((double)maxy - npos[1]) / nray[1];
+
+			if (t0 < t1) return;
+			if (t0 < t2) t2= t0;
+		}
+
+		/* calc points */
+		p1[0]= npos[0] + t1*nray[0];
+		p1[1]= npos[1] + t1*nray[1];
+		p1[2]= npos[2] + t1*nray[2];
+		p2[0]= npos[0] + t2*nray[0];
+		p2[1]= npos[1] + t2*nray[1];
+		p2[2]= npos[2] + t2*nray[2];
+
+
+		/* now we have 2 points, make three lengths with it */
+
+		a = len_v3(p1);
+		b = len_v3(p2);
+		c = len_v3v3(p1, p2);
+
+		a/= ladist;
+		a= sqrt(a);
+		b/= ladist;
+		b= sqrt(b);
+		c/= ladist;
+
+		*intens= c*( (1.0-a)+(1.0-b) );
+
+		/* WATCH IT: do not clip a,b en c at 1.0, this gives nasty little overflows
+		 * at the edges (especially with narrow halos) */
+		if (*intens<=0.0f) return;
+
+		/* soft area */
+		/* not needed because t0 has been used for p1/p2 as well */
+		/* if (doclip && t0<t2) { */
+		/* 	*intens *= (t0-t1)/(t2-t1); */
+		/* } */
+
+		*intens *= haint;
+
+		if (lar->shb && lar->shb->shadhalostep) {
+			*intens *= shadow_halo(lar, p1, p2);
+		}
+
+	}
+}
+
+void renderspothalo(ShadeInput *shi, float col[4], float alpha)
+{
+	ListBase *lights;
+	GroupObject *go;
+	LampRen *lar;
+	float i;
+
+	if (alpha==0.0f) return;
+
+	lights= get_lights(shi);
+	for (go=lights->first; go; go= go->next) {
+		lar= go->lampren;
+		if (lar==NULL) continue;
+
+		if (lar->type==LA_SPOT && (lar->mode & LA_HALO) && (lar->buftype != LA_SHADBUF_DEEP) && lar->haint>0) {
+
+			if (lar->mode & LA_LAYER)
+				if (shi->vlr && (lar->lay & shi->obi->lay)==0)
+					continue;
+			if ((lar->lay & shi->lay)==0)
+				continue;
+
+			spothalo(lar, shi, &i);
+			if (i > 0.0f) {
+				const float i_alpha = i * alpha;
+				col[0] += i_alpha * lar->r;
+				col[1] += i_alpha * lar->g;
+				col[2] += i_alpha * lar->b;
+				col[3] += i_alpha;  /* all premul */
+			}
+		}
+	}
+	/* clip alpha, is needed for unified 'alpha threshold' (vanillaRenderPipe.c) */
+	if (col[3]>1.0f) col[3]= 1.0f;
+}
+
+
+
+/* ---------------- shaders ----------------------- */
+
+static double Normalize_d(double *n)
+{
+	double d;
+
+	d= n[0]*n[0]+n[1]*n[1]+n[2]*n[2];
+
+	if (d>0.00000000000000001) {
+		d= sqrt(d);
+
+		n[0]/=d;
+		n[1]/=d;
+		n[2]/=d;
+	}
+	else {
+		n[0]=n[1]=n[2]= 0.0;
+		d= 0.0;
+	}
+	return d;
+}
+
+/* mix of 'real' fresnel and allowing control. grad defines blending gradient */
+float fresnel_fac(const float view[3], const float vn[3], float grad, float fac)
+{
+	float t1, t2;
+
+	if (fac==0.0f) return 1.0f;
+
+	t1 = dot_v3v3(view, vn);
+	if (t1>0.0f)  t2= 1.0f+t1;
+	else t2= 1.0f-t1;
+
+	t2= grad + (1.0f-grad)*powf(t2, fac);
+
+	if (t2<0.0f) return 0.0f;
+	else if (t2>1.0f) return 1.0f;
+	return t2;
+}
+
+static double saacos_d(double fac)
+{
+	if (fac<= -1.0) return M_PI;
+	else if (fac>=1.0) return 0.0;
+	else return acos(fac);
+}
+
+/* Stoke's form factor. Need doubles here for extreme small area sizes */
+static float area_lamp_energy(float (*area)[3], const float co[3], const float vn[3])
+{
+	double fac;
+	double vec[4][3];	/* vectors of rendered co to vertices lamp */
+	double cross[4][3];	/* cross products of this */
+	double rad[4];		/* angles between vecs */
+
+	VECSUB(vec[0], co, area[0]);
+	VECSUB(vec[1], co, area[1]);
+	VECSUB(vec[2], co, area[2]);
+	VECSUB(vec[3], co, area[3]);
+
+	Normalize_d(vec[0]);
+	Normalize_d(vec[1]);
+	Normalize_d(vec[2]);
+	Normalize_d(vec[3]);
+
+	/* cross product */
+#define CROSS(dest, a, b) \
+	{ \
+		dest[0]= a[1] * b[2] - a[2] * b[1]; \
+		dest[1]= a[2] * b[0] - a[0] * b[2]; \
+		dest[2]= a[0] * b[1] - a[1] * b[0]; \
+	} (void)0
+
+	CROSS(cross[0], vec[0], vec[1]);
+	CROSS(cross[1], vec[1], vec[2]);
+	CROSS(cross[2], vec[2], vec[3]);
+	CROSS(cross[3], vec[3], vec[0]);
+
+#undef CROSS
+
+	Normalize_d(cross[0]);
+	Normalize_d(cross[1]);
+	Normalize_d(cross[2]);
+	Normalize_d(cross[3]);
+
+	/* angles */
+	rad[0]= vec[0][0]*vec[1][0]+ vec[0][1]*vec[1][1]+ vec[0][2]*vec[1][2];
+	rad[1]= vec[1][0]*vec[2][0]+ vec[1][1]*vec[2][1]+ vec[1][2]*vec[2][2];
+	rad[2]= vec[2][0]*vec[3][0]+ vec[2][1]*vec[3][1]+ vec[2][2]*vec[3][2];
+	rad[3]= vec[3][0]*vec[0][0]+ vec[3][1]*vec[0][1]+ vec[3][2]*vec[0][2];
+
+	rad[0]= saacos_d(rad[0]);
+	rad[1]= saacos_d(rad[1]);
+	rad[2]= saacos_d(rad[2]);
+	rad[3]= saacos_d(rad[3]);
+
+	/* Stoke formula */
+	fac=  rad[0]*(vn[0]*cross[0][0]+ vn[1]*cross[0][1]+ vn[2]*cross[0][2]);
+	fac+= rad[1]*(vn[0]*cross[1][0]+ vn[1]*cross[1][1]+ vn[2]*cross[1][2]);
+	fac+= rad[2]*(vn[0]*cross[2][0]+ vn[1]*cross[2][1]+ vn[2]*cross[2][2]);
+	fac+= rad[3]*(vn[0]*cross[3][0]+ vn[1]*cross[3][1]+ vn[2]*cross[3][2]);
+
+	if (fac<=0.0) return 0.0;
+	return fac;
+}
+
+static float area_lamp_energy_multisample(LampRen *lar, const float co[3], float *vn)
+{
+	/* corner vectors are moved around according lamp jitter */
+	float *jitlamp= lar->jitter, vec[3];
+	float area[4][3], intens= 0.0f;
+	int a= lar->ray_totsamp;
+
+	/* test if co is behind lamp */
+	sub_v3_v3v3(vec, co, lar->co);
+	if (dot_v3v3(vec, lar->vec) < 0.0f)
+		return 0.0f;
+
+	while (a--) {
+		vec[0]= jitlamp[0];
+		vec[1]= jitlamp[1];
+		vec[2]= 0.0f;
+		mul_m3_v3(lar->mat, vec);
+
+		add_v3_v3v3(area[0], lar->area[0], vec);
+		add_v3_v3v3(area[1], lar->area[1], vec);
+		add_v3_v3v3(area[2], lar->area[2], vec);
+		add_v3_v3v3(area[3], lar->area[3], vec);
+
+		intens+= area_lamp_energy(area, co, vn);
+
+		jitlamp+= 2;
+	}
+	intens /= (float)lar->ray_totsamp;
+
+	return pow(intens * lar->areasize, lar->k);	/* corrected for buttons size and lar->dist^2 */
+}
+
+static float spec(float inp, int hard)
+{
+	float b1;
+
+	if (inp>=1.0f) return 1.0f;
+	else if (inp<=0.0f) return 0.0f;
+
+	b1= inp*inp;
+	/* avoid FPE */
+	if (b1<0.01f) b1= 0.01f;
+
+	if ((hard & 1)==0)  inp= 1.0f;
+	if (hard & 2)  inp*= b1;
+	b1*= b1;
+	if (hard & 4)  inp*= b1;
+	b1*= b1;
+	if (hard & 8)  inp*= b1;
+	b1*= b1;
+	if (hard & 16) inp*= b1;
+	b1*= b1;
+
+	/* avoid FPE */
+	if (b1<0.001f) b1= 0.0f;
+
+	if (hard & 32) inp*= b1;
+	b1*= b1;
+	if (hard & 64) inp*=b1;
+	b1*= b1;
+	if (hard & 128) inp*=b1;
+
+	if (b1<0.001f) b1= 0.0f;
+
+	if (hard & 256) {
+		b1*= b1;
+		inp*=b1;
+	}
+
+	return inp;
+}
+
+static float Phong_Spec(const float n[3], const float l[3], const float v[3], int hard, int tangent )
+{
+	float h[3];
+	float rslt;
+
+	h[0] = l[0] + v[0];
+	h[1] = l[1] + v[1];
+	h[2] = l[2] + v[2];
+	normalize_v3(h);
+
+	rslt = h[0]*n[0] + h[1]*n[1] + h[2]*n[2];
+	if (tangent) rslt= sasqrt(1.0f - rslt*rslt);
+
+	if ( rslt > 0.0f ) rslt= spec(rslt, hard);
+	else rslt = 0.0f;
+
+	return rslt;
+}
+
+
+/* reduced cook torrance spec (for off-specular peak) */
+static float CookTorr_Spec(const float n[3], const float l[3], const float v[3], int hard, int tangent)
+{
+	float i, nh, nv, h[3];
+
+	h[0]= v[0]+l[0];
+	h[1]= v[1]+l[1];
+	h[2]= v[2]+l[2];
+	normalize_v3(h);
+
+	nh= n[0]*h[0]+n[1]*h[1]+n[2]*h[2];
+	if (tangent) nh= sasqrt(1.0f - nh*nh);
+	else if (nh<0.0f) return 0.0f;
+
+	nv= n[0]*v[0]+n[1]*v[1]+n[2]*v[2];
+	if (tangent) nv= sasqrt(1.0f - nv*nv);
+	else if (nv<0.0f) nv= 0.0f;
+
+	i= spec(nh, hard);
+
+	i= i/(0.1f+nv);
+	return i;
+}
+
+/* Blinn spec */
+static float Blinn_Spec(const float n[3], const float l[3], const float v[3], float refrac, float spec_power, int tangent)
+{
+	float i, nh, nv, nl, vh, h[3];
+	float a, b, c, g=0.0f, p, f, ang;
+
+	if (refrac < 1.0f) return 0.0f;
+	if (spec_power == 0.0f) return 0.0f;
+
+	/* conversion from 'hardness' (1-255) to 'spec_power' (50 maps at 0.1) */
+	if (spec_power<100.0f)
+		spec_power = sqrtf(1.0f / spec_power);
+	else spec_power= 10.0f/spec_power;
+
+	h[0]= v[0]+l[0];
+	h[1]= v[1]+l[1];
+	h[2]= v[2]+l[2];
+	normalize_v3(h);
+
+	nh= n[0]*h[0]+n[1]*h[1]+n[2]*h[2]; /* Dot product between surface normal and half-way vector */
+	if (tangent) nh= sasqrt(1.0f - nh*nh);
+	else if (nh<0.0f) return 0.0f;
+
+	nv= n[0]*v[0]+n[1]*v[1]+n[2]*v[2]; /* Dot product between surface normal and view vector */
+	if (tangent) nv= sasqrt(1.0f - nv*nv);
+	if (nv<=0.01f) nv= 0.01f;				/* hrms... */
+
+	nl= n[0]*l[0]+n[1]*l[1]+n[2]*l[2]; /* Dot product between surface normal and light vector */
+	if (tangent) nl= sasqrt(1.0f - nl*nl);
+	if (nl<=0.01f) {
+		return 0.0f;
+	}
+
+	vh= v[0]*h[0]+v[1]*h[1]+v[2]*h[2]; /* Dot product between view vector and half-way vector */
+	if (vh<=0.0f) vh= 0.01f;
+
+	a = 1.0f;
+	b = (2.0f*nh*nv)/vh;
+	c = (2.0f*nh*nl)/vh;
+
+	if ( a < b && a < c ) g = a;
+	else if ( b < a && b < c ) g = b;
+	else if ( c < a && c < b ) g = c;
+
+	p = sqrt((double)((refrac * refrac)+(vh * vh) - 1.0f));
+	f = (((p-vh)*(p-vh))/((p+vh)*(p+vh)))*(1+((((vh*(p+vh))-1.0f)*((vh*(p+vh))-1.0f))/(((vh*(p-vh))+1.0f)*((vh*(p-vh))+1.0f))));
+	ang = saacos(nh);
+
+	i= f * g * exp((double)(-(ang*ang) / (2.0f*spec_power*spec_power)));
+	if (i<0.0f) i= 0.0f;
+
+	return i;
+}
+
+/* cartoon render spec */
+static float Toon_Spec(const float n[3], const float l[3], const float v[3], float size, float smooth, int tangent)
+{
+	float h[3];
+	float ang;
+	float rslt;
+
+	h[0] = l[0] + v[0];
+	h[1] = l[1] + v[1];
+	h[2] = l[2] + v[2];
+	normalize_v3(h);
+
+	rslt = h[0]*n[0] + h[1]*n[1] + h[2]*n[2];
+	if (tangent) rslt = sasqrt(1.0f - rslt*rslt);
+
+	ang = saacos( rslt );
+
+	if ( ang < size ) rslt = 1.0f;
+	else if ( ang >= (size + smooth) || smooth == 0.0f ) rslt = 0.0f;
+	else rslt = 1.0f - ((ang - size) / smooth);
+
+	return rslt;
+}
+
+/* Ward isotropic gaussian spec */
+static float WardIso_Spec(const float n[3], const float l[3], const float v[3], float rms, int tangent)
+{
+	float i, nh, nv, nl, h[3], angle, alpha;
+
+
+	/* half-way vector */
+	h[0] = l[0] + v[0];
+	h[1] = l[1] + v[1];
+	h[2] = l[2] + v[2];
+	normalize_v3(h);
+
+	nh = n[0]*h[0]+n[1]*h[1]+n[2]*h[2]; /* Dot product between surface normal and half-way vector */
+	if (tangent) nh = sasqrt(1.0f - nh*nh);
+	if (nh<=0.0f) nh = 0.001f;
+
+	nv = n[0]*v[0]+n[1]*v[1]+n[2]*v[2]; /* Dot product between surface normal and view vector */
+	if (tangent) nv = sasqrt(1.0f - nv*nv);
+	if (nv<=0.0f) nv = 0.001f;
+
+	nl = n[0]*l[0]+n[1]*l[1]+n[2]*l[2]; /* Dot product between surface normal and light vector */
+	if (tangent) nl = sasqrt(1.0f - nl*nl);
+	if (nl<=0.0f) nl = 0.001f;
+
+	angle = tanf(saacos(nh));
+	alpha = MAX2(rms, 0.001f);
+
+	i= nl * (1.0f/(4.0f*(float)M_PI*alpha*alpha)) * (expf( -(angle*angle)/(alpha*alpha))/(sqrtf(nv*nl)));
+
+	return i;
+}
+
+/* cartoon render diffuse */
+static float Toon_Diff(const float n[3], const float l[3], const float UNUSED(v[3]), float size, float smooth)
+{
+	float rslt, ang;
+
+	rslt = n[0]*l[0] + n[1]*l[1] + n[2]*l[2];
+
+	ang = saacos(rslt);
+
+	if ( ang < size ) rslt = 1.0f;
+	else if ( ang >= (size + smooth) || smooth == 0.0f ) rslt = 0.0f;
+	else rslt = 1.0f - ((ang - size) / smooth);
+
+	return rslt;
+}
+
+/* Oren Nayar diffuse */
+
+/* 'nl' is either dot product, or return value of area light */
+/* in latter case, only last multiplication uses 'nl' */
+static float OrenNayar_Diff(float nl, const float n[3], const float l[3], const float v[3], float rough )
+{
+	float i/*, nh*/, nv /*, vh */, realnl, h[3];
+	float a, b, t, A, B;
+	float Lit_A, View_A, Lit_B[3], View_B[3];
+
+	h[0]= v[0]+l[0];
+	h[1]= v[1]+l[1];
+	h[2]= v[2]+l[2];
+	normalize_v3(h);
+
+	/* nh= n[0]*h[0]+n[1]*h[1]+n[2]*h[2]; */ /* Dot product between surface normal and half-way vector */
+	/* if (nh<0.0f) nh = 0.0f; */
+
+	nv= n[0]*v[0]+n[1]*v[1]+n[2]*v[2]; /* Dot product between surface normal and view vector */
+	if (nv<=0.0f) nv= 0.0f;
+
+	realnl= n[0]*l[0]+n[1]*l[1]+n[2]*l[2]; /* Dot product between surface normal and light vector */
+	if (realnl<=0.0f) return 0.0f;
+	if (nl<0.0f) return 0.0f;		/* value from area light */
+
+	/* vh= v[0]*h[0]+v[1]*h[1]+v[2]*h[2]; */ /* Dot product between view vector and halfway vector */
+	/* if (vh<=0.0f) vh= 0.0f; */
+
+	Lit_A = saacos(realnl);
+	View_A = saacos( nv );
+
+	Lit_B[0] = l[0] - (realnl * n[0]);
+	Lit_B[1] = l[1] - (realnl * n[1]);
+	Lit_B[2] = l[2] - (realnl * n[2]);
+	normalize_v3(Lit_B);
+
+	View_B[0] = v[0] - (nv * n[0]);
+	View_B[1] = v[1] - (nv * n[1]);
+	View_B[2] = v[2] - (nv * n[2]);
+	normalize_v3(View_B);
+
+	t = Lit_B[0]*View_B[0] + Lit_B[1]*View_B[1] + Lit_B[2]*View_B[2];
+	if ( t < 0 ) t = 0;
+
+	if ( Lit_A > View_A ) {
+		a = Lit_A;
+		b = View_A;
+	}
+	else {
+		a = View_A;
+		b = Lit_A;
+	}
+
+	A = 1.0f - (0.5f * ((rough * rough) / ((rough * rough) + 0.33f)));
+	B = 0.45f * ((rough * rough) / ((rough * rough) + 0.09f));
+
+	b*= 0.95f;	/* prevent tangens from shooting to inf, 'nl' can be not a dot product here. */
+				/* overflow only happens with extreme size area light, and higher roughness */
+	i = nl * ( A + ( B * t * sinf(a) * tanf(b) ) );
+
+	return i;
+}
+
+/* Minnaert diffuse */
+static float Minnaert_Diff(float nl, const float n[3], const float v[3], float darkness)
+{
+	float i, nv;
+
+	/* nl = dot product between surface normal and light vector */
+	if (nl <= 0.0f)
+		return 0.0f;
+
+	/* nv = dot product between surface normal and view vector */
+	nv = dot_v3v3(n, v);
+	if (nv < 0.0f)
+		nv = 0.0f;
+
+	if (darkness <= 1.0f)
+		i = nl * pow(max_ff(nv * nl, 0.1f), (darkness - 1.0f) ); /*The Real model*/
+	else
+		i = nl * pow( (1.001f - nv), (darkness  - 1.0f) ); /*Nvidia model*/
+
+	return i;
+}
+
+static float Fresnel_Diff(float *vn, float *lv, float *UNUSED(view), float fac_i, float fac)
+{
+	return fresnel_fac(lv, vn, fac_i, fac);
+}
+
+/* --------------------------------------------- */
+/* also called from texture.c */
+void calc_R_ref(ShadeInput *shi)
+{
+	float i;
+
+	/* shi->vn dot shi->view */
+	i= -2*(shi->vn[0]*shi->view[0]+shi->vn[1]*shi->view[1]+shi->vn[2]*shi->view[2]);
+
+	shi->ref[0]= (shi->view[0]+i*shi->vn[0]);
+	shi->ref[1]= (shi->view[1]+i*shi->vn[1]);
+	shi->ref[2]= (shi->view[2]+i*shi->vn[2]);
+	if (shi->osatex) {
+		if (shi->vlr->flag & R_SMOOTH) {
+			i= -2*( (shi->vn[0]+shi->dxno[0])*(shi->view[0]+shi->dxview) +
+				(shi->vn[1]+shi->dxno[1])*shi->view[1]+ (shi->vn[2]+shi->dxno[2])*shi->view[2] );
+
+			shi->dxref[0]= shi->ref[0]- ( shi->view[0]+shi->dxview+i*(shi->vn[0]+shi->dxno[0]));
+			shi->dxref[1]= shi->ref[1]- (shi->view[1]+ i*(shi->vn[1]+shi->dxno[1]));
+			shi->dxref[2]= shi->ref[2]- (shi->view[2]+ i*(shi->vn[2]+shi->dxno[2]));
+
+			i= -2*( (shi->vn[0]+shi->dyno[0])*shi->view[0]+
+				(shi->vn[1]+shi->dyno[1])*(shi->view[1]+shi->dyview)+ (shi->vn[2]+shi->dyno[2])*shi->view[2] );
+
+			shi->dyref[0]= shi->ref[0]- (shi->view[0]+ i*(shi->vn[0]+shi->dyno[0]));
+			shi->dyref[1]= shi->ref[1]- (shi->view[1]+shi->dyview+i*(shi->vn[1]+shi->dyno[1]));
+			shi->dyref[2]= shi->ref[2]- (shi->view[2]+ i*(shi->vn[2]+shi->dyno[2]));
+
+		}
+		else {
+
+			i= -2*( shi->vn[0]*(shi->view[0]+shi->dxview) +
+				shi->vn[1]*shi->view[1]+ shi->vn[2]*shi->view[2] );
+
+			shi->dxref[0]= shi->ref[0]- (shi->view[0]+shi->dxview+i*shi->vn[0]);
+			shi->dxref[1]= shi->ref[1]- (shi->view[1]+ i*shi->vn[1]);
+			shi->dxref[2]= shi->ref[2]- (shi->view[2]+ i*shi->vn[2]);
+
+			i= -2*( shi->vn[0]*shi->view[0]+
+				shi->vn[1]*(shi->view[1]+shi->dyview)+ shi->vn[2]*shi->view[2] );
+
+			shi->dyref[0]= shi->ref[0]- (shi->view[0]+ i*shi->vn[0]);
+			shi->dyref[1]= shi->ref[1]- (shi->view[1]+shi->dyview+i*shi->vn[1]);
+			shi->dyref[2]= shi->ref[2]- (shi->view[2]+ i*shi->vn[2]);
+		}
+	}
+
+}
+
+/* called from rayshade.c */
+void shade_color(ShadeInput *shi, ShadeResult *shr)
+{
+	Material *ma= shi->mat;
+
+	if (ma->mode & (MA_FACETEXTURE)) {
+		shi->r= shi->vcol[0];
+		shi->g= shi->vcol[1];
+		shi->b= shi->vcol[2];
+		if (ma->mode & (MA_FACETEXTURE_ALPHA))
+			shi->alpha= shi->vcol[3];
+	}
+	else if (ma->mode & (MA_VERTEXCOLP)) {
+		float neg_alpha = 1.0f - shi->vcol[3];
+		shi->r= shi->r*neg_alpha + shi->vcol[0]*shi->vcol[3];
+		shi->g= shi->g*neg_alpha + shi->vcol[1]*shi->vcol[3];
+		shi->b= shi->b*neg_alpha + shi->vcol[2]*shi->vcol[3];
+	}
+
+	if (ma->texco)
+		do_material_tex(shi, &R);
+
+	if (ma->fresnel_tra!=0.0f)
+		shi->alpha*= fresnel_fac(shi->view, shi->vn, ma->fresnel_tra_i, ma->fresnel_tra);
+
+	if (!(shi->mode & MA_TRANSP)) shi->alpha= 1.0f;
+
+	shr->diff[0]= shi->r;
+	shr->diff[1]= shi->g;
+	shr->diff[2]= shi->b;
+	shr->alpha= shi->alpha;
+
+	/* modulate by the object color */
+	if ((ma->shade_flag & MA_OBCOLOR) && shi->obr->ob) {
+		float obcol[4];
+
+		copy_v4_v4(obcol, shi->obr->ob->col);
+		CLAMP(obcol[3], 0.0f, 1.0f);
+
+		shr->diff[0] *= obcol[0];
+		shr->diff[1] *= obcol[1];
+		shr->diff[2] *= obcol[2];
+		if (shi->mode & MA_TRANSP) shr->alpha *= obcol[3];
+	}
+
+	copy_v3_v3(shr->diffshad, shr->diff);
+}
+
+/* ramp for at end of shade */
+static void ramp_diffuse_result(float *diff, ShadeInput *shi)
+{
+	Material *ma= shi->mat;
+	float col[4];
+
+	if (ma->ramp_col) {
+		if (ma->rampin_col==MA_RAMP_IN_RESULT) {
+			float fac = IMB_colormanagement_get_luminance(diff);
+			BKE_colorband_evaluate(ma->ramp_col, fac, col);
+
+			/* blending method */
+			fac= col[3]*ma->rampfac_col;
+
+			ramp_blend(ma->rampblend_col, diff, fac, col);
+		}
+	}
+}
+
+/* r,g,b denote energy, ramp is used with different values to make new material color */
+static void add_to_diffuse(float diff[3], const ShadeInput *shi, const float is, const float rgb[3])
+{
+	Material *ma= shi->mat;
+
+	if (ma->ramp_col && (ma->mode & MA_RAMP_COL)) {
+
+		/* MA_RAMP_IN_RESULT is exceptional */
+		if (ma->rampin_col==MA_RAMP_IN_RESULT) {
+			/* normal add */
+			diff[0] += rgb[0] * shi->r;
+			diff[1] += rgb[1] * shi->g;
+			diff[2] += rgb[2] * shi->b;
+		}
+		else {
+			float colt[3], col[4];
+			float fac;
+
+			/* input */
+			switch (ma->rampin_col) {
+				case MA_RAMP_IN_ENERGY:
+					fac = IMB_colormanagement_get_luminance(rgb);
+					break;
+				case MA_RAMP_IN_SHADER:
+					fac = is;
+					break;
+				case MA_RAMP_IN_NOR:
+					fac = dot_v3v3(shi->view, shi->vn);
+					break;
+				default:
+					fac = 0.0f;
+					break;
+			}
+
+			BKE_colorband_evaluate(ma->ramp_col, fac, col);
+
+			/* blending method */
+			fac = col[3] * ma->rampfac_col;
+			copy_v3_v3(colt, &shi->r);
+
+			ramp_blend(ma->rampblend_col, colt, fac, col);
+
+			/* output to */
+			diff[0] += rgb[0] * colt[0];
+			diff[1] += rgb[1] * colt[1];
+			diff[2] += rgb[2] * colt[2];
+		}
+	}
+	else {
+		diff[0] += rgb[0] * shi->r;
+		diff[1] += rgb[1] * shi->g;
+		diff[2] += rgb[2] * shi->b;
+	}
+}
+
+static void ramp_spec_result(float spec_col[3], ShadeInput *shi)
+{
+	Material *ma= shi->mat;
+
+	if (ma->ramp_spec && (ma->rampin_spec==MA_RAMP_IN_RESULT)) {
+		float col[4];
+		float fac = IMB_colormanagement_get_luminance(spec_col);
+
+		BKE_colorband_evaluate(ma->ramp_spec, fac, col);
+
+		/* blending method */
+		fac= col[3]*ma->rampfac_spec;
+
+		ramp_blend(ma->rampblend_spec, spec_col, fac, col);
+
+	}
+}
+
+/* is = dot product shade, t = spec energy */
+static void do_specular_ramp(ShadeInput *shi, float is, float t, float spec[3])
+{
+	Material *ma= shi->mat;
+
+	spec[0]= shi->specr;
+	spec[1]= shi->specg;
+	spec[2]= shi->specb;
+
+	/* MA_RAMP_IN_RESULT is exception */
+	if (ma->ramp_spec && (ma->rampin_spec!=MA_RAMP_IN_RESULT)) {
+		float fac;
+		float col[4];
+
+		/* input */
+		switch (ma->rampin_spec) {
+		case MA_RAMP_IN_ENERGY:
+			fac= t;
+			break;
+		case MA_RAMP_IN_SHADER:
+			fac= is;
+			break;
+		case MA_RAMP_IN_NOR:
+			fac= shi->view[0]*shi->vn[0] + shi->view[1]*shi->vn[1] + shi->view[2]*shi->vn[2];
+			break;
+		default:
+			fac= 0.0f;
+			break;
+		}
+
+		BKE_colorband_evaluate(ma->ramp_spec, fac, col);
+
+		/* blending method */
+		fac= col[3]*ma->rampfac_spec;
+
+		ramp_blend(ma->rampblend_spec, spec, fac, col);
+	}
+}
+
+/* pure AO, check for raytrace and world should have been done */
+/* preprocess, textures were not done, don't use shi->amb for that reason */
+void ambient_occlusion(ShadeInput *shi)
+{
+	if ((R.wrld.ao_gather_method == WO_AOGATHER_APPROX) && shi->mat->amb!=0.0f) {
+		sample_occ(&R, shi);
+	}
+	else if ((R.r.mode & R_RAYTRACE) && shi->mat->amb!=0.0f) {
+		ray_ao(shi, shi->ao, shi->env);
+	}
+	else {
+		shi->ao[0]= shi->ao[1]= shi->ao[2]= 1.0f;
+		zero_v3(shi->env);
+		zero_v3(shi->indirect);
+	}
+}
+
+
+/* wrld mode was checked for */
+static void ambient_occlusion_apply(ShadeInput *shi, ShadeResult *shr)
+{
+	float f= R.wrld.aoenergy;
+	float tmp[3], tmpspec[3];
+
+	if (!((R.r.mode & R_RAYTRACE) || R.wrld.ao_gather_method == WO_AOGATHER_APPROX))
+		return;
+	if (f == 0.0f)
+		return;
+
+	if (R.wrld.aomix==WO_AOADD) {
+		shr->combined[0] += shi->ao[0]*shi->r*shi->refl*f;
+		shr->combined[1] += shi->ao[1]*shi->g*shi->refl*f;
+		shr->combined[2] += shi->ao[2]*shi->b*shi->refl*f;
+	}
+	else if (R.wrld.aomix==WO_AOMUL) {
+		mul_v3_v3v3(tmp, shr->combined, shi->ao);
+		mul_v3_v3v3(tmpspec, shr->spec, shi->ao);
+
+		if (f == 1.0f) {
+			copy_v3_v3(shr->combined, tmp);
+			copy_v3_v3(shr->spec, tmpspec);
+		}
+		else {
+			interp_v3_v3v3(shr->combined, shr->combined, tmp, f);
+			interp_v3_v3v3(shr->spec, shr->spec, tmpspec, f);
+		}
+	}
+}
+
+void environment_lighting_apply(ShadeInput *shi, ShadeResult *shr)
+{
+	float f= R.wrld.ao_env_energy*shi->amb;
+
+	if (!((R.r.mode & R_RAYTRACE) || R.wrld.ao_gather_method == WO_AOGATHER_APPROX))
+		return;
+	if (f == 0.0f)
+		return;
+
+	shr->combined[0] += shi->env[0]*shi->r*shi->refl*f;
+	shr->combined[1] += shi->env[1]*shi->g*shi->refl*f;
+	shr->combined[2] += shi->env[2]*shi->b*shi->refl*f;
+}
+
+static void indirect_lighting_apply(ShadeInput *shi, ShadeResult *shr)
+{
+	float f= R.wrld.ao_indirect_energy;
+
+	if (!((R.r.mode & R_RAYTRACE) || R.wrld.ao_gather_method == WO_AOGATHER_APPROX))
+		return;
+	if (f == 0.0f)
+		return;
+
+	shr->combined[0] += shi->indirect[0]*shi->r*shi->refl*f;
+	shr->combined[1] += shi->indirect[1]*shi->g*shi->refl*f;
+	shr->combined[2] += shi->indirect[2]*shi->b*shi->refl*f;
+}
+
+/* result written in shadfac */
+void lamp_get_shadow(LampRen *lar, ShadeInput *shi, float inp, float shadfac[4], int do_real)
+{
+	LampShadowSubSample *lss= &(lar->shadsamp[shi->thread].s[shi->sample]);
+
+	if (do_real || lss->samplenr!=shi->samplenr) {
+
+		shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 1.0f;
+
+		if (lar->shb) {
+			if (lar->buftype==LA_SHADBUF_IRREGULAR)
+				shadfac[3]= ISB_getshadow(shi, lar->shb);
+			else
+				shadfac[3] = testshadowbuf(&R, lar->shb, shi->co, shi->dxco, shi->dyco, inp, shi->mat->lbias);
+		}
+		else if (lar->mode & LA_SHAD_RAY) {
+			ray_shadow(shi, lar, shadfac);
+		}
+
+		if (shi->depth==0) {
+			copy_v4_v4(lss->shadfac, shadfac);
+			lss->samplenr= shi->samplenr;
+		}
+	}
+	else {
+		copy_v4_v4(shadfac, lss->shadfac);
+	}
+}
+
+/* lampdistance and spot angle, writes in lv and dist */
+float lamp_get_visibility(LampRen *lar, const float co[3], float lv[3], float *dist)
+{
+	if (lar->type==LA_SUN || lar->type==LA_HEMI) {
+		*dist= 1.0f;
+		copy_v3_v3(lv, lar->vec);
+		return 1.0f;
+	}
+	else {
+		float visifac= 1.0f, visifac_r;
+
+		sub_v3_v3v3(lv, co, lar->co);
+		mul_v3_fl(lv, 1.0f / (*dist = len_v3(lv)));
+
+		/* area type has no quad or sphere option */
+		if (lar->type==LA_AREA) {
+			/* area is single sided */
+			//if (dot_v3v3(lv, lar->vec) > 0.0f)
+			//	visifac= 1.0f;
+			//else
+			//	visifac= 0.0f;
+		}
+		else {
+			switch (lar->falloff_type) {
+				case LA_FALLOFF_CONSTANT:
+					visifac = 1.0f;
+					break;
+				case LA_FALLOFF_INVLINEAR:
+					visifac = lar->dist/(lar->dist + dist[0]);
+					break;
+				case LA_FALLOFF_INVSQUARE:
+					/* NOTE: This seems to be a hack since commit r12045 says this
+					 * option is similar to old Quad, but with slight changes.
+					 * Correct inv square would be (which would be old Quad):
+					 * visifac = lar->distkw / (lar->distkw + dist[0]*dist[0]);
+					 */
+					visifac = lar->dist / (lar->dist + dist[0]*dist[0]);
+					break;
+				case LA_FALLOFF_SLIDERS:
+					if (lar->ld1>0.0f)
+						visifac= lar->dist/(lar->dist+lar->ld1*dist[0]);
+					if (lar->ld2>0.0f)
+						visifac*= lar->distkw/(lar->distkw+lar->ld2*dist[0]*dist[0]);
+					break;
+				case LA_FALLOFF_INVCOEFFICIENTS:
+					visifac_r = lar->coeff_const +
+								lar->coeff_lin * dist[0] +
+								lar->coeff_quad * dist[0] * dist[0];
+					if (visifac_r > 0.0)
+						visifac = 1.0 / visifac_r;
+					else
+						visifac = 0.0;
+					break;
+				case LA_FALLOFF_CURVE:
+					/* curvemapping_initialize is called from #add_render_lamp */
+					visifac = curvemapping_evaluateF(lar->curfalloff, 0, dist[0]/lar->dist);
+					break;
+			}
+
+			if (lar->mode & LA_SPHERE) {
+				float t= lar->dist - dist[0];
+				if (t<=0.0f)
+					visifac= 0.0f;
+				else
+					visifac*= t/lar->dist;
+			}
+
+			if (visifac > 0.0f) {
+				if (lar->type==LA_SPOT) {
+					float inpr, t;
+
+					if (lar->mode & LA_SQUARE) {
+						if (dot_v3v3(lv, lar->vec) > 0.0f) {
+							float lvrot[3], x;
+
+							/* rotate view to lampspace */
+							copy_v3_v3(lvrot, lv);
+							mul_m3_v3(lar->imat, lvrot);
+
+							x = max_ff(fabsf(lvrot[0]/lvrot[2]), fabsf(lvrot[1]/lvrot[2]));
+							/* 1.0f/(sqrt(1+x*x)) is equivalent to cos(atan(x)) */
+
+							inpr = 1.0f / (sqrtf(1.0f + x * x));
+						}
+						else inpr= 0.0f;
+					}
+					else {
+						inpr= lv[0]*lar->vec[0]+lv[1]*lar->vec[1]+lv[2]*lar->vec[2];
+					}
+
+					t= lar->spotsi;
+					if (inpr<=t)
+						visifac= 0.0f;
+					else {
+						t= inpr-t;
+						if (t<lar->spotbl && lar->spotbl!=0.0f) {
+							/* soft area */
+							float i= t/lar->spotbl;
+							t= i*i;
+							inpr*= (3.0f*t-2.0f*t*i);
+						}
+						visifac*= inpr;
+					}
+				}
+			}
+		}
+		if (visifac <= 0.001f) visifac = 0.0f;
+		return visifac;
+	}
+}
+
+/* function returns raw diff, spec and full shadowed diff in the 'shad' pass */
+static void shade_one_light(LampRen *lar, ShadeInput *shi, ShadeResult *shr, int passflag)
+{
+	Material *ma= shi->mat;
+	VlakRen *vlr= shi->vlr;
+	float lv[3], lampdist, lacol[3], shadfac[4], lashdw[3];
+	float i, is, i_noshad, inp, *vn, *view, vnor[3], phongcorr=1.0f;
+	float visifac;
+
+	vn= shi->vn;
+	view= shi->view;
+
+
+	if (lar->energy == 0.0f) return;
+	/* only shadow lamps shouldn't affect shadow-less materials at all */
+	if ((lar->mode & LA_ONLYSHADOW) && (!(ma->mode & MA_SHADOW) || !(R.r.mode & R_SHADOW)))
+		return;
+	/* optimization, don't render fully black lamps */
+	if (!(lar->mode & LA_TEXTURE) && (lar->r + lar->g + lar->b == 0.0f))
+		return;
+
+	/* lampdist, spot angle, area side, ... */
+	visifac= lamp_get_visibility(lar, shi->co, lv, &lampdist);
+	if (visifac==0.0f)
+		return;
+
+	if (lar->type==LA_SPOT) {
+		if (lar->mode & LA_OSATEX) {
+			shi->osatex= 1;	/* signal for multitex() */
+
+			shi->dxlv[0]= lv[0] - (shi->co[0]-lar->co[0]+shi->dxco[0])/lampdist;
+			shi->dxlv[1]= lv[1] - (shi->co[1]-lar->co[1]+shi->dxco[1])/lampdist;
+			shi->dxlv[2]= lv[2] - (shi->co[2]-lar->co[2]+shi->dxco[2])/lampdist;
+
+			shi->dylv[0]= lv[0] - (shi->co[0]-lar->co[0]+shi->dyco[0])/lampdist;
+			shi->dylv[1]= lv[1] - (shi->co[1]-lar->co[1]+shi->dyco[1])/lampdist;
+			shi->dylv[2]= lv[2] - (shi->co[2]-lar->co[2]+shi->dyco[2])/lampdist;
+		}
+	}
+
+	/* lamp color texture */
+	lacol[0]= lar->r;
+	lacol[1]= lar->g;
+	lacol[2]= lar->b;
+
+	lashdw[0]= lar->shdwr;
+	lashdw[1]= lar->shdwg;
+	lashdw[2]= lar->shdwb;
+
+	if (lar->mode & LA_TEXTURE)	do_lamp_tex(lar, lv, shi, lacol, LA_TEXTURE);
+	if (lar->mode & LA_SHAD_TEX)	do_lamp_tex(lar, lv, shi, lashdw, LA_SHAD_TEX);
+
+		/* tangent case; calculate fake face normal, aligned with lampvector */
+		/* note, vnor==vn is used as tangent trigger for buffer shadow */
+	if (vlr->flag & R_TANGENT) {
+		float cross[3], nstrand[3], blend;
+
+		if (ma->mode & MA_STR_SURFDIFF) {
+			cross_v3_v3v3(cross, shi->surfnor, vn);
+			cross_v3_v3v3(nstrand, vn, cross);
+
+			blend= dot_v3v3(nstrand, shi->surfnor);
+			blend= 1.0f - blend;
+			CLAMP(blend, 0.0f, 1.0f);
+
+			interp_v3_v3v3(vnor, nstrand, shi->surfnor, blend);
+			normalize_v3(vnor);
+		}
+		else {
+			cross_v3_v3v3(cross, lv, vn);
+			cross_v3_v3v3(vnor, cross, vn);
+			normalize_v3(vnor);
+		}
+
+		if (ma->strand_surfnor > 0.0f) {
+			if (ma->strand_surfnor > shi->surfdist) {
+				blend= (ma->strand_surfnor - shi->surfdist)/ma->strand_surfnor;
+				interp_v3_v3v3(vnor, vnor, shi->surfnor, blend);
+				normalize_v3(vnor);
+			}
+		}
+
+		vnor[0]= -vnor[0];vnor[1]= -vnor[1];vnor[2]= -vnor[2];
+		vn= vnor;
+	}
+	else if (ma->mode & MA_TANGENT_V) {
+		float cross[3];
+		cross_v3_v3v3(cross, lv, shi->tang);
+		cross_v3_v3v3(vnor, cross, shi->tang);
+		normalize_v3(vnor);
+		vnor[0]= -vnor[0];vnor[1]= -vnor[1];vnor[2]= -vnor[2];
+		vn= vnor;
+	}
+
+	/* dot product and reflectivity */
+	/* inp = dotproduct, is = shader result, i = lamp energy (with shadow), i_noshad = i without shadow */
+	inp= dot_v3v3(vn, lv);
+
+	/* phong threshold to prevent backfacing faces having artifacts on ray shadow (terminator problem) */
+	/* this complex construction screams for a nicer implementation! (ton) */
+	if (R.r.mode & R_SHADOW) {
+		if (ma->mode & MA_SHADOW) {
+			if (lar->type == LA_HEMI || lar->type == LA_AREA) {
+				/* pass */
+			}
+			else if ((ma->mode & MA_RAYBIAS) && (lar->mode & LA_SHAD_RAY) && (vlr->flag & R_SMOOTH)) {
+				float thresh= shi->obr->ob->smoothresh;
+				if (inp>thresh)
+					phongcorr= (inp-thresh)/(inp*(1.0f-thresh));
+				else
+					phongcorr= 0.0f;
+			}
+			else if (ma->sbias!=0.0f && ((lar->mode & LA_SHAD_RAY) || lar->shb)) {
+				if (inp>ma->sbias)
+					phongcorr= (inp-ma->sbias)/(inp*(1.0f-ma->sbias));
+				else
+					phongcorr= 0.0f;
+			}
+		}
+	}
+
+	/* diffuse shaders */
+	if (lar->mode & LA_NO_DIFF) {
+		is = 0.0f;  /* skip shaders */
+	}
+	else if (lar->type==LA_HEMI) {
+		is = 0.5f * inp + 0.5f;
+	}
+	else {
+
+		if (lar->type==LA_AREA)
+			inp= area_lamp_energy_multisample(lar, shi->co, vn);
+
+		/* diffuse shaders (oren nayer gets inp from area light) */
+		if (ma->diff_shader==MA_DIFF_ORENNAYAR) is= OrenNayar_Diff(inp, vn, lv, view, ma->roughness);
+		else if (ma->diff_shader==MA_DIFF_TOON) is= Toon_Diff(vn, lv, view, ma->param[0], ma->param[1]);
+		else if (ma->diff_shader==MA_DIFF_MINNAERT) is= Minnaert_Diff(inp, vn, view, ma->darkness);
+		else if (ma->diff_shader==MA_DIFF_FRESNEL) is= Fresnel_Diff(vn, lv, view, ma->param[0], ma->param[1]);
+		else is= inp;  /* Lambert */
+	}
+
+	/* 'is' is diffuse */
+	if ((ma->shade_flag & MA_CUBIC) && is > 0.0f && is < 1.0f) {
+		is= 3.0f * is * is - 2.0f * is * is * is;  /* nicer termination of shades */
+	}
+
+	i= is*phongcorr;
+
+	if (i>0.0f) {
+		i*= visifac*shi->refl;
+	}
+	i_noshad= i;
+
+	vn = shi->vn;  /* bring back original vector, we use special specular shaders for tangent */
+	if (ma->mode & MA_TANGENT_V)
+		vn= shi->tang;
+
+	/* init transp shadow */
+	shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 1.0f;
+
+	/* shadow and spec, (visifac==0 outside spot) */
+	if (visifac> 0.0f) {
+
+		if ((R.r.mode & R_SHADOW)) {
+			if (ma->mode & MA_SHADOW) {
+				if (lar->shb || (lar->mode & LA_SHAD_RAY)) {
+
+					if (vn==vnor)	/* tangent trigger */
+						lamp_get_shadow(lar, shi, dot_v3v3(shi->vn, lv), shadfac, shi->depth);
+					else
+						lamp_get_shadow(lar, shi, inp, shadfac, shi->depth);
+
+					/* warning, here it skips the loop */
+					if ((lar->mode & LA_ONLYSHADOW) && i>0.0f) {
+
+						shadfac[3]= i*lar->energy*(1.0f-shadfac[3]);
+						shr->shad[0] -= shadfac[3]*shi->r*(1.0f-lashdw[0]);
+						shr->shad[1] -= shadfac[3]*shi->g*(1.0f-lashdw[1]);
+						shr->shad[2] -= shadfac[3]*shi->b*(1.0f-lashdw[2]);
+
+						if (!(lar->mode & LA_NO_SPEC)) {
+							shr->spec[0] -= shadfac[3]*shi->specr*(1.0f-lashdw[0]);
+							shr->spec[1] -= shadfac[3]*shi->specg*(1.0f-lashdw[1]);
+							shr->spec[2] -= shadfac[3]*shi->specb*(1.0f-lashdw[2]);
+						}
+
+						return;
+					}
+
+					i*= shadfac[3];
+					shr->shad[3] = shadfac[3]; /* store this for possible check in troublesome cases */
+				}
+				else {
+					shr->shad[3] = 1.0f;  /* No shadow at all! */
+				}
+			}
+		}
+
+		/* in case 'no diffuse' we still do most calculus, spec can be in shadow.*/
+		if (!(lar->mode & LA_NO_DIFF)) {
+			if (i>0.0f) {
+				if (ma->mode & MA_SHADOW_TRA) {
+					const float tcol[3] = {
+					    i * shadfac[0] * lacol[0],
+					    i * shadfac[1] * lacol[1],
+					    i * shadfac[2] * lacol[2],
+					};
+					add_to_diffuse(shr->shad, shi, is, tcol);
+				}
+				else {
+					const float tcol[3] = {
+					    i * lacol[0],
+					    i * lacol[1],
+					    i * lacol[2],
+					};
+					add_to_diffuse(shr->shad, shi, is, tcol);
+				}
+			}
+			/* add light for colored shadow */
+			if (i_noshad>i && !(lashdw[0]==0 && lashdw[1]==0 && lashdw[2]==0)) {
+				const float tcol[3] = {
+				    lashdw[0] * (i_noshad - i) * lacol[0],
+				    lashdw[1] * (i_noshad - i) * lacol[1],
+				    lashdw[2] * (i_noshad - i) * lacol[2],
+				};
+				add_to_diffuse(shr->shad, shi, is, tcol);
+			}
+			if (i_noshad>0.0f) {
+				if (passflag & (SCE_PASS_DIFFUSE|SCE_PASS_SHADOW) ||
+				    ((passflag & SCE_PASS_COMBINED) && !(shi->combinedflag & SCE_PASS_SHADOW)))
+				{
+					const float tcol[3] = {
+					    i_noshad * lacol[0],
+					    i_noshad * lacol[1],
+					    i_noshad * lacol[2]
+					};
+					add_to_diffuse(shr->diff, shi, is, tcol);
+				}
+				else {
+					copy_v3_v3(shr->diff, shr->shad);
+				}
+			}
+		}
+
+		/* specularity */
+		shadfac[3]*= phongcorr;	/* note, shadfac not allowed to be stored nonlocal */
+
+		if (shadfac[3]>0.0f && shi->spec!=0.0f && !(lar->mode & LA_NO_SPEC) && !(lar->mode & LA_ONLYSHADOW)) {
+
+			if (!(passflag & (SCE_PASS_COMBINED | SCE_PASS_SPEC))) {
+				/* pass */
+			}
+			else if (lar->type == LA_HEMI) {
+				float t;
+				/* hemi uses no spec shaders (yet) */
+
+				lv[0]+= view[0];
+				lv[1]+= view[1];
+				lv[2]+= view[2];
+
+				normalize_v3(lv);
+
+				t= vn[0]*lv[0]+vn[1]*lv[1]+vn[2]*lv[2];
+
+				if (lar->type==LA_HEMI) {
+					t= 0.5f*t+0.5f;
+				}
+
+				t= shadfac[3]*shi->spec*spec(t, shi->har);
+
+				shr->spec[0]+= t*(lacol[0] * shi->specr);
+				shr->spec[1]+= t*(lacol[1] * shi->specg);
+				shr->spec[2]+= t*(lacol[2] * shi->specb);
+			}
+			else {
+				/* specular shaders */
+				float specfac, t;
+
+				if (ma->spec_shader==MA_SPEC_PHONG)
+					specfac= Phong_Spec(vn, lv, view, shi->har, (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V));
+				else if (ma->spec_shader==MA_SPEC_COOKTORR)
+					specfac= CookTorr_Spec(vn, lv, view, shi->har, (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V));
+				else if (ma->spec_shader==MA_SPEC_BLINN)
+					specfac= Blinn_Spec(vn, lv, view, ma->refrac, (float)shi->har, (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V));
+				else if (ma->spec_shader==MA_SPEC_WARDISO)
+					specfac= WardIso_Spec( vn, lv, view, ma->rms, (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V));
+				else
+					specfac= Toon_Spec(vn, lv, view, ma->param[2], ma->param[3], (vlr->flag & R_TANGENT) || (ma->mode & MA_TANGENT_V));
+
+				/* area lamp correction */
+				if (lar->type==LA_AREA) specfac*= inp;
+
+				t= shadfac[3]*shi->spec*visifac*specfac;
+
+				if (ma->mode & MA_RAMP_SPEC) {
+					float spec[3];
+					do_specular_ramp(shi, specfac, t, spec);
+					shr->spec[0]+= t*(lacol[0] * spec[0]);
+					shr->spec[1]+= t*(lacol[1] * spec[1]);
+					shr->spec[2]+= t*(lacol[2] * spec[2]);
+				}
+				else {
+					shr->spec[0]+= t*(lacol[0] * shi->specr);
+					shr->spec[1]+= t*(lacol[1] * shi->specg);
+					shr->spec[2]+= t*(lacol[2] * shi->specb);
+				}
+			}
+		}
+	}
+}
+
+static void shade_lamp_loop_only_shadow(ShadeInput *shi, ShadeResult *shr)
+{
+
+	if (R.r.mode & R_SHADOW) {
+		ListBase *lights;
+		LampRen *lar;
+		GroupObject *go;
+		float inpr, lv[3];
+		float /* *view, */ shadfac[4];
+		float ir, accum, visifac, lampdist;
+		float shaded = 0.0f, lightness = 0.0f;
+
+
+		/* view= shi->view; */ /* UNUSED */
+		accum= ir= 0.0f;
+
+		lights= get_lights(shi);
+		for (go=lights->first; go; go= go->next) {
+			lar= go->lampren;
+			if (lar==NULL) continue;
+
+			if (lar->mode & LA_LAYER) if ((lar->lay & shi->obi->lay)==0) continue;
+			if ((lar->lay & shi->lay)==0) continue;
+
+			if (lar->shb || (lar->mode & LA_SHAD_RAY)) {
+				visifac= lamp_get_visibility(lar, shi->co, lv, &lampdist);
+				ir+= 1.0f;
+
+				if (visifac <= 0.0f) {
+					if (shi->mat->shadowonly_flag == MA_SO_OLD)
+						accum+= 1.0f;
+
+					continue;
+				}
+				inpr= dot_v3v3(shi->vn, lv);
+				if (inpr <= 0.0f) {
+					if (shi->mat->shadowonly_flag == MA_SO_OLD)
+						accum+= 1.0f;
+
+					continue;
+				}
+
+				lamp_get_shadow(lar, shi, inpr, shadfac, shi->depth);
+
+				if (shi->mat->shadowonly_flag == MA_SO_OLD) {
+					/* Old "Shadows Only" */
+					accum+= (1.0f-visifac) + (visifac)*IMB_colormanagement_get_luminance(shadfac)*shadfac[3];
+				}
+				else {
+					shaded += IMB_colormanagement_get_luminance(shadfac)*shadfac[3] * visifac * lar->energy;
+
+					if (shi->mat->shadowonly_flag == MA_SO_SHADOW) {
+						lightness += visifac * lar->energy;
+					}
+				}
+			}
+		}
+
+		/* Apply shadows as alpha */
+		if (ir>0.0f) {
+			if (shi->mat->shadowonly_flag == MA_SO_OLD) {
+				accum = 1.0f - accum/ir;
+			}
+			else {
+				if (shi->mat->shadowonly_flag == MA_SO_SHADOW) {
+					if (lightness > 0.0f) {
+						/* Get shadow value from between 0.0f and non-shadowed lightness */
+						accum = (lightness - shaded) / (lightness);
+					}
+					else {
+						accum = 0.0f;
+					}
+				}
+				else { /* shadowonly_flag == MA_SO_SHADED */
+					/* Use shaded value */
+					accum = 1.0f - shaded;
+				}
+			}
+
+			shr->alpha= (shi->alpha)*(accum);
+			if (shr->alpha<0.0f) shr->alpha=0.0f;
+		}
+		else {
+			/* If "fully shaded", use full alpha even on areas that have no lights */
+			if (shi->mat->shadowonly_flag == MA_SO_SHADED) shr->alpha=shi->alpha;
+			else shr->alpha= 0.f;
+		}
+	}
+
+	/* quite disputable this...  also note it doesn't mirror-raytrace */
+	if ((R.wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT)) && shi->amb!=0.0f) {
+		float f;
+
+		if (R.wrld.mode & WO_AMB_OCC) {
+			f= R.wrld.aoenergy*shi->amb;
+
+			if (R.wrld.aomix==WO_AOADD) {
+				if (shi->mat->shadowonly_flag == MA_SO_OLD) {
+					f= f*(1.0f - IMB_colormanagement_get_luminance(shi->ao));
+					shr->alpha= (shr->alpha + f)*f;
+				}
+				else {
+					shr->alpha -= f*IMB_colormanagement_get_luminance(shi->ao);
+					if (shr->alpha<0.0f) shr->alpha=0.0f;
+				}
+			}
+			else /* AO Multiply */
+				shr->alpha= (1.0f - f)*shr->alpha + f*(1.0f - (1.0f - shr->alpha)*IMB_colormanagement_get_luminance(shi->ao));
+		}
+
+		if (R.wrld.mode & WO_ENV_LIGHT) {
+			if (shi->mat->shadowonly_flag == MA_SO_OLD) {
+				f= R.wrld.ao_env_energy*shi->amb*(1.0f - IMB_colormanagement_get_luminance(shi->env));
+				shr->alpha= (shr->alpha + f)*f;
+			}
+			else {
+				f= R.wrld.ao_env_energy*shi->amb;
+				shr->alpha -= f*IMB_colormanagement_get_luminance(shi->env);
+				if (shr->alpha<0.0f) shr->alpha=0.0f;
+			}
+		}
+	}
+}
+
+/* let's map negative light as if it mirrors positive light, otherwise negative values disappear */
+static void wrld_exposure_correct(float diff[3])
+{
+
+	diff[0]= R.wrld.linfac*(1.0f-expf( diff[0]*R.wrld.logfac) );
+	diff[1]= R.wrld.linfac*(1.0f-expf( diff[1]*R.wrld.logfac) );
+	diff[2]= R.wrld.linfac*(1.0f-expf( diff[2]*R.wrld.logfac) );
+}
+
+void shade_lamp_loop(ShadeInput *shi, ShadeResult *shr)
+{
+	/* Passes which might need to know material color.
+	 *
+	 * It seems to be faster to just calculate material color
+	 * even if the pass doesn't really need it than trying to
+	 * figure out whether color is really needed or not.
+	 */
+	const int color_passes =
+		SCE_PASS_COMBINED | SCE_PASS_RGBA | SCE_PASS_DIFFUSE | SCE_PASS_SPEC |
+		SCE_PASS_REFLECT | SCE_PASS_NORMAL | SCE_PASS_REFRACT | SCE_PASS_EMIT | SCE_PASS_SHADOW;
+
+	Material *ma= shi->mat;
+	int passflag= shi->passflag;
+
+	memset(shr, 0, sizeof(ShadeResult));
+
+	if (!(shi->mode & MA_TRANSP)) shi->alpha = 1.0f;
+
+	/* separate loop */
+	if (ma->mode & MA_ONLYSHADOW) {
+		shade_lamp_loop_only_shadow(shi, shr);
+		return;
+	}
+
+	/* envmap hack, always reset */
+	shi->refcol[0]= shi->refcol[1]= shi->refcol[2]= shi->refcol[3]= 0.0f;
+
+	/* material color itself */
+	if (passflag & color_passes) {
+		if (ma->mode & (MA_FACETEXTURE)) {
+			shi->r= shi->vcol[0];
+			shi->g= shi->vcol[1];
+			shi->b= shi->vcol[2];
+			if (ma->mode & (MA_FACETEXTURE_ALPHA))
+				shi->alpha= shi->vcol[3];
+		}
+#ifdef WITH_FREESTYLE
+		else if (ma->vcol_alpha) {
+			shi->r= shi->vcol[0];
+			shi->g= shi->vcol[1];
+			shi->b= shi->vcol[2];
+			shi->alpha= shi->vcol[3];
+		}
+#endif
+		else if (ma->mode & (MA_VERTEXCOLP)) {
+			float neg_alpha = 1.0f - shi->vcol[3];
+			shi->r= shi->r*neg_alpha + shi->vcol[0]*shi->vcol[3];
+			shi->g= shi->g*neg_alpha + shi->vcol[1]*shi->vcol[3];
+			shi->b= shi->b*neg_alpha + shi->vcol[2]*shi->vcol[3];
+		}
+		if (ma->texco) {
+			do_material_tex(shi, &R);
+			if (!(shi->mode & MA_TRANSP)) shi->alpha = 1.0f;
+		}
+
+		shr->col[0]= shi->r*shi->alpha;
+		shr->col[1]= shi->g*shi->alpha;
+		shr->col[2]= shi->b*shi->alpha;
+		shr->col[3]= shi->alpha;
+
+		if ((ma->sss_flag & MA_DIFF_SSS) && !sss_pass_done(&R, ma)) {
+			if (ma->sss_texfac == 0.0f) {
+				shi->r= shi->g= shi->b= shi->alpha= 1.0f;
+				shr->col[0]= shr->col[1]= shr->col[2]= shr->col[3]= 1.0f;
+			}
+			else {
+				shi->r= pow(max_ff(shi->r, 0.0f), ma->sss_texfac);
+				shi->g= pow(max_ff(shi->g, 0.0f), ma->sss_texfac);
+				shi->b= pow(max_ff(shi->b, 0.0f), ma->sss_texfac);
+				shi->alpha= pow(max_ff(shi->alpha, 0.0f), ma->sss_texfac);
+
+				shr->col[0]= pow(max_ff(shr->col[0], 0.0f), ma->sss_texfac);
+				shr->col[1]= pow(max_ff(shr->col[1], 0.0f), ma->sss_texfac);
+				shr->col[2]= pow(max_ff(shr->col[2], 0.0f), ma->sss_texfac);
+				shr->col[3]= pow(max_ff(shr->col[3], 0.0f), ma->sss_texfac);
+			}
+		}
+	}
+
+	if (ma->mode & MA_SHLESS) {
+		shr->combined[0]= shi->r;
+		shr->combined[1]= shi->g;
+		shr->combined[2]= shi->b;
+		shr->alpha= shi->alpha;
+		goto finally_shadeless;
+	}
+
+	if ( (ma->mode & (MA_VERTEXCOL|MA_VERTEXCOLP))== MA_VERTEXCOL ) {	/* vertexcolor light */
+		shr->emit[0]= shi->r*(shi->emit+shi->vcol[0]*shi->vcol[3]);
+		shr->emit[1]= shi->g*(shi->emit+shi->vcol[1]*shi->vcol[3]);
+		shr->emit[2]= shi->b*(shi->emit+shi->vcol[2]*shi->vcol[3]);
+	}
+	else {
+		shr->emit[0]= shi->r*shi->emit;
+		shr->emit[1]= shi->g*shi->emit;
+		shr->emit[2]= shi->b*shi->emit;
+	}
+
+	/* AO pass */
+	if (((passflag & SCE_PASS_COMBINED) && (shi->combinedflag & (SCE_PASS_AO|SCE_PASS_ENVIRONMENT|SCE_PASS_INDIRECT))) ||
+	    (passflag & (SCE_PASS_AO|SCE_PASS_ENVIRONMENT|SCE_PASS_INDIRECT))) {
+		if ((R.wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT)) && (R.r.mode & R_SHADOW)) {
+			/* AO was calculated for scanline already */
+			if (shi->depth || shi->volume_depth)
+				ambient_occlusion(shi);
+			copy_v3_v3(shr->ao, shi->ao);
+			copy_v3_v3(shr->env, shi->env); /* XXX multiply */
+			copy_v3_v3(shr->indirect, shi->indirect); /* XXX multiply */
+		}
+		else {
+			shr->ao[0]= shr->ao[1]= shr->ao[2]= 1.0f;
+			zero_v3(shr->env);
+			zero_v3(shr->indirect);
+		}
+	}
+
+	/* lighting pass */
+	if (passflag & (SCE_PASS_COMBINED|SCE_PASS_DIFFUSE|SCE_PASS_SPEC|SCE_PASS_SHADOW)) {
+		GroupObject *go;
+		ListBase *lights;
+		LampRen *lar;
+
+		lights= get_lights(shi);
+		for (go=lights->first; go; go= go->next) {
+			lar= go->lampren;
+			if (lar==NULL) continue;
+
+			/* test for lamp layer */
+			if (lar->mode & LA_LAYER) if ((lar->lay & shi->obi->lay)==0) continue;
+			if ((lar->lay & shi->lay)==0) continue;
+
+			/* accumulates in shr->diff and shr->spec and shr->shad (diffuse with shadow!) */
+			shade_one_light(lar, shi, shr, passflag);
+		}
+
+		/* this check is to prevent only shadow lamps from producing negative
+		 * colors.*/
+		if (shr->spec[0] < 0) shr->spec[0] = 0;
+		if (shr->spec[1] < 0) shr->spec[1] = 0;
+		if (shr->spec[2] < 0) shr->spec[2] = 0;
+
+		if (shr->shad[0] < 0) shr->shad[0] = 0;
+		if (shr->shad[1] < 0) shr->shad[1] = 0;
+		if (shr->shad[2] < 0) shr->shad[2] = 0;
+
+		if (ma->sss_flag & MA_DIFF_SSS) {
+			float sss[3], col[3], invalpha, texfac= ma->sss_texfac;
+
+			/* this will return false in the preprocess stage */
+			if (sample_sss(&R, ma, shi->co, sss)) {
+				invalpha= (shr->col[3] > FLT_EPSILON)? 1.0f/shr->col[3]: 1.0f;
+
+				if (texfac==0.0f) {
+					copy_v3_v3(col, shr->col);
+					mul_v3_fl(col, invalpha);
+				}
+				else if (texfac==1.0f) {
+					col[0]= col[1]= col[2]= 1.0f;
+					mul_v3_fl(col, invalpha);
+				}
+				else {
+					copy_v3_v3(col, shr->col);
+					mul_v3_fl(col, invalpha);
+					col[0]= pow(max_ff(col[0], 0.0f), 1.0f-texfac);
+					col[1]= pow(max_ff(col[1], 0.0f), 1.0f-texfac);
+					col[2]= pow(max_ff(col[2], 0.0f), 1.0f-texfac);
+				}
+
+				shr->diff[0]= sss[0]*col[0];
+				shr->diff[1]= sss[1]*col[1];
+				shr->diff[2]= sss[2]*col[2];
+
+				if (shi->combinedflag & SCE_PASS_SHADOW) {
+					shr->shad[0]= shr->diff[0];
+					shr->shad[1]= shr->diff[1];
+					shr->shad[2]= shr->diff[2];
+				}
+			}
+		}
+
+		if (shi->combinedflag & SCE_PASS_SHADOW)
+			copy_v3_v3(shr->diffshad, shr->shad);
+		else
+			copy_v3_v3(shr->diffshad, shr->diff);
+
+		copy_v3_v3(shr->combined, shr->diffshad);
+
+		/* calculate shadow pass, we use a multiplication mask */
+		/* Even if diff = 0,0,0, it does matter what the shadow pass is, since we may want it 'for itself'! */
+		if (passflag & SCE_PASS_SHADOW) {
+			if (shr->diff[0]!=0.0f) shr->shad[0]= shr->shad[0]/shr->diff[0];
+			/* can't determine proper shadow from shad/diff (0/0), so use shadow intensity */
+			else if (shr->shad[0]==0.0f) shr->shad[0]= shr->shad[3];
+
+			if (shr->diff[1]!=0.0f) shr->shad[1]= shr->shad[1]/shr->diff[1];
+			else if (shr->shad[1]==0.0f) shr->shad[1]= shr->shad[3];
+
+			if (shr->diff[2]!=0.0f) shr->shad[2]= shr->shad[2]/shr->diff[2];
+			else if (shr->shad[2]==0.0f) shr->shad[2]= shr->shad[3];
+		}
+
+		/* exposure correction */
+		if ((R.wrld.exp!=0.0f || R.wrld.range!=1.0f) && !R.sss_points) {
+			wrld_exposure_correct(shr->combined);	/* has no spec! */
+			wrld_exposure_correct(shr->spec);
+		}
+	}
+
+	/* alpha in end, spec can influence it */
+	if (passflag & (SCE_PASS_COMBINED)) {
+		if ((ma->fresnel_tra!=0.0f) && (shi->mode & MA_TRANSP))
+			shi->alpha*= fresnel_fac(shi->view, shi->vn, ma->fresnel_tra_i, ma->fresnel_tra);
+
+		/* note: shi->mode! */
+		if (shi->mode & MA_TRANSP && (shi->mode & (MA_ZTRANSP|MA_RAYTRANSP))) {
+			if (shi->spectra!=0.0f) {
+				float t = max_fff(shr->spec[0], shr->spec[1], shr->spec[2]);
+				t *= shi->spectra;
+				if (t>1.0f) t= 1.0f;
+				shi->alpha= (1.0f-t)*shi->alpha+t;
+			}
+		}
+	}
+	shr->alpha= shi->alpha;
+
+	/* from now stuff everything in shr->combined: ambient, AO, ramps, exposure */
+	if (!(ma->sss_flag & MA_DIFF_SSS) || !sss_pass_done(&R, ma)) {
+		if (R.r.mode & R_SHADOW) {
+			/* add AO in combined? */
+			if (R.wrld.mode & WO_AMB_OCC)
+				if (shi->combinedflag & SCE_PASS_AO)
+					ambient_occlusion_apply(shi, shr);
+
+			if (R.wrld.mode & WO_ENV_LIGHT)
+				if (shi->combinedflag & SCE_PASS_ENVIRONMENT)
+					environment_lighting_apply(shi, shr);
+
+			if (R.wrld.mode & WO_INDIRECT_LIGHT)
+				if (shi->combinedflag & SCE_PASS_INDIRECT)
+					indirect_lighting_apply(shi, shr);
+		}
+
+		shr->combined[0]+= shi->ambr;
+		shr->combined[1]+= shi->ambg;
+		shr->combined[2]+= shi->ambb;
+
+		if (ma->mode & MA_RAMP_COL) ramp_diffuse_result(shr->combined, shi);
+	}
+
+	if (ma->mode & MA_RAMP_SPEC) ramp_spec_result(shr->spec, shi);
+
+	/* refcol is for envmap only */
+	if (shi->refcol[0]!=0.0f) {
+		float result[3];
+
+		result[0]= shi->mirr*shi->refcol[1] + (1.0f - shi->mirr*shi->refcol[0])*shr->combined[0];
+		result[1]= shi->mirg*shi->refcol[2] + (1.0f - shi->mirg*shi->refcol[0])*shr->combined[1];
+		result[2]= shi->mirb*shi->refcol[3] + (1.0f - shi->mirb*shi->refcol[0])*shr->combined[2];
+
+		if (passflag & SCE_PASS_REFLECT)
+			sub_v3_v3v3(shr->refl, result, shr->combined);
+
+		if (shi->combinedflag & SCE_PASS_REFLECT)
+			copy_v3_v3(shr->combined, result);
+
+	}
+
+	/* and add emit and spec */
+	if (shi->combinedflag & SCE_PASS_EMIT)
+		add_v3_v3(shr->combined, shr->emit);
+	if (shi->combinedflag & SCE_PASS_SPEC)
+		add_v3_v3(shr->combined, shr->spec);
+
+
+	/* Last section of this function applies to shadeless colors too */
+finally_shadeless:
+
+	/* modulate by the object color */
+	if ((ma->shade_flag & MA_OBCOLOR) && shi->obr->ob) {
+		if (!(ma->sss_flag & MA_DIFF_SSS) || !sss_pass_done(&R, ma)) {
+			float obcol[4];
+
+			copy_v4_v4(obcol, shi->obr->ob->col);
+			CLAMP(obcol[3], 0.0f, 1.0f);
+
+			shr->combined[0] *= obcol[0];
+			shr->combined[1] *= obcol[1];
+			shr->combined[2] *= obcol[2];
+			if (shi->mode & MA_TRANSP) shr->alpha *= obcol[3];
+		}
+	}
+
+	shr->combined[3]= shr->alpha;
+}
+
+/* used for "Lamp Data" shader node */
+static float lamp_get_data_internal(ShadeInput *shi, GroupObject *go, float col[4], float lv[3], float *dist, float shadow[4])
+{
+	LampRen *lar = go->lampren;
+	float visifac, inp;
+
+	if (!lar
+	    || ((lar->mode & LA_LAYER) && (lar->lay & shi->obi->lay) == 0)
+	    || (lar->lay & shi->lay) == 0)
+		return 0.0f;
+
+	if (lar->mode & LA_TEXTURE)
+		do_lamp_tex(lar, lv, shi, col, LA_TEXTURE);
+
+	visifac = lamp_get_visibility(lar, shi->co, lv, dist);
+
+	if (visifac == 0.0f
+	    || lar->type == LA_HEMI
+	    || (lar->type != LA_SPOT && !(lar->mode & LA_SHAD_RAY))
+	    || (R.r.scemode & R_BUTS_PREVIEW))
+		return visifac;
+
+	inp = dot_v3v3(shi->vn, lv);
+
+	if (inp > 0.0f) {
+		float shadfac[4];
+
+		shadow[0] = lar->shdwr;
+		shadow[1] = lar->shdwg;
+		shadow[2] = lar->shdwb;
+
+		if (lar->mode & LA_SHAD_TEX)
+			do_lamp_tex(lar, lv, shi, shadow, LA_SHAD_TEX);
+
+		if (R.r.mode & R_SHADOW) {
+			lamp_get_shadow(lar, shi, inp, shadfac, shi->depth);
+
+			shadow[0] = 1.0f - ((1.0f - shadfac[0] * shadfac[3]) * (1.0f - shadow[0]));
+			shadow[1] = 1.0f - ((1.0f - shadfac[1] * shadfac[3]) * (1.0f - shadow[1]));
+			shadow[2] = 1.0f - ((1.0f - shadfac[2] * shadfac[3]) * (1.0f - shadow[2]));
+		}
+	}
+
+	return visifac;
+}
+
+float RE_lamp_get_data(ShadeInput *shi, Object *lamp_obj, float col[4], float lv[3], float *dist, float shadow[4])
+{
+	col[0] = col[1] = col[2] = 0.0f;
+	col[3] = 1.0f;
+	copy_v3_v3(lv, shi->vn);
+	*dist = 1.0f;
+	shadow[0] = shadow[1] = shadow[2] = shadow[3] = 1.0f;
+
+	if (lamp_obj->type == OB_LAMP) {
+		GroupObject *go;
+		Lamp *lamp = (Lamp *)lamp_obj->data;
+
+		col[0] = lamp->r * lamp->energy;
+		col[1] = lamp->g * lamp->energy;
+		col[2] = lamp->b * lamp->energy;
+
+		if (R.r.scemode & R_BUTS_PREVIEW) {
+			for (go = R.lights.first; go; go = go->next) {
+				/* "Lamp.002" is main key light of material preview */
+				if (STREQ(go->ob->id.name + 2, "Lamp.002"))
+					return lamp_get_data_internal(shi, go, col, lv, dist, shadow);
+			}
+			return 0.0f;
+		}
+
+		if (shi->light_override) {
+			for (go = shi->light_override->gobject.first; go; go = go->next) {
+				if (go->ob == lamp_obj)
+					return lamp_get_data_internal(shi, go, col, lv, dist, shadow);
+			}
+		}
+
+		if (shi->mat && shi->mat->group) {
+			for (go = shi->mat->group->gobject.first; go; go = go->next) {
+				if (go->ob == lamp_obj)
+					return lamp_get_data_internal(shi, go, col, lv, dist, shadow);
+			}
+		}
+
+		for (go = R.lights.first; go; go = go->next) {
+			if (go->ob == lamp_obj)
+				return lamp_get_data_internal(shi, go, col, lv, dist, shadow);
+		}
+	}
+
+	return 0.0f;
+}
+
+const float (*RE_object_instance_get_matrix(struct ObjectInstanceRen *obi, int matrix_id))[4]
+{
+	if (obi) {
+		switch (matrix_id) {
+			case RE_OBJECT_INSTANCE_MATRIX_OB:
+				return (const float(*)[4])obi->obmat;
+			case RE_OBJECT_INSTANCE_MATRIX_OBINV:
+				return (const float(*)[4])obi->obinvmat;
+			case RE_OBJECT_INSTANCE_MATRIX_LOCALTOVIEW:
+				return (const float(*)[4])obi->localtoviewmat;
+			case RE_OBJECT_INSTANCE_MATRIX_LOCALTOVIEWINV:
+				return (const float(*)[4])obi->localtoviewinvmat;
+		}
+	}
+	return NULL;
+}
+
+float RE_object_instance_get_object_pass_index(struct ObjectInstanceRen *obi)
+{
+	return obi->ob->index;
+}
+
+float RE_object_instance_get_random_id(struct ObjectInstanceRen *obi)
+{
+	return obi->random_id;
+}
+
+const float (*RE_render_current_get_matrix(int matrix_id))[4]
+{
+	switch (matrix_id) {
+		case RE_VIEW_MATRIX:
+			return (const float(*)[4])R.viewmat;
+		case RE_VIEWINV_MATRIX:
+			return (const float(*)[4])R.viewinv;
+	}
+	return NULL;
+}
+
+float RE_fresnel_dielectric(float incoming[3], float normal[3], float eta)
+{
+	/* compute fresnel reflectance without explicitly computing
+	 * the refracted direction */
+	float c = fabs(dot_v3v3(incoming, normal));
+	float g = eta * eta - 1.0 + c * c;
+	float result;
+
+	if (g > 0.0) {
+		g = sqrtf(g);
+		float A = (g - c) / (g + c);
+		float B = (c * (g + c) - 1.0) / (c * (g - c) + 1.0);
+		result = 0.5 * A * A * (1.0 + B * B);
+	}
+	else {
+		result = 1.0;  /* TIR (no refracted component) */
+	}
+
+	return result;
+}
diff --git a/source/blender/render/intern/source/sss.c b/source/blender/render/intern/source/sss.c
new file mode 100644
index 00000000000..5919b8130d7
--- /dev/null
+++ b/source/blender/render/intern/source/sss.c
@@ -0,0 +1,1074 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2007 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): none yet.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/sss.c
+ *  \ingroup render
+ */
+
+/* Possible Improvements:
+ * - add fresnel terms
+ * - adapt Rd table to scale, now with small scale there are a lot of misses?
+ * - possible interesting method: perform sss on all samples in the tree,
+ *   and then use those values interpolated somehow later. can also do this
+ *   filtering on demand for speed. since we are doing things in screen
+ *   space now there is an exact correspondence
+ * - avoid duplicate shading (filtering points in advance, irradiance cache
+ *   like lookup?)
+ * - lower resolution samples
+ */
+
+#include <math.h>
+#include <string.h>
+#include <stdio.h>
+#include <string.h>
+
+/* external modules: */
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_blenlib.h"
+#include "BLI_utildefines.h"
+#include "BLI_ghash.h"
+#include "BLI_memarena.h"
+
+#include "BLT_translation.h"
+
+
+#include "DNA_material_types.h"
+
+#include "BKE_global.h"
+#include "BKE_main.h"
+#include "BKE_scene.h"
+
+
+/* this module */
+#include "render_types.h"
+#include "sss.h"
+
+/* Generic Multiple Scattering API */
+
+/* Relevant papers:
+ * [1] A Practical Model for Subsurface Light Transport
+ * [2] A Rapid Hierarchical Rendering Technique for Translucent Materials
+ * [3] Efficient Rendering of Local Subsurface Scattering
+ * [4] Implementing a skin BSSRDF (or several...)
+ */
+
+/* Defines */
+
+#define RD_TABLE_RANGE 		100.0f
+#define RD_TABLE_RANGE_2	10000.0f
+#define RD_TABLE_SIZE		10000
+
+#define MAX_OCTREE_NODE_POINTS	8
+#define MAX_OCTREE_DEPTH		15
+
+/* Struct Definitions */
+
+struct ScatterSettings {
+	float eta;		/* index of refraction */
+	float sigma_a;	/* absorption coefficient */
+	float sigma_s_; /* reduced scattering coefficient */
+	float sigma_t_; /* reduced extinction coefficient */
+	float sigma;	/* effective extinction coefficient */
+	float Fdr;		/* diffuse fresnel reflectance */
+	float D;		/* diffusion constant */
+	float A;
+	float alpha_;	/* reduced albedo */
+	float zr;		/* distance of virtual lightsource above surface */
+	float zv;		/* distance of virtual lightsource below surface */
+	float ld;		/* mean free path */
+	float ro;		/* diffuse reflectance */
+	float color;
+	float invsigma_t_;
+	float frontweight;
+	float backweight;
+
+	float *tableRd;  /* lookup table to avoid computing Rd */
+	float *tableRd2; /* lookup table to avoid computing Rd for bigger values */
+};
+
+typedef struct ScatterPoint {
+	float co[3];
+	float rad[3];
+	float area;
+	int back;
+} ScatterPoint;
+
+typedef struct ScatterNode {
+	float co[3];
+	float rad[3];
+	float backrad[3];
+	float area, backarea;
+
+	int totpoint;
+	ScatterPoint *points;
+
+	float split[3];
+	struct ScatterNode *child[8];
+} ScatterNode;
+
+struct ScatterTree {
+	MemArena *arena;
+
+	ScatterSettings *ss[3];
+	float error, scale;
+
+	ScatterNode *root;
+	ScatterPoint *points;
+	ScatterPoint **refpoints;
+	ScatterPoint **tmppoints;
+	int totpoint;
+	float min[3], max[3];
+};
+
+typedef struct ScatterResult {
+	float rad[3];
+	float backrad[3];
+	float rdsum[3];
+	float backrdsum[3];
+} ScatterResult;
+
+/* Functions for BSSRDF reparametrization in to more intuitive parameters,
+ * see [2] section 4 for more info. */
+
+static float f_Rd(float alpha_, float A, float ro)
+{
+	float sq;
+
+	sq = sqrtf(3.0f * (1.0f - alpha_));
+	return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro;
+}
+
+static float compute_reduced_albedo(ScatterSettings *ss)
+{
+	const float tolerance= 1e-8;
+	const int max_iteration_count= 20;
+	float d, fsub, xn_1= 0.0f, xn= 1.0f, fxn, fxn_1;
+	int i;
+
+	/* use secant method to compute reduced albedo using Rd function inverse
+	 * with a given reflectance */
+	fxn= f_Rd(xn, ss->A, ss->ro);
+	fxn_1= f_Rd(xn_1, ss->A, ss->ro);
+
+	for (i= 0; i < max_iteration_count; i++) {
+		fsub= (fxn - fxn_1);
+		if (fabsf(fsub) < tolerance)
+			break;
+		d= ((xn - xn_1)/fsub)*fxn;
+		if (fabsf(d) < tolerance)
+			break;
+
+		xn_1= xn;
+		fxn_1= fxn;
+		xn= xn - d;
+
+		if (xn > 1.0f) xn= 1.0f;
+		if (xn_1 > 1.0f) xn_1= 1.0f;
+
+		fxn= f_Rd(xn, ss->A, ss->ro);
+	}
+
+	/* avoid division by zero later */
+	if (xn <= 0.0f)
+		xn= 0.00001f;
+
+	return xn;
+}
+
+/* Exponential falloff functions */
+
+static float Rd_rsquare(ScatterSettings *ss, float rr)
+{
+	float sr, sv, Rdr, Rdv;
+
+	sr = sqrtf(rr + ss->zr * ss->zr);
+	sv = sqrtf(rr + ss->zv * ss->zv);
+
+	Rdr= ss->zr*(1.0f + ss->sigma*sr)*expf(-ss->sigma*sr)/(sr*sr*sr);
+	Rdv= ss->zv*(1.0f + ss->sigma*sv)*expf(-ss->sigma*sv)/(sv*sv*sv);
+
+	return /*ss->alpha_*/(1.0f/(4.0f*(float)M_PI))*(Rdr + Rdv);
+}
+
+static float Rd(ScatterSettings *ss, float r)
+{
+	return Rd_rsquare(ss, r*r);
+}
+
+/* table lookups for Rd. this avoids expensive exp calls. we use two
+ * separate tables as well for lower and higher numbers to improve
+ * precision, since the number are poorly distributed because we do
+ * a lookup with the squared distance for smaller distances, saving
+ * another sqrt. */
+
+static void approximate_Rd_rgb(ScatterSettings **ss, float rr, float *rd)
+{
+	float indexf, t, idxf;
+	int index;
+
+	if (rr > (RD_TABLE_RANGE_2 * RD_TABLE_RANGE_2)) {
+		/* pass */
+	}
+	else if (rr > RD_TABLE_RANGE) {
+		rr = sqrtf(rr);
+		indexf= rr*(RD_TABLE_SIZE/RD_TABLE_RANGE_2);
+		index= (int)indexf;
+		idxf= (float)index;
+		t= indexf - idxf;
+
+		if (index >= 0 && index < RD_TABLE_SIZE) {
+			rd[0]= (ss[0]->tableRd2[index]*(1-t) + ss[0]->tableRd2[index+1]*t);
+			rd[1]= (ss[1]->tableRd2[index]*(1-t) + ss[1]->tableRd2[index+1]*t);
+			rd[2]= (ss[2]->tableRd2[index]*(1-t) + ss[2]->tableRd2[index+1]*t);
+			return;
+		}
+	}
+	else {
+		indexf= rr*(RD_TABLE_SIZE/RD_TABLE_RANGE);
+		index= (int)indexf;
+		idxf= (float)index;
+		t= indexf - idxf;
+
+		if (index >= 0 && index < RD_TABLE_SIZE) {
+			rd[0]= (ss[0]->tableRd[index]*(1-t) + ss[0]->tableRd[index+1]*t);
+			rd[1]= (ss[1]->tableRd[index]*(1-t) + ss[1]->tableRd[index+1]*t);
+			rd[2]= (ss[2]->tableRd[index]*(1-t) + ss[2]->tableRd[index+1]*t);
+			return;
+		}
+	}
+
+	/* fallback to slow Rd computation */
+	rd[0]= Rd_rsquare(ss[0], rr);
+	rd[1]= Rd_rsquare(ss[1], rr);
+	rd[2]= Rd_rsquare(ss[2], rr);
+}
+
+static void build_Rd_table(ScatterSettings *ss)
+{
+	float r;
+	int i, size = RD_TABLE_SIZE+1;
+
+	ss->tableRd= MEM_mallocN(sizeof(float)*size, "scatterTableRd");
+	ss->tableRd2= MEM_mallocN(sizeof(float)*size, "scatterTableRd");
+
+	for (i= 0; i < size; i++) {
+		r= i*(RD_TABLE_RANGE/RD_TABLE_SIZE);
+#if 0
+		if (r < ss->invsigma_t_*ss->invsigma_t_) {
+			r= ss->invsigma_t_*ss->invsigma_t_;
+		}
+#endif
+		ss->tableRd[i]= Rd(ss, sqrtf(r));
+
+		r= i*(RD_TABLE_RANGE_2/RD_TABLE_SIZE);
+#if 0
+		if (r < ss->invsigma_t_) {
+			r= ss->invsigma_t_;
+		}
+#endif
+		ss->tableRd2[i]= Rd(ss, r);
+	}
+}
+
+ScatterSettings *scatter_settings_new(float refl, float radius, float ior, float reflfac, float frontweight, float backweight)
+{
+	ScatterSettings *ss;
+
+	ss= MEM_callocN(sizeof(ScatterSettings), "ScatterSettings");
+
+	/* see [1] and [3] for these formulas */
+	ss->eta= ior;
+	ss->Fdr= -1.440f/ior*ior + 0.710f/ior + 0.668f + 0.0636f*ior;
+	ss->A= (1.0f + ss->Fdr)/(1.0f - ss->Fdr);
+	ss->ld= radius;
+	ss->ro= min_ff(refl, 0.99f);
+	ss->color= ss->ro*reflfac + (1.0f-reflfac);
+
+	ss->alpha_= compute_reduced_albedo(ss);
+
+	ss->sigma= 1.0f/ss->ld;
+	ss->sigma_t_= ss->sigma/sqrtf(3.0f*(1.0f - ss->alpha_));
+	ss->sigma_s_= ss->alpha_*ss->sigma_t_;
+	ss->sigma_a= ss->sigma_t_ - ss->sigma_s_;
+
+	ss->D= 1.0f/(3.0f*ss->sigma_t_);
+
+	ss->zr= 1.0f/ss->sigma_t_;
+	ss->zv= ss->zr + 4.0f*ss->A*ss->D;
+
+	ss->invsigma_t_= 1.0f/ss->sigma_t_;
+
+	ss->frontweight= frontweight;
+	ss->backweight= backweight;
+
+	/* precompute a table of Rd values for quick lookup */
+	build_Rd_table(ss);
+
+	return ss;
+}
+
+void scatter_settings_free(ScatterSettings *ss)
+{
+	MEM_freeN(ss->tableRd);
+	MEM_freeN(ss->tableRd2);
+	MEM_freeN(ss);
+}
+
+/* Hierarchical method as in [2]. */
+
+/* traversal */
+
+#define SUBNODE_INDEX(co, split) \
+	((co[0]>=split[0]) + (co[1]>=split[1])*2 + (co[2]>=split[2])*4)
+
+static void add_radiance(ScatterTree *tree, float *frontrad, float *backrad, float area, float backarea, float rr, ScatterResult *result)
+{
+	float rd[3], frontrd[3], backrd[3];
+
+	approximate_Rd_rgb(tree->ss, rr, rd);
+
+	if (frontrad && area) {
+		frontrd[0] = rd[0]*area;
+		frontrd[1] = rd[1]*area;
+		frontrd[2] = rd[2]*area;
+
+		result->rad[0] += frontrad[0]*frontrd[0];
+		result->rad[1] += frontrad[1]*frontrd[1];
+		result->rad[2] += frontrad[2]*frontrd[2];
+
+		result->rdsum[0] += frontrd[0];
+		result->rdsum[1] += frontrd[1];
+		result->rdsum[2] += frontrd[2];
+	}
+	if (backrad && backarea) {
+		backrd[0] = rd[0]*backarea;
+		backrd[1] = rd[1]*backarea;
+		backrd[2] = rd[2]*backarea;
+
+		result->backrad[0] += backrad[0]*backrd[0];
+		result->backrad[1] += backrad[1]*backrd[1];
+		result->backrad[2] += backrad[2]*backrd[2];
+
+		result->backrdsum[0] += backrd[0];
+		result->backrdsum[1] += backrd[1];
+		result->backrdsum[2] += backrd[2];
+	}
+}
+
+static void traverse_octree(ScatterTree *tree, ScatterNode *node, const float co[3], int self, ScatterResult *result)
+{
+	float sub[3], dist;
+	int i, index = 0;
+
+	if (node->totpoint > 0) {
+		/* leaf - add radiance from all samples */
+		for (i=0; i<node->totpoint; i++) {
+			ScatterPoint *p= &node->points[i];
+
+			sub_v3_v3v3(sub, co, p->co);
+			dist= dot_v3v3(sub, sub);
+
+			if (p->back)
+				add_radiance(tree, NULL, p->rad, 0.0f, p->area, dist, result);
+			else
+				add_radiance(tree, p->rad, NULL, p->area, 0.0f, dist, result);
+		}
+	}
+	else {
+		/* branch */
+		if (self)
+			index = SUBNODE_INDEX(co, node->split);
+
+		for (i=0; i<8; i++) {
+			if (node->child[i]) {
+				ScatterNode *subnode= node->child[i];
+
+				if (self && index == i) {
+					/* always traverse node containing the point */
+					traverse_octree(tree, subnode, co, 1, result);
+				}
+				else {
+					/* decide subnode traversal based on maximum solid angle */
+					sub_v3_v3v3(sub, co, subnode->co);
+					dist= dot_v3v3(sub, sub);
+
+					/* actually area/dist > error, but this avoids division */
+					if (subnode->area+subnode->backarea>tree->error*dist) {
+						traverse_octree(tree, subnode, co, 0, result);
+					}
+					else {
+						add_radiance(tree, subnode->rad, subnode->backrad,
+							subnode->area, subnode->backarea, dist, result);
+					}
+				}
+			}
+		}
+	}
+}
+
+static void compute_radiance(ScatterTree *tree, const float co[3], float *rad)
+{
+	ScatterResult result;
+	float rdsum[3], backrad[3], backrdsum[3];
+
+	memset(&result, 0, sizeof(result));
+
+	traverse_octree(tree, tree->root, co, 1, &result);
+
+	/* the original paper doesn't do this, but we normalize over the
+	 * sampled area and multiply with the reflectance. this is because
+	 * our point samples are incomplete, there are no samples on parts
+	 * of the mesh not visible from the camera. this can not only make
+	 * it darker, but also lead to ugly color shifts */
+
+	mul_v3_fl(result.rad, tree->ss[0]->frontweight);
+	mul_v3_fl(result.backrad, tree->ss[0]->backweight);
+
+	copy_v3_v3(rad, result.rad);
+	add_v3_v3v3(backrad, result.rad, result.backrad);
+
+	copy_v3_v3(rdsum, result.rdsum);
+	add_v3_v3v3(backrdsum, result.rdsum, result.backrdsum);
+
+	if (rdsum[0] > 1e-16f) rad[0]= tree->ss[0]->color*rad[0]/rdsum[0];
+	if (rdsum[1] > 1e-16f) rad[1]= tree->ss[1]->color*rad[1]/rdsum[1];
+	if (rdsum[2] > 1e-16f) rad[2]= tree->ss[2]->color*rad[2]/rdsum[2];
+
+	if (backrdsum[0] > 1e-16f) backrad[0]= tree->ss[0]->color*backrad[0]/backrdsum[0];
+	if (backrdsum[1] > 1e-16f) backrad[1]= tree->ss[1]->color*backrad[1]/backrdsum[1];
+	if (backrdsum[2] > 1e-16f) backrad[2]= tree->ss[2]->color*backrad[2]/backrdsum[2];
+
+	rad[0]= MAX2(rad[0], backrad[0]);
+	rad[1]= MAX2(rad[1], backrad[1]);
+	rad[2]= MAX2(rad[2], backrad[2]);
+}
+
+/* building */
+
+static void sum_leaf_radiance(ScatterTree *UNUSED(tree), ScatterNode *node)
+{
+	ScatterPoint *p;
+	float rad, totrad= 0.0f, inv;
+	int i;
+
+	node->co[0]= node->co[1]= node->co[2]= 0.0;
+	node->rad[0]= node->rad[1]= node->rad[2]= 0.0;
+	node->backrad[0]= node->backrad[1]= node->backrad[2]= 0.0;
+
+	/* compute total rad, rad weighted average position,
+	 * and total area */
+	for (i=0; i<node->totpoint; i++) {
+		p= &node->points[i];
+
+		rad= p->area*fabsf(p->rad[0] + p->rad[1] + p->rad[2]);
+		totrad += rad;
+
+		node->co[0] += rad*p->co[0];
+		node->co[1] += rad*p->co[1];
+		node->co[2] += rad*p->co[2];
+
+		if (p->back) {
+			node->backrad[0] += p->rad[0]*p->area;
+			node->backrad[1] += p->rad[1]*p->area;
+			node->backrad[2] += p->rad[2]*p->area;
+
+			node->backarea += p->area;
+		}
+		else {
+			node->rad[0] += p->rad[0]*p->area;
+			node->rad[1] += p->rad[1]*p->area;
+			node->rad[2] += p->rad[2]*p->area;
+
+			node->area += p->area;
+		}
+	}
+
+	if (node->area > 1e-16f) {
+		inv= 1.0f/node->area;
+		node->rad[0] *= inv;
+		node->rad[1] *= inv;
+		node->rad[2] *= inv;
+	}
+	if (node->backarea > 1e-16f) {
+		inv= 1.0f/node->backarea;
+		node->backrad[0] *= inv;
+		node->backrad[1] *= inv;
+		node->backrad[2] *= inv;
+	}
+
+	if (totrad > 1e-16f) {
+		inv= 1.0f/totrad;
+		node->co[0] *= inv;
+		node->co[1] *= inv;
+		node->co[2] *= inv;
+	}
+	else {
+		/* make sure that if radiance is 0.0f, we still have these points in
+		 * the tree at a good position, they count for rdsum too */
+		for (i=0; i<node->totpoint; i++) {
+			p= &node->points[i];
+
+			node->co[0] += p->co[0];
+			node->co[1] += p->co[1];
+			node->co[2] += p->co[2];
+		}
+
+		node->co[0] /= node->totpoint;
+		node->co[1] /= node->totpoint;
+		node->co[2] /= node->totpoint;
+	}
+}
+
+static void sum_branch_radiance(ScatterTree *UNUSED(tree), ScatterNode *node)
+{
+	ScatterNode *subnode;
+	float rad, totrad= 0.0f, inv;
+	int i, totnode;
+
+	node->co[0]= node->co[1]= node->co[2]= 0.0;
+	node->rad[0]= node->rad[1]= node->rad[2]= 0.0;
+	node->backrad[0]= node->backrad[1]= node->backrad[2]= 0.0;
+
+	/* compute total rad, rad weighted average position,
+	 * and total area */
+	for (i=0; i<8; i++) {
+		if (node->child[i] == NULL)
+			continue;
+
+		subnode= node->child[i];
+
+		rad= subnode->area*fabsf(subnode->rad[0] + subnode->rad[1] + subnode->rad[2]);
+		rad += subnode->backarea*fabsf(subnode->backrad[0] + subnode->backrad[1] + subnode->backrad[2]);
+		totrad += rad;
+
+		node->co[0] += rad*subnode->co[0];
+		node->co[1] += rad*subnode->co[1];
+		node->co[2] += rad*subnode->co[2];
+
+		node->rad[0] += subnode->rad[0]*subnode->area;
+		node->rad[1] += subnode->rad[1]*subnode->area;
+		node->rad[2] += subnode->rad[2]*subnode->area;
+
+		node->backrad[0] += subnode->backrad[0]*subnode->backarea;
+		node->backrad[1] += subnode->backrad[1]*subnode->backarea;
+		node->backrad[2] += subnode->backrad[2]*subnode->backarea;
+
+		node->area += subnode->area;
+		node->backarea += subnode->backarea;
+	}
+
+	if (node->area > 1e-16f) {
+		inv= 1.0f/node->area;
+		node->rad[0] *= inv;
+		node->rad[1] *= inv;
+		node->rad[2] *= inv;
+	}
+	if (node->backarea > 1e-16f) {
+		inv= 1.0f/node->backarea;
+		node->backrad[0] *= inv;
+		node->backrad[1] *= inv;
+		node->backrad[2] *= inv;
+	}
+
+	if (totrad > 1e-16f) {
+		inv= 1.0f/totrad;
+		node->co[0] *= inv;
+		node->co[1] *= inv;
+		node->co[2] *= inv;
+	}
+	else {
+		/* make sure that if radiance is 0.0f, we still have these points in
+		 * the tree at a good position, they count for rdsum too */
+		totnode= 0;
+
+		for (i=0; i<8; i++) {
+			if (node->child[i]) {
+				subnode= node->child[i];
+
+				node->co[0] += subnode->co[0];
+				node->co[1] += subnode->co[1];
+				node->co[2] += subnode->co[2];
+
+				totnode++;
+			}
+		}
+
+		node->co[0] /= totnode;
+		node->co[1] /= totnode;
+		node->co[2] /= totnode;
+	}
+}
+
+static void sum_radiance(ScatterTree *tree, ScatterNode *node)
+{
+	if (node->totpoint > 0) {
+		sum_leaf_radiance(tree, node);
+	}
+	else {
+		int i;
+
+		for (i=0; i<8; i++)
+			if (node->child[i])
+				sum_radiance(tree, node->child[i]);
+
+		sum_branch_radiance(tree, node);
+	}
+}
+
+static void subnode_middle(int i, float *mid, float *subsize, float *submid)
+{
+	int x= i & 1, y= i & 2, z= i & 4;
+
+	submid[0]= mid[0] + ((x)? subsize[0]: -subsize[0]);
+	submid[1]= mid[1] + ((y)? subsize[1]: -subsize[1]);
+	submid[2]= mid[2] + ((z)? subsize[2]: -subsize[2]);
+}
+
+static void create_octree_node(ScatterTree *tree, ScatterNode *node, float *mid, float *size, ScatterPoint **refpoints, int depth)
+{
+	ScatterNode *subnode;
+	ScatterPoint **subrefpoints, **tmppoints= tree->tmppoints;
+	int index, nsize[8], noffset[8], i, subco, used_nodes, usedi;
+	float submid[3], subsize[3];
+
+	/* stopping condition */
+	if (node->totpoint <= MAX_OCTREE_NODE_POINTS || depth == MAX_OCTREE_DEPTH) {
+		for (i=0; i<node->totpoint; i++)
+			node->points[i]= *(refpoints[i]);
+
+		return;
+	}
+
+	subsize[0]= size[0]*0.5f;
+	subsize[1]= size[1]*0.5f;
+	subsize[2]= size[2]*0.5f;
+
+	node->split[0]= mid[0];
+	node->split[1]= mid[1];
+	node->split[2]= mid[2];
+
+	memset(nsize, 0, sizeof(nsize));
+	memset(noffset, 0, sizeof(noffset));
+
+	/* count points in subnodes */
+	for (i=0; i<node->totpoint; i++) {
+		index= SUBNODE_INDEX(refpoints[i]->co, node->split);
+		tmppoints[i]= refpoints[i];
+		nsize[index]++;
+	}
+
+	/* here we check if only one subnode is used. if this is the case, we don't
+	 * create a new node, but rather call this function again, with different
+	 * size and middle position for the same node. */
+	for (usedi=0, used_nodes=0, i=0; i<8; i++) {
+		if (nsize[i]) {
+			used_nodes++;
+			usedi = i;
+		}
+		if (i != 0)
+			noffset[i]= noffset[i-1]+nsize[i-1];
+	}
+
+	if (used_nodes <= 1) {
+		subnode_middle(usedi, mid, subsize, submid);
+		create_octree_node(tree, node, submid, subsize, refpoints, depth+1);
+		return;
+	}
+
+	/* reorder refpoints by subnode */
+	for (i=0; i<node->totpoint; i++) {
+		index= SUBNODE_INDEX(tmppoints[i]->co, node->split);
+		refpoints[noffset[index]]= tmppoints[i];
+		noffset[index]++;
+	}
+
+	/* create subnodes */
+	for (subco=0, i=0; i<8; subco+=nsize[i], i++) {
+		if (nsize[i] > 0) {
+			subnode= BLI_memarena_alloc(tree->arena, sizeof(ScatterNode));
+			node->child[i]= subnode;
+			subnode->points= node->points + subco;
+			subnode->totpoint= nsize[i];
+			subrefpoints= refpoints + subco;
+
+			subnode_middle(i, mid, subsize, submid);
+
+			create_octree_node(tree, subnode, submid, subsize, subrefpoints,
+				depth+1);
+		}
+		else
+			node->child[i]= NULL;
+	}
+
+	node->points= NULL;
+	node->totpoint= 0;
+}
+
+/* public functions */
+
+ScatterTree *scatter_tree_new(ScatterSettings *ss[3], float scale, float error,
+	float (*co)[3], float (*color)[3], float *area, int totpoint)
+{
+	ScatterTree *tree;
+	ScatterPoint *points, **refpoints;
+	int i;
+
+	/* allocate tree */
+	tree= MEM_callocN(sizeof(ScatterTree), "ScatterTree");
+	tree->scale= scale;
+	tree->error= error;
+	tree->totpoint= totpoint;
+
+	tree->ss[0]= ss[0];
+	tree->ss[1]= ss[1];
+	tree->ss[2]= ss[2];
+
+	points = MEM_callocN(sizeof(ScatterPoint) * totpoint, "ScatterPoints");
+	refpoints = MEM_callocN(sizeof(ScatterPoint *) * totpoint, "ScatterRefPoints");
+
+	tree->points= points;
+	tree->refpoints= refpoints;
+
+	/* build points */
+	INIT_MINMAX(tree->min, tree->max);
+
+	for (i=0; i<totpoint; i++) {
+		copy_v3_v3(points[i].co, co[i]);
+		copy_v3_v3(points[i].rad, color[i]);
+		points[i].area= fabsf(area[i])/(tree->scale*tree->scale);
+		points[i].back= (area[i] < 0.0f);
+
+		mul_v3_fl(points[i].co, 1.0f / tree->scale);
+		minmax_v3v3_v3(tree->min, tree->max, points[i].co);
+
+		refpoints[i]= points + i;
+	}
+
+	return tree;
+}
+
+void scatter_tree_build(ScatterTree *tree)
+{
+	ScatterPoint *newpoints, **tmppoints;
+	float mid[3], size[3];
+	int totpoint= tree->totpoint;
+
+	newpoints = MEM_callocN(sizeof(ScatterPoint) * totpoint, "ScatterPoints");
+	tmppoints = MEM_callocN(sizeof(ScatterPoint *) * totpoint, "ScatterTmpPoints");
+	tree->tmppoints= tmppoints;
+
+	tree->arena= BLI_memarena_new(0x8000 * sizeof(ScatterNode), "sss tree arena");
+	BLI_memarena_use_calloc(tree->arena);
+
+	/* build tree */
+	tree->root= BLI_memarena_alloc(tree->arena, sizeof(ScatterNode));
+	tree->root->points= newpoints;
+	tree->root->totpoint= totpoint;
+
+	mid[0]= (tree->min[0]+tree->max[0])*0.5f;
+	mid[1]= (tree->min[1]+tree->max[1])*0.5f;
+	mid[2]= (tree->min[2]+tree->max[2])*0.5f;
+
+	size[0]= (tree->max[0]-tree->min[0])*0.5f;
+	size[1]= (tree->max[1]-tree->min[1])*0.5f;
+	size[2]= (tree->max[2]-tree->min[2])*0.5f;
+
+	create_octree_node(tree, tree->root, mid, size, tree->refpoints, 0);
+
+	MEM_freeN(tree->points);
+	MEM_freeN(tree->refpoints);
+	MEM_freeN(tree->tmppoints);
+	tree->refpoints= NULL;
+	tree->tmppoints= NULL;
+	tree->points= newpoints;
+
+	/* sum radiance at nodes */
+	sum_radiance(tree, tree->root);
+}
+
+void scatter_tree_sample(ScatterTree *tree, const float co[3], float color[3])
+{
+	float sco[3];
+
+	copy_v3_v3(sco, co);
+	mul_v3_fl(sco, 1.0f / tree->scale);
+
+	compute_radiance(tree, sco, color);
+}
+
+void scatter_tree_free(ScatterTree *tree)
+{
+	if (tree->arena) BLI_memarena_free(tree->arena);
+	if (tree->points) MEM_freeN(tree->points);
+	if (tree->refpoints) MEM_freeN(tree->refpoints);
+
+	MEM_freeN(tree);
+}
+
+/* Internal Renderer API */
+
+/* sss tree building */
+
+typedef struct SSSData {
+	ScatterTree *tree;
+	ScatterSettings *ss[3];
+} SSSData;
+
+typedef struct SSSPoints {
+	struct SSSPoints *next, *prev;
+
+	float (*co)[3];
+	float (*color)[3];
+	float *area;
+	int totpoint;
+} SSSPoints;
+
+static void sss_create_tree_mat(Render *re, Material *mat)
+{
+	SSSPoints *p;
+	RenderResult *rr;
+	ListBase points;
+	float (*co)[3] = NULL, (*color)[3] = NULL, *area = NULL;
+	int totpoint = 0, osa, osaflag, frsflag, partsdone;
+
+	if (re->test_break(re->tbh))
+		return;
+
+	points.first= points.last= NULL;
+
+	/* TODO: this is getting a bit ugly, copying all those variables and
+	 * setting them back, maybe we need to create our own Render? */
+
+	/* do SSS preprocessing render */
+	BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
+	rr= re->result;
+	osa= re->osa;
+	osaflag= re->r.mode & R_OSA;
+	frsflag= re->r.mode & R_EDGE_FRS;
+	partsdone= re->i.partsdone;
+
+	re->osa= 0;
+	re->r.mode &= ~(R_OSA | R_EDGE_FRS);
+	re->sss_points= &points;
+	re->sss_mat= mat;
+	re->i.partsdone = 0;
+
+	if (!(re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW)))
+		re->result= NULL;
+	BLI_rw_mutex_unlock(&re->resultmutex);
+
+	RE_TileProcessor(re);
+
+	BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
+	if (!(re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW))) {
+		RE_FreeRenderResult(re->result);
+		re->result= rr;
+	}
+	BLI_rw_mutex_unlock(&re->resultmutex);
+
+	re->i.partsdone= partsdone;
+	re->sss_mat= NULL;
+	re->sss_points= NULL;
+	re->osa= osa;
+	if (osaflag) re->r.mode |= R_OSA;
+	if (frsflag) re->r.mode |= R_EDGE_FRS;
+
+	/* no points? no tree */
+	if (!points.first)
+		return;
+
+	/* merge points together into a single buffer */
+	if (!re->test_break(re->tbh)) {
+		for (totpoint=0, p=points.first; p; p=p->next)
+			totpoint += p->totpoint;
+
+		co= MEM_mallocN(sizeof(*co)*totpoint, "SSSCo");
+		color= MEM_mallocN(sizeof(*color)*totpoint, "SSSColor");
+		area= MEM_mallocN(sizeof(*area)*totpoint, "SSSArea");
+
+		for (totpoint=0, p=points.first; p; p=p->next) {
+			memcpy(co+totpoint, p->co, sizeof(*co)*p->totpoint);
+			memcpy(color+totpoint, p->color, sizeof(*color)*p->totpoint);
+			memcpy(area+totpoint, p->area, sizeof(*area)*p->totpoint);
+			totpoint += p->totpoint;
+		}
+	}
+
+	/* free points */
+	for (p=points.first; p; p=p->next) {
+		MEM_freeN(p->co);
+		MEM_freeN(p->color);
+		MEM_freeN(p->area);
+	}
+	BLI_freelistN(&points);
+
+	/* build tree */
+	if (!re->test_break(re->tbh)) {
+		SSSData *sss= MEM_callocN(sizeof(*sss), "SSSData");
+		float ior= mat->sss_ior, cfac= mat->sss_colfac;
+		const float *radius = mat->sss_radius;
+		float fw= mat->sss_front, bw= mat->sss_back;
+		float error = mat->sss_error;
+
+		error= get_render_aosss_error(&re->r, error);
+		if ((re->r.scemode & (R_BUTS_PREVIEW|R_VIEWPORT_PREVIEW)) && error < 0.5f)
+			error= 0.5f;
+
+		sss->ss[0]= scatter_settings_new(mat->sss_col[0], radius[0], ior, cfac, fw, bw);
+		sss->ss[1]= scatter_settings_new(mat->sss_col[1], radius[1], ior, cfac, fw, bw);
+		sss->ss[2]= scatter_settings_new(mat->sss_col[2], radius[2], ior, cfac, fw, bw);
+		sss->tree= scatter_tree_new(sss->ss, mat->sss_scale, error,
+			co, color, area, totpoint);
+
+		MEM_freeN(co);
+		MEM_freeN(color);
+		MEM_freeN(area);
+
+		scatter_tree_build(sss->tree);
+
+		BLI_ghash_insert(re->sss_hash, mat, sss);
+	}
+	else {
+		if (co) MEM_freeN(co);
+		if (color) MEM_freeN(color);
+		if (area) MEM_freeN(area);
+	}
+}
+
+void sss_add_points(Render *re, float (*co)[3], float (*color)[3], float *area, int totpoint)
+{
+	SSSPoints *p;
+
+	if (totpoint > 0) {
+		p= MEM_callocN(sizeof(SSSPoints), "SSSPoints");
+
+		p->co= co;
+		p->color= color;
+		p->area= area;
+		p->totpoint= totpoint;
+
+		BLI_thread_lock(LOCK_CUSTOM1);
+		BLI_addtail(re->sss_points, p);
+		BLI_thread_unlock(LOCK_CUSTOM1);
+	}
+}
+
+static void sss_free_tree(SSSData *sss)
+{
+	scatter_tree_free(sss->tree);
+	scatter_settings_free(sss->ss[0]);
+	scatter_settings_free(sss->ss[1]);
+	scatter_settings_free(sss->ss[2]);
+	MEM_freeN(sss);
+}
+
+/* public functions */
+
+void make_sss_tree(Render *re)
+{
+	Material *mat;
+	bool infostr_set = false;
+	const char *prevstr = NULL;
+
+	free_sss(re);
+
+	re->sss_hash= BLI_ghash_ptr_new("make_sss_tree gh");
+
+	re->stats_draw(re->sdh, &re->i);
+
+	for (mat= re->main->mat.first; mat; mat= mat->id.next) {
+		if (mat->id.us && (mat->flag & MA_IS_USED) && (mat->sss_flag & MA_DIFF_SSS)) {
+			if (!infostr_set) {
+				prevstr = re->i.infostr;
+				re->i.infostr = IFACE_("SSS preprocessing");
+				infostr_set = true;
+			}
+
+			sss_create_tree_mat(re, mat);
+		}
+	}
+
+	/* XXX preview exception */
+	/* localizing preview render data is not fun for node trees :( */
+	if (re->main!=G.main) {
+		for (mat= G.main->mat.first; mat; mat= mat->id.next) {
+			if (mat->id.us && (mat->flag & MA_IS_USED) && (mat->sss_flag & MA_DIFF_SSS)) {
+				if (!infostr_set) {
+					prevstr = re->i.infostr;
+					re->i.infostr = IFACE_("SSS preprocessing");
+					infostr_set = true;
+				}
+
+				sss_create_tree_mat(re, mat);
+			}
+		}
+	}
+
+	if (infostr_set)
+		re->i.infostr = prevstr;
+}
+
+void free_sss(Render *re)
+{
+	if (re->sss_hash) {
+		GHashIterator gh_iter;
+
+		GHASH_ITER (gh_iter, re->sss_hash) {
+			sss_free_tree(BLI_ghashIterator_getValue(&gh_iter));
+		}
+
+		BLI_ghash_free(re->sss_hash, NULL, NULL);
+		re->sss_hash= NULL;
+	}
+}
+
+int sample_sss(Render *re, Material *mat, const float co[3], float color[3])
+{
+	if (re->sss_hash) {
+		SSSData *sss= BLI_ghash_lookup(re->sss_hash, mat);
+
+		if (sss) {
+			scatter_tree_sample(sss->tree, co, color);
+			return 1;
+		}
+		else {
+			color[0]= 0.0f;
+			color[1]= 0.0f;
+			color[2]= 0.0f;
+		}
+	}
+
+	return 0;
+}
+
+int sss_pass_done(struct Render *re, struct Material *mat)
+{
+	return ((re->flag & R_BAKING) || !(re->r.mode & R_SSS) || (re->sss_hash && BLI_ghash_lookup(re->sss_hash, mat)));
+}
+
diff --git a/source/blender/render/intern/source/strand.c b/source/blender/render/intern/source/strand.c
new file mode 100644
index 00000000000..5fde688481a
--- /dev/null
+++ b/source/blender/render/intern/source/strand.c
@@ -0,0 +1,1069 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Contributors: Brecht Van Lommel.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/strand.c
+ *  \ingroup render
+ */
+
+
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_key_types.h"
+#include "DNA_material_types.h"
+#include "DNA_meshdata_types.h"
+
+#include "BLI_math.h"
+#include "BLI_blenlib.h"
+#include "BLI_utildefines.h"
+#include "BLI_ghash.h"
+#include "BLI_memarena.h"
+#include "BLI_rand.h"
+
+#include "BKE_DerivedMesh.h"
+#include "BKE_key.h"
+
+
+#include "render_types.h"
+#include "rendercore.h"
+#include "renderdatabase.h"
+#include "shading.h"
+#include "strand.h"
+#include "zbuf.h"
+
+/* *************** */
+
+static float strand_eval_width(Material *ma, float strandco)
+{
+	float fac;
+
+	strandco= 0.5f*(strandco + 1.0f);
+
+	if (ma->strand_ease!=0.0f) {
+		if (ma->strand_ease<0.0f)
+			fac= pow(strandco, 1.0f+ma->strand_ease);
+		else
+			fac= pow(strandco, 1.0f/(1.0f-ma->strand_ease));
+	}
+	else fac= strandco;
+
+	return ((1.0f-fac)*ma->strand_sta + (fac)*ma->strand_end);
+}
+
+void strand_eval_point(StrandSegment *sseg, StrandPoint *spoint)
+{
+	Material *ma;
+	StrandBuffer *strandbuf;
+	const float *simplify;
+	float p[4][3], data[4], cross[3], w, dx, dy, t;
+	int type;
+
+	strandbuf= sseg->buffer;
+	ma= sseg->buffer->ma;
+	t= spoint->t;
+	type= (strandbuf->flag & R_STRAND_BSPLINE)? KEY_BSPLINE: KEY_CARDINAL;
+
+	copy_v3_v3(p[0], sseg->v[0]->co);
+	copy_v3_v3(p[1], sseg->v[1]->co);
+	copy_v3_v3(p[2], sseg->v[2]->co);
+	copy_v3_v3(p[3], sseg->v[3]->co);
+
+	if (sseg->obi->flag & R_TRANSFORMED) {
+		mul_m4_v3(sseg->obi->mat, p[0]);
+		mul_m4_v3(sseg->obi->mat, p[1]);
+		mul_m4_v3(sseg->obi->mat, p[2]);
+		mul_m4_v3(sseg->obi->mat, p[3]);
+	}
+
+	if (t == 0.0f) {
+		copy_v3_v3(spoint->co, p[1]);
+		spoint->strandco= sseg->v[1]->strandco;
+
+		spoint->dtstrandco= (sseg->v[2]->strandco - sseg->v[0]->strandco);
+		if (sseg->v[0] != sseg->v[1])
+			spoint->dtstrandco *= 0.5f;
+	}
+	else if (t == 1.0f) {
+		copy_v3_v3(spoint->co, p[2]);
+		spoint->strandco= sseg->v[2]->strandco;
+
+		spoint->dtstrandco= (sseg->v[3]->strandco - sseg->v[1]->strandco);
+		if (sseg->v[3] != sseg->v[2])
+			spoint->dtstrandco *= 0.5f;
+	}
+	else {
+		key_curve_position_weights(t, data, type);
+		spoint->co[0]= data[0]*p[0][0] + data[1]*p[1][0] + data[2]*p[2][0] + data[3]*p[3][0];
+		spoint->co[1]= data[0]*p[0][1] + data[1]*p[1][1] + data[2]*p[2][1] + data[3]*p[3][1];
+		spoint->co[2]= data[0]*p[0][2] + data[1]*p[1][2] + data[2]*p[2][2] + data[3]*p[3][2];
+		spoint->strandco= (1.0f-t)*sseg->v[1]->strandco + t*sseg->v[2]->strandco;
+	}
+
+	key_curve_tangent_weights(t, data, type);
+	spoint->dtco[0]= data[0]*p[0][0] + data[1]*p[1][0] + data[2]*p[2][0] + data[3]*p[3][0];
+	spoint->dtco[1]= data[0]*p[0][1] + data[1]*p[1][1] + data[2]*p[2][1] + data[3]*p[3][1];
+	spoint->dtco[2]= data[0]*p[0][2] + data[1]*p[1][2] + data[2]*p[2][2] + data[3]*p[3][2];
+
+	normalize_v3_v3(spoint->tan, spoint->dtco);
+	normalize_v3_v3(spoint->nor, spoint->co);
+	negate_v3(spoint->nor);
+
+	spoint->width= strand_eval_width(ma, spoint->strandco);
+
+	/* simplification */
+	simplify= RE_strandren_get_simplify(strandbuf->obr, sseg->strand, 0);
+	spoint->alpha= (simplify)? simplify[1]: 1.0f;
+
+	/* outer points */
+	cross_v3_v3v3(cross, spoint->co, spoint->tan);
+
+	w= spoint->co[2]*strandbuf->winmat[2][3] + strandbuf->winmat[3][3];
+	dx= strandbuf->winx*cross[0]*strandbuf->winmat[0][0]/w;
+	dy= strandbuf->winy*cross[1]*strandbuf->winmat[1][1]/w;
+	w = sqrtf(dx * dx + dy * dy);
+
+	if (w > 0.0f) {
+		if (strandbuf->flag & R_STRAND_B_UNITS) {
+			const float crosslen= len_v3(cross);
+			w= 2.0f*crosslen*strandbuf->minwidth/w;
+
+			if (spoint->width < w) {
+				spoint->alpha= spoint->width/w;
+				spoint->width= w;
+			}
+
+			if (simplify)
+				/* squared because we only change width, not length */
+				spoint->width *= simplify[0]*simplify[0];
+
+			mul_v3_fl(cross, spoint->width*0.5f/crosslen);
+		}
+		else
+			mul_v3_fl(cross, spoint->width/w);
+	}
+
+	sub_v3_v3v3(spoint->co1, spoint->co, cross);
+	add_v3_v3v3(spoint->co2, spoint->co, cross);
+
+	copy_v3_v3(spoint->dsco, cross);
+}
+
+/* *************** */
+
+static void interpolate_vec1(float *v1, float *v2, float t, float negt, float *v)
+{
+	v[0]= negt*v1[0] + t*v2[0];
+}
+
+static void interpolate_vec3(float *v1, float *v2, float t, float negt, float *v)
+{
+	v[0]= negt*v1[0] + t*v2[0];
+	v[1]= negt*v1[1] + t*v2[1];
+	v[2]= negt*v1[2] + t*v2[2];
+}
+
+static void interpolate_vec4(float *v1, float *v2, float t, float negt, float *v)
+{
+	v[0]= negt*v1[0] + t*v2[0];
+	v[1]= negt*v1[1] + t*v2[1];
+	v[2]= negt*v1[2] + t*v2[2];
+	v[3]= negt*v1[3] + t*v2[3];
+}
+
+static void interpolate_shade_result(ShadeResult *shr1, ShadeResult *shr2, float t, ShadeResult *shr, int addpassflag)
+{
+	float negt= 1.0f - t;
+
+	interpolate_vec4(shr1->combined, shr2->combined, t, negt, shr->combined);
+
+	if (addpassflag & SCE_PASS_VECTOR) {
+		interpolate_vec4(shr1->winspeed, shr2->winspeed, t, negt, shr->winspeed);
+	}
+	/* optim... */
+	if (addpassflag & ~(SCE_PASS_VECTOR)) {
+		if (addpassflag & SCE_PASS_Z)
+			interpolate_vec1(&shr1->z, &shr2->z, t, negt, &shr->z);
+		if (addpassflag & SCE_PASS_RGBA)
+			interpolate_vec4(shr1->col, shr2->col, t, negt, shr->col);
+		if (addpassflag & SCE_PASS_NORMAL) {
+			interpolate_vec3(shr1->nor, shr2->nor, t, negt, shr->nor);
+			normalize_v3(shr->nor);
+		}
+		if (addpassflag & SCE_PASS_EMIT)
+			interpolate_vec3(shr1->emit, shr2->emit, t, negt, shr->emit);
+		if (addpassflag & SCE_PASS_DIFFUSE) {
+			interpolate_vec3(shr1->diff, shr2->diff, t, negt, shr->diff);
+			interpolate_vec3(shr1->diffshad, shr2->diffshad, t, negt, shr->diffshad);
+		}
+		if (addpassflag & SCE_PASS_SPEC)
+			interpolate_vec3(shr1->spec, shr2->spec, t, negt, shr->spec);
+		if (addpassflag & SCE_PASS_SHADOW)
+			interpolate_vec3(shr1->shad, shr2->shad, t, negt, shr->shad);
+		if (addpassflag & SCE_PASS_AO)
+			interpolate_vec3(shr1->ao, shr2->ao, t, negt, shr->ao);
+		if (addpassflag & SCE_PASS_ENVIRONMENT)
+			interpolate_vec3(shr1->env, shr2->env, t, negt, shr->env);
+		if (addpassflag & SCE_PASS_INDIRECT)
+			interpolate_vec3(shr1->indirect, shr2->indirect, t, negt, shr->indirect);
+		if (addpassflag & SCE_PASS_REFLECT)
+			interpolate_vec3(shr1->refl, shr2->refl, t, negt, shr->refl);
+		if (addpassflag & SCE_PASS_REFRACT)
+			interpolate_vec3(shr1->refr, shr2->refr, t, negt, shr->refr);
+		if (addpassflag & SCE_PASS_MIST)
+			interpolate_vec1(&shr1->mist, &shr2->mist, t, negt, &shr->mist);
+	}
+}
+
+static void strand_apply_shaderesult_alpha(ShadeResult *shr, float alpha)
+{
+	if (alpha < 1.0f) {
+		shr->combined[0] *= alpha;
+		shr->combined[1] *= alpha;
+		shr->combined[2] *= alpha;
+		shr->combined[3] *= alpha;
+
+		shr->col[0] *= alpha;
+		shr->col[1] *= alpha;
+		shr->col[2] *= alpha;
+		shr->col[3] *= alpha;
+
+		shr->alpha *= alpha;
+	}
+}
+
+static void strand_shade_point(Render *re, ShadeSample *ssamp, StrandSegment *sseg, StrandVert *svert, StrandPoint *spoint)
+{
+	ShadeInput *shi= ssamp->shi;
+	ShadeResult *shr= ssamp->shr;
+	VlakRen vlr;
+	int seed;
+
+	memset(&vlr, 0, sizeof(vlr));
+	vlr.flag= R_SMOOTH;
+	if (sseg->buffer->ma->mode & MA_TANGENT_STR)
+		vlr.flag |= R_TANGENT;
+
+	shi->vlr= &vlr;
+	shi->v1= NULL;
+	shi->v2= NULL;
+	shi->v3= NULL;
+	shi->strand= sseg->strand;
+	shi->obi= sseg->obi;
+	shi->obr= sseg->obi->obr;
+
+	/* cache for shadow */
+	shi->samplenr= re->shadowsamplenr[shi->thread]++;
+
+	/* all samples */
+	shi->mask= 0xFFFF;
+
+	/* seed RNG for consistent results across tiles */
+	seed = shi->strand->index + (svert - shi->strand->vert);
+	BLI_thread_srandom(shi->thread, seed);
+
+	shade_input_set_strand(shi, sseg->strand, spoint);
+	shade_input_set_strand_texco(shi, sseg->strand, sseg->v[1], spoint);
+
+	/* init material vars */
+	shade_input_init_material(shi);
+
+	/* shade */
+	shade_samples_do_AO(ssamp);
+	shade_input_do_shade(shi, shr);
+
+	/* apply simplification */
+	strand_apply_shaderesult_alpha(shr, spoint->alpha);
+
+	/* include lamphalos for strand, since halo layer was added already */
+	if (re->flag & R_LAMPHALO)
+		if (shi->layflag & SCE_LAY_HALO)
+			renderspothalo(shi, shr->combined, shr->combined[3]);
+
+	shi->strand= NULL;
+}
+
+/* *************** */
+
+struct StrandShadeCache {
+	GHash *resulthash;
+	GHash *refcounthash;
+	MemArena *memarena;
+};
+
+typedef struct StrandCacheEntry {
+	GHashPair pair;
+	ShadeResult shr;
+} StrandCacheEntry;
+
+StrandShadeCache *strand_shade_cache_create(void)
+{
+	StrandShadeCache *cache;
+
+	cache= MEM_callocN(sizeof(StrandShadeCache), "StrandShadeCache");
+	cache->resulthash= BLI_ghash_pair_new("strand_shade_cache_create1 gh");
+	cache->refcounthash= BLI_ghash_pair_new("strand_shade_cache_create2 gh");
+	cache->memarena= BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "strand shade cache arena");
+
+	return cache;
+}
+
+void strand_shade_cache_free(StrandShadeCache *cache)
+{
+	BLI_ghash_free(cache->refcounthash, NULL, NULL);
+	BLI_ghash_free(cache->resulthash, MEM_freeN, NULL);
+	BLI_memarena_free(cache->memarena);
+	MEM_freeN(cache);
+}
+
+static GHashPair strand_shade_hash_pair(ObjectInstanceRen *obi, StrandVert *svert)
+{
+	GHashPair pair = {obi, svert};
+	return pair;
+}
+
+static void strand_shade_get(Render *re, StrandShadeCache *cache, ShadeSample *ssamp, StrandSegment *sseg, StrandVert *svert)
+{
+	StrandCacheEntry *entry;
+	StrandPoint p;
+	int *refcount;
+	GHashPair pair = strand_shade_hash_pair(sseg->obi, svert);
+
+	entry= BLI_ghash_lookup(cache->resulthash, &pair);
+	refcount= BLI_ghash_lookup(cache->refcounthash, &pair);
+
+	if (!entry) {
+		/* not shaded yet, shade and insert into hash */
+		p.t= (sseg->v[1] == svert)? 0.0f: 1.0f;
+		strand_eval_point(sseg, &p);
+		strand_shade_point(re, ssamp, sseg, svert, &p);
+
+		entry= MEM_callocN(sizeof(StrandCacheEntry), "StrandCacheEntry");
+		entry->pair = pair;
+		entry->shr = ssamp->shr[0];
+		BLI_ghash_insert(cache->resulthash, entry, entry);
+	}
+	else
+		/* already shaded, just copy previous result from hash */
+		ssamp->shr[0]= entry->shr;
+
+	/* lower reference count and remove if not needed anymore by any samples */
+	(*refcount)--;
+	if (*refcount == 0) {
+		BLI_ghash_remove(cache->resulthash, &pair, MEM_freeN, NULL);
+		BLI_ghash_remove(cache->refcounthash, &pair, NULL, NULL);
+	}
+}
+
+void strand_shade_segment(Render *re, StrandShadeCache *cache, StrandSegment *sseg, ShadeSample *ssamp, float t, float s, int addpassflag)
+{
+	ShadeResult shr1, shr2;
+
+	/* get shading for two endpoints and interpolate */
+	strand_shade_get(re, cache, ssamp, sseg, sseg->v[1]);
+	shr1= ssamp->shr[0];
+	strand_shade_get(re, cache, ssamp, sseg, sseg->v[2]);
+	shr2= ssamp->shr[0];
+
+	interpolate_shade_result(&shr1, &shr2, t, ssamp->shr, addpassflag);
+
+	/* apply alpha along width */
+	if (sseg->buffer->widthfade != -1.0f) {
+		s = 1.0f - powf(fabsf(s), sseg->buffer->widthfade);
+
+		strand_apply_shaderesult_alpha(ssamp->shr, s);
+	}
+}
+
+void strand_shade_unref(StrandShadeCache *cache, ObjectInstanceRen *obi, StrandVert *svert)
+{
+	GHashPair pair = strand_shade_hash_pair(obi, svert);
+	int *refcount;
+
+	/* lower reference count and remove if not needed anymore by any samples */
+	refcount= BLI_ghash_lookup(cache->refcounthash, &pair);
+
+	(*refcount)--;
+	if (*refcount == 0) {
+		BLI_ghash_remove(cache->resulthash, &pair, MEM_freeN, NULL);
+		BLI_ghash_remove(cache->refcounthash, &pair, NULL, NULL);
+	}
+}
+
+static void strand_shade_refcount(StrandShadeCache *cache, StrandSegment *sseg, StrandVert *svert)
+{
+	GHashPair pair = strand_shade_hash_pair(sseg->obi, svert);
+	GHashPair *key;
+	int *refcount= BLI_ghash_lookup(cache->refcounthash, &pair);
+
+	if (!refcount) {
+		key= BLI_memarena_alloc(cache->memarena, sizeof(GHashPair));
+		*key = pair;
+		refcount= BLI_memarena_alloc(cache->memarena, sizeof(int));
+		*refcount= 1;
+		BLI_ghash_insert(cache->refcounthash, key, refcount);
+	}
+	else
+		(*refcount)++;
+}
+
+/* *************** */
+
+typedef struct StrandPart {
+	Render *re;
+	ZSpan *zspan;
+
+	APixstrand *apixbuf;
+	int *totapixbuf;
+	int *rectz;
+	int *rectmask;
+	intptr_t *rectdaps;
+	int rectx, recty;
+	int sample;
+	int shadow;
+	float (*jit)[2];
+	int samples;
+
+	StrandSegment *segment;
+	float t[3], s[3];
+
+	StrandShadeCache *cache;
+} StrandPart;
+
+typedef struct StrandSortSegment {
+	struct StrandSortSegment *next;
+	int obi, strand, segment;
+	float z;
+} StrandSortSegment;
+
+static int compare_strand_segment(const void *poin1, const void *poin2)
+{
+	const StrandSortSegment *seg1= (const StrandSortSegment*)poin1;
+	const StrandSortSegment *seg2= (const StrandSortSegment*)poin2;
+
+	if (seg1->z < seg2->z)
+		return -1;
+	else if (seg1->z == seg2->z)
+		return 0;
+	else
+		return 1;
+}
+
+static void do_strand_point_project(float winmat[4][4], ZSpan *zspan, float *co, float *hoco, float *zco)
+{
+	projectvert(co, winmat, hoco);
+	hoco_to_zco(zspan, zco, hoco);
+}
+
+static void strand_project_point(float winmat[4][4], float winx, float winy, StrandPoint *spoint)
+{
+	float div;
+
+	projectvert(spoint->co, winmat, spoint->hoco);
+
+	div= 1.0f/spoint->hoco[3];
+	spoint->x= spoint->hoco[0]*div*winx*0.5f;
+	spoint->y= spoint->hoco[1]*div*winy*0.5f;
+}
+
+static APixstrand *addpsmainAstrand(ListBase *lb)
+{
+	APixstrMain *psm;
+
+	psm= MEM_mallocN(sizeof(APixstrMain), "addpsmainA");
+	BLI_addtail(lb, psm);
+	psm->ps = MEM_callocN(4096 * sizeof(APixstrand), "pixstr");
+
+	return psm->ps;
+}
+
+static APixstrand *addpsAstrand(ZSpan *zspan)
+{
+	/* make new PS */
+	if (zspan->apstrandmcounter==0) {
+		zspan->curpstrand= addpsmainAstrand(zspan->apsmbase);
+		zspan->apstrandmcounter= 4095;
+	}
+	else {
+		zspan->curpstrand++;
+		zspan->apstrandmcounter--;
+	}
+	return zspan->curpstrand;
+}
+
+#define MAX_ZROW	2000
+
+static void do_strand_fillac(void *handle, int x, int y, float u, float v, float z)
+{
+	StrandPart *spart= (StrandPart *)handle;
+	StrandShadeCache *cache= spart->cache;
+	StrandSegment *sseg= spart->segment;
+	APixstrand *apn, *apnew;
+	float t, s;
+	int offset, mask, obi, strnr, seg, zverg, bufferz, maskz=0;
+
+	offset = y*spart->rectx + x;
+	obi= sseg->obi - spart->re->objectinstance;
+	strnr= sseg->strand->index + 1;
+	seg= sseg->v[1] - sseg->strand->vert;
+	mask= (1<<spart->sample);
+
+	/* check against solid z-buffer */
+	zverg= (int)z;
+
+	if (spart->rectdaps) {
+		/* find the z of the sample */
+		PixStr *ps;
+		intptr_t *rd= spart->rectdaps + offset;
+
+		bufferz= 0x7FFFFFFF;
+		if (spart->rectmask) maskz= 0x7FFFFFFF;
+
+		if (*rd) {
+			for (ps= (PixStr *)(*rd); ps; ps= ps->next) {
+				if (mask & ps->mask) {
+					bufferz= ps->z;
+					if (spart->rectmask)
+						maskz= ps->maskz;
+					break;
+				}
+			}
+		}
+	}
+	else {
+		bufferz= (spart->rectz)? spart->rectz[offset]: 0x7FFFFFFF;
+		if (spart->rectmask)
+			maskz= spart->rectmask[offset];
+	}
+
+#define CHECK_ADD(n) \
+	if (apn->p[n]==strnr && apn->obi[n]==obi && apn->seg[n]==seg) \
+	{ if (!(apn->mask[n] & mask)) { apn->mask[n] |= mask; apn->v[n] += t; apn->u[n] += s; } break; } (void)0
+#define CHECK_ASSIGN(n) \
+	if (apn->p[n]==0) \
+	{apn->obi[n]= obi; apn->p[n]= strnr; apn->z[n]= zverg; apn->mask[n]= mask; apn->v[n]= t; apn->u[n]= s; apn->seg[n]= seg; break; } (void)0
+
+	/* add to pixel list */
+	if (zverg < bufferz && (spart->totapixbuf[offset] < MAX_ZROW)) {
+		if (!spart->rectmask || zverg > maskz) {
+			t = u * spart->t[0] + v * spart->t[1] + (1.0f - u - v) * spart->t[2];
+			s = fabsf(u * spart->s[0] + v * spart->s[1] + (1.0f - u - v) * spart->s[2]);
+
+			apn= spart->apixbuf + offset;
+			while (apn) {
+				CHECK_ADD(0);
+				CHECK_ADD(1);
+				CHECK_ADD(2);
+				CHECK_ADD(3);
+				CHECK_ASSIGN(0);
+				CHECK_ASSIGN(1);
+				CHECK_ASSIGN(2);
+				CHECK_ASSIGN(3);
+
+				apnew= addpsAstrand(spart->zspan);
+				SWAP(APixstrand, *apnew, *apn);
+				apn->next= apnew;
+				CHECK_ASSIGN(0);
+			}
+
+			if (cache) {
+				strand_shade_refcount(cache, sseg, sseg->v[1]);
+				strand_shade_refcount(cache, sseg, sseg->v[2]);
+			}
+			spart->totapixbuf[offset]++;
+		}
+	}
+}
+
+/* width is calculated in hoco space, to ensure strands are visible */
+static int strand_test_clip(float winmat[4][4], ZSpan *UNUSED(zspan), float *bounds, float *co, float *zcomp, float widthx, float widthy)
+{
+	float hoco[4];
+	int clipflag= 0;
+
+	projectvert(co, winmat, hoco);
+
+	/* we compare z without perspective division for segment sorting */
+	*zcomp= hoco[2];
+
+	if (hoco[0]+widthx < bounds[0]*hoco[3]) clipflag |= 1;
+	else if (hoco[0]-widthx > bounds[1]*hoco[3]) clipflag |= 2;
+
+	if (hoco[1]-widthy > bounds[3]*hoco[3]) clipflag |= 4;
+	else if (hoco[1]+widthy < bounds[2]*hoco[3]) clipflag |= 8;
+
+	clipflag |= testclip(hoco);
+
+	return clipflag;
+}
+
+static void do_scanconvert_strand(Render *UNUSED(re), StrandPart *spart, ZSpan *zspan, float t, float dt, float *co1, float *co2, float *co3, float *co4, int sample)
+{
+	float jco1[3], jco2[3], jco3[3], jco4[3], jx, jy;
+
+	copy_v3_v3(jco1, co1);
+	copy_v3_v3(jco2, co2);
+	copy_v3_v3(jco3, co3);
+	copy_v3_v3(jco4, co4);
+
+	if (spart->jit) {
+		jx= -spart->jit[sample][0];
+		jy= -spart->jit[sample][1];
+
+		jco1[0] += jx; jco1[1] += jy;
+		jco2[0] += jx; jco2[1] += jy;
+		jco3[0] += jx; jco3[1] += jy;
+		jco4[0] += jx; jco4[1] += jy;
+
+		/* XXX mblur? */
+	}
+
+	spart->sample= sample;
+
+	spart->t[0]= t-dt;
+	spart->s[0]= -1.0f;
+	spart->t[1]= t-dt;
+	spart->s[1]= 1.0f;
+	spart->t[2]= t;
+	spart->s[2]= 1.0f;
+	zspan_scanconvert_strand(zspan, spart, jco1, jco2, jco3, do_strand_fillac);
+	spart->t[0]= t-dt;
+	spart->s[0]= -1.0f;
+	spart->t[1]= t;
+	spart->s[1]= 1.0f;
+	spart->t[2]= t;
+	spart->s[2]= -1.0f;
+	zspan_scanconvert_strand(zspan, spart, jco1, jco3, jco4, do_strand_fillac);
+}
+
+static void strand_render(Render *re, StrandSegment *sseg, float winmat[4][4], StrandPart *spart, ZSpan *zspan, int totzspan, StrandPoint *p1, StrandPoint *p2)
+{
+	if (spart) {
+		float t= p2->t;
+		float dt= p2->t - p1->t;
+		int a;
+
+		for (a=0; a<spart->samples; a++)
+			do_scanconvert_strand(re, spart, zspan, t, dt, p1->zco2, p1->zco1, p2->zco1, p2->zco2, a);
+	}
+	else {
+		float hoco1[4], hoco2[4];
+		int a, obi, index;
+
+		obi= sseg->obi - re->objectinstance;
+		index= sseg->strand->index;
+
+		projectvert(p1->co, winmat, hoco1);
+		projectvert(p2->co, winmat, hoco2);
+
+
+		for (a=0; a<totzspan; a++) {
+#if 0
+			/* render both strand and single pixel wire to counter aliasing */
+			zbufclip4(re, &zspan[a], obi, index, p1->hoco2, p1->hoco1, p2->hoco1, p2->hoco2, p1->clip2, p1->clip1, p2->clip1, p2->clip2);
+#endif
+			/* only render a line for now, which makes the shadow map more
+			 * similar across frames, and so reduces flicker */
+			zbufsinglewire(&zspan[a], obi, index, hoco1, hoco2);
+		}
+	}
+}
+
+static int strand_segment_recursive(Render *re, float winmat[4][4], StrandPart *spart, ZSpan *zspan, int totzspan, StrandSegment *sseg, StrandPoint *p1, StrandPoint *p2, int depth)
+{
+	StrandPoint p;
+	StrandBuffer *buffer= sseg->buffer;
+	float dot, d1[2], d2[2], len1, len2;
+
+	if (depth == buffer->maxdepth)
+		return 0;
+
+	p.t= (p1->t + p2->t)*0.5f;
+	strand_eval_point(sseg, &p);
+	strand_project_point(buffer->winmat, buffer->winx, buffer->winy, &p);
+
+	d1[0]= (p.x - p1->x);
+	d1[1]= (p.y - p1->y);
+	len1= d1[0]*d1[0] + d1[1]*d1[1];
+
+	d2[0]= (p2->x - p.x);
+	d2[1]= (p2->y - p.y);
+	len2= d2[0]*d2[0] + d2[1]*d2[1];
+
+	if (len1 == 0.0f || len2 == 0.0f)
+		return 0;
+
+	dot= d1[0]*d2[0] + d1[1]*d2[1];
+	if (dot*dot > sseg->sqadaptcos*len1*len2)
+		return 0;
+
+	if (spart) {
+		do_strand_point_project(winmat, zspan, p.co1, p.hoco1, p.zco1);
+		do_strand_point_project(winmat, zspan, p.co2, p.hoco2, p.zco2);
+	}
+	else {
+#if 0
+		projectvert(p.co1, winmat, p.hoco1);
+		projectvert(p.co2, winmat, p.hoco2);
+		p.clip1= testclip(p.hoco1);
+		p.clip2= testclip(p.hoco2);
+#endif
+	}
+
+	if (!strand_segment_recursive(re, winmat, spart, zspan, totzspan, sseg, p1, &p, depth+1))
+		strand_render(re, sseg, winmat, spart, zspan, totzspan, p1, &p);
+	if (!strand_segment_recursive(re, winmat, spart, zspan, totzspan, sseg, &p, p2, depth+1))
+		strand_render(re, sseg, winmat, spart, zspan, totzspan, &p, p2);
+
+	return 1;
+}
+
+void render_strand_segment(Render *re, float winmat[4][4], StrandPart *spart, ZSpan *zspan, int totzspan, StrandSegment *sseg)
+{
+	StrandBuffer *buffer= sseg->buffer;
+	StrandPoint *p1= &sseg->point1;
+	StrandPoint *p2= &sseg->point2;
+
+	p1->t= 0.0f;
+	p2->t= 1.0f;
+
+	strand_eval_point(sseg, p1);
+	strand_project_point(buffer->winmat, buffer->winx, buffer->winy, p1);
+	strand_eval_point(sseg, p2);
+	strand_project_point(buffer->winmat, buffer->winx, buffer->winy, p2);
+
+	if (spart) {
+		do_strand_point_project(winmat, zspan, p1->co1, p1->hoco1, p1->zco1);
+		do_strand_point_project(winmat, zspan, p1->co2, p1->hoco2, p1->zco2);
+		do_strand_point_project(winmat, zspan, p2->co1, p2->hoco1, p2->zco1);
+		do_strand_point_project(winmat, zspan, p2->co2, p2->hoco2, p2->zco2);
+	}
+	else {
+#if 0
+		projectvert(p1->co1, winmat, p1->hoco1);
+		projectvert(p1->co2, winmat, p1->hoco2);
+		projectvert(p2->co1, winmat, p2->hoco1);
+		projectvert(p2->co2, winmat, p2->hoco2);
+		p1->clip1= testclip(p1->hoco1);
+		p1->clip2= testclip(p1->hoco2);
+		p2->clip1= testclip(p2->hoco1);
+		p2->clip2= testclip(p2->hoco2);
+#endif
+	}
+
+	if (!strand_segment_recursive(re, winmat, spart, zspan, totzspan, sseg, p1, p2, 0))
+		strand_render(re, sseg, winmat, spart, zspan, totzspan, p1, p2);
+}
+
+/* render call to fill in strands */
+int zbuffer_strands_abuf(Render *re, RenderPart *pa, APixstrand *apixbuf, ListBase *apsmbase, unsigned int lay, int UNUSED(negzmask), float winmat[4][4], int winx, int winy, int samples, float (*jit)[2], float clipcrop, int shadow, StrandShadeCache *cache)
+{
+	ObjectRen *obr;
+	ObjectInstanceRen *obi;
+	ZSpan zspan;
+	StrandRen *strand = NULL;
+	StrandVert *svert;
+	StrandBound *sbound;
+	StrandPart spart;
+	StrandSegment sseg;
+	StrandSortSegment *sortsegments = NULL, *sortseg, *firstseg;
+	MemArena *memarena;
+	float z[4], bounds[4], obwinmat[4][4];
+	int a, b, c, i, totsegment, clip[4];
+
+	if (re->test_break(re->tbh))
+		return 0;
+	if (re->totstrand == 0)
+		return 0;
+
+	/* setup StrandPart */
+	memset(&spart, 0, sizeof(spart));
+
+	spart.re= re;
+	spart.rectx= pa->rectx;
+	spart.recty= pa->recty;
+	spart.apixbuf= apixbuf;
+	spart.zspan= &zspan;
+	spart.rectdaps= pa->rectdaps;
+	spart.rectz= pa->rectz;
+	spart.rectmask= pa->rectmask;
+	spart.cache= cache;
+	spart.shadow= shadow;
+	spart.jit= jit;
+	spart.samples= samples;
+
+	zbuf_alloc_span(&zspan, pa->rectx, pa->recty, clipcrop);
+
+	/* needed for transform from hoco to zbuffer co */
+	zspan.zmulx= ((float)winx)/2.0f;
+	zspan.zmuly= ((float)winy)/2.0f;
+
+	zspan.zofsx= -pa->disprect.xmin;
+	zspan.zofsy= -pa->disprect.ymin;
+
+	/* to center the sample position */
+	if (!shadow) {
+		zspan.zofsx -= 0.5f;
+		zspan.zofsy -= 0.5f;
+	}
+
+	zspan.apsmbase= apsmbase;
+
+	/* clipping setup */
+	bounds[0]= (2*pa->disprect.xmin - winx-1)/(float)winx;
+	bounds[1]= (2*pa->disprect.xmax - winx+1)/(float)winx;
+	bounds[2]= (2*pa->disprect.ymin - winy-1)/(float)winy;
+	bounds[3]= (2*pa->disprect.ymax - winy+1)/(float)winy;
+
+	memarena= BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, "strand sort arena");
+	firstseg= NULL;
+	totsegment= 0;
+
+	/* for all object instances */
+	for (obi=re->instancetable.first, i=0; obi; obi=obi->next, i++) {
+		Material *ma;
+		float widthx, widthy;
+
+		obr= obi->obr;
+
+		if (!obr->strandbuf || !(obr->strandbuf->lay & lay))
+			continue;
+
+		/* compute matrix and try clipping whole object */
+		if (obi->flag & R_TRANSFORMED)
+			mul_m4_m4m4(obwinmat, winmat, obi->mat);
+		else
+			copy_m4_m4(obwinmat, winmat);
+
+		/* test if we should skip it */
+		ma = obr->strandbuf->ma;
+
+		if (shadow && (!(ma->mode2 & MA_CASTSHADOW) || !(ma->mode & MA_SHADBUF)))
+			continue;
+		else if (!shadow && (ma->mode & MA_ONLYCAST))
+			continue;
+
+		if (clip_render_object(obi->obr->boundbox, bounds, obwinmat))
+			continue;
+
+		widthx= obr->strandbuf->maxwidth*obwinmat[0][0];
+		widthy= obr->strandbuf->maxwidth*obwinmat[1][1];
+
+		/* for each bounding box containing a number of strands */
+		sbound= obr->strandbuf->bound;
+		for (c=0; c<obr->strandbuf->totbound; c++, sbound++) {
+			if (clip_render_object(sbound->boundbox, bounds, obwinmat))
+				continue;
+
+			/* for each strand in this bounding box */
+			for (a=sbound->start; a<sbound->end; a++) {
+				strand= RE_findOrAddStrand(obr, a);
+				svert= strand->vert;
+
+				/* keep clipping and z depth for 4 control points */
+				clip[1]= strand_test_clip(obwinmat, &zspan, bounds, svert->co, &z[1], widthx, widthy);
+				clip[2]= strand_test_clip(obwinmat, &zspan, bounds, (svert+1)->co, &z[2], widthx, widthy);
+				clip[0]= clip[1]; z[0]= z[1];
+
+				for (b=0; b<strand->totvert-1; b++, svert++) {
+					/* compute 4th point clipping and z depth */
+					if (b < strand->totvert-2) {
+						clip[3]= strand_test_clip(obwinmat, &zspan, bounds, (svert+2)->co, &z[3], widthx, widthy);
+					}
+					else {
+						clip[3]= clip[2]; z[3]= z[2];
+					}
+
+					/* check clipping and add to sortsegments buffer */
+					if (!(clip[0] & clip[1] & clip[2] & clip[3])) {
+						sortseg= BLI_memarena_alloc(memarena, sizeof(StrandSortSegment));
+						sortseg->obi= i;
+						sortseg->strand= strand->index;
+						sortseg->segment= b;
+
+						sortseg->z= 0.5f*(z[1] + z[2]);
+
+						sortseg->next= firstseg;
+						firstseg= sortseg;
+						totsegment++;
+					}
+
+					/* shift clipping and z depth */
+					clip[0]= clip[1]; z[0]= z[1];
+					clip[1]= clip[2]; z[1]= z[2];
+					clip[2]= clip[3]; z[2]= z[3];
+				}
+			}
+		}
+	}
+
+	if (!re->test_break(re->tbh)) {
+		/* convert list to array and sort */
+		sortsegments= MEM_mallocN(sizeof(StrandSortSegment)*totsegment, "StrandSortSegment");
+		for (a=0, sortseg=firstseg; a<totsegment; a++, sortseg=sortseg->next)
+			sortsegments[a]= *sortseg;
+		qsort(sortsegments, totsegment, sizeof(StrandSortSegment), compare_strand_segment);
+	}
+
+	BLI_memarena_free(memarena);
+
+	spart.totapixbuf= MEM_callocN(sizeof(int)*pa->rectx*pa->recty, "totapixbuf");
+
+	if (!re->test_break(re->tbh)) {
+		/* render segments in sorted order */
+		sortseg= sortsegments;
+		for (a=0; a<totsegment; a++, sortseg++) {
+			if (re->test_break(re->tbh))
+				break;
+
+			obi= &re->objectinstance[sortseg->obi];
+			obr= obi->obr;
+
+			sseg.obi= obi;
+			sseg.strand= RE_findOrAddStrand(obr, sortseg->strand);
+			sseg.buffer= sseg.strand->buffer;
+			sseg.sqadaptcos= sseg.buffer->adaptcos;
+			sseg.sqadaptcos *= sseg.sqadaptcos;
+
+			svert= sseg.strand->vert + sortseg->segment;
+			sseg.v[0]= (sortseg->segment > 0)? (svert-1): svert;
+			sseg.v[1]= svert;
+			sseg.v[2]= svert+1;
+			sseg.v[3]= (sortseg->segment < sseg.strand->totvert-2)? svert+2: svert+1;
+			sseg.shaded= 0;
+
+			spart.segment= &sseg;
+
+			render_strand_segment(re, winmat, &spart, &zspan, 1, &sseg);
+		}
+	}
+
+	if (sortsegments)
+		MEM_freeN(sortsegments);
+	MEM_freeN(spart.totapixbuf);
+
+	zbuf_free_span(&zspan);
+
+	return totsegment;
+}
+
+/* *************** */
+
+StrandSurface *cache_strand_surface(Render *re, ObjectRen *obr, DerivedMesh *dm, float mat[4][4], int timeoffset)
+{
+	StrandSurface *mesh;
+	MFace *mface;
+	MVert *mvert;
+	float (*co)[3];
+	int a, totvert, totface;
+
+	totvert= dm->getNumVerts(dm);
+	totface= dm->getNumTessFaces(dm);
+
+	for (mesh = re->strandsurface.first; mesh; mesh = mesh->next) {
+		if ((mesh->obr.ob    == obr->ob) &&
+		    (mesh->obr.par   == obr->par) &&
+		    (mesh->obr.index == obr->index) &&
+		    (mesh->totvert   == totvert) &&
+		    (mesh->totface   == totface))
+		{
+			break;
+		}
+	}
+
+	if (!mesh) {
+		mesh= MEM_callocN(sizeof(StrandSurface), "StrandSurface");
+		mesh->obr= *obr;
+		mesh->totvert= totvert;
+		mesh->totface= totface;
+		mesh->face= MEM_callocN(sizeof(int)*4*mesh->totface, "StrandSurfFaces");
+		mesh->ao= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfAO");
+		mesh->env= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfEnv");
+		mesh->indirect= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfIndirect");
+		BLI_addtail(&re->strandsurface, mesh);
+	}
+
+	if (timeoffset == -1 && !mesh->prevco)
+		mesh->prevco= co= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfCo");
+	else if (timeoffset == 0 && !mesh->co)
+		mesh->co= co= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfCo");
+	else if (timeoffset == 1 && !mesh->nextco)
+		mesh->nextco= co= MEM_callocN(sizeof(float)*3*mesh->totvert, "StrandSurfCo");
+	else
+		return mesh;
+
+	mvert= dm->getVertArray(dm);
+	for (a=0; a<mesh->totvert; a++, mvert++) {
+		copy_v3_v3(co[a], mvert->co);
+		mul_m4_v3(mat, co[a]);
+	}
+
+	mface= dm->getTessFaceArray(dm);
+	for (a=0; a<mesh->totface; a++, mface++) {
+		mesh->face[a][0]= mface->v1;
+		mesh->face[a][1]= mface->v2;
+		mesh->face[a][2]= mface->v3;
+		mesh->face[a][3]= mface->v4;
+	}
+
+	return mesh;
+}
+
+void free_strand_surface(Render *re)
+{
+	StrandSurface *mesh;
+
+	for (mesh=re->strandsurface.first; mesh; mesh=mesh->next) {
+		if (mesh->co) MEM_freeN(mesh->co);
+		if (mesh->prevco) MEM_freeN(mesh->prevco);
+		if (mesh->nextco) MEM_freeN(mesh->nextco);
+		if (mesh->ao) MEM_freeN(mesh->ao);
+		if (mesh->env) MEM_freeN(mesh->env);
+		if (mesh->indirect) MEM_freeN(mesh->indirect);
+		if (mesh->face) MEM_freeN(mesh->face);
+	}
+
+	BLI_freelistN(&re->strandsurface);
+}
+
+void strand_minmax(StrandRen *strand, float min[3], float max[3], const float width)
+{
+	StrandVert *svert;
+	const float width2 = width * 2.0f;
+	float vec[3];
+	int a;
+
+	for (a=0, svert=strand->vert; a<strand->totvert; a++, svert++) {
+		copy_v3_v3(vec, svert->co);
+		minmax_v3v3_v3(min, max, vec);
+
+		if (width!=0.0f) {
+			add_v3_fl(vec, width);
+			minmax_v3v3_v3(min, max, vec);
+			add_v3_fl(vec, -width2);
+			minmax_v3v3_v3(min, max, vec);
+		}
+	}
+}
+
diff --git a/source/blender/render/intern/source/sunsky.c b/source/blender/render/intern/source/sunsky.c
new file mode 100644
index 00000000000..80dd52c220c
--- /dev/null
+++ b/source/blender/render/intern/source/sunsky.c
@@ -0,0 +1,506 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/sunsky.c
+ *  \ingroup render
+ *
+ * This feature comes from Preetham paper on "A Practical Analytic Model for Daylight"
+ * and example code from Brian Smits, another author of that paper in
+ * http://www.cs.utah.edu/vissim/papers/sunsky/code/
+ */
+
+#include "sunsky.h"
+#include "BLI_math.h"
+
+/**
+ * These macros are defined for vector operations
+ * */
+
+/**
+ * compute v1 = v2 op v3
+ * v1, v2 and v3 are vectors contains 3 float
+ * */
+#define VEC3OPV(v1, v2, op, v3)                                               \
+	{                                                                         \
+		v1[0] = (v2[0] op v3[0]);                                             \
+		v1[1] = (v2[1] op v3[1]);                                             \
+		v1[2] = (v2[2] op v3[2]);                                             \
+	} (void)0
+
+/**
+ * compute v1 = v2 op f1
+ * v1, v2 are vectors contains 3 float
+ * and f1 is a float
+ * */
+#define VEC3OPF(v1, v2, op, f1)                                               \
+	{                                                                         \
+		v1[0] = (v2[0] op(f1));                                              \
+		v1[1] = (v2[1] op(f1));                                              \
+		v1[2] = (v2[2] op(f1));                                              \
+	} (void)0
+
+/**
+ * compute v1 = f1 op v2
+ * v1, v2 are vectors contains 3 float
+ * and f1 is a float
+ * */
+#define FOPVEC3(v1, f1, op, v2)                                               \
+	{                                                                         \
+		v1[0] = ((f1) op v2[0]);                                              \
+		v1[1] = ((f1) op v2[1]);                                              \
+		v1[2] = ((f1) op v2[2]);                                              \
+	} (void)0
+
+/**
+ * ClipColor:
+ * clip a color to range [0, 1];
+ * */
+void ClipColor(float c[3])
+{
+	if (c[0] > 1.0f) c[0] = 1.0f;
+	if (c[0] < 0.0f) c[0] = 0.0f;
+	if (c[1] > 1.0f) c[1] = 1.0f;
+	if (c[1] < 0.0f) c[1] = 0.0f;
+	if (c[2] > 1.0f) c[2] = 1.0f;
+	if (c[2] < 0.0f) c[2] = 0.0f;
+}
+
+/**
+ * AngleBetween:
+ * compute angle between to direction
+ * all angles are in radians
+ * */
+static float AngleBetween(float thetav, float phiv, float theta, float phi)
+{
+	float cospsi = sinf(thetav) * sinf(theta) * cosf(phi - phiv) + cosf(thetav) * cosf(theta);
+
+	if (cospsi > 1.0f)
+		return 0;
+	if (cospsi < -1.0f)
+		return M_PI;
+
+	return acosf(cospsi);
+}
+
+/**
+ * DirectionToThetaPhi:
+ * this function convert a direction to it's theta and phi value
+ * parameters:
+ * toSun: contains direction information
+ * theta, phi, are return values from this conversion
+ * */
+static void DirectionToThetaPhi(float *toSun, float *theta, float *phi)
+{
+	*theta = acosf(toSun[2]);
+	if (fabsf(*theta) < 1e-5f)
+		*phi = 0;
+	else
+		*phi = atan2f(toSun[1], toSun[0]);
+}
+
+/**
+ * PerezFunction:
+ * compute perez function value based on input parameters
+ */
+static float PerezFunction(struct SunSky *sunsky, const float *lam, float theta, float gamma, float lvz)
+{
+	float den, num;
+
+	den = ((1 + lam[0] * expf(lam[1])) *
+	       (1 + lam[2] * expf(lam[3] * sunsky->theta) + lam[4] * cosf(sunsky->theta) * cosf(sunsky->theta)));
+
+	num = ((1 + lam[0] * expf(lam[1] / cosf(theta))) *
+	       (1 + lam[2] * expf(lam[3] * gamma) + lam[4] * cosf(gamma) * cosf(gamma)));
+
+	return(lvz * num / den);
+}
+
+/**
+ * InitSunSky:
+ * this function compute some sun,sky parameters according to input parameters and also initiate some other sun, sky parameters
+ * parameters:
+ * sunSky, is a structure that contains information about sun, sky and atmosphere, in this function, most of its values initiated
+ * turb, is atmosphere turbidity
+ * toSun, contains sun direction
+ * horizon_brighness, controls the brightness of the horizon colors
+ * spread, controls colors spreed at horizon
+ * sun_brightness, controls sun's brightness
+ * sun_size, controls sun's size
+ * back_scatter, controls back scatter light
+ * */
+void InitSunSky(struct SunSky *sunsky, float turb, const float toSun[3], float horizon_brightness,
+                float spread, float sun_brightness, float sun_size, float back_scatter,
+                float skyblendfac, short skyblendtype, float sky_exposure, float sky_colorspace)
+{
+	float theta2;
+	float theta3;
+	float T;
+	float T2;
+	float chi;
+
+	sunsky->turbidity = turb;
+
+	sunsky->horizon_brightness = horizon_brightness;
+	sunsky->spread = spread;
+	sunsky->sun_brightness = sun_brightness;
+	sunsky->sun_size = sun_size;
+	sunsky->backscattered_light = back_scatter;
+	sunsky->skyblendfac = skyblendfac;
+	sunsky->skyblendtype = skyblendtype;
+	sunsky->sky_exposure = -sky_exposure;
+	sunsky->sky_colorspace = sky_colorspace;
+
+	sunsky->toSun[0] = toSun[0];
+	sunsky->toSun[1] = toSun[1];
+	sunsky->toSun[2] = toSun[2];
+
+	DirectionToThetaPhi(sunsky->toSun, &sunsky->theta, &sunsky->phi);
+
+	sunsky->sunSolidAngle = 0.25 * M_PI * 1.39 * 1.39 / (150 * 150);   /* = 6.7443e-05 */
+
+	theta2 = sunsky->theta * sunsky->theta;
+	theta3 = theta2 * sunsky->theta;
+	T = turb;
+	T2 = turb * turb;
+
+	chi = (4.0f / 9.0f - T / 120.0f) * ((float)M_PI - 2.0f * sunsky->theta);
+	sunsky->zenith_Y = (4.0453f * T - 4.9710f) * tanf(chi) - 0.2155f * T + 2.4192f;
+	sunsky->zenith_Y *= 1000;   /* conversion from kcd/m^2 to cd/m^2 */
+
+	if (sunsky->zenith_Y <= 0)
+		sunsky->zenith_Y = 1e-6;
+
+	sunsky->zenith_x =
+	    (+0.00165f * theta3 - 0.00374f * theta2 + 0.00208f * sunsky->theta + 0.0f) * T2 +
+	    (-0.02902f * theta3 + 0.06377f * theta2 - 0.03202f * sunsky->theta + 0.00394f) * T +
+	    (+0.11693f * theta3 - 0.21196f * theta2 + 0.06052f * sunsky->theta + 0.25885f);
+
+	sunsky->zenith_y =
+	    (+0.00275f * theta3 - 0.00610f * theta2 + 0.00316f * sunsky->theta + 0.0f) * T2 +
+	    (-0.04214f * theta3 + 0.08970f * theta2 - 0.04153f * sunsky->theta + 0.00515f) * T +
+	    (+0.15346f * theta3 - 0.26756f * theta2 + 0.06669f * sunsky->theta + 0.26688f);
+
+
+	sunsky->perez_Y[0] = 0.17872f * T - 1.46303f;
+	sunsky->perez_Y[1] = -0.35540f * T + 0.42749f;
+	sunsky->perez_Y[2] = -0.02266f * T + 5.32505f;
+	sunsky->perez_Y[3] = 0.12064f * T - 2.57705f;
+	sunsky->perez_Y[4] = -0.06696f * T + 0.37027f;
+
+	sunsky->perez_x[0] = -0.01925f * T - 0.25922f;
+	sunsky->perez_x[1] = -0.06651f * T + 0.00081f;
+	sunsky->perez_x[2] = -0.00041f * T + 0.21247f;
+	sunsky->perez_x[3] = -0.06409f * T - 0.89887f;
+	sunsky->perez_x[4] = -0.00325f * T + 0.04517f;
+
+	sunsky->perez_y[0] = -0.01669f * T - 0.26078f;
+	sunsky->perez_y[1] = -0.09495f * T + 0.00921f;
+	sunsky->perez_y[2] = -0.00792f * T + 0.21023f;
+	sunsky->perez_y[3] = -0.04405f * T - 1.65369f;
+	sunsky->perez_y[4] = -0.01092f * T + 0.05291f;
+
+	/* suggested by glome in patch [#8063] */
+	sunsky->perez_Y[0] *= sunsky->horizon_brightness;
+	sunsky->perez_x[0] *= sunsky->horizon_brightness;
+	sunsky->perez_y[0] *= sunsky->horizon_brightness;
+
+	sunsky->perez_Y[1] *= sunsky->spread;
+	sunsky->perez_x[1] *= sunsky->spread;
+	sunsky->perez_y[1] *= sunsky->spread;
+
+	sunsky->perez_Y[2] *= sunsky->sun_brightness;
+	sunsky->perez_x[2] *= sunsky->sun_brightness;
+	sunsky->perez_y[2] *= sunsky->sun_brightness;
+
+	sunsky->perez_Y[3] *= sunsky->sun_size;
+	sunsky->perez_x[3] *= sunsky->sun_size;
+	sunsky->perez_y[3] *= sunsky->sun_size;
+
+	sunsky->perez_Y[4] *= sunsky->backscattered_light;
+	sunsky->perez_x[4] *= sunsky->backscattered_light;
+	sunsky->perez_y[4] *= sunsky->backscattered_light;
+}
+
+/**
+ * GetSkyXYZRadiance:
+ * this function compute sky radiance according to a view parameters `theta' and `phi'and sunSky values
+ * parameters:
+ * sunSky, sontains sun and sky parameters
+ * theta, is sun's theta
+ * phi, is sun's phi
+ * color_out, is computed color that shows sky radiance in XYZ color format
+ * */
+void GetSkyXYZRadiance(struct SunSky *sunsky, float theta, float phi, float color_out[3])
+{
+	float gamma;
+	float x, y, Y, X, Z;
+	float hfade = 1, nfade = 1;
+
+
+	if (theta > (float)M_PI_2) {
+		hfade = 1.0f - (theta * (float)M_1_PI - 0.5f) * 2.0f;
+		hfade = hfade * hfade * (3.0f - 2.0f * hfade);
+		theta = M_PI_2;
+	}
+
+	if (sunsky->theta > (float)M_PI_2) {
+		if (theta <= (float)M_PI_2) {
+			nfade = 1.0f - (0.5f - theta * (float)M_1_PI) * 2.0f;
+			nfade *= 1.0f - (sunsky->theta * (float)M_1_PI - 0.5f) * 2.0f;
+			nfade = nfade * nfade * (3.0f - 2.0f * nfade);
+		}
+	}
+
+	gamma = AngleBetween(theta, phi, sunsky->theta, sunsky->phi);
+
+	/* Compute xyY values */
+	x = PerezFunction(sunsky, sunsky->perez_x, theta, gamma, sunsky->zenith_x);
+	y = PerezFunction(sunsky, sunsky->perez_y, theta, gamma, sunsky->zenith_y);
+	Y = 6.666666667e-5f * nfade * hfade * PerezFunction(sunsky, sunsky->perez_Y, theta, gamma, sunsky->zenith_Y);
+
+	if (sunsky->sky_exposure != 0.0f)
+		Y = 1.0 - exp(Y * sunsky->sky_exposure);
+
+	X = (x / y) * Y;
+	Z = ((1 - x - y) / y) * Y;
+
+	color_out[0] = X;
+	color_out[1] = Y;
+	color_out[2] = Z;
+}
+
+/**
+ * GetSkyXYZRadiancef:
+ * this function compute sky radiance according to a view direction `varg' and sunSky values
+ * parameters:
+ * sunSky, sontains sun and sky parameters
+ * varg, shows direction
+ * color_out, is computed color that shows sky radiance in XYZ color format
+ * */
+void GetSkyXYZRadiancef(struct SunSky *sunsky, const float varg[3], float color_out[3])
+{
+	float theta, phi;
+	float v[3];
+
+	normalize_v3_v3(v, varg);
+
+	if (v[2] < 0.001f) {
+		v[2] = 0.001f;
+		normalize_v3(v);
+	}
+
+	DirectionToThetaPhi(v, &theta, &phi);
+	GetSkyXYZRadiance(sunsky, theta, phi, color_out);
+}
+
+/**
+ * ComputeAttenuatedSunlight:
+ * this function compute attenuated sun light based on sun's theta and atmosphere turbidity
+ * parameters:
+ * theta, is sun's theta
+ * turbidity: is atmosphere turbidity
+ * fTau: contains computed attenuated sun light
+ * */
+static void ComputeAttenuatedSunlight(float theta, int turbidity, float fTau[3])
+{
+	float fBeta;
+	float fTauR, fTauA;
+	float m;
+	float fAlpha;
+
+	int i;
+	float fLambda[3];
+	fLambda[0] = 0.65f;
+	fLambda[1] = 0.57f;
+	fLambda[2] = 0.475f;
+
+	fAlpha = 1.3f;
+	fBeta = 0.04608365822050f * turbidity - 0.04586025928522f;
+
+	m =  1.0f / (cosf(theta) + 0.15f * powf(93.885f - theta / (float)M_PI * 180.0f, -1.253f));
+
+	for (i = 0; i < 3; i++) {
+		/* Rayleigh Scattering */
+		fTauR = expf(-m * 0.008735f * powf(fLambda[i], (float)(-4.08f)));
+
+		/* Aerosal (water + dust) attenuation */
+		fTauA = exp(-m * fBeta * powf(fLambda[i], -fAlpha));
+
+		fTau[i] = fTauR * fTauA;
+	}
+}
+
+/**
+ * InitAtmosphere:
+ * this function initiate sunSky structure with user input parameters.
+ * parameters:
+ * sunSky, contains information about sun, and in this function some atmosphere parameters will initiated
+ * sun_intens, shows sun intensity value
+ * mief, Mie scattering factor this factor currently call with 1.0
+ * rayf, Rayleigh scattering factor, this factor currently call with 1.0
+ * inscattf, inscatter light factor that range from 0.0 to 1.0, 0.0 means no inscatter light and 1.0 means full inscatter light
+ * extincf, extinction light factor that range from 0.0 to 1.0, 0.0 means no extinction and 1.0 means full extinction
+ * disf, is distance factor, multiplied to pixle's z value to compute each pixle's distance to camera,
+ * */
+void InitAtmosphere(struct SunSky *sunSky, float sun_intens, float mief, float rayf,
+                    float inscattf, float extincf, float disf)
+{
+	const float pi = M_PI;
+	const float n = 1.003f;  /* refractive index */
+	const float N = 2.545e25;
+	const float pn = 0.035f;
+	const float T = 2.0f;
+	float fTemp, fTemp2, fTemp3, fBeta, fBetaDash;
+	float c = (6.544f * T - 6.51f) * 1e-17f;
+	float K[3] = {0.685f, 0.679f, 0.670f};
+	float vBetaMieTemp[3];
+
+	float fLambda[3], fLambda2[3], fLambda4[3];
+	float vLambda2[3];
+	float vLambda4[3];
+
+	int i;
+
+	sunSky->atm_SunIntensity = sun_intens;
+	sunSky->atm_BetaMieMultiplier  = mief;
+	sunSky->atm_BetaRayMultiplier = rayf;
+	sunSky->atm_InscatteringMultiplier = inscattf;
+	sunSky->atm_ExtinctionMultiplier = extincf;
+	sunSky->atm_DistanceMultiplier = disf;
+
+	sunSky->atm_HGg = 0.8;
+
+	fLambda[0]  = 1 / 650e-9f;
+	fLambda[1]  = 1 / 570e-9f;
+	fLambda[2]  = 1 / 475e-9f;
+	for (i = 0; i < 3; i++) {
+		fLambda2[i] = fLambda[i] * fLambda[i];
+		fLambda4[i] = fLambda2[i] * fLambda2[i];
+	}
+
+	vLambda2[0] = fLambda2[0];
+	vLambda2[1] = fLambda2[1];
+	vLambda2[2] = fLambda2[2];
+
+	vLambda4[0] = fLambda4[0];
+	vLambda4[1] = fLambda4[1];
+	vLambda4[2] = fLambda4[2];
+
+	/* Rayleigh scattering constants. */
+	fTemp = pi * pi * (n * n - 1) * (n * n - 1) * (6 + 3 * pn) / (6 - 7 * pn) / N;
+	fBeta = 8 * fTemp * pi / 3;
+
+	VEC3OPF(sunSky->atm_BetaRay, vLambda4, *, fBeta);
+	fBetaDash = fTemp / 2;
+	VEC3OPF(sunSky->atm_BetaDashRay, vLambda4, *, fBetaDash);
+
+
+	/* Mie scattering constants. */
+	fTemp2 = 0.434f * c * (2 * pi) * (2 * pi) * 0.5f;
+	VEC3OPF(sunSky->atm_BetaDashMie, vLambda2, *, fTemp2);
+
+	fTemp3 = 0.434f * c * pi * (2 * pi) * (2 * pi);
+
+	VEC3OPV(vBetaMieTemp, K, *, fLambda);
+	VEC3OPF(sunSky->atm_BetaMie, vBetaMieTemp, *, fTemp3);
+
+}
+
+/**
+ * AtmospherePixleShader:
+ * this function apply atmosphere effect on a pixle color `rgb' at distance `s'
+ * parameters:
+ * sunSky, contains information about sun parameters and user values
+ * view, is camera view vector
+ * s, is distance
+ * rgb, contains rendered color value for a pixle
+ * */
+void AtmospherePixleShader(struct SunSky *sunSky, float view[3], float s, float rgb[3])
+{
+	float costheta;
+	float Phase_1;
+	float Phase_2;
+	float sunColor[3];
+
+	float E[3];
+	float E1[3];
+
+
+	float I[3];
+	float fTemp;
+	float vTemp1[3], vTemp2[3];
+
+	float sunDirection[3];
+
+	s *= sunSky->atm_DistanceMultiplier;
+
+	sunDirection[0] = sunSky->toSun[0];
+	sunDirection[1] = sunSky->toSun[1];
+	sunDirection[2] = sunSky->toSun[2];
+
+	costheta = dot_v3v3(view, sunDirection); /* cos(theta) */
+	Phase_1 = 1 + (costheta * costheta); /* Phase_1 */
+
+	VEC3OPF(sunSky->atm_BetaRay, sunSky->atm_BetaRay, *, sunSky->atm_BetaRayMultiplier);
+	VEC3OPF(sunSky->atm_BetaMie, sunSky->atm_BetaMie, *, sunSky->atm_BetaMieMultiplier);
+	VEC3OPV(sunSky->atm_BetaRM, sunSky->atm_BetaRay, +, sunSky->atm_BetaMie);
+
+	/* e^(-(beta_1 + beta_2) * s) = E1 */
+	VEC3OPF(E1, sunSky->atm_BetaRM, *, -s / (float)M_LN2);
+	E1[0] = exp(E1[0]);
+	E1[1] = exp(E1[1]);
+	E1[2] = exp(E1[2]);
+
+	copy_v3_v3(E, E1);
+
+	/* Phase2(theta) = (1-g^2)/(1+g-2g*cos(theta))^(3/2) */
+	fTemp = 1 + sunSky->atm_HGg - 2 * sunSky->atm_HGg * costheta;
+	fTemp = fTemp * sqrtf(fTemp);
+	Phase_2 = (1 - sunSky->atm_HGg * sunSky->atm_HGg) / fTemp;
+
+	VEC3OPF(vTemp1, sunSky->atm_BetaDashRay, *, Phase_1);
+	VEC3OPF(vTemp2, sunSky->atm_BetaDashMie, *, Phase_2);
+
+	VEC3OPV(vTemp1, vTemp1, +, vTemp2);
+	FOPVEC3(vTemp2, 1.0f, -, E1);
+	VEC3OPV(vTemp1, vTemp1, *, vTemp2);
+
+	FOPVEC3(vTemp2, 1.0f, /, sunSky->atm_BetaRM);
+
+	VEC3OPV(I, vTemp1, *, vTemp2);
+
+	VEC3OPF(I, I, *, sunSky->atm_InscatteringMultiplier);
+	VEC3OPF(E, E, *, sunSky->atm_ExtinctionMultiplier);
+
+	/* scale to color sun */
+	ComputeAttenuatedSunlight(sunSky->theta, sunSky->turbidity, sunColor);
+	VEC3OPV(E, E, *, sunColor);
+
+	VEC3OPF(I, I, *, sunSky->atm_SunIntensity);
+
+	VEC3OPV(rgb, rgb, *, E);
+	VEC3OPV(rgb, rgb, +, I);
+}
+
+#undef VEC3OPV
+#undef VEC3OPF
+#undef FOPVEC3
+
+/* EOF */
diff --git a/source/blender/render/intern/source/volume_precache.c b/source/blender/render/intern/source/volume_precache.c
new file mode 100644
index 00000000000..8e79f309814
--- /dev/null
+++ b/source/blender/render/intern/source/volume_precache.c
@@ -0,0 +1,855 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): Matt Ebb, Ra˙l Fern·ndez Hern·ndez (Farsthary).
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/volume_precache.c
+ *  \ingroup render
+ */
+
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_blenlib.h"
+#include "BLI_math.h"
+#include "BLI_task.h"
+#include "BLI_threads.h"
+#include "BLI_voxel.h"
+#include "BLI_utildefines.h"
+
+#include "BLT_translation.h"
+
+#include "PIL_time.h"
+
+#include "RE_shader_ext.h"
+
+#include "DNA_material_types.h"
+
+#include "rayintersection.h"
+#include "rayobject.h"
+#include "render_types.h"
+#include "rendercore.h"
+#include "renderdatabase.h"
+#include "volumetric.h"
+#include "volume_precache.h"
+
+#include "atomic_ops.h"
+
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+/* *** utility code to set up an individual raytree for objectinstance, for checking inside/outside *** */
+
+/* Recursive test for intersections, from a point inside the mesh, to outside
+ * Number of intersections (depth) determine if a point is inside or outside the mesh */
+static int intersect_outside_volume(RayObject *tree, Isect *isect, float *offset, int limit, int depth)
+{
+	if (limit == 0) return depth;
+
+	if (RE_rayobject_raycast(tree, isect)) {
+
+		isect->start[0] = isect->start[0] + isect->dist*isect->dir[0];
+		isect->start[1] = isect->start[1] + isect->dist*isect->dir[1];
+		isect->start[2] = isect->start[2] + isect->dist*isect->dir[2];
+
+		isect->dist = FLT_MAX;
+		isect->skip = RE_SKIP_VLR_NEIGHBOUR;
+		isect->orig.face= isect->hit.face;
+		isect->orig.ob= isect->hit.ob;
+
+		return intersect_outside_volume(tree, isect, offset, limit-1, depth+1);
+	}
+	else {
+		return depth;
+	}
+}
+
+/* Uses ray tracing to check if a point is inside or outside an ObjectInstanceRen */
+static int point_inside_obi(RayObject *tree, ObjectInstanceRen *obi, const float co[3])
+{
+	Isect isect= {{0}};
+	float dir[3] = {0.0f, 0.0f, 1.0f};
+	int final_depth=0, depth=0, limit=20;
+
+	/* set up the isect */
+	copy_v3_v3(isect.start, co);
+	copy_v3_v3(isect.dir, dir);
+	isect.mode= RE_RAY_MIRROR;
+	isect.last_hit= NULL;
+	isect.lay= -1;
+
+	isect.dist = FLT_MAX;
+	isect.orig.face= NULL;
+	isect.orig.ob = NULL;
+
+	RE_instance_rotate_ray(obi, &isect);
+	final_depth = intersect_outside_volume(tree, &isect, dir, limit, depth);
+	RE_instance_rotate_ray_restore(obi, &isect);
+
+	/* even number of intersections: point is outside
+	 * odd number: point is inside */
+	if (final_depth % 2 == 0) return 0;
+	else return 1;
+}
+
+/* find the bounding box of an objectinstance in global space */
+void global_bounds_obi(Render *re, ObjectInstanceRen *obi, float bbmin[3], float bbmax[3])
+{
+	ObjectRen *obr = obi->obr;
+	VolumePrecache *vp = obi->volume_precache;
+	VertRen *ver= NULL;
+	float co[3];
+	int a;
+
+	if (vp->bbmin != NULL && vp->bbmax != NULL) {
+		copy_v3_v3(bbmin, vp->bbmin);
+		copy_v3_v3(bbmax, vp->bbmax);
+		return;
+	}
+
+	vp->bbmin = MEM_callocN(sizeof(float)*3, "volume precache min boundbox corner");
+	vp->bbmax = MEM_callocN(sizeof(float)*3, "volume precache max boundbox corner");
+
+	INIT_MINMAX(bbmin, bbmax);
+
+	for (a=0; a<obr->totvert; a++) {
+		if ((a & 255)==0) ver= obr->vertnodes[a>>8].vert;
+		else ver++;
+
+		copy_v3_v3(co, ver->co);
+
+		/* transformed object instance in camera space */
+		if (obi->flag & R_TRANSFORMED)
+			mul_m4_v3(obi->mat, co);
+
+		/* convert to global space */
+		mul_m4_v3(re->viewinv, co);
+
+		minmax_v3v3_v3(vp->bbmin, vp->bbmax, co);
+	}
+
+	copy_v3_v3(bbmin, vp->bbmin);
+	copy_v3_v3(bbmax, vp->bbmax);
+
+}
+
+/* *** light cache filtering *** */
+
+static float get_avg_surrounds(float *cache, int *res, int xx, int yy, int zz)
+{
+	int x, y, z, x_, y_, z_;
+	int added=0;
+	float tot=0.0f;
+
+	for (z=-1; z <= 1; z++) {
+		z_ = zz+z;
+		if (z_ >= 0 && z_ <= res[2]-1) {
+
+			for (y=-1; y <= 1; y++) {
+				y_ = yy+y;
+				if (y_ >= 0 && y_ <= res[1]-1) {
+
+					for (x=-1; x <= 1; x++) {
+						x_ = xx+x;
+						if (x_ >= 0 && x_ <= res[0]-1) {
+							const int64_t i = BLI_VOXEL_INDEX(x_, y_, z_, res);
+
+							if (cache[i] > 0.0f) {
+								tot += cache[i];
+								added++;
+							}
+
+						}
+					}
+				}
+			}
+		}
+	}
+
+	if (added > 0) tot /= added;
+
+	return tot;
+}
+
+/* function to filter the edges of the light cache, where there was no volume originally.
+ * For each voxel which was originally external to the mesh, it finds the average values of
+ * the surrounding internal voxels and sets the original external voxel to that average amount.
+ * Works almost a bit like a 'dilate' filter */
+static void lightcache_filter(VolumePrecache *vp)
+{
+	int x, y, z;
+
+	for (z=0; z < vp->res[2]; z++) {
+		for (y=0; y < vp->res[1]; y++) {
+			for (x=0; x < vp->res[0]; x++) {
+				/* trigger for outside mesh */
+				const int64_t i = BLI_VOXEL_INDEX(x, y, z, vp->res);
+
+				if (vp->data_r[i] < -0.f)
+					vp->data_r[i] = get_avg_surrounds(vp->data_r, vp->res, x, y, z);
+				if (vp->data_g[i] < -0.f)
+					vp->data_g[i] = get_avg_surrounds(vp->data_g, vp->res, x, y, z);
+				if (vp->data_b[i] < -0.f)
+					vp->data_b[i] = get_avg_surrounds(vp->data_b, vp->res, x, y, z);
+			}
+		}
+	}
+}
+
+#if 0
+static void lightcache_filter2(VolumePrecache *vp)
+{
+	int x, y, z;
+	float *new_r, *new_g, *new_b;
+	int field_size = vp->res[0]*vp->res[1]*vp->res[2]*sizeof(float);
+
+	new_r = MEM_mallocN(field_size, "temp buffer for light cache filter r channel");
+	new_g = MEM_mallocN(field_size, "temp buffer for light cache filter g channel");
+	new_b = MEM_mallocN(field_size, "temp buffer for light cache filter b channel");
+
+	memcpy(new_r, vp->data_r, field_size);
+	memcpy(new_g, vp->data_g, field_size);
+	memcpy(new_b, vp->data_b, field_size);
+
+	for (z=0; z < vp->res[2]; z++) {
+		for (y=0; y < vp->res[1]; y++) {
+			for (x=0; x < vp->res[0]; x++) {
+				/* trigger for outside mesh */
+				const int64_t i = BLI_VOXEL_INDEX(x, y, z, vp->res);
+				if (vp->data_r[i] < -0.f)
+					new_r[i] = get_avg_surrounds(vp->data_r, vp->res, x, y, z);
+				if (vp->data_g[i] < -0.f)
+					new_g[i] = get_avg_surrounds(vp->data_g, vp->res, x, y, z);
+				if (vp->data_b[i] < -0.f)
+					new_b[i] = get_avg_surrounds(vp->data_b, vp->res, x, y, z);
+			}
+		}
+	}
+
+	SWAP(float *, vp->data_r, new_r);
+	SWAP(float *, vp->data_g, new_g);
+	SWAP(float *, vp->data_b, new_b);
+
+	if (new_r) { MEM_freeN(new_r); new_r=NULL; }
+	if (new_g) { MEM_freeN(new_g); new_g=NULL; }
+	if (new_b) { MEM_freeN(new_b); new_b=NULL; }
+}
+#endif
+
+/* has a pad of 1 voxel surrounding the core for boundary simulation */
+BLI_INLINE int64_t ms_I(int x, int y, int z, const int *n)
+{
+	/* different ordering to light cache */
+	return ((int64_t)x * (int64_t)(n[1] + 2) * (int64_t)(n[2] + 2) +
+	        (int64_t)y * (int64_t)(n[2] + 2) +
+	        (int64_t)z);
+}
+
+/* has a pad of 1 voxel surrounding the core for boundary simulation */
+BLI_INLINE int64_t v_I_pad(int x, int y, int z, const int *n)
+{
+	/* same ordering to light cache, with padding */
+	return ((int64_t)z * (int64_t)(n[1] + 2) * (int64_t)(n[0] + 2) +
+	        (int64_t)y * (int64_t)(n[0] + 2) +
+	        (int64_t)x);
+}
+
+BLI_INLINE int64_t lc_to_ms_I(int x, int y, int z, const int *n)
+{
+	/* converting light cache index to multiple scattering index */
+	return ((int64_t)(x - 1) * ((int64_t)n[1] * (int64_t)n[2]) +
+	        (int64_t)(y - 1) * ((int64_t)n[2]) +
+	        (int64_t)(z - 1));
+}
+
+/* *** multiple scattering approximation *** */
+
+/* get the total amount of light energy in the light cache. used to normalize after multiple scattering */
+static float total_ss_energy(Render *re, int do_test_break, VolumePrecache *vp)
+{
+	int x, y, z;
+	const int *res = vp->res;
+	float energy=0.f;
+
+	for (z=0; z < res[2]; z++) {
+		for (y=0; y < res[1]; y++) {
+			for (x=0; x < res[0]; x++) {
+				const int64_t i = BLI_VOXEL_INDEX(x, y, z, res);
+
+				if (vp->data_r[i] > 0.f) energy += vp->data_r[i];
+				if (vp->data_g[i] > 0.f) energy += vp->data_g[i];
+				if (vp->data_b[i] > 0.f) energy += vp->data_b[i];
+			}
+		}
+
+		if (do_test_break && re->test_break(re->tbh)) break;
+	}
+
+	return energy;
+}
+
+static float total_ms_energy(Render *re, int do_test_break, float *sr, float *sg, float *sb, const int res[3])
+{
+	int x, y, z;
+	float energy=0.f;
+
+	for (z=1;z<=res[2];z++) {
+		for (y=1;y<=res[1];y++) {
+			for (x=1;x<=res[0];x++) {
+				const int64_t i = ms_I(x, y, z, res);
+
+				if (sr[i] > 0.f) energy += sr[i];
+				if (sg[i] > 0.f) energy += sg[i];
+				if (sb[i] > 0.f) energy += sb[i];
+			}
+		}
+
+		if (do_test_break && re->test_break(re->tbh)) break;
+	}
+
+	return energy;
+}
+
+/**
+ * \param n: the unpadded resolution
+ */
+static void ms_diffuse(Render *re, int do_test_break, const float *x0, float *x, float diff, const int n[3])
+{
+	int i, j, k, l;
+	const float dt = VOL_MS_TIMESTEP;
+	int64_t size = (int64_t)n[0] * (int64_t)n[1] * (int64_t)n[2];
+	const float a = dt * diff * size;
+
+	for (l=0; l<20; l++) {
+		for (k=1; k<=n[2]; k++) {
+			for (j=1; j<=n[1]; j++) {
+				for (i=1; i<=n[0]; i++) {
+					x[v_I_pad(i, j, k, n)] =
+					        ((x0[v_I_pad(i, j, k, n)]) + (
+					         (x0[v_I_pad(i - 1, j, k, n)] +
+					          x0[v_I_pad(i + 1, j, k, n)] +
+					          x0[v_I_pad(i, j - 1, k, n)] +
+					          x0[v_I_pad(i, j + 1, k, n)] +
+					          x0[v_I_pad(i, j, k - 1, n)] +
+					          x0[v_I_pad(i, j, k + 1, n)]) * a) / (1 + 6 * a));
+				}
+			}
+
+			if (do_test_break && re->test_break(re->tbh)) break;
+		}
+
+		if (re->test_break(re->tbh)) break;
+	}
+}
+
+static void multiple_scattering_diffusion(Render *re, VolumePrecache *vp, Material *ma)
+{
+	const float diff = ma->vol.ms_diff * 0.001f; 	/* compensate for scaling for a nicer UI range */
+	const int simframes = (int)(ma->vol.ms_spread * (float)max_iii(vp->res[0], vp->res[1], vp->res[2]));
+	const int shade_type = ma->vol.shade_type;
+	float fac = ma->vol.ms_intensity;
+
+	int x, y, z, m;
+	const int *n = vp->res;
+	const int size = (n[0]+2)*(n[1]+2)*(n[2]+2);
+	const int do_test_break = (size > 100000);
+	double time, lasttime= PIL_check_seconds_timer();
+	float total;
+	float c=1.0f;
+	float origf;	/* factor for blending in original light cache */
+	float energy_ss, energy_ms;
+
+	float *sr0=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer");
+	float *sr=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer");
+	float *sg0=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer");
+	float *sg=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer");
+	float *sb0=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer");
+	float *sb=(float *)MEM_callocN(size*sizeof(float), "temporary multiple scattering buffer");
+
+	total = (float)(n[0]*n[1]*n[2]*simframes);
+
+	energy_ss = total_ss_energy(re, do_test_break, vp);
+
+	/* Scattering as diffusion pass */
+	for (m=0; m<simframes; m++) {
+		/* add sources */
+		for (z=1; z<=n[2]; z++) {
+			for (y=1; y<=n[1]; y++) {
+				for (x=1; x<=n[0]; x++) {
+					const int64_t i = lc_to_ms_I(x, y, z, n);	//lc index
+					const int64_t j = ms_I(x, y, z, n);			//ms index
+
+					time= PIL_check_seconds_timer();
+					c++;
+					if (vp->data_r[i] > 0.0f)
+						sr[j] += vp->data_r[i];
+					if (vp->data_g[i] > 0.0f)
+						sg[j] += vp->data_g[i];
+					if (vp->data_b[i] > 0.0f)
+						sb[j] += vp->data_b[i];
+
+					/* Displays progress every second */
+					if (time-lasttime>1.0) {
+						char str[64];
+						BLI_snprintf(str, sizeof(str), IFACE_("Simulating multiple scattering: %d%%"),
+						             (int)(100.0f * (c / total)));
+						re->i.infostr = str;
+						re->stats_draw(re->sdh, &re->i);
+						re->i.infostr = NULL;
+						lasttime= time;
+					}
+				}
+			}
+
+			if (do_test_break && re->test_break(re->tbh)) break;
+		}
+
+		if (re->test_break(re->tbh)) break;
+
+		SWAP(float *, sr, sr0);
+		SWAP(float *, sg, sg0);
+		SWAP(float *, sb, sb0);
+
+		/* main diffusion simulation */
+		ms_diffuse(re, do_test_break, sr0, sr, diff, n);
+		ms_diffuse(re, do_test_break, sg0, sg, diff, n);
+		ms_diffuse(re, do_test_break, sb0, sb, diff, n);
+
+		if (re->test_break(re->tbh)) break;
+	}
+
+	/* normalization factor to conserve energy */
+	energy_ms = total_ms_energy(re, do_test_break, sr, sg, sb, n);
+	fac *= (energy_ss / energy_ms);
+
+	/* blend multiple scattering back in the light cache */
+	if (shade_type == MA_VOL_SHADE_SHADEDPLUSMULTIPLE) {
+		/* conserve energy - half single, half multiple */
+		origf = 0.5f;
+		fac *= 0.5f;
+	}
+	else {
+		origf = 0.0f;
+	}
+
+	for (z=1;z<=n[2];z++) {
+		for (y=1;y<=n[1];y++) {
+			for (x=1;x<=n[0];x++) {
+				const int64_t i = lc_to_ms_I(x, y, z, n);	//lc index
+				const int64_t j = ms_I(x, y, z, n);			//ms index
+
+				vp->data_r[i] = origf * vp->data_r[i] + fac * sr[j];
+				vp->data_g[i] = origf * vp->data_g[i] + fac * sg[j];
+				vp->data_b[i] = origf * vp->data_b[i] + fac * sb[j];
+			}
+		}
+
+		if (do_test_break && re->test_break(re->tbh)) break;
+	}
+
+	MEM_freeN(sr0);
+	MEM_freeN(sr);
+	MEM_freeN(sg0);
+	MEM_freeN(sg);
+	MEM_freeN(sb0);
+	MEM_freeN(sb);
+}
+
+
+
+#if 0  /* debug stuff */
+static void *vol_precache_part_test(void *data)
+{
+	VolPrecachePart *pa = data;
+
+	printf("part number: %d\n", pa->num);
+	printf("done: %d\n", pa->done);
+	printf("x min: %d   x max: %d\n", pa->minx, pa->maxx);
+	printf("y min: %d   y max: %d\n", pa->miny, pa->maxy);
+	printf("z min: %d   z max: %d\n", pa->minz, pa->maxz);
+
+	return NULL;
+}
+#endif
+
+/* Iterate over the 3d voxel grid, and fill the voxels with scattering information
+ *
+ * It's stored in memory as 3 big float grids next to each other, one for each RGB channel.
+ * I'm guessing the memory alignment may work out better this way for the purposes
+ * of doing linear interpolation, but I haven't actually tested this theory! :)
+ */
+typedef struct VolPrecacheState {
+	double lasttime;
+	unsigned int doneparts;
+	unsigned int totparts;
+} VolPrecacheState;
+
+static void vol_precache_part(TaskPool * __restrict pool, void *taskdata, int UNUSED(threadid))
+{
+	VolPrecacheState *state = (VolPrecacheState *)BLI_task_pool_userdata(pool);
+	VolPrecachePart *pa = (VolPrecachePart *)taskdata;
+	Render *re = pa->re;
+
+	ObjectInstanceRen *obi = pa->obi;
+	RayObject *tree = pa->tree;
+	ShadeInput *shi = pa->shi;
+	float scatter_col[3] = {0.f, 0.f, 0.f};
+	float co[3], cco[3], view[3];
+	int x, y, z;
+	int res[3];
+	double time;
+
+	if (re->test_break && re->test_break(re->tbh))
+		return;
+
+	//printf("thread id %d\n", threadid);
+
+	res[0]= pa->res[0];
+	res[1]= pa->res[1];
+	res[2]= pa->res[2];
+
+	for (z= pa->minz; z < pa->maxz; z++) {
+		co[2] = pa->bbmin[2] + (pa->voxel[2] * (z + 0.5f));
+
+		for (y= pa->miny; y < pa->maxy; y++) {
+			co[1] = pa->bbmin[1] + (pa->voxel[1] * (y + 0.5f));
+
+			for (x=pa->minx; x < pa->maxx; x++) {
+				int64_t i;
+				co[0] = pa->bbmin[0] + (pa->voxel[0] * (x + 0.5f));
+
+				if (re->test_break && re->test_break(re->tbh))
+					break;
+
+				/* convert from world->camera space for shading */
+				mul_v3_m4v3(cco, pa->viewmat, co);
+
+				i = BLI_VOXEL_INDEX(x, y, z, res);
+
+				/* don't bother if the point is not inside the volume mesh */
+				if (!point_inside_obi(tree, obi, cco)) {
+					obi->volume_precache->data_r[i] = -1.0f;
+					obi->volume_precache->data_g[i] = -1.0f;
+					obi->volume_precache->data_b[i] = -1.0f;
+					continue;
+				}
+
+				copy_v3_v3(view, cco);
+				normalize_v3(view);
+				vol_get_scattering(shi, scatter_col, cco, view);
+
+				obi->volume_precache->data_r[i] = scatter_col[0];
+				obi->volume_precache->data_g[i] = scatter_col[1];
+				obi->volume_precache->data_b[i] = scatter_col[2];
+
+			}
+		}
+	}
+
+	unsigned int doneparts = atomic_add_and_fetch_u(&state->doneparts, 1);
+
+	time = PIL_check_seconds_timer();
+	if (time - state->lasttime > 1.0) {
+		ThreadMutex *mutex = BLI_task_pool_user_mutex(pool);
+
+		if (BLI_mutex_trylock(mutex)) {
+			char str[64];
+			float ratio = (float)doneparts/(float)state->totparts;
+			BLI_snprintf(str, sizeof(str), IFACE_("Precaching volume: %d%%"), (int)(100.0f * ratio));
+			re->i.infostr = str;
+			re->stats_draw(re->sdh, &re->i);
+			re->i.infostr = NULL;
+			state->lasttime = time;
+
+			BLI_mutex_unlock(mutex);
+		}
+	}
+}
+
+static void precache_setup_shadeinput(Render *re, ObjectInstanceRen *obi, Material *ma, ShadeInput *shi)
+{
+	memset(shi, 0, sizeof(ShadeInput));
+	shi->depth= 1;
+	shi->mask= 1;
+	shi->mat = ma;
+	shi->vlr = NULL;
+	memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));	/* note, keep this synced with render_types.h */
+	shi->har= shi->mat->har;
+	shi->obi= obi;
+	shi->obr= obi->obr;
+	shi->lay = re->lay;
+}
+
+static void precache_launch_parts(Render *re, RayObject *tree, ShadeInput *shi, ObjectInstanceRen *obi)
+{
+	TaskScheduler *task_scheduler;
+	TaskPool *task_pool;
+	VolumePrecache *vp = obi->volume_precache;
+	VolPrecacheState state;
+	int i=0, x, y, z;
+	float voxel[3];
+	int sizex, sizey, sizez;
+	float bbmin[3], bbmax[3];
+	const int *res;
+	int minx, maxx;
+	int miny, maxy;
+	int minz, maxz;
+	int totthread = re->r.threads;
+	int parts[3];
+
+	if (!vp) return;
+
+	/* currently we just subdivide the box, number of threads per side */
+	parts[0] = parts[1] = parts[2] = totthread;
+	res = vp->res;
+
+	/* setup task scheduler */
+	memset(&state, 0, sizeof(state));
+	state.doneparts = 0;
+	state.totparts = parts[0]*parts[1]*parts[2];
+	state.lasttime = PIL_check_seconds_timer();
+
+	task_scheduler = BLI_task_scheduler_create(totthread);
+	task_pool = BLI_task_pool_create(task_scheduler, &state);
+
+	/* using boundbox in worldspace */
+	global_bounds_obi(re, obi, bbmin, bbmax);
+	sub_v3_v3v3(voxel, bbmax, bbmin);
+
+	voxel[0] /= (float)res[0];
+	voxel[1] /= (float)res[1];
+	voxel[2] /= (float)res[2];
+
+	for (x=0; x < parts[0]; x++) {
+		sizex = ceil(res[0] / (float)parts[0]);
+		minx = x * sizex;
+		maxx = minx + sizex;
+		maxx = (maxx>res[0])?res[0]:maxx;
+
+		for (y=0; y < parts[1]; y++) {
+			sizey = ceil(res[1] / (float)parts[1]);
+			miny = y * sizey;
+			maxy = miny + sizey;
+			maxy = (maxy>res[1])?res[1]:maxy;
+
+			for (z=0; z < parts[2]; z++) {
+				VolPrecachePart *pa= MEM_callocN(sizeof(VolPrecachePart), "new precache part");
+
+				sizez = ceil(res[2] / (float)parts[2]);
+				minz = z * sizez;
+				maxz = minz + sizez;
+				maxz = (maxz>res[2])?res[2]:maxz;
+
+				pa->re = re;
+				pa->num = i;
+				pa->tree = tree;
+				pa->shi = shi;
+				pa->obi = obi;
+				copy_m4_m4(pa->viewmat, re->viewmat);
+
+				copy_v3_v3(pa->bbmin, bbmin);
+				copy_v3_v3(pa->voxel, voxel);
+				copy_v3_v3_int(pa->res, res);
+
+				pa->minx = minx; pa->maxx = maxx;
+				pa->miny = miny; pa->maxy = maxy;
+				pa->minz = minz; pa->maxz = maxz;
+
+				BLI_task_pool_push(task_pool, vol_precache_part, pa, true, TASK_PRIORITY_HIGH);
+
+				i++;
+			}
+		}
+	}
+
+	/* work and wait until tasks are done */
+	BLI_task_pool_work_and_wait(task_pool);
+
+	/* free */
+	BLI_task_pool_free(task_pool);
+	BLI_task_scheduler_free(task_scheduler);
+}
+
+/* calculate resolution from bounding box in world space */
+static int precache_resolution(Render *re, VolumePrecache *vp, ObjectInstanceRen *obi, int res)
+{
+	float dim[3], div;
+	float bbmin[3], bbmax[3];
+
+	/* bound box in global space */
+	global_bounds_obi(re, obi, bbmin, bbmax);
+	sub_v3_v3v3(dim, bbmax, bbmin);
+
+	div = max_fff(dim[0], dim[1], dim[2]);
+	dim[0] /= div;
+	dim[1] /= div;
+	dim[2] /= div;
+
+	vp->res[0] = ceil(dim[0] * res);
+	vp->res[1] = ceil(dim[1] * res);
+	vp->res[2] = ceil(dim[2] * res);
+
+	if ((vp->res[0] < 1) || (vp->res[1] < 1) || (vp->res[2] < 1))
+		return 0;
+
+	return 1;
+}
+
+/* Precache a volume into a 3D voxel grid.
+ * The voxel grid is stored in the ObjectInstanceRen,
+ * in camera space, aligned with the ObjectRen's bounding box.
+ * Resolution is defined by the user.
+ */
+static void vol_precache_objectinstance_threads(Render *re, ObjectInstanceRen *obi, Material *ma)
+{
+	VolumePrecache *vp;
+	RayObject *tree;
+	ShadeInput shi;
+
+	R = *re;
+
+	/* create a raytree with just the faces of the instanced ObjectRen,
+	 * used for checking if the cached point is inside or outside. */
+	tree = makeraytree_object(&R, obi);
+	if (!tree) return;
+
+	vp = MEM_callocN(sizeof(VolumePrecache), "volume light cache");
+	obi->volume_precache = vp;
+
+	if (!precache_resolution(re, vp, obi, ma->vol.precache_resolution)) {
+		MEM_freeN(vp);
+		vp = NULL;
+		return;
+	}
+
+	vp->data_r = MEM_callocN(sizeof(float)*vp->res[0]*vp->res[1]*vp->res[2], "volume light cache data red channel");
+	vp->data_g = MEM_callocN(sizeof(float)*vp->res[0]*vp->res[1]*vp->res[2], "volume light cache data green channel");
+	vp->data_b = MEM_callocN(sizeof(float)*vp->res[0]*vp->res[1]*vp->res[2], "volume light cache data blue channel");
+	if (vp->data_r==NULL || vp->data_g==NULL || vp->data_b==NULL) {
+		MEM_freeN(vp);
+		return;
+	}
+
+	/* Need a shadeinput to calculate scattering */
+	precache_setup_shadeinput(re, obi, ma, &shi);
+
+	precache_launch_parts(re, tree, &shi, obi);
+
+	if (tree) {
+		/* TODO: makeraytree_object creates a tree and saves it on OBI,
+		 * if we free this tree we should also clear other pointers to it */
+		//RE_rayobject_free(tree);
+		//tree= NULL;
+	}
+
+	if (ELEM(ma->vol.shade_type, MA_VOL_SHADE_MULTIPLE, MA_VOL_SHADE_SHADEDPLUSMULTIPLE)) {
+		/* this should be before the filtering */
+		multiple_scattering_diffusion(re, obi->volume_precache, ma);
+	}
+
+	lightcache_filter(obi->volume_precache);
+}
+
+static int using_lightcache(Material *ma)
+{
+	return (((ma->vol.shadeflag & MA_VOL_PRECACHESHADING) && (ma->vol.shade_type == MA_VOL_SHADE_SHADED)) ||
+	        (ELEM(ma->vol.shade_type, MA_VOL_SHADE_MULTIPLE, MA_VOL_SHADE_SHADEDPLUSMULTIPLE)));
+}
+
+/* loop through all objects (and their associated materials)
+ * marked for pre-caching in convertblender.c, and pre-cache them */
+void volume_precache(Render *re)
+{
+	ObjectInstanceRen *obi;
+	VolumeOb *vo;
+
+	re->i.infostr = IFACE_("Volume preprocessing");
+	re->stats_draw(re->sdh, &re->i);
+
+	for (vo= re->volumes.first; vo; vo= vo->next) {
+		if (using_lightcache(vo->ma)) {
+			for (obi= re->instancetable.first; obi; obi= obi->next) {
+				if (obi->obr == vo->obr) {
+					vol_precache_objectinstance_threads(re, obi, vo->ma);
+
+					if (re->test_break && re->test_break(re->tbh))
+						break;
+				}
+			}
+
+			if (re->test_break && re->test_break(re->tbh))
+				break;
+		}
+	}
+
+	re->i.infostr = NULL;
+	re->stats_draw(re->sdh, &re->i);
+}
+
+void free_volume_precache(Render *re)
+{
+	ObjectInstanceRen *obi;
+
+	for (obi= re->instancetable.first; obi; obi= obi->next) {
+		if (obi->volume_precache != NULL) {
+			MEM_freeN(obi->volume_precache->data_r);
+			MEM_freeN(obi->volume_precache->data_g);
+			MEM_freeN(obi->volume_precache->data_b);
+			MEM_freeN(obi->volume_precache->bbmin);
+			MEM_freeN(obi->volume_precache->bbmax);
+			MEM_freeN(obi->volume_precache);
+			obi->volume_precache = NULL;
+		}
+	}
+
+	BLI_freelistN(&re->volumes);
+}
+
+int point_inside_volume_objectinstance(Render *re, ObjectInstanceRen *obi, const float co[3])
+{
+	RayObject *tree;
+	int inside=0;
+
+	tree = makeraytree_object(re, obi);
+	if (!tree) return 0;
+
+	inside = point_inside_obi(tree, obi, co);
+
+	//TODO: makeraytree_object creates a tree and saves it on OBI, if we free this tree we should also clear other pointers to it
+	//RE_rayobject_free(tree);
+	//tree= NULL;
+
+	return inside;
+}
+
diff --git a/source/blender/render/intern/source/volumetric.c b/source/blender/render/intern/source/volumetric.c
new file mode 100644
index 00000000000..583353ed8cf
--- /dev/null
+++ b/source/blender/render/intern/source/volumetric.c
@@ -0,0 +1,836 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): Matt Ebb, Raul Fernandez Hernandez (Farsthary)
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/render/intern/source/volumetric.c
+ *  \ingroup render
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+
+#include "BLI_math.h"
+#include "BLI_rand.h"
+#include "BLI_voxel.h"
+#include "BLI_utildefines.h"
+
+#include "RE_shader_ext.h"
+
+#include "IMB_colormanagement.h"
+
+#include "DNA_material_types.h"
+#include "DNA_group_types.h"
+#include "DNA_lamp_types.h"
+#include "DNA_meta_types.h"
+
+
+#include "render_types.h"
+#include "pixelshading.h"
+#include "rayintersection.h"
+#include "rayobject.h"
+#include "renderdatabase.h"
+#include "shading.h"
+#include "shadbuf.h"
+#include "texture.h"
+#include "volumetric.h"
+#include "volume_precache.h"
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
+/* only to be used here in this file, it's for speed */
+extern struct Render R;
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+/* tracing */
+static float vol_get_shadow(ShadeInput *shi, LampRen *lar, const float co[3])
+{
+	float visibility = 1.f;
+
+	if (lar->shb) {
+		float dxco[3] = {0.f, 0.f, 0.f}, dyco[3] = {0.f, 0.f, 0.f};
+
+		visibility = testshadowbuf(&R, lar->shb, co, dxco, dyco, 1.0, 0.0);
+	}
+	else if (lar->mode & LA_SHAD_RAY) {
+		/* trace shadow manually, no good lamp api atm */
+		Isect is;
+
+		copy_v3_v3(is.start, co);
+		if (lar->type == LA_SUN || lar->type == LA_HEMI) {
+			is.dir[0] = -lar->vec[0];
+			is.dir[1] = -lar->vec[1];
+			is.dir[2] = -lar->vec[2];
+			is.dist = R.maxdist;
+		}
+		else {
+			sub_v3_v3v3(is.dir, lar->co, is.start);
+			is.dist = normalize_v3(is.dir);
+		}
+
+		is.mode = RE_RAY_MIRROR;
+		is.check = RE_CHECK_VLR_NON_SOLID_MATERIAL;
+		is.skip = 0;
+
+		if (lar->mode & (LA_LAYER | LA_LAYER_SHADOW))
+			is.lay = lar->lay;
+		else
+			is.lay = -1;
+
+		is.orig.ob = NULL;
+		is.orig.face = NULL;
+		is.last_hit = lar->last_hit[shi->thread];
+
+		RE_instance_rotate_ray(shi->obi, &is);
+
+		if (RE_rayobject_raycast(R.raytree, &is)) {
+			RE_instance_rotate_ray_restore(shi->obi, &is);
+
+			visibility = 0.f;
+		}
+
+		lar->last_hit[shi->thread] = is.last_hit;
+	}
+	return visibility;
+}
+
+static int vol_get_bounds(ShadeInput *shi, const float co[3], const float vec[3], float hitco[3], Isect *isect, int intersect_type)
+{
+
+	copy_v3_v3(isect->start, co);
+	copy_v3_v3(isect->dir, vec);
+	isect->dist = FLT_MAX;
+	isect->mode = RE_RAY_MIRROR;
+	isect->last_hit = NULL;
+	isect->lay = -1;
+	isect->check = RE_CHECK_VLR_NONE;
+
+	if (intersect_type == VOL_BOUNDS_DEPTH) {
+		isect->skip = RE_SKIP_VLR_NEIGHBOUR;
+		isect->orig.face = (void *)shi->vlr;
+		isect->orig.ob = (void *)shi->obi;
+	}
+	else { // if (intersect_type == VOL_BOUNDS_SS) {
+		isect->skip = 0;
+		isect->orig.face = NULL;
+		isect->orig.ob = NULL;
+	}
+
+	RE_instance_rotate_ray(shi->obi, isect);
+
+	if (RE_rayobject_raycast(R.raytree, isect)) {
+		RE_instance_rotate_ray_restore(shi->obi, isect);
+
+		hitco[0] = isect->start[0] + isect->dist * isect->dir[0];
+		hitco[1] = isect->start[1] + isect->dist * isect->dir[1];
+		hitco[2] = isect->start[2] + isect->dist * isect->dir[2];
+		return 1;
+	}
+	else {
+		return 0;
+	}
+}
+
+static void shade_intersection(ShadeInput *shi, float col_r[4], Isect *is)
+{
+	ShadeInput shi_new;
+	ShadeResult shr_new;
+
+	memset(&shi_new, 0, sizeof(ShadeInput));
+
+	shi_new.mask = shi->mask;
+	shi_new.osatex = shi->osatex;
+	shi_new.thread = shi->thread;
+	shi_new.depth = shi->depth + 1;
+	shi_new.volume_depth = shi->volume_depth + 1;
+	shi_new.xs = shi->xs;
+	shi_new.ys = shi->ys;
+	shi_new.lay = shi->lay;
+	shi_new.passflag = SCE_PASS_COMBINED; /* result of tracing needs no pass info */
+	shi_new.combinedflag = 0xFFFFFF;      /* ray trace does all options */
+	shi_new.light_override = shi->light_override;
+	shi_new.mat_override = shi->mat_override;
+
+	copy_v3_v3(shi_new.camera_co, is->start);
+
+	memset(&shr_new, 0, sizeof(ShadeResult));
+
+	/* hardcoded limit of 100 for now - prevents problems in weird geometry */
+	if (shi->volume_depth < 100) {
+		shade_ray(is, &shi_new, &shr_new);
+	}
+
+	copy_v3_v3(col_r, shr_new.combined);
+	col_r[3] = shr_new.alpha;
+}
+
+static void vol_trace_behind(ShadeInput *shi, VlakRen *vlr, const float co[3], float col_r[4])
+{
+	Isect isect;
+
+	copy_v3_v3(isect.start, co);
+	copy_v3_v3(isect.dir, shi->view);
+	isect.dist = FLT_MAX;
+
+	isect.mode = RE_RAY_MIRROR;
+	isect.check = RE_CHECK_VLR_NONE;
+	isect.skip = RE_SKIP_VLR_NEIGHBOUR;
+	isect.orig.ob = (void *) shi->obi;
+	isect.orig.face = (void *)vlr;
+	isect.last_hit = NULL;
+	isect.lay = -1;
+
+	/* check to see if there's anything behind the volume, otherwise shade the sky */
+	RE_instance_rotate_ray(shi->obi, &isect);
+
+	if (RE_rayobject_raycast(R.raytree, &isect)) {
+		RE_instance_rotate_ray_restore(shi->obi, &isect);
+
+		shade_intersection(shi, col_r, &isect);
+	}
+	else {
+		shadeSkyView(col_r, co, shi->view, NULL, shi->thread);
+		shadeSunView(col_r, shi->view);
+	}
+}
+
+
+/* trilinear interpolation */
+static void vol_get_precached_scattering(Render *re, ShadeInput *shi, float scatter_col[3], const float co[3])
+{
+	VolumePrecache *vp = shi->obi->volume_precache;
+	float bbmin[3], bbmax[3], dim[3];
+	float world_co[3], sample_co[3];
+
+	if (!vp) return;
+
+	/* find sample point in global space bounding box 0.0-1.0 */
+	global_bounds_obi(re, shi->obi, bbmin, bbmax);
+	sub_v3_v3v3(dim, bbmax, bbmin);
+	mul_v3_m4v3(world_co, re->viewinv, co);
+
+	/* sample_co in 0.0-1.0 */
+	sample_co[0] = (world_co[0] - bbmin[0]) / dim[0];
+	sample_co[1] = (world_co[1] - bbmin[1]) / dim[1];
+	sample_co[2] = (world_co[2] - bbmin[2]) / dim[2];
+
+	scatter_col[0] = BLI_voxel_sample_triquadratic(vp->data_r, vp->res, sample_co);
+	scatter_col[1] = BLI_voxel_sample_triquadratic(vp->data_g, vp->res, sample_co);
+	scatter_col[2] = BLI_voxel_sample_triquadratic(vp->data_b, vp->res, sample_co);
+}
+
+/* Meta object density, brute force for now
+ * (might be good enough anyway, don't need huge number of metaobs to model volumetric objects */
+static float metadensity(Object *ob, const float co[3])
+{
+	float mat[4][4], imat[4][4], dens = 0.f;
+	MetaBall *mb = (MetaBall *)ob->data;
+	MetaElem *ml;
+
+	/* transform co to meta-element */
+	float tco[3] = {co[0], co[1], co[2]};
+	mul_m4_m4m4(mat, R.viewmat, ob->obmat);
+	invert_m4_m4(imat, mat);
+	mul_m4_v3(imat, tco);
+
+	for (ml = mb->elems.first; ml; ml = ml->next) {
+		float bmat[3][3], dist2;
+
+		/* element rotation transform */
+		float tp[3] = {ml->x - tco[0], ml->y - tco[1], ml->z - tco[2]};
+		quat_to_mat3(bmat, ml->quat);
+		transpose_m3(bmat); /* rot.only, so inverse == transpose */
+		mul_m3_v3(bmat, tp);
+
+		/* MB_BALL default */
+		switch (ml->type) {
+			case MB_ELIPSOID:
+				tp[0] /= ml->expx;
+				tp[1] /= ml->expy;
+				tp[2] /= ml->expz;
+				break;
+			case MB_CUBE:
+				tp[2] = (tp[2] > ml->expz) ? (tp[2] - ml->expz) : ((tp[2] < -ml->expz) ? (tp[2] + ml->expz) : 0.f);
+				/* no break, xy as plane */
+				ATTR_FALLTHROUGH;
+			case MB_PLANE:
+				tp[1] = (tp[1] > ml->expy) ? (tp[1] - ml->expy) : ((tp[1] < -ml->expy) ? (tp[1] + ml->expy) : 0.f);
+				/* no break, x as tube */
+				ATTR_FALLTHROUGH;
+			case MB_TUBE:
+				tp[0] = (tp[0] > ml->expx) ? (tp[0] - ml->expx) : ((tp[0] < -ml->expx) ? (tp[0] + ml->expx) : 0.f);
+		}
+
+		/* ml->rad2 is not set */
+		dist2 = 1.0f - (dot_v3v3(tp, tp) / (ml->rad * ml->rad));
+		if (dist2 > 0.f)
+			dens += (ml->flag & MB_NEGATIVE) ? -ml->s * dist2 * dist2 * dist2 : ml->s * dist2 * dist2 * dist2;
+	}
+
+	dens -= mb->thresh;
+	return (dens < 0.f) ? 0.f : dens;
+}
+
+float vol_get_density(struct ShadeInput *shi, const float co[3])
+{
+	float density = shi->mat->vol.density;
+	float density_scale = shi->mat->vol.density_scale;
+
+	if (shi->mat->mapto_textured & MAP_DENSITY)
+		do_volume_tex(shi, co, MAP_DENSITY, NULL, &density, &R);
+
+	/* if meta-object, modulate by metadensity without increasing it */
+	if (shi->obi->obr->ob->type == OB_MBALL) {
+		const float md = metadensity(shi->obi->obr->ob, co);
+		if (md < 1.f) density *= md;
+	}
+
+	return density * density_scale;
+}
+
+
+/* Color of light that gets scattered out by the volume */
+/* Uses same physically based scattering parameter as in transmission calculations,
+ * along with artificial reflection scale/reflection color tint */
+static void vol_get_reflection_color(ShadeInput *shi, float ref_col[3], const float co[3])
+{
+	float scatter = shi->mat->vol.scattering;
+	float reflection = shi->mat->vol.reflection;
+	copy_v3_v3(ref_col, shi->mat->vol.reflection_col);
+
+	if (shi->mat->mapto_textured & (MAP_SCATTERING + MAP_REFLECTION_COL))
+		do_volume_tex(shi, co, MAP_SCATTERING + MAP_REFLECTION_COL, ref_col, &scatter, &R);
+
+	/* only one single float parameter at a time... :s */
+	if (shi->mat->mapto_textured & (MAP_REFLECTION))
+		do_volume_tex(shi, co, MAP_REFLECTION, NULL, &reflection, &R);
+
+	ref_col[0] = reflection * ref_col[0] * scatter;
+	ref_col[1] = reflection * ref_col[1] * scatter;
+	ref_col[2] = reflection * ref_col[2] * scatter;
+}
+
+/* compute emission component, amount of radiance to add per segment
+ * can be textured with 'emit' */
+static void vol_get_emission(ShadeInput *shi, float emission_col[3], const float co[3])
+{
+	float emission = shi->mat->vol.emission;
+	copy_v3_v3(emission_col, shi->mat->vol.emission_col);
+
+	if (shi->mat->mapto_textured & (MAP_EMISSION + MAP_EMISSION_COL))
+		do_volume_tex(shi, co, MAP_EMISSION + MAP_EMISSION_COL, emission_col, &emission, &R);
+
+	emission_col[0] = emission_col[0] * emission;
+	emission_col[1] = emission_col[1] * emission;
+	emission_col[2] = emission_col[2] * emission;
+}
+
+
+/* A combination of scattering and absorption -> known as sigma T.
+ * This can possibly use a specific scattering color,
+ * and absorption multiplier factor too, but these parameters are left out for simplicity.
+ * It's easy enough to get a good wide range of results with just these two parameters. */
+static void vol_get_sigma_t(ShadeInput *shi, float sigma_t[3], const float co[3])
+{
+	/* technically absorption, but named transmission color
+	 * since it describes the effect of the coloring *after* absorption */
+	float transmission_col[3] = {shi->mat->vol.transmission_col[0], shi->mat->vol.transmission_col[1], shi->mat->vol.transmission_col[2]};
+	float scattering = shi->mat->vol.scattering;
+
+	if (shi->mat->mapto_textured & (MAP_SCATTERING + MAP_TRANSMISSION_COL))
+		do_volume_tex(shi, co, MAP_SCATTERING + MAP_TRANSMISSION_COL, transmission_col, &scattering, &R);
+
+	sigma_t[0] = (1.0f - transmission_col[0]) + scattering;
+	sigma_t[1] = (1.0f - transmission_col[1]) + scattering;
+	sigma_t[2] = (1.0f - transmission_col[2]) + scattering;
+}
+
+/* phase function - determines in which directions the light
+ * is scattered in the volume relative to incoming direction
+ * and view direction */
+static float vol_get_phasefunc(ShadeInput *UNUSED(shi), float g, const float w[3], const float wp[3])
+{
+	const float normalize = 0.25f; // = 1.f/4.f = M_PI/(4.f*M_PI)
+
+	/* normalization constant is 1/4 rather than 1/4pi, since
+	 * Blender's shading system doesn't normalize for
+	 * energy conservation - eg. multiplying by pdf ( 1/pi for a lambert brdf ).
+	 * This means that lambert surfaces in Blender are pi times brighter than they 'should be'
+	 * and therefore, with correct energy conservation, volumes will darker than other solid objects,
+	 * for the same lighting intensity.
+	 * To correct this, scale up the phase function values by pi
+	 * until Blender's shading system supports this better. --matt
+	 */
+
+	if (g == 0.f) { /* isotropic */
+		return normalize * 1.f;
+	}
+	else {      /* schlick */
+		const float k = 1.55f * g - 0.55f * g * g * g;
+		const float kcostheta = k * dot_v3v3(w, wp);
+		return normalize * (1.f - k * k) / ((1.f - kcostheta) * (1.f - kcostheta));
+	}
+
+	/* not used, but here for reference: */
+#if 0
+	switch (phasefunc_type) {
+		case MA_VOL_PH_MIEHAZY:
+			return normalize * (0.5f + 4.5f * powf(0.5 * (1.f + costheta), 8.f));
+		case MA_VOL_PH_MIEMURKY:
+			return normalize * (0.5f + 16.5f * powf(0.5 * (1.f + costheta), 32.f));
+		case MA_VOL_PH_RAYLEIGH:
+			return normalize * 3.f / 4.f * (1 + costheta * costheta);
+		case MA_VOL_PH_HG:
+			return normalize * (1.f - g * g) / powf(1.f + g * g - 2.f * g * costheta, 1.5f);
+		case MA_VOL_PH_SCHLICK:
+		{
+			const float k = 1.55f * g - 0.55f * g * g * g;
+			const float kcostheta = k * costheta;
+			return normalize * (1.f - k * k) / ((1.f - kcostheta) * (1.f - kcostheta));
+		}
+		case MA_VOL_PH_ISOTROPIC:
+		default:
+			return normalize * 1.f;
+	}
+#endif
+}
+
+/* Compute transmittance = e^(-attenuation) */
+static void vol_get_transmittance_seg(ShadeInput *shi, float tr[3], float stepsize, const float co[3], float density)
+{
+	/* input density = density at co */
+	float tau[3] = {0.f, 0.f, 0.f};
+	const float stepd = density * stepsize;
+	float sigma_t[3];
+
+	vol_get_sigma_t(shi, sigma_t, co);
+
+	/* homogeneous volume within the sampled distance */
+	tau[0] += stepd * sigma_t[0];
+	tau[1] += stepd * sigma_t[1];
+	tau[2] += stepd * sigma_t[2];
+
+	tr[0] *= expf(-tau[0]);
+	tr[1] *= expf(-tau[1]);
+	tr[2] *= expf(-tau[2]);
+}
+
+/* Compute transmittance = e^(-attenuation) */
+static void vol_get_transmittance(ShadeInput *shi, float tr[3], const float co[3], const float endco[3])
+{
+	float p[3] = {co[0], co[1], co[2]};
+	float step_vec[3] = {endco[0] - co[0], endco[1] - co[1], endco[2] - co[2]};
+	float tau[3] = {0.f, 0.f, 0.f};
+
+	float t0 = 0.f;
+	float t1 = normalize_v3(step_vec);
+	float pt0 = t0;
+
+	t0 += shi->mat->vol.stepsize * ((shi->mat->vol.stepsize_type == MA_VOL_STEP_CONSTANT) ? 0.5f : BLI_thread_frand(shi->thread));
+	p[0] += t0 * step_vec[0];
+	p[1] += t0 * step_vec[1];
+	p[2] += t0 * step_vec[2];
+	mul_v3_fl(step_vec, shi->mat->vol.stepsize);
+
+	for (; t0 < t1; pt0 = t0, t0 += shi->mat->vol.stepsize) {
+		const float d = vol_get_density(shi, p);
+		const float stepd = (t0 - pt0) * d;
+		float sigma_t[3];
+
+		vol_get_sigma_t(shi, sigma_t, p);
+
+		tau[0] += stepd * sigma_t[0];
+		tau[1] += stepd * sigma_t[1];
+		tau[2] += stepd * sigma_t[2];
+
+		add_v3_v3(p, step_vec);
+	}
+
+	/* return transmittance */
+	tr[0] = expf(-tau[0]);
+	tr[1] = expf(-tau[1]);
+	tr[2] = expf(-tau[2]);
+}
+
+static void vol_shade_one_lamp(struct ShadeInput *shi, const float co[3], const float view[3], LampRen *lar, float lacol[3])
+{
+	float visifac, lv[3], lampdist;
+	float tr[3] = {1.0, 1.0, 1.0};
+	float hitco[3], *atten_co;
+	float p, ref_col[3];
+
+	if (lar->mode & LA_LAYER) if ((lar->lay & shi->obi->lay) == 0) return;
+	if ((lar->lay & shi->lay) == 0) return;
+	if (lar->energy == 0.0f) return;
+
+	if ((visifac = lamp_get_visibility(lar, co, lv, &lampdist)) == 0.f) return;
+
+	copy_v3_v3(lacol, &lar->r);
+
+	if (lar->mode & LA_TEXTURE) {
+		shi->osatex = 0;
+		do_lamp_tex(lar, lv, shi, lacol, LA_TEXTURE);
+	}
+
+	mul_v3_fl(lacol, visifac);
+
+	if (ELEM(lar->type, LA_SUN, LA_HEMI))
+		copy_v3_v3(lv, lar->vec);
+	negate_v3(lv);
+
+	if (shi->mat->vol.shade_type == MA_VOL_SHADE_SHADOWED) {
+		mul_v3_fl(lacol, vol_get_shadow(shi, lar, co));
+	}
+	else if (ELEM(shi->mat->vol.shade_type, MA_VOL_SHADE_SHADED, MA_VOL_SHADE_MULTIPLE, MA_VOL_SHADE_SHADEDPLUSMULTIPLE)) {
+		Isect is;
+
+		if (shi->mat->vol.shadeflag & MA_VOL_RECV_EXT_SHADOW) {
+			mul_v3_fl(lacol, vol_get_shadow(shi, lar, co));
+			if (IMB_colormanagement_get_luminance(lacol) < 0.001f) return;
+		}
+
+		/* find minimum of volume bounds, or lamp coord */
+		if (vol_get_bounds(shi, co, lv, hitco, &is, VOL_BOUNDS_SS)) {
+			float dist = len_v3v3(co, hitco);
+			VlakRen *vlr = (VlakRen *)is.hit.face;
+
+			/* simple internal shadowing */
+			if (vlr->mat->material_type == MA_TYPE_SURFACE) {
+				lacol[0] = lacol[1] = lacol[2] = 0.0f;
+				return;
+			}
+
+			if (ELEM(lar->type, LA_SUN, LA_HEMI))
+				/* infinite lights, can never be inside volume */
+				atten_co = hitco;
+			else if (lampdist < dist) {
+				atten_co = lar->co;
+			}
+			else
+				atten_co = hitco;
+
+			vol_get_transmittance(shi, tr, co, atten_co);
+
+			mul_v3_v3v3(lacol, lacol, tr);
+		}
+		else {
+			/* Point is on the outside edge of the volume,
+			 * therefore no attenuation, full transmission.
+			 * Radiance from lamp remains unchanged */
+		}
+	}
+
+	if (IMB_colormanagement_get_luminance(lacol) < 0.001f) return;
+
+	normalize_v3(lv);
+	p = vol_get_phasefunc(shi, shi->mat->vol.asymmetry, view, lv);
+
+	/* physically based scattering with non-physically based RGB gain */
+	vol_get_reflection_color(shi, ref_col, co);
+
+	lacol[0] *= p * ref_col[0];
+	lacol[1] *= p * ref_col[1];
+	lacol[2] *= p * ref_col[2];
+}
+
+/* single scattering only for now */
+void vol_get_scattering(ShadeInput *shi, float scatter_col[3], const float co[3], const float view[3])
+{
+	ListBase *lights;
+	GroupObject *go;
+	LampRen *lar;
+
+	zero_v3(scatter_col);
+
+	lights = get_lights(shi);
+	for (go = lights->first; go; go = go->next) {
+		float lacol[3] = {0.f, 0.f, 0.f};
+		lar = go->lampren;
+
+		if (lar) {
+			vol_shade_one_lamp(shi, co, view, lar, lacol);
+			add_v3_v3(scatter_col, lacol);
+		}
+	}
+}
+
+
+/*
+ * The main volumetric integrator, using an emission/absorption/scattering model.
+ *
+ * Incoming radiance =
+ *
+ * outgoing radiance from behind surface * beam transmittance/attenuation
+ * + added radiance from all points along the ray due to participating media
+ *     --> radiance for each segment =
+ *         (radiance added by scattering + radiance added by emission) * beam transmittance/attenuation
+ */
+
+/* For ease of use, I've also introduced a 'reflection' and 'reflection color' parameter, which isn't
+ * physically correct. This works as an RGB tint/gain on out-scattered light, but doesn't affect the light
+ * that is transmitted through the volume. While having wavelength dependent absorption/scattering is more correct,
+ * it also makes it harder to control the overall look of the volume since coloring the outscattered light results
+ * in the inverse color being transmitted through the rest of the volume.
+ */
+static void volumeintegrate(struct ShadeInput *shi, float col[4], const float co[3], const float endco[3])
+{
+	float radiance[3] = {0.f, 0.f, 0.f};
+	float tr[3] = {1.f, 1.f, 1.f};
+	float p[3] = {co[0], co[1], co[2]};
+	float step_vec[3] = {endco[0] - co[0], endco[1] - co[1], endco[2] - co[2]};
+	const float stepsize = shi->mat->vol.stepsize;
+
+	float t0 = 0.f;
+	float pt0 = t0;
+	float t1 = normalize_v3(step_vec);  /* returns vector length */
+
+	t0 += stepsize * ((shi->mat->vol.stepsize_type == MA_VOL_STEP_CONSTANT) ? 0.5f : BLI_thread_frand(shi->thread));
+	p[0] += t0 * step_vec[0];
+	p[1] += t0 * step_vec[1];
+	p[2] += t0 * step_vec[2];
+	mul_v3_fl(step_vec, stepsize);
+
+	for (; t0 < t1; pt0 = t0, t0 += stepsize) {
+		const float density = vol_get_density(shi, p);
+
+		if (density > 0.00001f) {
+			float scatter_col[3] = {0.f, 0.f, 0.f}, emit_col[3];
+			const float stepd = (t0 - pt0) * density;
+
+			/* transmittance component (alpha) */
+			vol_get_transmittance_seg(shi, tr, stepsize, co, density);
+
+			if (t0 > t1 * 0.25f) {
+				/* only use depth cutoff after we've traced a little way into the volume */
+				if (IMB_colormanagement_get_luminance(tr) < shi->mat->vol.depth_cutoff) break;
+			}
+
+			vol_get_emission(shi, emit_col, p);
+
+			if (shi->obi->volume_precache) {
+				float p2[3];
+
+				p2[0] = p[0] + (step_vec[0] * 0.5f);
+				p2[1] = p[1] + (step_vec[1] * 0.5f);
+				p2[2] = p[2] + (step_vec[2] * 0.5f);
+
+				vol_get_precached_scattering(&R, shi, scatter_col, p2);
+			}
+			else
+				vol_get_scattering(shi, scatter_col, p, shi->view);
+
+			radiance[0] += stepd * tr[0] * (emit_col[0] + scatter_col[0]);
+			radiance[1] += stepd * tr[1] * (emit_col[1] + scatter_col[1]);
+			radiance[2] += stepd * tr[2] * (emit_col[2] + scatter_col[2]);
+		}
+		add_v3_v3(p, step_vec);
+	}
+
+	/* multiply original color (from behind volume) with transmittance over entire distance */
+	mul_v3_v3v3(col, tr, col);
+	add_v3_v3(col, radiance);
+
+	/* alpha <-- transmission luminance */
+	col[3] = 1.0f - IMB_colormanagement_get_luminance(tr);
+}
+
+/* the main entry point for volume shading */
+static void volume_trace(struct ShadeInput *shi, struct ShadeResult *shr, int inside_volume)
+{
+	float hitco[3], col[4] = {0.f, 0.f, 0.f, 0.f};
+	const float *startco, *endco;
+	int trace_behind = 1;
+	const int ztransp = ((shi->depth == 0) && (shi->mat->mode & MA_TRANSP) && (shi->mat->mode & MA_ZTRANSP));
+	Isect is;
+
+	/* check for shading an internal face a volume object directly */
+	if (inside_volume == VOL_SHADE_INSIDE)
+		trace_behind = 0;
+	else if (inside_volume == VOL_SHADE_OUTSIDE) {
+		if (shi->flippednor)
+			inside_volume = VOL_SHADE_INSIDE;
+	}
+
+	if (ztransp && inside_volume == VOL_SHADE_INSIDE) {
+		MatInside *mi;
+		int render_this = 0;
+
+		/* don't render the backfaces of ztransp volume materials.
+		 *
+		 * volume shading renders the internal volume from between the
+		 * ' view intersection of the solid volume to the
+		 * intersection on the other side, as part of the shading of
+		 * the front face.
+		 *
+		 * Because ztransp renders both front and back faces independently
+		 * this will double up, so here we prevent rendering the backface as well,
+		 * which would otherwise render the volume in between the camera and the backface
+		 * --matt */
+
+		for (mi = R.render_volumes_inside.first; mi; mi = mi->next) {
+			/* weak... */
+			if (mi->ma == shi->mat) render_this = 1;
+		}
+		if (!render_this) return;
+	}
+
+
+	if (inside_volume == VOL_SHADE_INSIDE) {
+		startco = shi->camera_co;
+		endco = shi->co;
+
+		if (trace_behind) {
+			if (!ztransp)
+				/* trace behind the volume object */
+				vol_trace_behind(shi, shi->vlr, endco, col);
+		}
+		else {
+			/* we're tracing through the volume between the camera
+			 * and a solid surface, so use that pre-shaded radiance */
+			copy_v4_v4(col, shr->combined);
+		}
+
+		/* shade volume from 'camera' to 1st hit point */
+		volumeintegrate(shi, col, startco, endco);
+	}
+	/* trace to find a backface, the other side bounds of the volume */
+	/* (ray intersect ignores front faces here) */
+	else if (vol_get_bounds(shi, shi->co, shi->view, hitco, &is, VOL_BOUNDS_DEPTH)) {
+		VlakRen *vlr = (VlakRen *)is.hit.face;
+
+		startco = shi->co;
+		endco = hitco;
+
+		if (!ztransp) {
+			/* if it's another face in the same material */
+			if (vlr->mat == shi->mat) {
+				/* trace behind the 2nd (raytrace) hit point */
+				vol_trace_behind(shi, (VlakRen *)is.hit.face, endco, col);
+			}
+			else {
+				shade_intersection(shi, col, &is);
+			}
+		}
+
+		/* shade volume from 1st hit point to 2nd hit point */
+		volumeintegrate(shi, col, startco, endco);
+	}
+
+	if (ztransp)
+		col[3] = col[3] > 1.f ? 1.f : col[3];
+	else
+		col[3] = 1.f;
+
+	copy_v3_v3(shr->combined, col);
+	shr->alpha = col[3];
+
+	copy_v3_v3(shr->diff, shr->combined);
+	copy_v3_v3(shr->diffshad, shr->diff);
+}
+
+/* Traces a shadow through the object,
+ * pretty much gets the transmission over a ray path */
+void shade_volume_shadow(struct ShadeInput *shi, struct ShadeResult *shr, struct Isect *last_is)
+{
+	float hitco[3];
+	float tr[3] = {1.0, 1.0, 1.0};
+	Isect is = {{0}};
+	const float *startco, *endco;
+
+	memset(shr, 0, sizeof(ShadeResult));
+
+	/* if 1st hit normal is facing away from the camera,
+	 * then we're inside the volume already. */
+	if (shi->flippednor) {
+		startco = last_is->start;
+		endco = shi->co;
+	}
+
+	/* trace to find a backface, the other side bounds of the volume */
+	/* (ray intersect ignores front faces here) */
+	else if (vol_get_bounds(shi, shi->co, shi->view, hitco, &is, VOL_BOUNDS_DEPTH)) {
+		startco = shi->co;
+		endco = hitco;
+	}
+	else {
+		shr->combined[0] = shr->combined[1] = shr->combined[2] = 0.f;
+		shr->alpha = shr->combined[3] = 1.f;
+		return;
+	}
+
+	vol_get_transmittance(shi, tr, startco, endco);
+
+
+	/* if we hit another face in the same volume bounds */
+	/* shift raytrace coordinates to the hit point, to avoid shading volume twice */
+	/* due to idiosyncracy in ray_trace_shadow_tra() */
+	if (is.hit.ob == shi->obi) {
+		copy_v3_v3(shi->co, hitco);
+		last_is->dist += is.dist;
+		shi->vlr = (VlakRen *)is.hit.face;
+	}
+
+
+	copy_v3_v3(shr->combined, tr);
+	shr->combined[3] = 1.0f - IMB_colormanagement_get_luminance(tr);
+	shr->alpha = shr->combined[3];
+}
+
+
+/* delivers a fully filled in ShadeResult, for all passes */
+void shade_volume_outside(ShadeInput *shi, ShadeResult *shr)
+{
+	memset(shr, 0, sizeof(ShadeResult));
+	volume_trace(shi, shr, VOL_SHADE_OUTSIDE);
+}
+
+
+void shade_volume_inside(ShadeInput *shi, ShadeResult *shr)
+{
+	MatInside *m;
+	Material *mat_backup;
+	ObjectInstanceRen *obi_backup;
+	float prev_alpha = shr->alpha;
+
+	/* XXX: extend to multiple volumes perhaps later */
+	mat_backup = shi->mat;
+	obi_backup = shi->obi;
+
+	m = R.render_volumes_inside.first;
+	shi->mat = m->ma;
+	shi->obi = m->obi;
+	shi->obr = m->obi->obr;
+
+	volume_trace(shi, shr, VOL_SHADE_INSIDE);
+
+	shr->alpha = shr->alpha + prev_alpha;
+	CLAMP(shr->alpha, 0.0f, 1.0f);
+
+	shi->mat = mat_backup;
+	shi->obi = obi_backup;
+	shi->obr = obi_backup->obr;
+}
diff --git a/source/blender/render/intern/source/voxeldata.c b/source/blender/render/intern/source/voxeldata.c
index 0d9f7b197e1..2daa4123536 100644
--- a/source/blender/render/intern/source/voxeldata.c
+++ b/source/blender/render/intern/source/voxeldata.c
@@ -88,10 +88,10 @@ static size_t vd_resol_size(VoxelData *vd)
 }
 
 static int load_frame_blendervoxel(VoxelData *vd, FILE *fp, int frame)
-{	
+{
 	const size_t size = vd_resol_size(vd);
 	size_t offset = sizeof(VoxelDataHeader);
-	
+
 	if (is_vd_res_ok(vd) == false)
 		return 0;
 
@@ -102,7 +102,7 @@ static int load_frame_blendervoxel(VoxelData *vd, FILE *fp, int frame)
 		return 0;
 	if (fread(vd->dataset, sizeof(float), size, fp) != size)
 		return 0;
-	
+
 	vd->cachedframe = frame;
 	vd->ok = 1;
 	return 1;
@@ -138,12 +138,12 @@ static int load_frame_raw8(VoxelData *vd, FILE *fp, int frame)
 		vd->dataset = NULL;
 		return 0;
 	}
-	
+
 	for (i = 0; i < size; i++) {
 		vd->dataset[i] = (float)data_c[i] / 255.f;
 	}
 	MEM_freeN(data_c);
-	
+
 	vd->cachedframe = frame;
 	vd->ok = 1;
 	return 1;
@@ -160,7 +160,7 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex)
 
 	if (!ima) return;
 	if (iuser.frames == 0) return;
-	
+
 	ima->source = IMA_SRC_SEQUENCE;
 	iuser.framenr = 1 + iuser.offset;
 
@@ -173,13 +173,13 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex)
 	}
 	if (!ibuf) return;
 	if (!ibuf->rect_float) IMB_float_from_rect(ibuf);
-	
+
 	vd->flag |= TEX_VD_STILL;
 	vd->resol[0] = ibuf->x;
 	vd->resol[1] = ibuf->y;
 	vd->resol[2] = iuser.frames;
 	vd->dataset = MEM_mapallocN(sizeof(float) * vd_resol_size(vd), "voxel dataset");
-	
+
 	for (z = 0; z < iuser.frames; z++) {
 		/* get a new ibuf for each frame */
 		if (z > 0) {
@@ -190,7 +190,7 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex)
 			if (!ibuf->rect_float) IMB_float_from_rect(ibuf);
 		}
 		rf = ibuf->rect_float;
-		
+
 		for (y = 0; y < ibuf->y; y++) {
 			for (x = 0; x < ibuf->x; x++) {
 				/* currently averaged to monchrome */
@@ -198,7 +198,7 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex)
 				rf += 4;
 			}
 		}
-		
+
 		BKE_image_free_anim_ibufs(ima, iuser.framenr);
 	}
 
@@ -211,13 +211,13 @@ static void load_frame_image_sequence(VoxelData *vd, Tex *tex)
 static int read_voxeldata_header(FILE *fp, struct VoxelData *vd)
 {
 	VoxelDataHeader *h = (VoxelDataHeader *)MEM_mallocN(sizeof(VoxelDataHeader), "voxel data header");
-	
+
 	rewind(fp);
 	if (fread(h, sizeof(VoxelDataHeader), 1, fp) != 1) {
 		MEM_freeN(h);
 		return 0;
 	}
-	
+
 	vd->resol[0] = h->resolX;
 	vd->resol[1] = h->resolY;
 	vd->resol[2] = h->resolZ;
@@ -231,16 +231,16 @@ static void init_frame_smoke(VoxelData *vd, int cfra)
 #ifdef WITH_SMOKE
 	Object *ob;
 	ModifierData *md;
-	
+
 	vd->dataset = NULL;
 	if (vd->object == NULL) return;
 	ob = vd->object;
-	
+
 	/* draw code for smoke */
 	if ((md = (ModifierData *)modifiers_findByType(ob, eModifierType_Smoke))) {
 		SmokeModifierData *smd = (SmokeModifierData *)md;
 		SmokeDomainSettings *sds = smd->domain;
-		
+
 		if (sds && sds->fluid) {
 			BLI_rw_mutex_lock(sds->fluid_mutex, THREAD_LOCK_READ);
 
@@ -356,7 +356,7 @@ static void init_frame_smoke(VoxelData *vd, int cfra)
 			BLI_rw_mutex_unlock(sds->fluid_mutex);
 		}
 	}
-	
+
 	vd->ok = 1;
 
 #else // WITH_SMOKE
@@ -371,14 +371,14 @@ static void init_frame_hair(VoxelData *vd, int UNUSED(cfra))
 {
 	Object *ob;
 	ModifierData *md;
-	
+
 	vd->dataset = NULL;
 	if (vd->object == NULL) return;
 	ob = vd->object;
-	
+
 	if ((md = (ModifierData *)modifiers_findByType(ob, eModifierType_ParticleSystem))) {
 		ParticleSystemModifierData *pmd = (ParticleSystemModifierData *)md;
-		
+
 		if (pmd->psys && pmd->psys->clmd) {
 			vd->ok |= BPH_cloth_solver_get_texture_data(ob, pmd->psys->clmd, vd);
 		}
@@ -386,16 +386,16 @@ static void init_frame_hair(VoxelData *vd, int UNUSED(cfra))
 }
 
 void cache_voxeldata(Tex *tex, int scene_frame)
-{	
+{
 	VoxelData *vd = tex->vd;
 	FILE *fp;
 	int curframe;
 	char path[sizeof(vd->source_path)];
-	
+
 	/* only re-cache if dataset needs updating */
 	if ((vd->flag & TEX_VD_STILL) || (vd->cachedframe == scene_frame))
 		if (vd->ok) return;
-	
+
 	/* clear out old cache, ready for new */
 	if (vd->dataset) {
 		MEM_freeN(vd->dataset);
@@ -408,9 +408,9 @@ void cache_voxeldata(Tex *tex, int scene_frame)
 		curframe = vd->still_frame;
 	else
 		curframe = scene_frame;
-	
+
 	BLI_strncpy(path, vd->source_path, sizeof(path));
-	
+
 	/* each type is responsible for setting to true */
 	vd->ok = false;
 
@@ -428,7 +428,7 @@ void cache_voxeldata(Tex *tex, int scene_frame)
 			BLI_path_abs(path, BKE_main_blendfile_path_from_global());
 			fp = BLI_fopen(path, "rb");
 			if (!fp) return;
-			
+
 			if (read_voxeldata_header(fp, vd))
 				load_frame_blendervoxel(vd, fp, curframe - 1);
 
@@ -438,7 +438,7 @@ void cache_voxeldata(Tex *tex, int scene_frame)
 			BLI_path_abs(path, BKE_main_blendfile_path_from_global());
 			fp = BLI_fopen(path, "rb");
 			if (!fp) return;
-			
+
 			load_frame_raw8(vd, fp, curframe);
 			fclose(fp);
 			return;
@@ -448,24 +448,24 @@ void cache_voxeldata(Tex *tex, int scene_frame)
 void make_voxeldata(struct Render *re)
 {
 	Tex *tex;
-	
+
 	re->i.infostr = IFACE_("Loading voxel datasets");
 	re->stats_draw(re->sdh, &re->i);
-	
+
 	/* XXX: should be doing only textures used in this render */
 	for (tex = re->main->tex.first; tex; tex = tex->id.next) {
 		if (tex->id.us && tex->type == TEX_VOXELDATA) {
 			cache_voxeldata(tex, re->r.cfra);
 		}
 	}
-	
+
 	re->i.infostr = NULL;
 	re->stats_draw(re->sdh, &re->i);
-	
+
 }
 
 int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texres)
-{	 
+{
 	VoxelData *vd = tex->vd;
 	float co[3], offset[3] = {0.5, 0.5, 0.5}, a;
 	int retval = (vd->data_type == TEX_VD_RGBA_PREMUL) ? TEX_RGB : TEX_INT;
@@ -476,7 +476,7 @@ int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texre
 		texres->tin = 0.0f;
 		return 0;
 	}
-	
+
 	/* scale lookup from 0.0-1.0 (original location) to -1.0, 1.0, consistent with image texture tex coords */
 	/* in implementation this works backwards, bringing sample locations from -1.0, 1.0
 	 * to the range 0.0, 1.0, before looking up in the voxel structure. */
@@ -531,7 +531,7 @@ int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texre
 		switch (vd->interp_type) {
 			case TEX_VD_NEARESTNEIGHBOR:
 				*result = BLI_voxel_sample_nearest(dataset, vd->resol, co);
-				break;  
+				break;
 			case TEX_VD_LINEAR:
 				*result = BLI_voxel_sample_trilinear(dataset, vd->resol, co);
 				break;
@@ -548,7 +548,7 @@ int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texre
 	a = texres->tin;
 	texres->tin *= vd->int_multiplier;
 	BRICONT;
-	
+
 	if (vd->data_type == TEX_VD_RGBA_PREMUL) {
 		/* unmultiply */
 		if (a>0.001f) {
@@ -566,6 +566,6 @@ int voxeldatatex(struct Tex *tex, const float texvec[3], struct TexResult *texre
 
 	texres->ta = texres->tin;
 	BRICONTRGB;
-	
+
 	return retval;
 }
diff --git a/source/blender/render/intern/source/zbuf.c b/source/blender/render/intern/source/zbuf.c
index 3837383c4c7..436ee590f5c 100644
--- a/source/blender/render/intern/source/zbuf.c
+++ b/source/blender/render/intern/source/zbuf.c
@@ -54,10 +54,10 @@
 void zbuf_alloc_span(ZSpan *zspan, int rectx, int recty)
 {
 	memset(zspan, 0, sizeof(ZSpan));
-	
+
 	zspan->rectx= rectx;
 	zspan->recty= recty;
-	
+
 	zspan->span1= MEM_mallocN(recty*sizeof(float), "zspan");
 	zspan->span2= MEM_mallocN(recty*sizeof(float), "zspan");
 }
@@ -85,27 +85,27 @@ static void zbuf_add_to_span(ZSpan *zspan, const float v1[2], const float v2[2])
 	float *span;
 	float xx1, dx0, xs0;
 	int y, my0, my2;
-	
+
 	if (v1[1]<v2[1]) {
 		minv= v1; maxv= v2;
 	}
 	else {
 		minv= v2; maxv= v1;
 	}
-	
+
 	my0= ceil(minv[1]);
 	my2= floor(maxv[1]);
-	
+
 	if (my2<0 || my0>= zspan->recty) return;
-	
+
 	/* clip top */
 	if (my2>=zspan->recty) my2= zspan->recty-1;
 	/* clip bottom */
 	if (my0<0) my0= 0;
-	
+
 	if (my0>my2) return;
 	/* if (my0>my2) should still fill in, that way we get spans that skip nicely */
-	
+
 	xx1= maxv[1]-minv[1];
 	if (xx1>FLT_EPSILON) {
 		dx0= (minv[0]-maxv[0])/xx1;
@@ -115,7 +115,7 @@ static void zbuf_add_to_span(ZSpan *zspan, const float v1[2], const float v2[2])
 		dx0 = 0.0f;
 		xs0 = min_ff(minv[0], maxv[0]);
 	}
-	
+
 	/* empty span */
 	if (zspan->maxp1 == NULL) {
 		span= zspan->span1;
@@ -158,9 +158,9 @@ static void zbuf_add_to_span(ZSpan *zspan, const float v1[2], const float v2[2])
 	}
 }
 
-/*-----------------------------------------------------------*/ 
+/*-----------------------------------------------------------*/
 /* Functions                                                 */
-/*-----------------------------------------------------------*/ 
+/*-----------------------------------------------------------*/
 
 /* scanconvert for strand triangles, calls func for each x, y coordinate and gives UV barycentrics and z */
 
@@ -170,30 +170,30 @@ void zspan_scanconvert(ZSpan *zspan, void *handle, float *v1, float *v2, float *
 	float u, v, uxd, uyd, vxd, vyd, uy0, vy0, xx1;
 	const float *span1, *span2;
 	int i, j, x, y, sn1, sn2, rectx = zspan->rectx, my0, my2;
-	
+
 	/* init */
 	zbuf_init_span(zspan);
-	
+
 	/* set spans */
 	zbuf_add_to_span(zspan, v1, v2);
 	zbuf_add_to_span(zspan, v2, v3);
 	zbuf_add_to_span(zspan, v3, v1);
-	
+
 	/* clipped */
 	if (zspan->minp2==NULL || zspan->maxp2==NULL) return;
-	
+
 	my0 = max_ii(zspan->miny1, zspan->miny2);
 	my2 = min_ii(zspan->maxy1, zspan->maxy2);
-	
+
 	//	printf("my %d %d\n", my0, my2);
 	if (my2<my0) return;
-	
+
 	/* ZBUF DX DY, in floats still */
 	x1= v1[0]- v2[0];
 	x2= v2[0]- v3[0];
 	y1= v1[1]- v2[1];
 	y2= v2[1]- v3[1];
-	
+
 	z1= 1.0f; /* (u1 - u2) */
 	z2= 0.0f; /* (u2 - u3) */
 
@@ -213,28 +213,28 @@ void zspan_scanconvert(ZSpan *zspan, void *handle, float *v1, float *v2, float *
 
 	x0= y1*z2-z1*y2;
 	y0= z1*x2-x1*z2;
-	
+
 	xx1= (x0*v1[0] + y0*v1[1])/z0;
 	vxd= -(double)x0/(double)z0;
 	vyd= -(double)y0/(double)z0;
 	vy0= ((double)my2)*vyd + (double)xx1;
-	
+
 	/* correct span */
 	span1= zspan->span1+my2;
 	span2= zspan->span2+my2;
-	
+
 	for (i = 0, y = my2; y >= my0; i++, y--, span1--, span2--) {
-		
+
 		sn1= floor(min_ff(*span1, *span2));
 		sn2= floor(max_ff(*span1, *span2));
-		sn1++; 
-		
+		sn1++;
+
 		if (sn2>=rectx) sn2= rectx-1;
 		if (sn1<0) sn1= 0;
-		
+
 		u = (((double)sn1 * uxd) + uy0) - (i * uyd);
 		v = (((double)sn1 * vxd) + vy0) - (i * vyd);
-		
+
 		for (j = 0, x = sn1; x <= sn2; j++, x++) {
 			func(handle, x, y, u + (j * uxd), v + (j * vxd));
 		}