22 files changed, 1617 insertions, 419 deletions
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
new file mode 100644
index 00000000000..b7abc1ec507
--- /dev/null
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType type, float3 weight)
+{
+	kernel_assert(size <= sizeof(ShaderClosure));
+
+	int num_closure = ccl_fetch(sd, num_closure);
+	int num_closure_extra = ccl_fetch(sd, num_closure_extra);
+	if(num_closure + num_closure_extra >= MAX_CLOSURE)
+		return NULL;
+
+	ShaderClosure *sc = &ccl_fetch(sd, closure)[num_closure];
+
+	sc->type = type;
+	sc->weight = weight;
+
+	ccl_fetch(sd, num_closure)++;
+
+	return sc;
+}
+
+ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
+{
+	/* Allocate extra space for closure that need more parameters. We allocate
+	 * in chunks of sizeof(ShaderClosure) starting from the end of the closure
+	 * array.
+	 *
+	 * This lets us keep the same fast array iteration over closures, as we
+	 * found linked list iteration and iteration with skipping to be slower. */
+	int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
+	int num_closure = ccl_fetch(sd, num_closure);
+	int num_closure_extra = ccl_fetch(sd, num_closure_extra) + num_extra;
+
+	if(num_closure + num_closure_extra > MAX_CLOSURE) {
+		/* Remove previous closure. */
+		ccl_fetch(sd, num_closure)--;
+		ccl_fetch(sd, num_closure_extra)++;
+		return NULL;
+	}
+
+	ccl_fetch(sd, num_closure_extra) = num_closure_extra;
+	return (ccl_addr_space void*)(ccl_fetch(sd, closure) + MAX_CLOSURE - num_closure_extra);
+}
+
+ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
+{
+	ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+
+	if(!sc)
+		return NULL;
+
+	float sample_weight = fabsf(average(weight));
+	sc->sample_weight = sample_weight;
+	return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
+}
+
+#ifdef __OSL__
+ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, int size, float3 weight, void *data)
+{
+	ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+
+	if(!sc)
+		return NULL;
+
+	memcpy(sc, data, size);
+
+	float sample_weight = fabsf(average(weight));
+	sc->weight = weight;
+	sc->sample_weight = sample_weight;
+	return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 1f225e1e96c..72096f4d873 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -20,6 +20,7 @@
 #include "../closure/bsdf_phong_ramp.h"
 #include "../closure/bsdf_diffuse_ramp.h"
 #include "../closure/bsdf_microfacet.h"
+#include "../closure/bsdf_microfacet_multi.h"
 #include "../closure/bsdf_reflection.h"
 #include "../closure/bsdf_refraction.h"
 #include "../closure/bsdf_transparent.h"
@@ -39,15 +40,10 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const ShaderClosure *sc, float randu, float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float randu, float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf)
 {
 	int label;
 
-#ifdef __OSL__
-	if(kg->osl && sc->prim)
-		return OSLShader::bsdf_sample(sd, sc, randu, randv, *eval, *omega_in, *domega_in, *pdf);
-#endif
-
 	switch(sc->type) {
 		case CLOSURE_BSDF_DIFFUSE_ID:
 		case CLOSURE_BSDF_BSSRDF_ID:
@@ -59,14 +55,16 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
 			label = bsdf_oren_nayar_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
 			break;
-		/*case CLOSURE_BSDF_PHONG_RAMP_ID:
+#ifdef __OSL__
+		case CLOSURE_BSDF_PHONG_RAMP_ID:
 			label = bsdf_phong_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
 			break;
 		case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
 			label = bsdf_diffuse_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;*/
+			break;
+#endif
 		case CLOSURE_BSDF_TRANSLUCENT_ID:
 			label = bsdf_translucent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
@@ -89,6 +87,14 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
 			label = bsdf_microfacet_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
 			break;
+		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+			label = bsdf_microfacet_multi_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
+			        eval, omega_in,  &domega_in->dx, &domega_in->dy, pdf, &ccl_fetch(sd, lcg_state));
+			break;
+		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+			label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
+			        eval, omega_in,  &domega_in->dx, &domega_in->dy, pdf, &ccl_fetch(sd, lcg_state));
+			break;
 		case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
 		case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
 		case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
@@ -129,10 +135,10 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
 			label = bsdf_disney_sheen_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
 			break;
-		case CLOSURE_BSDF_DISNEY_SPECULAR_ID:
+		/*case CLOSURE_BSDF_DISNEY_SPECULAR_ID:
 			label = bsdf_disney_specular_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
-			break;
+			break;*/
 		case CLOSURE_BSDF_DISNEY_CLEARCOAT_ID:
 			label = bsdf_disney_clearcoat_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
@@ -151,15 +157,10 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
 	return label;
 }
 
-ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
 {
 	float3 eval;
 
-#ifdef __OSL__
-	if(kg->osl && sc->prim)
-		return OSLShader::bsdf_eval(sd, sc, omega_in, *pdf);
-#endif
-
 	if(dot(ccl_fetch(sd, Ng), omega_in) >= 0.0f) {
 		switch(sc->type) {
 			case CLOSURE_BSDF_DIFFUSE_ID:
@@ -170,12 +171,14 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
 			case CLOSURE_BSDF_OREN_NAYAR_ID:
 				eval = bsdf_oren_nayar_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
-			/*case CLOSURE_BSDF_PHONG_RAMP_ID:
+#ifdef __OSL__
+			case CLOSURE_BSDF_PHONG_RAMP_ID:
 				eval = bsdf_phong_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
 			case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
 				eval = bsdf_diffuse_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
-				break;*/
+				break;
+#endif
 			case CLOSURE_BSDF_TRANSLUCENT_ID:
 				eval = bsdf_translucent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
@@ -193,6 +196,12 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
 			case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
 				eval = bsdf_microfacet_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
+			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+				eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
+				break;
+			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+				eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
+				break;
 			case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
 			case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
 			case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
@@ -224,9 +233,9 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
 			case CLOSURE_BSDF_DISNEY_SHEEN_ID:
 				eval = bsdf_disney_sheen_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
-			case CLOSURE_BSDF_DISNEY_SPECULAR_ID:
+			/*case CLOSURE_BSDF_DISNEY_SPECULAR_ID:
 				eval = bsdf_disney_specular_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
-				break;
+				break;*/
 			case CLOSURE_BSDF_DISNEY_CLEARCOAT_ID:
 				eval = bsdf_disney_clearcoat_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
@@ -268,6 +277,12 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
 			case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
 				eval = bsdf_microfacet_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
+			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+				eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
+				break;
+			case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+				eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state));
+				break;
 			case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
 			case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
 			case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
@@ -299,9 +314,9 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
 			case CLOSURE_BSDF_DISNEY_SHEEN_ID:
 				eval = bsdf_disney_sheen_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
-			case CLOSURE_BSDF_DISNEY_SPECULAR_ID:
+			/*case CLOSURE_BSDF_DISNEY_SPECULAR_ID:
 				eval = bsdf_disney_specular_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
-				break;
+				break;*/
 			case CLOSURE_BSDF_DISNEY_CLEARCOAT_ID:
 				eval = bsdf_disney_clearcoat_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
 				break;
@@ -322,17 +337,13 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
 
 ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness)
 {
-/* ToDo: do we want to blur volume closures? */
-
-#ifdef __OSL__
-	if(kg->osl && sc->prim) {
-		OSLShader::bsdf_blur(sc, roughness);
-		return;
-	}
-#endif
-
+	/* ToDo: do we want to blur volume closures? */
 #ifdef __SVM__
 	switch(sc->type) {
+		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+			bsdf_microfacet_multi_ggx_blur(sc, roughness);
+			break;
 		case CLOSURE_BSDF_MICROFACET_GGX_ID:
 		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
 		case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
@@ -353,5 +364,48 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness)
 #endif
 }
 
+ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
+{
+#ifdef __SVM__
+	switch(a->type) {
+		case CLOSURE_BSDF_TRANSPARENT_ID:
+			return true;
+		case CLOSURE_BSDF_DIFFUSE_ID:
+		case CLOSURE_BSDF_BSSRDF_ID:
+		case CLOSURE_BSDF_TRANSLUCENT_ID:
+			return bsdf_diffuse_merge(a, b);
+		case CLOSURE_BSDF_OREN_NAYAR_ID:
+			return bsdf_oren_nayar_merge(a, b);
+		case CLOSURE_BSDF_REFLECTION_ID:
+		case CLOSURE_BSDF_REFRACTION_ID:
+		case CLOSURE_BSDF_MICROFACET_GGX_ID:
+		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+		case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
+		case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+		case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+		case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+		case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+		case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+			return bsdf_microfacet_merge(a, b);
+		case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+			return bsdf_ashikhmin_velvet_merge(a, b);
+		case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+		case CLOSURE_BSDF_GLOSSY_TOON_ID:
+			return bsdf_toon_merge(a, b);
+		case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+		case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+			return bsdf_hair_merge(a, b);
+#ifdef __VOLUME__
+		case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
+			return volume_henyey_greenstein_merge(a, b);
+#endif
+		default:
+			return false;
+	}
+#endif
+}
+
 CCL_NAMESPACE_END
 
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index 8d7d533d6f8..8ed76bea525 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -31,28 +31,30 @@ Other than that, the implementation directly follows the paper.
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device int bsdf_ashikhmin_shirley_setup(ShaderClosure *sc)
+ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf)
 {
-	sc->data0 = clamp(sc->data0, 1e-4f, 1.0f);
-	sc->data1 = sc->data0;
+	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+	bsdf->alpha_y = bsdf->alpha_x;
 
-	sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
+	bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_ashikhmin_shirley_aniso_setup(ShaderClosure *sc)
+ccl_device int bsdf_ashikhmin_shirley_aniso_setup(MicrofacetBsdf *bsdf)
 {
-	sc->data0 = clamp(sc->data0, 1e-4f, 1.0f);
-	sc->data1 = clamp(sc->data1, 1e-4f, 1.0f);
+	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+	bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
 
-	sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
+	bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
 ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness)
 {
-	sc->data0 = fmaxf(roughness, sc->data0); /* clamp roughness */
-	sc->data1 = fmaxf(roughness, sc->data1);
+	MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+
+	bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+	bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
 }
 
 ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float roughness)
@@ -62,14 +64,15 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough
 
 ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float3 N = sc->N;
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float3 N = bsdf->N;
 
 	float NdotI = dot(N, I);           /* in Cycles/OSL convention I is omega_out    */
 	float NdotO = dot(N, omega_in);    /* and consequently we use for O omaga_in ;)  */
 
 	float out = 0.0f;
 
-	if(fmaxf(sc->data0, sc->data1) <= 1e-4f)
+	if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f)
 		return make_float3(0.0f, 0.0f, 0.0f);
 
 	if(NdotI > 0.0f && NdotO > 0.0f) {
@@ -82,8 +85,8 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c
 		float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
 		/*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */
 
-		float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0);
-		float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1);
+		float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
+		float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
 
 		if(n_x == n_y) {
 			/* isotropic */
@@ -97,12 +100,18 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c
 		else {
 			/* anisotropic */
 			float3 X, Y;
-			make_orthonormals_tangent(N, sc->T, &X, &Y);
+			make_orthonormals_tangent(N, bsdf->T, &X, &Y);
 
 			float HdotX = dot(H, X);
 			float HdotY = dot(H, Y);
-			float e = (n_x * HdotX*HdotX + n_y * HdotY*HdotY) / (1.0f - HdotN*HdotN);
-			float lobe = powf(HdotN, e);
+			float lobe;
+			if(HdotN < 1.0f) {
+				float e = (n_x * HdotX*HdotX + n_y * HdotY*HdotY) / (1.0f - HdotN*HdotN);
+				lobe = powf(HdotN, e);
+			}
+			else {
+				lobe = 1.0f;
+			}
 			float norm = sqrtf((n_x + 1.0f)*(n_y + 1.0f)) / (8.0f * M_PI_F);
 			
 			out = NdotO * norm * lobe * pump;
@@ -128,13 +137,14 @@ ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x, f
 
 ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float3 N = sc->N;
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float3 N = bsdf->N;
 
 	float NdotI = dot(N, I);
 	if(NdotI > 0.0f) {
 
-		float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0);
-		float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1);
+		float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
+		float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
 
 		/* get x,y basis on the surface for anisotropy */
 		float3 X, Y;
@@ -142,7 +152,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
 		if(n_x == n_y)
 			make_orthonormals(N, &X, &Y);
 		else
-			make_orthonormals_tangent(N, sc->T, &X, &Y);
+			make_orthonormals_tangent(N, bsdf->T, &X, &Y);
 
 		/* sample spherical coords for h in tangent space */
 		float phi;
@@ -193,7 +203,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
 		/* reflect I on H to get omega_in */
 		*omega_in = -I + (2.0f * HdotI) * H;
 
-		if(fmaxf(sc->data0, sc->data1) <= 1e-4f) {
+		if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
 			/* Some high number for MIS. */
 			*pdf = 1e6f;
 			*eval = make_float3(1e6f, 1e6f, 1e6f);
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index f1a26650078..7e0f5a7ec75 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -35,20 +35,38 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device int bsdf_ashikhmin_velvet_setup(ShaderClosure *sc)
+typedef ccl_addr_space struct VelvetBsdf {
+	SHADER_CLOSURE_BASE;
+
+	float sigma;
+	float invsigma2;
+	float3 N;
+} VelvetBsdf;
+
+ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
 {
-	float sigma = fmaxf(sc->data0, 0.01f);
-	sc->data0 = 1.0f/(sigma * sigma); /* m_invsigma2 */
+	float sigma = fmaxf(bsdf->sigma, 0.01f);
+	bsdf->invsigma2 = 1.0f/(sigma * sigma);
 	
-	sc->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID;
+	bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID;
 
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
+ccl_device bool bsdf_ashikhmin_velvet_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+	const VelvetBsdf *bsdf_a = (const VelvetBsdf*)a;
+	const VelvetBsdf *bsdf_b = (const VelvetBsdf*)b;
+
+	return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
+	       (bsdf_a->sigma == bsdf_b->sigma);
+}
+
 ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float m_invsigma2 = sc->data0;
-	float3 N = sc->N;
+	const VelvetBsdf *bsdf = (const VelvetBsdf*)sc;
+	float m_invsigma2 = bsdf->invsigma2;
+	float3 N = bsdf->N;
 
 	float cosNO = dot(N, I);
 	float cosNI = dot(N, omega_in);
@@ -90,8 +108,9 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc, c
 
 ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float m_invsigma2 = sc->data0;
-	float3 N = sc->N;
+	const VelvetBsdf *bsdf = (const VelvetBsdf*)sc;
+	float m_invsigma2 = bsdf->invsigma2;
+	float3 N = bsdf->N;
 
 	// we are viewing the surface from above - send a ray out with uniform
 	// distribution over the hemisphere
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index 4b29bb096d1..dcd187f9305 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -35,17 +35,31 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct DiffuseBsdf {
+	SHADER_CLOSURE_BASE;
+	float3 N;
+} DiffuseBsdf;
+
 /* DIFFUSE */
 
-ccl_device int bsdf_diffuse_setup(ShaderClosure *sc)
+ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_DIFFUSE_ID;
+	bsdf->type = CLOSURE_BSDF_DIFFUSE_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
+ccl_device bool bsdf_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+	const DiffuseBsdf *bsdf_a = (const DiffuseBsdf*)a;
+	const DiffuseBsdf *bsdf_b = (const DiffuseBsdf*)b;
+
+	return (isequal_float3(bsdf_a->N, bsdf_b->N));
+}
+
 ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float3 N = sc->N;
+	const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
+	float3 N = bsdf->N;
 
 	float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
 	*pdf = cos_pi;
@@ -59,7 +73,8 @@ ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc, const floa
 
 ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float3 N = sc->N;
+	const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
+	float3 N = bsdf->N;
 
 	// distribution over the hemisphere
 	sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
@@ -80,9 +95,9 @@ ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, float3 Ng, float3 I,
 
 /* TRANSLUCENT */
 
-ccl_device int bsdf_translucent_setup(ShaderClosure *sc)
+ccl_device int bsdf_translucent_setup(DiffuseBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_TRANSLUCENT_ID;
+	bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
@@ -93,7 +108,8 @@ ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc, const f
 
 ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float3 N = sc->N;
+	const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
+	float3 N = bsdf->N;
 
 	float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
 	*pdf = cos_pi;
@@ -102,7 +118,8 @@ ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const
 
 ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float3 N = sc->N;
+	const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
+	float3 N = bsdf->N;
 
 	// we are viewing the surface from the right side - send a ray out with cosine
 	// distribution over the hemisphere
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index e0287e7655a..2d982a95fe4 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -35,7 +35,16 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const float3 colors[8], float pos)
+#ifdef __OSL__
+
+typedef ccl_addr_space struct DiffuseRampBsdf {
+	SHADER_CLOSURE_BASE;
+
+	float3 N;
+	float3 *colors;
+} DiffuseRampBsdf;
+
+ccl_device float3 bsdf_diffuse_ramp_get_color(const float3 colors[8], float pos)
 {
 	int MAXCOLORS = 8;
 	
@@ -49,11 +58,9 @@ ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const flo
 	return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset;
 }
 
-ccl_device int bsdf_diffuse_ramp_setup(ShaderClosure *sc)
+ccl_device int bsdf_diffuse_ramp_setup(DiffuseRampBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID;
-	sc->data0 = 0.0f;
-	sc->data1 = 0.0f;
+	bsdf->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
@@ -61,29 +68,31 @@ ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness)
 {
 }
 
-ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc, const float3 colors[8], const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float3 N = sc->N;
+	const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc;
+	float3 N = bsdf->N;
 
 	float cos_pi = fmaxf(dot(N, omega_in), 0.0f);
 	*pdf = cos_pi * M_1_PI_F;
-	return bsdf_diffuse_ramp_get_color(sc, colors, cos_pi) * M_1_PI_F;
+	return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F;
 }
 
-ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc, const float3 colors[8], const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
 	return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, const float3 colors[8], float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float3 N = sc->N;
+	const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc;
+	float3 N = bsdf->N;
 
 	// distribution over the hemisphere
 	sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
 
 	if(dot(Ng, *omega_in) > 0.0f) {
-		*eval = bsdf_diffuse_ramp_get_color(sc, colors, *pdf * M_PI_F) * M_1_PI_F;
+		*eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F;
 #ifdef __RAY_DIFFERENTIALS__
 		*domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
 		*domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
@@ -95,6 +104,8 @@ ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, const float3 co
 	return LABEL_REFLECT|LABEL_DIFFUSE;
 }
 
+#endif /* __OSL__ */
+
 CCL_NAMESPACE_END
 
 #endif /* __BSDF_DIFFUSE_RAMP_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_disney_clearcoat.h b/intern/cycles/kernel/closure/bsdf_disney_clearcoat.h
index 377d21939b7..cdb8b3e0cff 100644
--- a/intern/cycles/kernel/closure/bsdf_disney_clearcoat.h
+++ b/intern/cycles/kernel/closure/bsdf_disney_clearcoat.h
@@ -35,22 +35,30 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct DisneyClearcoatBsdf {
+	SHADER_CLOSURE_BASE;
 
-ccl_device int bsdf_disney_clearcoat_setup(ShaderClosure *sc)
+	float clearcoat, clearcoatGloss, clearcoatRoughness;
+	float3 N;
+} DisneyClearcoatBsdf;
+
+ccl_device int bsdf_disney_clearcoat_setup(DisneyClearcoatBsdf *bsdf)
 {
 	/* clearcoat roughness */
-	sc->custom1 = 0.1f * (1.0f - sc->data1/*clearcoatGloss*/) + 0.001f * sc->data1/*clearcoatGloss*/; // lerp(0.1f, 0.001f, sc->data1/*clearcoatGloss*/); // 
+	bsdf->clearcoatRoughness = 0.1f * (1.0f - bsdf->clearcoatGloss) + 0.001f * bsdf->clearcoatGloss; // lerp(0.1f, 0.001f, sc->data1/*clearcoatGloss*/); // 
 
-    sc->type = CLOSURE_BSDF_DISNEY_CLEARCOAT_ID;
+    bsdf->type = CLOSURE_BSDF_DISNEY_CLEARCOAT_ID;
     return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
 ccl_device float3 bsdf_disney_clearcoat_eval_reflect(const ShaderClosure *sc, const float3 I,
     const float3 omega_in, float *pdf)
 {
-	if (sc->data0 > 0.0f) {
-		float alpha = sc->custom1;
-		float3 N = sc->N;
+	const DisneyClearcoatBsdf *bsdf = (const DisneyClearcoatBsdf *)sc;
+
+	if (bsdf->clearcoat > 0.0f) {
+		float alpha = bsdf->clearcoatRoughness;
+		float3 N = bsdf->N;
 
 		if (alpha <= 1e-4f)
 			return make_float3(0.0f, 0.0f, 0.0f);
@@ -81,7 +89,7 @@ ccl_device float3 bsdf_disney_clearcoat_eval_reflect(const ShaderClosure *sc, co
 			float common = D * 0.25f / cosNO;
 
 			float FH = schlick_fresnel(dot(omega_in, m));
-			float3 F = (0.04f * (1.0f - FH) + 1.0f * FH) * 0.25f * sc->data0/*clearcoat*/ * make_float3(1.0f, 1.0f, 1.0f); // lerp(make_float3(0.04f, 0.04f, 0.04f), make_float3(1.0f, 1.0f, 1.0f), FH);
+			float3 F = (0.04f * (1.0f - FH) + 1.0f * FH) * 0.25f * bsdf->clearcoat * make_float3(1.0f, 1.0f, 1.0f); // lerp(make_float3(0.04f, 0.04f, 0.04f), make_float3(1.0f, 1.0f, 1.0f), FH);
 
 			float3 out = F * G * common;
 
@@ -110,9 +118,11 @@ ccl_device int bsdf_disney_clearcoat_sample(const ShaderClosure *sc,
     float3 *eval, float3 *omega_in, float3 *domega_in_dx,
     float3 *domega_in_dy, float *pdf)
 {
-	if (sc->data0 > 0.0f) {
-		float alpha = sc->custom1;
-		float3 N = sc->N;
+	const DisneyClearcoatBsdf *bsdf = (const DisneyClearcoatBsdf *)sc;
+
+	if (bsdf->clearcoat > 0.0f) {
+		float alpha = bsdf->clearcoatRoughness;
+		float3 N = bsdf->N;
 
 		float cosNO = dot(N, I);
 		if (cosNO > 0) {
@@ -168,8 +178,8 @@ ccl_device int bsdf_disney_clearcoat_sample(const ShaderClosure *sc,
 
 						float FH = schlick_fresnel(dot(*omega_in, m));
 						float3 F = make_float3(0.04f, 0.04f, 0.04f) * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(make_float3(0.04f, 0.04f, 0.04f), make_float3(1.0f, 1.0f, 1.0f), FH);
-
-						*eval = G1i * common * F * 0.25f * sc->data0/*clearcoat*/;
+						
+						*eval = G1i * common * F * 0.25f * bsdf->clearcoat;
 					}
 
 #ifdef __RAY_DIFFERENTIALS__
diff --git a/intern/cycles/kernel/closure/bsdf_disney_diffuse.h b/intern/cycles/kernel/closure/bsdf_disney_diffuse.h
index ac5032f3377..faeb00197c7 100644
--- a/intern/cycles/kernel/closure/bsdf_disney_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_disney_diffuse.h
@@ -36,8 +36,15 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct DisneyDiffuseBsdf {
+	SHADER_CLOSURE_BASE;
 
-ccl_device float3 calculate_disney_diffuse_brdf(const ShaderClosure *sc,
+	float roughness;
+	float3 N;
+	float3 baseColor;
+} DisneyDiffuseBsdf;
+
+ccl_device float3 calculate_disney_diffuse_brdf(const DisneyDiffuseBsdf *bsdf,
 	float3 N, float3 V, float3 L, float3 H, float *pdf)
 {
 	float NdotL = max(dot(N, L), 0.0f);
@@ -51,10 +58,10 @@ ccl_device float3 calculate_disney_diffuse_brdf(const ShaderClosure *sc,
 	float LdotH = dot(L, H);
 
 	float FL = schlick_fresnel(NdotL), FV = schlick_fresnel(NdotV);
-    const float Fd90 = 0.5f + 2.0f * LdotH*LdotH * sc->data0/*roughness*/;
+    const float Fd90 = 0.5f + 2.0f * LdotH*LdotH * bsdf->roughness;
 	float Fd = (1.0f * (1.0f - FL) + Fd90 * FL) * (1.0f * (1.0f - FV) + Fd90 * FV); //lerp(1.0f, Fd90, FL) * lerp(1.0f, Fd90, FV);
 
-	float3 value = M_1_PI_F * Fd * sc->color0/*baseColor*/;
+	float3 value = M_1_PI_F * Fd * bsdf->baseColor;
 
 	*pdf = M_1_PI_F * 0.5f;
 
@@ -63,22 +70,24 @@ ccl_device float3 calculate_disney_diffuse_brdf(const ShaderClosure *sc,
 	return value;
 }
 
-ccl_device int bsdf_disney_diffuse_setup(ShaderClosure *sc)
+ccl_device int bsdf_disney_diffuse_setup(DisneyDiffuseBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_DISNEY_DIFFUSE_ID;
+	bsdf->type = CLOSURE_BSDF_DISNEY_DIFFUSE_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
 ccl_device float3 bsdf_disney_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I,
 	const float3 omega_in, float *pdf)
 {
-	float3 N = normalize(sc->N);
+	const DisneyDiffuseBsdf *bsdf = (const DisneyDiffuseBsdf *)sc;
+
+	float3 N = normalize(bsdf->N);
 	float3 V = I; // outgoing
 	float3 L = omega_in; // incoming
 	float3 H = normalize(L + V);
 
-    if (dot(sc->N, omega_in) > 0.0f) {
-        float3 value = calculate_disney_diffuse_brdf(sc, N, V, L, H, pdf);
+    if (dot(bsdf->N, omega_in) > 0.0f) {
+        float3 value = calculate_disney_diffuse_brdf(bsdf, N, V, L, H, pdf);
 
 		return value;
     }
@@ -99,14 +108,16 @@ ccl_device int bsdf_disney_diffuse_sample(const ShaderClosure *sc,
 	float3 *eval, float3 *omega_in, float3 *domega_in_dx,
 	float3 *domega_in_dy, float *pdf)
 {
-	float3 N = normalize(sc->N);
+	const DisneyDiffuseBsdf *bsdf = (const DisneyDiffuseBsdf *)sc;
+
+	float3 N = normalize(bsdf->N);
 
 	sample_uniform_hemisphere(N, randu, randv, omega_in, pdf);
 
 	if (dot(Ng, *omega_in) > 0) {
 		float3 H = normalize(I + *omega_in);
 
-		*eval = calculate_disney_diffuse_brdf(sc, N, I, *omega_in, H, pdf);
+		*eval = calculate_disney_diffuse_brdf(bsdf, N, I, *omega_in, H, pdf);
 
 #ifdef __RAY_DIFFERENTIALS__
 		// TODO: find a better approximation for the diffuse bounce
diff --git a/intern/cycles/kernel/closure/bsdf_disney_sheen.h b/intern/cycles/kernel/closure/bsdf_disney_sheen.h
index 2806ecfc951..478ecaba324 100644
--- a/intern/cycles/kernel/closure/bsdf_disney_sheen.h
+++ b/intern/cycles/kernel/closure/bsdf_disney_sheen.h
@@ -36,14 +36,21 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct DisneySheenBsdf {
+	SHADER_CLOSURE_BASE;
 
-ccl_device float3 calculate_disney_sheen_brdf(const ShaderClosure *sc,
+	float sheen, sheenTint;
+	float3 N;
+	float3 baseColor, csheen0;
+} DisneySheenBsdf;
+
+ccl_device float3 calculate_disney_sheen_brdf(const DisneySheenBsdf *bsdf,
 	float3 N, float3 V, float3 L, float3 H, float *pdf)
 {
 	float NdotL = dot(N, L);
 	float NdotV = dot(N, V);
 
-    if (NdotL < 0 || NdotV < 0 || sc->data0 == 0.0f) {
+    if (NdotL < 0 || NdotV < 0 || bsdf->sheen == 0.0f) {
         *pdf = 0.0f;
         return make_float3(0.0f, 0.0f, 0.0f);
     }
@@ -54,36 +61,38 @@ ccl_device float3 calculate_disney_sheen_brdf(const ShaderClosure *sc,
 
 	float FH = schlick_fresnel(LdotH);
 
-	float3 value = FH * sc->data0/*sheen*/ * sc->custom_color0/*csheen0*/;
+	float3 value = FH * bsdf->sheen * bsdf->csheen0;
 
 	value *= NdotL;
 
 	return value;
 }
 
-ccl_device int bsdf_disney_sheen_setup(ShaderClosure *sc)
+ccl_device int bsdf_disney_sheen_setup(DisneySheenBsdf *bsdf)
 {
-	float m_cdlum = 0.3f * sc->color0.x + 0.6f * sc->color0.y + 0.1f * sc->color0.z; // luminance approx.
+	float m_cdlum = 0.3f * bsdf->baseColor.x + 0.6f * bsdf->baseColor.y + 0.1f * bsdf->baseColor.z; // luminance approx.
 
-	float3 m_ctint = m_cdlum > 0.0f ? sc->color0 / m_cdlum : make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat
+	float3 m_ctint = m_cdlum > 0.0f ? bsdf->baseColor / m_cdlum : make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat
 
 	/* csheen0 */
-	sc->custom_color0 = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sc->data1/*sheenTint*/) + m_ctint * sc->data1/*sheenTint*/; // lerp(make_float3(1.0f, 1.0f, 1.0f), m_ctint, sc->data1/*sheenTint*/);
+	bsdf->csheen0 = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - bsdf->sheenTint) + m_ctint * bsdf->sheenTint; // lerp(make_float3(1.0f, 1.0f, 1.0f), m_ctint, sc->data1/*sheenTint*/);
 
-	sc->type = CLOSURE_BSDF_DISNEY_SHEEN_ID;
+	bsdf->type = CLOSURE_BSDF_DISNEY_SHEEN_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
 ccl_device float3 bsdf_disney_sheen_eval_reflect(const ShaderClosure *sc, const float3 I,
 	const float3 omega_in, float *pdf)
 {
-	float3 N = normalize(sc->N);
+	const DisneySheenBsdf *bsdf = (const DisneySheenBsdf *)sc;
+
+	float3 N = normalize(bsdf->N);
 	float3 V = I; // outgoing
 	float3 L = omega_in; // incoming
 	float3 H = normalize(L + V);
 
-    if (dot(sc->N, omega_in) > 0.0f) {
-        float3 value = calculate_disney_sheen_brdf(sc, N, V, L, H, pdf);
+    if (dot(bsdf->N, omega_in) > 0.0f) {
+        float3 value = calculate_disney_sheen_brdf(bsdf, N, V, L, H, pdf);
 
 		return value;
     }
@@ -104,14 +113,16 @@ ccl_device int bsdf_disney_sheen_sample(const ShaderClosure *sc,
 	float3 *eval, float3 *omega_in, float3 *domega_in_dx,
 	float3 *domega_in_dy, float *pdf)
 {
-	float3 N = normalize(sc->N);
+	const DisneySheenBsdf *bsdf = (const DisneySheenBsdf *)sc;
+
+	float3 N = normalize(bsdf->N);
 
 	sample_uniform_hemisphere(N, randu, randv, omega_in, pdf);
 
 	if (dot(Ng, *omega_in) > 0) {
 		float3 H = normalize(I + *omega_in);
 
-		*eval = calculate_disney_sheen_brdf(sc, N, I, *omega_in, H, pdf);
+		*eval = calculate_disney_sheen_brdf(bsdf, N, I, *omega_in, H, pdf);
 
 #ifdef __RAY_DIFFERENTIALS__
 		// TODO: find a better approximation for the diffuse bounce
diff --git a/intern/cycles/kernel/closure/bsdf_disney_specular.h b/intern/cycles/kernel/closure/bsdf_disney_specular.h
index cbcfc4106c5..7e8139fd040 100644
--- a/intern/cycles/kernel/closure/bsdf_disney_specular.h
+++ b/intern/cycles/kernel/closure/bsdf_disney_specular.h
@@ -35,40 +35,47 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct DisneySpecularBsdf {
+	SHADER_CLOSURE_BASE;
 
-ccl_device int bsdf_disney_specular_setup(ShaderClosure *sc)
+	float specular, specularTint, roughness, metallic, anisotropic, alpha_x, alpha_y, rough_g;
+	float3 N, T;
+	float3 baseColor, cspec0;
+} DisneySpecularBsdf;
+
+ccl_device int bsdf_disney_specular_setup(DisneySpecularBsdf *bsdf)
 {
-	float m_cdlum = 0.3f * sc->color0.x + 0.6f * sc->color0.y + 0.1f * sc->color0.z; // luminance approx.
+	float m_cdlum = 0.3f * bsdf->baseColor.x + 0.6f * bsdf->baseColor.y + 0.1f * bsdf->baseColor.z; // luminance approx.
 
-	float3 m_ctint = m_cdlum > 0.0f ? sc->color0/*baseColor*/ / m_cdlum : make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat
+	float3 m_ctint = m_cdlum > 0.0f ? bsdf->baseColor / m_cdlum : make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat
 
-	float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sc->data2/*specularTint*/) + m_ctint * sc->data2/*specularTint*/; // lerp(make_float3(1.0f, 1.0f, 1.0f), m_ctint, sc->data2/*specularTint*/);
-	sc->custom_color0/*cspec0*/ = (sc->data1/*specular*/ * 0.08f * tmp_col) * (1.0f - sc->data0/*metallic*/) + sc->color0/*baseColor*/ * sc->data0/*metallic*/; // lerp(sc->data1/*specular*/ * 0.08f * tmp_col, sc->color0/*baseColor*/, sc->data0/*metallic*/);
+	float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - bsdf->specularTint) + m_ctint * bsdf->specularTint; // lerp(make_float3(1.0f, 1.0f, 1.0f), m_ctint, sc->data2/*specularTint*/);
+	bsdf->cspec0 = (bsdf->specular * 0.08f * tmp_col) * (1.0f - bsdf->metallic) + bsdf->baseColor * bsdf->metallic; // lerp(sc->data1/*specular*/ * 0.08f * tmp_col, sc->color0/*baseColor*/, sc->data0/*metallic*/);
 
-	float aspect = safe_sqrtf(1.0f - sc->data4/*anisotropic*/ * 0.9f);
-	float r2 = sqr(sc->data3/*roughness*/);
+	float aspect = safe_sqrtf(1.0f - bsdf->anisotropic * 0.9f);
+	float r2 = sqr(bsdf->roughness);
 	
 	/* ax */
-	sc->custom1 = fmaxf(0.001f, r2 / aspect);
+	bsdf->alpha_x = fmaxf(0.001f, r2 / aspect);
 
 	/* ay */
-	sc->custom2 = fmaxf(0.001f, r2 * aspect);
+	bsdf->alpha_y = fmaxf(0.001f, r2 * aspect);
 
 	/* rough_g */
-	sc->custom3 = sqr(sc->data3/*roughness*/ * 0.5f + 0.5f);
+	bsdf->rough_g = sqr(bsdf->roughness * 0.5f + 0.5f);
 
-    sc->type = CLOSURE_BSDF_DISNEY_SPECULAR_ID;
+    bsdf->type = CLOSURE_BSDF_DISNEY_SPECULAR_ID;
     return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
 ccl_device float3 bsdf_disney_specular_eval_reflect(const ShaderClosure *sc, const float3 I,
     const float3 omega_in, float *pdf)
 {
-	float alpha_x = sc->custom1;
-	float alpha_y = sc->custom2;
-	float3 N = sc->N;
+	const DisneySpecularBsdf *bsdf = (const DisneySpecularBsdf *)sc;
+
+	float3 N = bsdf->N;
 
-	if (fmaxf(alpha_x, alpha_y) <= 1e-4f)
+	if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f)
 		return make_float3(0.0f, 0.0f, 0.0f);
 
 	float cosNO = dot(N, I);
@@ -77,10 +84,10 @@ ccl_device float3 bsdf_disney_specular_eval_reflect(const ShaderClosure *sc, con
 	if (cosNI > 0 && cosNO > 0) {
 		/* get half vector */
 		float3 m = normalize(omega_in + I);
-		float alpha2 = alpha_x * alpha_y;
+		float alpha2 = bsdf->alpha_x * bsdf->alpha_y;
 		float D, G1o, G1i;
 
-		if (alpha_x == alpha_y) {
+		if (bsdf->alpha_x == bsdf->alpha_y) {
 			/* isotropic
 			 * eq. 20: (F*G*D)/(4*in*on)
 			 * eq. 33: first we calculate D(m) */
@@ -97,12 +104,12 @@ ccl_device float3 bsdf_disney_specular_eval_reflect(const ShaderClosure *sc, con
 		else {
 			/* anisotropic */
             float3 X, Y, Z = N;
-            make_orthonormals_tangent(Z, sc->T, &X, &Y);
+            make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
 
             // distribution
             float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
-            float slope_x = -local_m.x/(local_m.z*alpha_x);
-            float slope_y = -local_m.y/(local_m.z*alpha_y);
+            float slope_x = -local_m.x/(local_m.z*bsdf->alpha_x);
+            float slope_y = -local_m.y/(local_m.z*bsdf->alpha_y);
             float slope_len = 1 + slope_x*slope_x + slope_y*slope_y;
 
             float cosThetaM = local_m.z;
@@ -116,7 +123,7 @@ ccl_device float3 bsdf_disney_specular_eval_reflect(const ShaderClosure *sc, con
             float cosPhiO = dot(I, X);
             float sinPhiO = dot(I, Y);
 
-            float alphaO2 = (cosPhiO*cosPhiO)*(alpha_x*alpha_x) + (sinPhiO*sinPhiO)*(alpha_y*alpha_y);
+            float alphaO2 = (cosPhiO*cosPhiO)*(bsdf->alpha_x*bsdf->alpha_x) + (sinPhiO*sinPhiO)*(bsdf->alpha_y*bsdf->alpha_y);
             alphaO2 /= cosPhiO*cosPhiO + sinPhiO*sinPhiO;
 
             G1o = 2 / (1 + safe_sqrtf(1 + alphaO2 * tanThetaO2));
@@ -125,7 +132,7 @@ ccl_device float3 bsdf_disney_specular_eval_reflect(const ShaderClosure *sc, con
             float cosPhiI = dot(omega_in, X);
             float sinPhiI = dot(omega_in, Y);
 
-            float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
+			float alphaI2 = (cosPhiI*cosPhiI)*(bsdf->alpha_x*bsdf->alpha_x) + (sinPhiI*sinPhiI)*(bsdf->alpha_y*bsdf->alpha_y);
             alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
 
             G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
@@ -137,7 +144,7 @@ ccl_device float3 bsdf_disney_specular_eval_reflect(const ShaderClosure *sc, con
 		float common = D * 0.25f / cosNO;
 
         float FH = schlick_fresnel(dot(omega_in, m));
-		float3 F = sc->custom_color0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(sc->custom_color0, make_float3(1.0f, 1.0f, 1.0f), FH);
+		float3 F = bsdf->cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(sc->custom_color0, make_float3(1.0f, 1.0f, 1.0f), FH);
 
 		float3 out = F * G * common;
 
@@ -166,18 +173,18 @@ ccl_device int bsdf_disney_specular_sample(const ShaderClosure *sc,
     float3 *eval, float3 *omega_in, float3 *domega_in_dx,
     float3 *domega_in_dy, float *pdf)
 {
-	float alpha_x = sc->custom1;
-	float alpha_y = sc->custom2;
-	float3 N = sc->N;
+	const DisneySpecularBsdf *bsdf = (const DisneySpecularBsdf *)sc;
+
+	float3 N = bsdf->N;
 
 	float cosNO = dot(N, I);
 	if(cosNO > 0) {
 		float3 X, Y, Z = N;
 
-		if(alpha_x == alpha_y)
+		if (bsdf->alpha_x == bsdf->alpha_y)
 			make_orthonormals(Z, &X, &Y);
 		else
-			make_orthonormals_tangent(Z, sc->T, &X, &Y);
+			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
 
 		/* importance sampling with distribution of visible normals. vectors are
 		 * transformed to local space before and after */
@@ -186,7 +193,7 @@ ccl_device int bsdf_disney_specular_sample(const ShaderClosure *sc,
         float3 m;
 		float G1o;
 
-		local_m = importance_sample_microfacet_stretched(local_I, alpha_x, alpha_y,
+		local_m = importance_sample_microfacet_stretched(local_I, bsdf->alpha_x, bsdf->alpha_y,
 			    randu, randv, false, &G1o);
 
 		m = X*local_m.x + Y*local_m.y + Z*local_m.z;
@@ -200,7 +207,7 @@ ccl_device int bsdf_disney_specular_sample(const ShaderClosure *sc,
             *omega_in = 2 * cosMO * m - I;
 
             if(dot(Ng, *omega_in) > 0) {
-                if(fmaxf(alpha_x, alpha_y) <= 1e-4f) {
+				if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
                     /* some high number for MIS */
                     *pdf = 1e6f;
                     *eval = make_float3(1e6f, 1e6f, 1e6f);
@@ -208,10 +215,10 @@ ccl_device int bsdf_disney_specular_sample(const ShaderClosure *sc,
                 else {
                     /* microfacet normal is visible to this ray */
                     /* eq. 33 */
-                    float alpha2 = alpha_x * alpha_y;
+					float alpha2 = bsdf->alpha_x * bsdf->alpha_y;
                     float D, G1i;
 
-                    if(alpha_x == alpha_y) {
+					if (bsdf->alpha_x == bsdf->alpha_y) {
                         float cosThetaM2 = cosThetaM * cosThetaM;
                         float cosThetaM4 = cosThetaM2 * cosThetaM2;
                         float tanThetaM2 = 1/(cosThetaM2) - 1;
@@ -225,8 +232,8 @@ ccl_device int bsdf_disney_specular_sample(const ShaderClosure *sc,
                     }
                     else {
                         /* anisotropic distribution */
-                        float slope_x = -local_m.x/(local_m.z*alpha_x);
-                        float slope_y = -local_m.y/(local_m.z*alpha_y);
+						float slope_x = -local_m.x / (local_m.z*bsdf->alpha_x);
+						float slope_y = -local_m.y / (local_m.z*bsdf->alpha_y);
                         float slope_len = 1 + slope_x*slope_x + slope_y*slope_y;
 
                         float cosThetaM = local_m.z;
@@ -242,7 +249,7 @@ ccl_device int bsdf_disney_specular_sample(const ShaderClosure *sc,
                         float cosPhiI = dot(*omega_in, X);
                         float sinPhiI = dot(*omega_in, Y);
 
-                        float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
+						float alphaI2 = (cosPhiI*cosPhiI)*(bsdf->alpha_x*bsdf->alpha_x) + (sinPhiI*sinPhiI)*(bsdf->alpha_y*bsdf->alpha_y);
                         alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
 
                         G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
@@ -253,7 +260,7 @@ ccl_device int bsdf_disney_specular_sample(const ShaderClosure *sc,
                     *pdf = common;
 
 					float FH = schlick_fresnel(dot(*omega_in, m));
-					float3 F = sc->custom_color0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(sc->custom_color0, make_float3(1.0f, 1.0f, 1.0f), FH);
+					float3 F = bsdf->cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(sc->custom_color0, make_float3(1.0f, 1.0f, 1.0f), FH);
 
                     *eval = G1i * common * F;
                 }
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index 1e81617a7d3..bede5f45e7e 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -35,29 +35,49 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct HairBsdf {
+	SHADER_CLOSURE_BASE;
 
-ccl_device int bsdf_hair_reflection_setup(ShaderClosure *sc)
+	float3 T;
+	float roughness1;
+	float roughness2;
+	float offset;
+} HairBsdf;
+
+ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
-	sc->data0 = clamp(sc->data0, 0.001f, 1.0f);
-	sc->data1 = clamp(sc->data1, 0.001f, 1.0f);
+	bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
+	bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
+	bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_hair_transmission_setup(ShaderClosure *sc)
+ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
-	sc->data0 = clamp(sc->data0, 0.001f, 1.0f);
-	sc->data1 = clamp(sc->data1, 0.001f, 1.0f);
+	bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
+	bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
+	bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
+ccl_device bool bsdf_hair_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+	const HairBsdf *bsdf_a = (const HairBsdf*)a;
+	const HairBsdf *bsdf_b = (const HairBsdf*)b;
+
+	return (isequal_float3(bsdf_a->T, bsdf_b->T)) &&
+	       (bsdf_a->roughness1 == bsdf_b->roughness1) &&
+	       (bsdf_a->roughness2 == bsdf_b->roughness2) &&
+	       (bsdf_a->offset == bsdf_b->offset);
+}
+
 ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float offset = sc->data2;
-	float3 Tg = sc->T;
-	float roughness1 = sc->data0;
-	float roughness2 = sc->data1;
+	const HairBsdf *bsdf = (const HairBsdf*)sc;
+	float offset = bsdf->offset;
+	float3 Tg = bsdf->T;
+	float roughness1 = bsdf->roughness1;
+	float roughness2 = bsdf->roughness2;
 
 	float Iz = dot(Tg, I);
 	float3 locy = normalize(I - Tg * Iz);
@@ -107,10 +127,11 @@ ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc, co
 
 ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float offset = sc->data2;
-	float3 Tg = sc->T;
-	float roughness1 = sc->data0;
-	float roughness2 = sc->data1;
+	const HairBsdf *bsdf = (const HairBsdf*)sc;
+	float offset = bsdf->offset;
+	float3 Tg = bsdf->T;
+	float roughness1 = bsdf->roughness1;
+	float roughness2 = bsdf->roughness2;
 	float Iz = dot(Tg, I);
 	float3 locy = normalize(I - Tg * Iz);
 
@@ -148,10 +169,11 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
 
 ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float offset = sc->data2;
-	float3 Tg = sc->T;
-	float roughness1 = sc->data0;
-	float roughness2 = sc->data1;
+	const HairBsdf *bsdf = (const HairBsdf*)sc;
+	float offset = bsdf->offset;
+	float3 Tg = bsdf->T;
+	float roughness1 = bsdf->roughness1;
+	float roughness2 = bsdf->roughness2;
 	float Iz = dot(Tg, I);
 	float3 locy = normalize(I - Tg * Iz);
 	float3 locx = cross(locy, Tg);
@@ -198,10 +220,11 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, f
 
 ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float offset = sc->data2;
-	float3 Tg = sc->T;
-	float roughness1 = sc->data0;
-	float roughness2 = sc->data1;
+	const HairBsdf *bsdf = (const HairBsdf*)sc;
+	float offset = bsdf->offset;
+	float3 Tg = bsdf->T;
+	float roughness1 = bsdf->roughness1;
+	float roughness2 = bsdf->roughness2;
 	float Iz = dot(Tg, I);
 	float3 locy = normalize(I - Tg * Iz);
 	float3 locx = cross(locy, Tg);
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 2a0e8f62e7c..7173ecd64de 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -35,6 +35,20 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct MicrofacetExtra {
+	float3 color, cspec0;
+	bool use_fresnel;
+} MicrofacetExtra;
+
+typedef ccl_addr_space struct MicrofacetBsdf {
+	SHADER_CLOSURE_BASE;
+
+	float alpha_x, alpha_y, ior;
+	MicrofacetExtra *extra;
+	float3 T;
+	float3 N;
+} MicrofacetBsdf;
+
 /* Beckmann and GGX microfacet importance sampling. */
 
 ccl_device_inline void microfacet_beckmann_sample_slopes(
@@ -233,50 +247,92 @@ ccl_device_inline float3 microfacet_sample_stretched(
  * Anisotropy is only supported for reflection currently, but adding it for
  * transmission is just a matter of copying code from reflection if needed. */
 
-ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc)
+ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false)
 {
-	sc->data0 = saturate(sc->data0); /* alpha_x */
-	sc->data1 = sc->data0; /* alpha_y */
+	if (bsdf->extra) {
+		bsdf->extra->use_fresnel = use_fresnel;
+
+		bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+		bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+		bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+	}
+
+	bsdf->alpha_x = saturate(bsdf->alpha_x);
+	bsdf->alpha_y = bsdf->alpha_x;
 	
-	sc->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
+	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
 
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_microfacet_ggx_aniso_setup(ShaderClosure *sc)
+ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosure *b)
 {
-	sc->data0 = saturate(sc->data0); /* alpha_x */
-	sc->data1 = saturate(sc->data1); /* alpha_y */
+	const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf*)a;
+	const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf*)b;
+
+	return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
+	       (bsdf_a->alpha_x == bsdf_b->alpha_x) &&
+	       (bsdf_a->alpha_y == bsdf_b->alpha_y) &&
+	       (isequal_float3(bsdf_a->T, bsdf_b->T)) &&
+	       (bsdf_a->ior == bsdf_b->ior) &&
+	       ((!bsdf_a->extra && !bsdf_b->extra) ||
+            ((bsdf_a->extra && bsdf_b->extra) &&
+	         (isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color))));
+}
+
+ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false)
+{
+	if (bsdf->extra) {
+		bsdf->extra->use_fresnel = use_fresnel;
+
+		bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+		bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+		bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+	}
+
+	bsdf->alpha_x = saturate(bsdf->alpha_x);
+	bsdf->alpha_y = saturate(bsdf->alpha_y);
 	
-	sc->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
+	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
 
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_microfacet_ggx_refraction_setup(ShaderClosure *sc)
+ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false)
 {
-	sc->data0 = saturate(sc->data0); /* alpha_x */
-	sc->data1 = sc->data0; /* alpha_y */
+	if (bsdf->extra) {
+		bsdf->extra->use_fresnel = use_fresnel;
+
+		bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+		bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+		bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+	}
+
+	bsdf->alpha_x = saturate(bsdf->alpha_x);
+	bsdf->alpha_y = bsdf->alpha_x;
 
-	sc->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+	bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
 
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
 ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness)
 {
-	sc->data0 = fmaxf(roughness, sc->data0); /* alpha_x */
-	sc->data1 = fmaxf(roughness, sc->data1); /* alpha_y */
+	MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+
+	bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+	bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
 }
 
 ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float alpha_x = sc->data0;
-	float alpha_y = sc->data1;
-	bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
-	float3 N = sc->N;
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float alpha_x = bsdf->alpha_x;
+	float alpha_y = bsdf->alpha_y;
+	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+	float3 N = bsdf->N;
 
-	if(m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
+	if(m_refractive || alpha_x*alpha_y <= 1e-7f)
 		return make_float3(0.0f, 0.0f, 0.0f);
 
 	float cosNO = dot(N, I);
@@ -305,7 +361,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons
 		else {
 			/* anisotropic */
 			float3 X, Y, Z = N;
-			make_orthonormals_tangent(Z, sc->T, &X, &Y);
+			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
 
 			/* distribution */
 			float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
@@ -343,7 +399,17 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons
 
 		/* eq. 20 */
 		float common = D * 0.25f / cosNO;
-		float out = G * common;
+
+		float3 F = make_float3(1.0f, 1.0f, 1.0f);
+		if (bsdf->extra) {
+			if (bsdf->extra->use_fresnel) {
+				float FH = schlick_fresnel(dot(omega_in, m));
+
+				F = bsdf->extra->cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(sc->custom_color0, make_float3(1.0f, 1.0f, 1.0f), FH);
+			}
+		}
+
+		float3 out = F * G * common;
 
 		/* eq. 2 in distribution of visible normals sampling
 		 * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
@@ -353,7 +419,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons
 		 * pdf = pm * 0.25 / dot(m, I); */
 		*pdf = G1o * common;
 
-		return make_float3(out, out, out);
+		return out;
 	}
 
 	return make_float3(0.0f, 0.0f, 0.0f);
@@ -361,13 +427,14 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons
 
 ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float alpha_x = sc->data0;
-	float alpha_y = sc->data1;
-	float m_eta = sc->data2;
-	bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
-	float3 N = sc->N;
-
-	if(!m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float alpha_x = bsdf->alpha_x;
+	float alpha_y = bsdf->alpha_y;
+	float m_eta = bsdf->ior;
+	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+	float3 N = bsdf->N;
+
+	if(!m_refractive || alpha_x*alpha_y <= 1e-7f)
 		return make_float3(0.0f, 0.0f, 0.0f);
 
 	float cosNO = dot(N, I);
@@ -407,18 +474,29 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con
 	/* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
 	 * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
 	float common = D * (m_eta * m_eta) / (cosNO * Ht2);
-	float out = G * fabsf(cosHI * cosHO) * common;
+
+	float3 F = make_float3(1.0f, 1.0f, 1.0f);
+	if (bsdf->extra) {
+		if (bsdf->extra->use_fresnel) {
+			float FH = schlick_fresnel(dot(omega_in, Ht));
+
+			F = bsdf->extra->cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(sc->custom_color0, make_float3(1.0f, 1.0f, 1.0f), FH);
+		}
+	}
+
+	float3 out = G * fabsf(cosHI * cosHO) * common * F;
 	*pdf = G1o * fabsf(cosHO * cosHI) * common;
 
-	return make_float3(out, out, out);
+	return out;
 }
 
 ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float alpha_x = sc->data0;
-	float alpha_y = sc->data1;
-	bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
-	float3 N = sc->N;
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float alpha_x = bsdf->alpha_x;
+	float alpha_y = bsdf->alpha_y;
+	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+	float3 N = bsdf->N;
 
 	float cosNO = dot(N, I);
 	if(cosNO > 0) {
@@ -427,7 +505,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 		if(alpha_x == alpha_y)
 			make_orthonormals(Z, &X, &Y);
 		else
-			make_orthonormals_tangent(Z, sc->T, &X, &Y);
+			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
 
 		/* importance sampling with distribution of visible normals. vectors are
 		 * transformed to local space before and after */
@@ -450,7 +528,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 				*omega_in = 2 * cosMO * m - I;
 
 				if(dot(Ng, *omega_in) > 0) {
-					if(fmaxf(alpha_x, alpha_y) <= 1e-4f) {
+					if(alpha_x*alpha_y <= 1e-7f) {
 						/* some high number for MIS */
 						*pdf = 1e6f;
 						*eval = make_float3(1e6f, 1e6f, 1e6f);
@@ -502,10 +580,18 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 
 						/* see eval function for derivation */
 						float common = (G1o * D) * 0.25f / cosNO;
-						float out = G1i * common;
 						*pdf = common;
 
-						*eval = make_float3(out, out, out);
+						float3 F = make_float3(1.0f, 1.0f, 1.0f);
+						if (bsdf->extra) {
+							if (bsdf->extra->use_fresnel) {
+								float FH = schlick_fresnel(dot(*omega_in, m));
+
+								F = bsdf->extra->cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(sc->custom_color0, make_float3(1.0f, 1.0f, 1.0f), FH);
+							}
+						}
+
+						*eval = G1i * common * F;
 					}
 
 #ifdef __RAY_DIFFERENTIALS__
@@ -522,7 +608,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 #ifdef __RAY_DIFFERENTIALS__
 			float3 dRdx, dRdy, dTdx, dTdy;
 #endif
-			float m_eta = sc->data2, fresnel;
+			float m_eta = bsdf->ior, fresnel;
 			bool inside;
 
 			fresnel = fresnel_dielectric(m_eta, m, I, &R, &T,
@@ -539,7 +625,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 				*domega_in_dy = dTdy;
 #endif
 
-				if(fmaxf(alpha_x, alpha_y) <= 1e-4f || fabsf(m_eta - 1.0f) < 1e-4f) {
+				if(alpha_x*alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
 					/* some high number for MIS */
 					*pdf = 1e6f;
 					*eval = make_float3(1e6f, 1e6f, 1e6f);
@@ -566,10 +652,20 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
 
 					/* see eval function for derivation */
 					float common = (G1o * D) * (m_eta * m_eta) / (cosNO * Ht2);
-					float out = G1i * fabsf(cosHI * cosHO) * common;
+
+					float3 F = make_float3(1.0f, 1.0f, 1.0f);
+					if (bsdf->extra) {
+						if (bsdf->extra->use_fresnel) {
+							float FH = schlick_fresnel(dot(*omega_in, m));
+
+							F = bsdf->extra->cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; // lerp(sc->custom_color0, make_float3(1.0f, 1.0f, 1.0f), FH);
+						}
+					}
+
+					float3 out = G1i * fabsf(cosHI * cosHO) * common * F;
 					*pdf = cosHO * fabsf(cosHI) * common;
 
-					*eval = make_float3(out, out, out);
+					*eval = out;
 				}
 			}
 		}
@@ -582,47 +678,80 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
  * Microfacet Models for Refraction through Rough Surfaces
  * B. Walter, S. R. Marschner, H. Li, K. E. Torrance, EGSR 2007 */
 
-ccl_device int bsdf_microfacet_beckmann_setup(ShaderClosure *sc)
+ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf)
 {
-	sc->data0 = saturate(sc->data0); /* alpha_x */
-	sc->data1 = sc->data0; /* alpha_y */
+	bsdf->alpha_x = saturate(bsdf->alpha_x);
+	bsdf->alpha_y = bsdf->alpha_x;
 
-	sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
+	bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_microfacet_beckmann_aniso_setup(ShaderClosure *sc)
+ccl_device int bsdf_microfacet_beckmann_aniso_setup(MicrofacetBsdf *bsdf)
 {
-	sc->data0 = saturate(sc->data0); /* alpha_x */
-	sc->data1 = saturate(sc->data1); /* alpha_y */
+	bsdf->alpha_x = saturate(bsdf->alpha_x);
+	bsdf->alpha_y = saturate(bsdf->alpha_y);
 
-	sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
+	bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_microfacet_beckmann_refraction_setup(ShaderClosure *sc)
+ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf)
 {
-	sc->data0 = saturate(sc->data0); /* alpha_x */
-	sc->data1 = sc->data0; /* alpha_y */
+	bsdf->alpha_x = saturate(bsdf->alpha_x);
+	bsdf->alpha_y = bsdf->alpha_x;
 
-	sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+	bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
 ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness)
 {
-	sc->data0 = fmaxf(roughness, sc->data0); /* alpha_x */
-	sc->data1 = fmaxf(roughness, sc->data1); /* alpha_y */
+	MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+
+	bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+	bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+}
+
+ccl_device_inline float bsdf_beckmann_G1(float alpha, float cos_n)
+{
+	cos_n *= cos_n;
+	float invA = alpha * safe_sqrtf((1.0f - cos_n) / cos_n);
+	if(invA < 0.625f) {
+		return 1.0f;
+	}
+
+	float a = 1.0f / invA;
+	return ((2.181f*a + 3.535f)*a) / ((2.577f*a + 2.276f)*a + 1.0f);
+}
+
+ccl_device_inline float bsdf_beckmann_aniso_G1(float alpha_x, float alpha_y, float cos_n, float cos_phi, float sin_phi)
+{
+	cos_n *= cos_n;
+	sin_phi *= sin_phi;
+	cos_phi *= cos_phi;
+	alpha_x *= alpha_x;
+	alpha_y *= alpha_y;
+
+	float alphaO2 = (cos_phi*alpha_x + sin_phi*alpha_y) / (cos_phi + sin_phi);
+	float invA = safe_sqrtf(alphaO2 * (1 - cos_n) / cos_n);
+	if(invA < 0.625f) {
+		return 1.0f;
+	}
+
+	float a = 1.0f / invA;
+	return ((2.181f*a + 3.535f)*a) / ((2.577f*a + 2.276f)*a + 1.0f);
 }
 
 ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float alpha_x = sc->data0;
-	float alpha_y = sc->data1;
-	bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-	float3 N = sc->N;
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float alpha_x = bsdf->alpha_x;
+	float alpha_y = bsdf->alpha_y;
+	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+	float3 N = bsdf->N;
 
-	if(m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
+	if(m_refractive || alpha_x*alpha_y <= 1e-7f)
 		return make_float3(0.0f, 0.0f, 0.0f);
 
 	float cosNO = dot(N, I);
@@ -646,15 +775,13 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
 			D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
 
 			/* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
-			float ao = 1 / (alpha_x * safe_sqrtf((1 - cosNO * cosNO) / (cosNO * cosNO)));
-			float ai = 1 / (alpha_x * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
-			G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
-			G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+			G1o = bsdf_beckmann_G1(alpha_x, cosNO);
+			G1i = bsdf_beckmann_G1(alpha_x, cosNI);
 		}
 		else {
 			/* anisotropic */
 			float3 X, Y, Z = N;
-			make_orthonormals_tangent(Z, sc->T, &X, &Y);
+			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
 
 			/* distribution */
 			float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
@@ -668,24 +795,8 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
 			D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4);
 
 			/* G1(i,m) and G1(o,m) */
-			float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO);
-			float cosPhiO = dot(I, X);
-			float sinPhiO = dot(I, Y);
-
-			float alphaO2 = (cosPhiO*cosPhiO)*(alpha_x*alpha_x) + (sinPhiO*sinPhiO)*(alpha_y*alpha_y);
-			alphaO2 /= cosPhiO*cosPhiO + sinPhiO*sinPhiO;
-
-			float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
-			float cosPhiI = dot(omega_in, X);
-			float sinPhiI = dot(omega_in, Y);
-
-			float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
-			alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
-
-			float ao = 1 / (safe_sqrtf(alphaO2 * tanThetaO2));
-			float ai = 1 / (safe_sqrtf(alphaI2 * tanThetaI2));
-			G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
-			G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+			G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y));
+			G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y));
 		}
 
 		float G = G1o * G1i;
@@ -710,13 +821,14 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
 
 ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float alpha_x = sc->data0;
-	float alpha_y = sc->data1;
-	float m_eta = sc->data2;
-	bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-	float3 N = sc->N;
-
-	if(!m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float alpha_x = bsdf->alpha_x;
+	float alpha_y = bsdf->alpha_y;
+	float m_eta = bsdf->ior;
+	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+	float3 N = bsdf->N;
+
+	if(!m_refractive || alpha_x*alpha_y <= 1e-7f)
 		return make_float3(0.0f, 0.0f, 0.0f);
 
 	float cosNO = dot(N, I);
@@ -740,10 +852,8 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc
 	float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 *  cosThetaM4);
 
 	/* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
-	float ao = 1 / (alpha_x * safe_sqrtf((1 - cosNO * cosNO) / (cosNO * cosNO)));
-	float ai = 1 / (alpha_x * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
-	float G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
-	float G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+	float G1o = bsdf_beckmann_G1(alpha_x, cosNO);
+	float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
 	float G = G1o * G1i;
 
 	/* probability */
@@ -763,10 +873,11 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc
 
 ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float alpha_x = sc->data0;
-	float alpha_y = sc->data1;
-	bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-	float3 N = sc->N;
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float alpha_x = bsdf->alpha_x;
+	float alpha_y = bsdf->alpha_y;
+	bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+	float3 N = bsdf->N;
 
 	float cosNO = dot(N, I);
 	if(cosNO > 0) {
@@ -775,7 +886,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 		if(alpha_x == alpha_y)
 			make_orthonormals(Z, &X, &Y);
 		else
-			make_orthonormals_tangent(Z, sc->T, &X, &Y);
+			make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
 
 		/* importance sampling with distribution of visible normals. vectors are
 		 * transformed to local space before and after */
@@ -798,7 +909,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 				*omega_in = 2 * cosMO * m - I;
 
 				if(dot(Ng, *omega_in) > 0) {
-					if(fmaxf(alpha_x, alpha_y) <= 1e-4f) {
+					if(alpha_x*alpha_y <= 1e-7f) {
 						/* some high number for MIS */
 						*pdf = 1e6f;
 						*eval = make_float3(1e6f, 1e6f, 1e6f);
@@ -820,8 +931,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 							float cosNI = dot(N, *omega_in);
 
 							/* eq. 26, 27: now calculate G1(i,m) */
-							float ai = 1 / (alpha_x * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
-							G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+							G1i = bsdf_beckmann_G1(alpha_x, cosNI);
 						}
 						else {
 							/* anisotropic distribution */
@@ -836,16 +946,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 							D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4);
 
 							/* G1(i,m) */
-							float cosNI = dot(N, *omega_in);
-							float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
-							float cosPhiI = dot(*omega_in, X);
-							float sinPhiI = dot(*omega_in, Y);
-
-							float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
-							alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
-
-							float ai = 1 / (safe_sqrtf(alphaI2 * tanThetaI2));
-							G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+							G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, dot(*omega_in, N), dot(*omega_in, X), dot(*omega_in, Y));
 						}
 
 						float G = G1o * G1i;
@@ -872,7 +973,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 #ifdef __RAY_DIFFERENTIALS__
 			float3 dRdx, dRdy, dTdx, dTdy;
 #endif
-			float m_eta = sc->data2, fresnel;
+			float m_eta = bsdf->ior, fresnel;
 			bool inside;
 
 			fresnel = fresnel_dielectric(m_eta, m, I, &R, &T,
@@ -889,7 +990,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 				*domega_in_dy = dTdy;
 #endif
 
-				if(fmaxf(alpha_x, alpha_y) <= 1e-4f || fabsf(m_eta - 1.0f) < 1e-4f) {
+				if(alpha_x*alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
 					/* some high number for MIS */
 					*pdf = 1e6f;
 					*eval = make_float3(1e6f, 1e6f, 1e6f);
@@ -906,8 +1007,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
 					float cosNI = dot(N, *omega_in);
 
 					/* eq. 26, 27: now calculate G1(i,m) */
-					float ai = 1 / (alpha_x * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
-					float G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+					float G1i = bsdf_beckmann_G1(alpha_x, cosNI);
 					float G = G1o * G1i;
 
 					/* eq. 21 */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
new file mode 100644
index 00000000000..38c645cba87
--- /dev/null
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -0,0 +1,507 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Most of the code is based on the supplemental implementations from https://eheitzresearch.wordpress.com/240-2/. */
+
+/* === GGX Microfacet distribution functions === */
+
+/* Isotropic GGX microfacet distribution */
+ccl_device_inline float D_ggx(float3 wm, float alpha)
+{
+	wm.z *= wm.z;
+	alpha *= alpha;
+	float tmp = (1.0f - wm.z) + alpha * wm.z;
+	return alpha / max(M_PI_F * tmp*tmp, 1e-7f);
+}
+
+/* Anisotropic GGX microfacet distribution */
+ccl_device_inline float D_ggx_aniso(const float3 wm, const float2 alpha)
+{
+	float slope_x = -wm.x/alpha.x;
+	float slope_y = -wm.y/alpha.y;
+	float tmp = wm.z*wm.z + slope_x*slope_x + slope_y*slope_y;
+
+	return 1.0f / max(M_PI_F * tmp*tmp * alpha.x*alpha.y, 1e-7f);
+}
+
+/* Sample slope distribution (based on page 14 of the supplemental implementation). */
+ccl_device_inline float2 mf_sampleP22_11(const float cosI, const float2 randU)
+{
+	if(cosI > 0.9999f || cosI < 1e-6f) {
+		const float r = sqrtf(randU.x / (1.0f - randU.x));
+		const float phi = M_2PI_F * randU.y;
+		return make_float2(r*cosf(phi), r*sinf(phi));
+	}
+
+	const float sinI = sqrtf(1.0f - cosI*cosI);
+	const float tanI = sinI/cosI;
+	const float projA = 0.5f * (cosI + 1.0f);
+	if(projA < 0.0001f)
+		return make_float2(0.0f, 0.0f);
+	const float A = 2.0f*randU.x*projA / cosI - 1.0f;
+	float tmp = A*A-1.0f;
+	if(fabsf(tmp) < 1e-7f)
+		return make_float2(0.0f, 0.0f);
+	tmp = 1.0f / tmp;
+	const float D = safe_sqrtf(tanI*tanI*tmp*tmp - (A*A-tanI*tanI)*tmp);
+
+	const float slopeX2 = tanI*tmp + D;
+	const float slopeX = (A < 0.0f || slopeX2 > 1.0f/tanI)? (tanI*tmp - D) : slopeX2;
+
+	float U2;
+	if(randU.y >= 0.5f)
+		U2 = 2.0f*(randU.y - 0.5f);
+	else
+		U2 = 2.0f*(0.5f - randU.y);
+	const float z = (U2*(U2*(U2*0.27385f-0.73369f)+0.46341f)) / (U2*(U2*(U2*0.093073f+0.309420f)-1.0f)+0.597999f);
+	const float slopeY = z * sqrtf(1.0f + slopeX*slopeX);
+
+	if(randU.y >= 0.5f)
+		return make_float2(slopeX, slopeY);
+	else
+		return make_float2(slopeX, -slopeY);
+}
+
+/* Visible normal sampling for the GGX distribution (based on page 7 of the supplemental implementation). */
+ccl_device_inline float3 mf_sample_vndf(const float3 wi, const float2 alpha, const float2 randU)
+{
+	const float3 wi_11 = normalize(make_float3(alpha.x*wi.x, alpha.y*wi.y, wi.z));
+	const float2 slope_11 = mf_sampleP22_11(wi_11.z, randU);
+
+	const float2 cossin_phi = normalize(make_float2(wi_11.x, wi_11.y));
+	const float slope_x = alpha.x*(cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y);
+	const float slope_y = alpha.y*(cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y);
+
+	kernel_assert(isfinite(slope_x));
+	return normalize(make_float3(-slope_x, -slope_y, 1.0f));
+}
+
+/* === Phase functions: Glossy, Diffuse and Glass === */
+
+/* Phase function for reflective materials, either without a fresnel term (for compatibility) or with the conductive fresnel term. */
+ccl_device_inline float3 mf_sample_phase_glossy(const float3 wi, float3 *n, float3 *k, float3 *weight, const float3 wm)
+{
+	if(n && k)
+		*weight *= fresnel_conductor(dot(wi, wm), *n, *k);
+
+	return -wi + 2.0f * wm * dot(wi, wm);
+}
+
+ccl_device_inline float3 mf_eval_phase_glossy(const float3 w, const float lambda, const float3 wo, const float2 alpha, float3 *n, float3 *k)
+{
+	if(w.z > 0.9999f)
+		return make_float3(0.0f, 0.0f, 0.0f);
+
+	const float3 wh = normalize(wo - w);
+	if(wh.z < 0.0f)
+		return make_float3(0.0f, 0.0f, 0.0f);
+
+	float pArea = (w.z < -0.9999f)? 1.0f: lambda*w.z;
+
+	const float dotW_WH = dot(-w, wh);
+	if(dotW_WH < 0.0f)
+		return make_float3(0.0f, 0.0f, 0.0f);
+
+	float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f);
+	if(alpha.x == alpha.y)
+		phase *= D_ggx(wh, alpha.x);
+	else
+		phase *= D_ggx_aniso(wh, alpha);
+
+	if(n && k) {
+		/* Apply conductive fresnel term. */
+		return phase * fresnel_conductor(dotW_WH, *n, *k);
+	}
+
+	return make_float3(phase, phase, phase);
+}
+
+/* Phase function for rough lambertian diffuse surfaces. */
+ccl_device_inline float3 mf_sample_phase_diffuse(const float3 wm, const float randu, const float randv)
+{
+	float3 tm, bm;
+	make_orthonormals(wm, &tm, &bm);
+
+	float2 disk = concentric_sample_disk(randu, randv);
+	return disk.x*tm + disk.y*bm + safe_sqrtf(1.0f - disk.x*disk.x - disk.y*disk.y)*wm;
+}
+
+ccl_device_inline float3 mf_eval_phase_diffuse(const float3 w, const float3 wm)
+{
+	const float v = max(0.0f, dot(w, wm)) * M_1_PI_F;
+	return make_float3(v, v, v);
+}
+
+/* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */
+ccl_device_inline float3 mf_sample_phase_glass(const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
+{
+	float cosI = dot(wi, wm);
+	float f = fresnel_dielectric_cos(cosI, eta);
+	if(randV < f) {
+		*outside = true;
+		return -wi + 2.0f * wm * cosI;
+	}
+	*outside = false;
+	float inv_eta = 1.0f/eta;
+	float cosT = -safe_sqrtf(1.0f - (1.0f - cosI*cosI) * inv_eta*inv_eta);
+	return normalize(wm*(cosI*inv_eta + cosT) - wi*inv_eta);
+}
+
+ccl_device_inline float3 mf_eval_phase_glass(const float3 w, const float lambda, const float3 wo, const bool wo_outside, const float2 alpha, const float eta)
+{
+	if(w.z > 0.9999f)
+		return make_float3(0.0f, 0.0f, 0.0f);
+
+	float pArea = (w.z < -0.9999f)? 1.0f: lambda*w.z;
+	float v;
+	if(wo_outside) {
+		const float3 wh = normalize(wo - w);
+		if(wh.z < 0.0f)
+			return make_float3(0.0f, 0.0f, 0.0f);
+
+		const float dotW_WH = dot(-w, wh);
+		v = fresnel_dielectric_cos(dotW_WH, eta) * max(0.0f, dotW_WH) * D_ggx(wh, alpha.x) * 0.25f / (pArea * dotW_WH);
+	}
+	else {
+		float3 wh = normalize(wo*eta - w);
+		if(wh.z < 0.0f)
+			wh = -wh;
+		const float dotW_WH = dot(-w, wh), dotWO_WH = dot(wo, wh);
+		if(dotW_WH < 0.0f)
+			return make_float3(0.0f, 0.0f, 0.0f);
+
+		float temp = dotW_WH + eta*dotWO_WH;
+		v = (1.0f - fresnel_dielectric_cos(dotW_WH, eta)) * max(0.0f, dotW_WH) * max(0.0f, -dotWO_WH) * D_ggx(wh, alpha.x) / (pArea * temp * temp);
+	}
+
+	return make_float3(v, v, v);
+}
+
+/* === Utility functions for the random walks === */
+
+/* Smith Lambda function for GGX (based on page 12 of the supplemental implementation). */
+ccl_device_inline float mf_lambda(const float3 w, const float2 alpha)
+{
+	if(w.z > 0.9999f)
+		return 0.0f;
+	else if(w.z < -0.9999f)
+		return -0.9999f;
+
+	const float inv_wz2 = 1.0f / max(w.z*w.z, 1e-7f);
+	const float2 wa = make_float2(w.x, w.y)*alpha;
+	float v = sqrtf(1.0f + dot(wa, wa) * inv_wz2);
+	if(w.z <= 0.0f)
+		v = -v;
+
+	return 0.5f*(v - 1.0f);
+}
+
+/* Height distribution CDF (based on page 4 of the supplemental implementation). */
+ccl_device_inline float mf_invC1(const float h)
+{
+	return 2.0f * saturate(h) - 1.0f;
+}
+
+ccl_device_inline float mf_C1(const float h)
+{
+	return saturate(0.5f * (h + 1.0f));
+}
+
+/* Masking function (based on page 16 of the supplemental implementation). */
+ccl_device_inline float mf_G1(const float3 w, const float C1, const float lambda)
+{
+	if(w.z > 0.9999f)
+		return 1.0f;
+	if(w.z < 1e-5f)
+		return 0.0f;
+	return powf(C1, lambda);
+}
+
+/* Sampling from the visible height distribution (based on page 17 of the supplemental implementation). */
+ccl_device_inline bool mf_sample_height(const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
+{
+	if(w.z > 0.9999f)
+		return false;
+	if(w.z < -0.9999f) {
+		*C1 *= U;
+		*h = mf_invC1(*C1);
+		*G1 = mf_G1(w, *C1, *lambda);
+	}
+	else if(fabsf(w.z) >= 0.0001f) {
+		if(U > 1.0f - *G1)
+			return false;
+		if(*lambda >= 0.0f) {
+			*C1 = 1.0f;
+		}
+		else {
+			*C1 *= powf(1.0f-U, -1.0f / *lambda);
+		}
+		*h = mf_invC1(*C1);
+		*G1 = mf_G1(w, *C1, *lambda);
+	}
+	return true;
+}
+
+/* === PDF approximations for the different phase functions. ===
+ * As explained in bsdf_microfacet_multi_impl.h, using approximations with MIS still produces an unbiased result. */
+
+/* Approximation for the albedo of the single-scattering GGX distribution,
+ * the missing energy is then approximated as a diffuse reflection for the PDF. */
+ccl_device_inline float mf_ggx_albedo(float r)
+{
+	float albedo = 0.806495f*expf(-1.98712f*r*r) + 0.199531f;
+	albedo -= ((((((1.76741f*r - 8.43891f)*r + 15.784f)*r - 14.398f)*r + 6.45221f)*r - 1.19722f)*r + 0.027803f)*r + 0.00568739f;
+	return saturate(albedo);
+}
+
+ccl_device_inline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha)
+{
+	float D = D_ggx(normalize(wi+wo), alpha);
+	float lambda = mf_lambda(wi, make_float2(alpha, alpha));
+	float albedo = mf_ggx_albedo(alpha);
+	return 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f) + (1.0f - albedo) * wo.z;
+}
+
+ccl_device_inline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha)
+{
+	return 0.25f * D_ggx_aniso(normalize(wi+wo), alpha) / ((1.0f + mf_lambda(wi, alpha)) * wi.z) + (1.0f - mf_ggx_albedo(sqrtf(alpha.x*alpha.y))) * wo.z;
+}
+
+ccl_device_inline float mf_diffuse_pdf(const float3 wo)
+{
+	return M_1_PI_F * wo.z;
+}
+
+ccl_device_inline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta)
+{
+	float3 wh;
+	float fresnel;
+	if(wi.z*wo.z > 0.0f) {
+		wh = normalize(wi + wo);
+		fresnel = fresnel_dielectric_cos(dot(wi, wh), eta);
+	}
+	else {
+		wh = normalize(wi + wo*eta);
+		fresnel = 1.0f - fresnel_dielectric_cos(dot(wi, wh), eta);
+	}
+	if(wh.z < 0.0f)
+		wh = -wh;
+	float3 r_wi = (wi.z < 0.0f)? -wi: wi;
+	return fresnel * max(0.0f, dot(r_wi, wh)) * D_ggx(wh, alpha) / ((1.0f + mf_lambda(r_wi, make_float2(alpha, alpha))) * r_wi.z) + fabsf(wo.z);
+}
+
+/* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */
+
+#define MF_NAME_JOIN(x,y) x ## _ ## y
+#define MF_NAME_EVAL(x,y) MF_NAME_JOIN(x,y)
+#define MF_FUNCTION_FULL_NAME(prefix) MF_NAME_EVAL(prefix, MF_PHASE_FUNCTION)
+
+#define MF_PHASE_FUNCTION glass
+#define MF_MULTI_GLASS
+#include "bsdf_microfacet_multi_impl.h"
+
+/* The diffuse phase function is not implemented as a node yet. */
+#if 0
+#define MF_PHASE_FUNCTION diffuse
+#define MF_MULTI_DIFFUSE
+#include "bsdf_microfacet_multi_impl.h"
+#endif
+
+#define MF_PHASE_FUNCTION glossy
+#define MF_MULTI_GLOSSY
+#include "bsdf_microfacet_multi_impl.h"
+
+ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughness)
+{
+	MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc;
+
+	bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
+	bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
+}
+
+/* === Closure implementations === */
+
+/* Multiscattering GGX Glossy closure */
+
+ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false)
+{
+	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+	bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
+	bsdf->extra->color.x = saturate(bsdf->extra->color.x);
+	bsdf->extra->color.y = saturate(bsdf->extra->color.y);
+	bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+	bsdf->extra->use_fresnel = use_fresnel;
+	bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+	bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+	bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+
+	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;
+
+	return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
+}
+
+ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false)
+{
+	if(is_zero(bsdf->T))
+		bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
+
+	return bsdf_microfacet_multi_ggx_common_setup(bsdf, use_fresnel);
+}
+
+ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false)
+{
+	bsdf->alpha_y = bsdf->alpha_x;
+
+	return bsdf_microfacet_multi_ggx_common_setup(bsdf, use_fresnel);
+}
+
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
+	*pdf = 0.0f;
+	return make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
+	float3 X, Y, Z;
+	Z = bsdf->N;
+	if(is_aniso)
+		make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+	else
+		make_orthonormals(Z, &X, &Y);
+
+	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+	float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+	if(is_aniso)
+		*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
+	else
+		*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
+	return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL);
+}
+
+ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
+{
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
+	float3 X, Y, Z;
+	Z = bsdf->N;
+	if(is_aniso)
+		make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+	else
+		make_orthonormals(Z, &X, &Y);
+
+	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+	float3 localO;
+
+	*eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL);
+	if(is_aniso)
+		*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
+	else
+		*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
+	*eval *= *pdf;
+
+	*omega_in = X*localO.x + Y*localO.y + Z*localO.z;
+#ifdef __RAY_DIFFERENTIALS__
+	*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
+	*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
+#endif
+	return LABEL_REFLECT|LABEL_GLOSSY;
+}
+
+/* Multiscattering GGX Glass closure */
+
+ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf, bool use_fresnel = false)
+{
+	bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
+	bsdf->alpha_y = bsdf->alpha_x;
+	bsdf->ior = max(0.0f, bsdf->ior);
+	bsdf->extra->color.x = saturate(bsdf->extra->color.x);
+	bsdf->extra->color.y = saturate(bsdf->extra->color.y);
+	bsdf->extra->color.z = saturate(bsdf->extra->color.z);
+	bsdf->extra->use_fresnel = use_fresnel;
+	bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
+	bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
+	bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+
+	bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
+
+	return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
+}
+
+ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float3 X, Y, Z;
+	Z = bsdf->N;
+	make_orthonormals(Z, &X, &Y);
+
+	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+	float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+	*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+	return mf_eval_glass(localI, localO, false, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior);
+}
+
+ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float3 X, Y, Z;
+	Z = bsdf->N;
+	make_orthonormals(Z, &X, &Y);
+
+	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+	float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+
+	*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+	return mf_eval_glass(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior);
+}
+
+ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
+{
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float3 X, Y, Z;
+	Z = bsdf->N;
+	make_orthonormals(Z, &X, &Y);
+
+	float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
+	float3 localO;
+
+	*eval = mf_sample_glass(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior);
+	*pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior);
+	*eval *= *pdf;
+
+	*omega_in = X*localO.x + Y*localO.y + Z*localO.z;
+	if(localO.z*localI.z > 0.0f) {
+#ifdef __RAY_DIFFERENTIALS__
+		*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
+		*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
+#endif
+		return LABEL_REFLECT|LABEL_GLOSSY;
+	}
+	else {
+#ifdef __RAY_DIFFERENTIALS__
+		float cosI = dot(Z, I);
+		float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI*cosI))), 1e-7f);
+		*domega_in_dx = -(bsdf->ior * dIdx) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z;
+		*domega_in_dy = -(bsdf->ior * dIdy) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z;
+#endif
+
+		return LABEL_TRANSMIT|LABEL_GLOSSY;
+	}
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
new file mode 100644
index 00000000000..afd4a8da62a
--- /dev/null
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Evaluate the BSDF from wi to wo.
+ * Evaluation is split into the analytical single-scattering BSDF and the multi-scattering BSDF,
+ * which is evaluated stochastically through a random walk. At each bounce (except for the first one),
+ * the amount of reflection from here towards wo is evaluated before bouncing again.
+ *
+ * Because of the random walk, the evaluation is not deterministic, but its expected value is equal to
+ * the correct BSDF, which is enough for Monte-Carlo rendering. The PDF also can't be determined
+ * analytically, so the single-scattering PDF plus a diffuse term to account for the multi-scattered
+ * energy is used. In combination with MIS, that is enough to produce an unbiased result, although
+ * the balance heuristic isn't necessarily optimal anymore.
+ */
+ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const bool wo_outside, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint* lcg_state
+#ifdef MF_MULTI_GLASS
+	, const float eta
+#elif defined(MF_MULTI_GLOSSY)
+	, float3 *n, float3 *k
+#endif
+)
+{
+	/* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */
+	bool swapped = false;
+#ifdef MF_MULTI_GLASS
+	if(wi.z*wo.z < 0.0f) {
+		/* Glass transmission is a special case and requires the directions to change hemisphere. */
+		if(-wo.z < wi.z) {
+			swapped = true;
+			float3 tmp = -wo;
+			wo = -wi;
+			wi = tmp;
+		}
+	}
+	else
+#endif
+	if(wo.z < wi.z) {
+		swapped = true;
+		float3 tmp = wo;
+		wo = wi;
+		wi = tmp;
+	}
+
+	if(wi.z < 1e-5f || (wo.z < 1e-5f && wo_outside) || (wo.z > -1e-5f && !wo_outside))
+		return make_float3(0.0f, 0.0f, 0.0f);
+
+	const float2 alpha = make_float2(alpha_x, alpha_y);
+
+	float lambda_r = mf_lambda(-wi, alpha);
+	float shadowing_lambda = mf_lambda(wo_outside? wo: -wo, alpha);
+
+	/* Analytically compute single scattering for lower noise. */
+	float3 eval;
+#ifdef MF_MULTI_GLASS
+	eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta);
+	if(wo_outside)
+		eval *= -lambda_r / (shadowing_lambda - lambda_r);
+	else
+		eval *= -lambda_r * beta(-lambda_r, shadowing_lambda+1.0f);
+#elif defined(MF_MULTI_DIFFUSE)
+	/* Diffuse has no special closed form for the single scattering bounce */
+	eval = make_float3(0.0f, 0.0f, 0.0f);
+#else /* MF_MULTI_GLOSSY */
+	const float3 wh = normalize(wi+wo);
+	const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda);
+	float val = G2 * 0.25f / wi.z;
+	if(alpha.x == alpha.y)
+		val *= D_ggx(wh, alpha.x);
+	else
+		val *= D_ggx_aniso(wh, alpha);
+	if(n && k) {
+		eval = fresnel_conductor(dot(wh, wi), *n, *k) * val;
+	}
+	else {
+		eval = make_float3(val, val, val);
+	}
+#endif
+
+	float3 wr = -wi;
+	float hr = 1.0f;
+	float C1_r = 1.0f;
+	float G1_r = 0.0f;
+	bool outside = true;
+	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+	for(int order = 0; order < 10; order++) {
+		/* Sample microfacet height and normal */
+		if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float_addrspace(lcg_state)))
+			break;
+		float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float_addrspace(lcg_state),
+		                                                   lcg_step_float_addrspace(lcg_state)));
+
+#ifdef MF_MULTI_DIFFUSE
+		if(order == 0) {
+			/* Compute single-scattering for diffuse. */
+			const float G2_G1 = -lambda_r / (shadowing_lambda - lambda_r);
+			eval += throughput * G2_G1 * mf_eval_phase_diffuse(wo, wm);
+		}
+#endif
+		if(order > 0) {
+			/* Evaluate amount of scattering towards wo on this microfacet. */
+			float3 phase;
+#ifdef MF_MULTI_GLASS
+			if(outside)
+				phase = mf_eval_phase_glass(wr, lambda_r,  wo,  wo_outside, alpha, eta);
+			else
+				phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f/eta);
+#elif defined(MF_MULTI_DIFFUSE)
+			phase = mf_eval_phase_diffuse(wo, wm);
+#else /* MF_MULTI_GLOSSY */
+			phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha, n, k) * throughput;
+#endif
+			eval += throughput * phase * mf_G1(wo_outside? wo: -wo, mf_C1((outside == wo_outside)? hr: -hr), shadowing_lambda);
+		}
+		if(order+1 < 10) {
+			/* Bounce from the microfacet. */
+#ifdef MF_MULTI_GLASS
+			bool next_outside;
+			wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float_addrspace(lcg_state), &next_outside);
+			if(!next_outside) {
+				outside = !outside;
+				wr = -wr;
+				hr = -hr;
+			}
+#elif defined(MF_MULTI_DIFFUSE)
+			wr = mf_sample_phase_diffuse(wm,
+			                             lcg_step_float_addrspace(lcg_state),
+			                             lcg_step_float_addrspace(lcg_state));
+#else /* MF_MULTI_GLOSSY */
+			wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
+#endif
+
+			lambda_r = mf_lambda(wr, alpha);
+
+			throughput *= color;
+
+			C1_r = mf_C1(hr);
+			G1_r = mf_G1(wr, C1_r, lambda_r);
+		}
+	}
+
+	if(swapped)
+		eval *= fabsf(wi.z / wo.z);
+	return eval;
+}
+
+/* Perform a random walk on the microsurface starting from wi, returning the direction in which the walk
+ * escaped the surface in wo. The function returns the throughput between wi and wo.
+ * Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
+ */
+ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint *lcg_state
+#ifdef MF_MULTI_GLASS
+	, const float eta
+#elif defined(MF_MULTI_GLOSSY)
+	, float3 *n, float3 *k
+#endif
+)
+{
+	const float2 alpha = make_float2(alpha_x, alpha_y);
+
+	float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+	float3 wr = -wi;
+	float lambda_r = mf_lambda(wr, alpha);
+	float hr = 1.0f;
+	float C1_r = 1.0f;
+	float G1_r = 0.0f;
+	bool outside = true;
+
+	int order;
+	for(order = 0; order < 10; order++) {
+		/* Sample microfacet height. */
+		if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float_addrspace(lcg_state))) {
+			/* The random walk has left the surface. */
+			*wo = outside? wr: -wr;
+			return throughput;
+		}
+		/* Sample microfacet normal. */
+		float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float_addrspace(lcg_state),
+		                                                   lcg_step_float_addrspace(lcg_state)));
+
+		/* First-bounce color is already accounted for in mix weight. */
+		if(order > 0)
+			throughput *= color;
+
+		/* Bounce from the microfacet. */
+#ifdef MF_MULTI_GLASS
+		bool next_outside;
+		wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float_addrspace(lcg_state), &next_outside);
+		if(!next_outside) {
+			hr = -hr;
+			wr = -wr;
+			outside = !outside;
+		}
+#elif defined(MF_MULTI_DIFFUSE)
+		wr = mf_sample_phase_diffuse(wm,
+		                             lcg_step_float_addrspace(lcg_state),
+		                             lcg_step_float_addrspace(lcg_state));
+#else /* MF_MULTI_GLOSSY */
+		wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
+#endif
+
+		/* Update random walk parameters. */
+		lambda_r = mf_lambda(wr, alpha);
+		G1_r = mf_G1(wr, C1_r, lambda_r);
+	}
+	*wo = make_float3(0.0f, 0.0f, 1.0f);
+	return make_float3(0.0f, 0.0f, 0.0f);
+}
+
+#undef MF_MULTI_GLASS
+#undef MF_MULTI_DIFFUSE
+#undef MF_MULTI_GLOSSY
+#undef MF_PHASE_FUNCTION
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index 61b7cb11b02..cb342a026ef 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -19,39 +19,59 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct OrenNayarBsdf {
+	SHADER_CLOSURE_BASE;
+
+	float3 N;
+	float roughness;
+	float a;
+	float b;
+} OrenNayarBsdf;
+
 ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, float3 n, float3 v, float3 l)
 {
+	const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
 	float nl = max(dot(n, l), 0.0f);
 	float nv = max(dot(n, v), 0.0f);
 	float t = dot(l, v) - nl * nv;
 
 	if(t > 0.0f)
 		t /= max(nl, nv) + FLT_MIN;
-	float is = nl * (sc->data0 + sc->data1 * t);
+	float is = nl * (bsdf->a + bsdf->b * t);
 	return make_float3(is, is, is);
 }
 
-ccl_device int bsdf_oren_nayar_setup(ShaderClosure *sc)
+ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf)
 {
-	float sigma = sc->data0;
+	float sigma = bsdf->roughness;
 
-	sc->type = CLOSURE_BSDF_OREN_NAYAR_ID;
+	bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID;
 
 	sigma = saturate(sigma);
 
 	float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma);
 
-	sc->data0 = 1.0f * div;
-	sc->data1 = sigma * div;
+	bsdf->a = 1.0f * div;
+	bsdf->b = sigma * div;
 
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
+ccl_device bool bsdf_oren_nayar_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+	const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf*)a;
+	const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf*)b;
+
+	return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
+	       (bsdf_a->roughness == bsdf_b->roughness);
+}
+
 ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	if(dot(sc->N, omega_in) > 0.0f) {
+	const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
+	if(dot(bsdf->N, omega_in) > 0.0f) {
 		*pdf = 0.5f * M_1_PI_F;
-		return bsdf_oren_nayar_get_intensity(sc, sc->N, I, omega_in);
+		return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in);
 	}
 	else {
 		*pdf = 0.0f;
@@ -66,15 +86,16 @@ ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc, const f
 
 ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	sample_uniform_hemisphere(sc->N, randu, randv, omega_in, pdf);
+	const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
+	sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf);
 
 	if(dot(Ng, *omega_in) > 0.0f) {
-		*eval = bsdf_oren_nayar_get_intensity(sc, sc->N, I, *omega_in);
+		*eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in);
 
 #ifdef __RAY_DIFFERENTIALS__
 		// TODO: find a better approximation for the bounce
-		*domega_in_dx = (2.0f * dot(sc->N, dIdx)) * sc->N - dIdx;
-		*domega_in_dy = (2.0f * dot(sc->N, dIdy)) * sc->N - dIdy;
+		*domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+		*domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
 #endif
 	}
 	else {
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 1ab15eee954..e152a8780db 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -35,7 +35,17 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float3 colors[8], float pos)
+#ifdef __OSL__
+
+typedef ccl_addr_space struct PhongRampBsdf {
+	SHADER_CLOSURE_BASE;
+
+	float3 N;
+	float exponent;
+	float3 *colors;
+} PhongRampBsdf;
+
+ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos)
 {
 	int MAXCOLORS = 8;
 	
@@ -49,57 +59,54 @@ ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float
 	return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset;
 }
 
-ccl_device int bsdf_phong_ramp_setup(ShaderClosure *sc)
+ccl_device int bsdf_phong_ramp_setup(PhongRampBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_PHONG_RAMP_ID;
-	sc->data0 = max(sc->data0, 0.0f);
-	sc->data1 = 0.0f;
+	bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID;
+	bsdf->exponent = max(bsdf->exponent, 0.0f);
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_phong_ramp_blur(ShaderClosure *sc, float roughness)
-{
-}
-
-ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const float3 colors[8], const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float m_exponent = sc->data0;
-	float cosNI = dot(sc->N, omega_in);
-	float cosNO = dot(sc->N, I);
+	const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc;
+	float m_exponent = bsdf->exponent;
+	float cosNI = dot(bsdf->N, omega_in);
+	float cosNO = dot(bsdf->N, I);
 	
 	if(cosNI > 0 && cosNO > 0) {
 		// reflect the view vector
-		float3 R = (2 * cosNO) * sc->N - I;
+		float3 R = (2 * cosNO) * bsdf->N - I;
 		float cosRI = dot(R, omega_in);
 		if(cosRI > 0) {
 			float cosp = powf(cosRI, m_exponent);
 			float common = 0.5f * M_1_PI_F * cosp;
 			float out = cosNI * (m_exponent + 2) * common;
 			*pdf = (m_exponent + 1) * common;
-			return bsdf_phong_ramp_get_color(sc, colors, cosp) * out;
+			return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
 		}
 	}
 	
 	return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc, const float3 colors[8], const float3 I, const float3 omega_in, float *pdf)
+ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
 	return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colors[8], float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float cosNO = dot(sc->N, I);
-	float m_exponent = sc->data0;
+	const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc;
+	float cosNO = dot(bsdf->N, I);
+	float m_exponent = bsdf->exponent;
 	
 	if(cosNO > 0) {
 		// reflect the view vector
-		float3 R = (2 * cosNO) * sc->N - I;
+		float3 R = (2 * cosNO) * bsdf->N - I;
 
 #ifdef __RAY_DIFFERENTIALS__
-		*domega_in_dx = (2 * dot(sc->N, dIdx)) * sc->N - dIdx;
-		*domega_in_dy = (2 * dot(sc->N, dIdy)) * sc->N - dIdy;
+		*domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+		*domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
 #endif
 		
 		float3 T, B;
@@ -114,7 +121,7 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo
 		if(dot(Ng, *omega_in) > 0.0f)
 		{
 			// common terms for pdf and eval
-			float cosNI = dot(sc->N, *omega_in);
+			float cosNI = dot(bsdf->N, *omega_in);
 			// make sure the direction we chose is still in the right hemisphere
 			if(cosNI > 0)
 			{
@@ -122,13 +129,14 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo
 				float common = 0.5f * M_1_PI_F * cosp;
 				*pdf = (m_exponent + 1) * common;
 				float out = cosNI * (m_exponent + 2) * common;
-				*eval = bsdf_phong_ramp_get_color(sc, colors, cosp) * out;
+				*eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
 			}
 		}
 	}
 	return LABEL_REFLECT|LABEL_GLOSSY;
 }
 
+#endif /* __OSL__ */
 
 CCL_NAMESPACE_END
 
diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h
index 303f4c9ce34..1d21614ecee 100644
--- a/intern/cycles/kernel/closure/bsdf_reflection.h
+++ b/intern/cycles/kernel/closure/bsdf_reflection.h
@@ -37,9 +37,9 @@ CCL_NAMESPACE_BEGIN
 
 /* REFLECTION */
 
-ccl_device int bsdf_reflection_setup(ShaderClosure *sc)
+ccl_device int bsdf_reflection_setup(MicrofacetBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_REFLECTION_ID;
+	bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
 	return SD_BSDF;
 }
 
@@ -55,7 +55,8 @@ ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc, const f
 
 ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float3 N = sc->N;
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float3 N = bsdf->N;
 
 	// only one direction is possible
 	float cosNO = dot(N, I);
diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h
index c78a4b67134..050a4e76fa9 100644
--- a/intern/cycles/kernel/closure/bsdf_refraction.h
+++ b/intern/cycles/kernel/closure/bsdf_refraction.h
@@ -37,9 +37,9 @@ CCL_NAMESPACE_BEGIN
 
 /* REFRACTION */
 
-ccl_device int bsdf_refraction_setup(ShaderClosure *sc)
+ccl_device int bsdf_refraction_setup(MicrofacetBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_REFRACTION_ID;
+	bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
 	return SD_BSDF;
 }
 
@@ -55,8 +55,9 @@ ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc, const f
 
 ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float m_eta = sc->data0;
-	float3 N = sc->N;
+	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
+	float m_eta = bsdf->ior;
+	float3 N = bsdf->N;
 
 	float3 R, T;
 #ifdef __RAY_DIFFERENTIALS__
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index e5b6ab93a64..28e775bcbc8 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -35,17 +35,35 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct ToonBsdf {
+	SHADER_CLOSURE_BASE;
+
+	float3 N;
+	float size;
+	float smooth;
+} ToonBsdf;
+
 /* DIFFUSE TOON */
 
-ccl_device int bsdf_diffuse_toon_setup(ShaderClosure *sc)
+ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
-	sc->data0 = saturate(sc->data0);
-	sc->data1 = saturate(sc->data1);
+	bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
+	bsdf->size = saturate(bsdf->size);
+	bsdf->smooth = saturate(bsdf->smooth);
 
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
+ccl_device bool bsdf_toon_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+	const ToonBsdf *bsdf_a = (const ToonBsdf*)a;
+	const ToonBsdf *bsdf_b = (const ToonBsdf*)b;
+
+	return (isequal_float3(bsdf_a->N, bsdf_b->N)) &&
+	       (bsdf_a->size == bsdf_b->size) &&
+		   (bsdf_a->smooth == bsdf_b->smooth);
+}
+
 ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle)
 {
 	float is;
@@ -67,9 +85,10 @@ ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth)
 
 ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float max_angle = sc->data0*M_PI_2_F;
-	float smooth = sc->data1*M_PI_2_F;
-	float angle = safe_acosf(fmaxf(dot(sc->N, omega_in), 0.0f));
+	const ToonBsdf *bsdf = (const ToonBsdf*)sc;
+	float max_angle = bsdf->size*M_PI_2_F;
+	float smooth = bsdf->smooth*M_PI_2_F;
+	float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f));
 
 	float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
 	
@@ -90,21 +109,22 @@ ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc, const
 
 ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float max_angle = sc->data0*M_PI_2_F;
-	float smooth = sc->data1*M_PI_2_F;
+	const ToonBsdf *bsdf = (const ToonBsdf*)sc;
+	float max_angle = bsdf->size*M_PI_2_F;
+	float smooth = bsdf->smooth*M_PI_2_F;
 	float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
 	float angle = sample_angle*randu;
 
 	if(sample_angle > 0.0f) {
-		sample_uniform_cone(sc->N, sample_angle, randu, randv, omega_in, pdf);
+		sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf);
 
 		if(dot(Ng, *omega_in) > 0.0f) {
 			*eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
 
 #ifdef __RAY_DIFFERENTIALS__
 			// TODO: find a better approximation for the bounce
-			*domega_in_dx = (2.0f * dot(sc->N, dIdx)) * sc->N - dIdx;
-			*domega_in_dy = (2.0f * dot(sc->N, dIdy)) * sc->N - dIdy;
+			*domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+			*domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
 #endif
 		}
 		else
@@ -117,25 +137,26 @@ ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, floa
 
 /* GLOSSY TOON */
 
-ccl_device int bsdf_glossy_toon_setup(ShaderClosure *sc)
+ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf)
 {
-	sc->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
-	sc->data0 = saturate(sc->data0);
-	sc->data1 = saturate(sc->data1);
+	bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
+	bsdf->size = saturate(bsdf->size);
+	bsdf->smooth = saturate(bsdf->smooth);
 
 	return SD_BSDF|SD_BSDF_HAS_EVAL;
 }
 
 ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
 {
-	float max_angle = sc->data0*M_PI_2_F;
-	float smooth = sc->data1*M_PI_2_F;
-	float cosNI = dot(sc->N, omega_in);
-	float cosNO = dot(sc->N, I);
+	const ToonBsdf *bsdf = (const ToonBsdf*)sc;
+	float max_angle = bsdf->size*M_PI_2_F;
+	float smooth = bsdf->smooth*M_PI_2_F;
+	float cosNI = dot(bsdf->N, omega_in);
+	float cosNO = dot(bsdf->N, I);
 	
 	if(cosNI > 0 && cosNO > 0) {
 		/* reflect the view vector */
-		float3 R = (2 * cosNO) * sc->N - I;
+		float3 R = (2 * cosNO) * bsdf->N - I;
 		float cosRI = dot(R, omega_in);
 
 		float angle = safe_acosf(fmaxf(cosRI, 0.0f));
@@ -157,13 +178,14 @@ ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc, const
 
 ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float max_angle = sc->data0*M_PI_2_F;
-	float smooth = sc->data1*M_PI_2_F;
-	float cosNO = dot(sc->N, I);
+	const ToonBsdf *bsdf = (const ToonBsdf*)sc;
+	float max_angle = bsdf->size*M_PI_2_F;
+	float smooth = bsdf->smooth*M_PI_2_F;
+	float cosNO = dot(bsdf->N, I);
 	
 	if(cosNO > 0) {
 		/* reflect the view vector */
-		float3 R = (2 * cosNO) * sc->N - I;
+		float3 R = (2 * cosNO) * bsdf->N - I;
 
 		float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
 		float angle = sample_angle*randu;
@@ -171,15 +193,15 @@ ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, float3 Ng, float
 		sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf);
 
 		if(dot(Ng, *omega_in) > 0.0f) {
-			float cosNI = dot(sc->N, *omega_in);
+			float cosNI = dot(bsdf->N, *omega_in);
 
 			/* make sure the direction we chose is still in the right hemisphere */
 			if(cosNI > 0) {
 				*eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
 
 #ifdef __RAY_DIFFERENTIALS__
-				*domega_in_dx = (2 * dot(sc->N, dIdx)) * sc->N - dIdx;
-				*domega_in_dy = (2 * dot(sc->N, dIdy)) * sc->N - dIdy;
+				*domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
+				*domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
 #endif
 			}
 			else
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index f99e0c3dca7..397d642c35a 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -80,7 +80,7 @@ ccl_device float fresnel_dielectric(
 		return 1; // total internal reflection
 	}
 	else {
-		float dnp = sqrtf(arg);
+		float dnp = max(sqrtf(arg), 1e-7f);
 		float nK = (neta * cos)- dnp;
 		*T = -(neta * I)+(nK * Nn);
 #ifdef __RAY_DIFFERENTIALS__
@@ -111,10 +111,9 @@ ccl_device float fresnel_dielectric_cos(float cosi, float eta)
 	return 1.0f; // TIR(no refracted component)
 }
 
-#if 0
 ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k)
 {
-	float3 cosi2 = make_float3(cosi*cosi);
+	float3 cosi2 = make_float3(cosi*cosi, cosi*cosi, cosi*cosi);
 	float3 one = make_float3(1.0f, 1.0f, 1.0f);
 	float3 tmp_f = eta * eta + k * k;
 	float3 tmp = tmp_f * cosi2;
@@ -124,7 +123,6 @@ ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k
 					(tmp_f + (2.0f * eta * cosi) + cosi2);
 	return(Rparl2 + Rperp2) * 0.5f;
 }
-#endif
 
 ccl_device float schlick_fresnel(float u)
 {
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index a1920841e6e..e31d790dd84 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -19,6 +19,19 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct Bssrdf {
+	SHADER_CLOSURE_BASE;
+
+	float radius;
+	float sharpness;
+	float d;
+	float texture_blur;
+	float albedo;
+	float roughness;
+	float3 N;
+	float3 baseColor;
+} Bssrdf;
+
 /* Planar Truncated Gaussian
  *
  * Note how this is different from the typical gaussian, this one integrates
@@ -28,11 +41,12 @@ CCL_NAMESPACE_BEGIN
 /* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */
 #define GAUSS_TRUNCATE 12.46f
 
-ccl_device float bssrdf_gaussian_eval(ShaderClosure *sc, float r)
+ccl_device float bssrdf_gaussian_eval(const ShaderClosure *sc, float r)
 {
 	/* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
 	 * = 1 - exp(-Rm*Rm/(2*v)) */
-	const float v = sc->data0*sc->data0*(0.25f*0.25f);
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float v = bssrdf->radius*bssrdf->radius*(0.25f*0.25f);
 	const float Rm = sqrtf(v*GAUSS_TRUNCATE);
 
 	if(r >= Rm)
@@ -41,7 +55,7 @@ ccl_device float bssrdf_gaussian_eval(ShaderClosure *sc, float r)
 	return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v);
 }
 
-ccl_device float bssrdf_gaussian_pdf(ShaderClosure *sc, float r)
+ccl_device float bssrdf_gaussian_pdf(const ShaderClosure *sc, float r)
 {
 	/* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
 	const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
@@ -49,12 +63,12 @@ ccl_device float bssrdf_gaussian_pdf(ShaderClosure *sc, float r)
 	return bssrdf_gaussian_eval(sc, r) * (1.0f/(area_truncated));
 }
 
-ccl_device void bssrdf_gaussian_sample(ShaderClosure *sc, float xi, float *r, float *h)
+ccl_device void bssrdf_gaussian_sample(const ShaderClosure *sc, float xi, float *r, float *h)
 {
 	/* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
 	 * r = sqrt(-2*v*logf(xi)) */
-
-	const float v = sc->data0*sc->data0*(0.25f*0.25f);
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float v = bssrdf->radius*bssrdf->radius*(0.25f*0.25f);
 	const float Rm = sqrtf(v*GAUSS_TRUNCATE);
 
 	/* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
@@ -75,12 +89,13 @@ ccl_device void bssrdf_gaussian_sample(ShaderClosure *sc, float xi, float *r, fl
  * far as I can tell has no closed form solution. So we get an iterative solution
  * instead with newton-raphson. */
 
-ccl_device float bssrdf_cubic_eval(ShaderClosure *sc, float r)
+ccl_device float bssrdf_cubic_eval(const ShaderClosure *sc, float r)
 {
-	const float sharpness = sc->T.x;
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float sharpness = bssrdf->sharpness;
 
 	if(sharpness == 0.0f) {
-		const float Rm = sc->data0;
+		const float Rm = bssrdf->radius;
 
 		if(r >= Rm)
 			return 0.0f;
@@ -94,7 +109,7 @@ ccl_device float bssrdf_cubic_eval(ShaderClosure *sc, float r)
 
 	}
 	else {
-		float Rm = sc->data0*(1.0f + sharpness);
+		float Rm = bssrdf->radius*(1.0f + sharpness);
 
 		if(r >= Rm)
 			return 0.0f;
@@ -122,7 +137,7 @@ ccl_device float bssrdf_cubic_eval(ShaderClosure *sc, float r)
 	}
 }
 
-ccl_device float bssrdf_cubic_pdf(ShaderClosure *sc, float r)
+ccl_device float bssrdf_cubic_pdf(const ShaderClosure *sc, float r)
 {
 	return bssrdf_cubic_eval(sc, r);
 }
@@ -155,12 +170,13 @@ ccl_device float bssrdf_cubic_quintic_root_find(float xi)
 	return x;
 }
 
-ccl_device void bssrdf_cubic_sample(ShaderClosure *sc, float xi, float *r, float *h)
+ccl_device void bssrdf_cubic_sample(const ShaderClosure *sc, float xi, float *r, float *h)
 {
-	float Rm = sc->data0;
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float sharpness = bssrdf->sharpness;
+	float Rm = bssrdf->radius;
 	float r_ = bssrdf_cubic_quintic_root_find(xi);
 
-	const float sharpness = sc->T.x;
 	if(sharpness != 0.0f) {
 		r_ = powf(r_, 1.0f + sharpness);
 		Rm *= (1.0f + sharpness);
@@ -198,21 +214,22 @@ ccl_device_inline float bssrdf_burley_compatible_mfp(float r)
 	return 0.25f * M_1_PI_F * r;
 }
 
-ccl_device void bssrdf_burley_setup(ShaderClosure *sc)
+ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
 {
 	/* Mean free path length. */
-	const float l = bssrdf_burley_compatible_mfp(sc->data0);
+	const float l = bssrdf_burley_compatible_mfp(bssrdf->radius);
 	/* Surface albedo. */
-	const float A = sc->data2;
+	const float A = bssrdf->albedo;
 	const float s = bssrdf_burley_fitting(A);
 	const float d = l / s;
 
-	sc->custom1 = d;
+	bssrdf->d = d;
 }
 
-ccl_device float bssrdf_burley_eval(ShaderClosure *sc, float r)
+ccl_device float bssrdf_burley_eval(const ShaderClosure *sc, float r)
 {
-	const float d = sc->custom1;
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float d = bssrdf->d;
 	const float Rm = BURLEY_TRUNCATE * d;
 
 	if(r >= Rm)
@@ -231,7 +248,7 @@ ccl_device float bssrdf_burley_eval(ShaderClosure *sc, float r)
 	return (exp_r_d + exp_r_3_d) / (4.0f*d);
 }
 
-ccl_device float bssrdf_burley_pdf(ShaderClosure *sc, float r)
+ccl_device float bssrdf_burley_pdf(const ShaderClosure *sc, float r)
 {
 	return bssrdf_burley_eval(sc, r) * (1.0f/BURLEY_TRUNCATE_CDF);
 }
@@ -276,12 +293,13 @@ ccl_device float bssrdf_burley_root_find(float xi)
 	return r;
 }
 
-ccl_device void bssrdf_burley_sample(ShaderClosure *sc,
+ccl_device void bssrdf_burley_sample(const ShaderClosure *sc,
                                      float xi,
                                      float *r,
                                      float *h)
 {
-	const float d = sc->custom1;
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float d = bssrdf->d;
 	const float Rm = BURLEY_TRUNCATE * d;
 	const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
 
@@ -295,26 +313,29 @@ ccl_device void bssrdf_burley_sample(ShaderClosure *sc,
  *
  * Samples distributed over disk with no falloff, for reference. */
 
-ccl_device float bssrdf_none_eval(ShaderClosure *sc, float r)
+ccl_device float bssrdf_none_eval(const ShaderClosure *sc, float r)
 {
-	const float Rm = sc->data0;
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float Rm = bssrdf->radius;
 	return (r < Rm)? 1.0f: 0.0f;
 }
 
-ccl_device float bssrdf_none_pdf(ShaderClosure *sc, float r)
+ccl_device float bssrdf_none_pdf(const ShaderClosure *sc, float r)
 {
 	/* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
-	const float Rm = sc->data0;
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float Rm = bssrdf->radius;
 	const float area = (M_PI_F*Rm*Rm);
 
 	return bssrdf_none_eval(sc, r) / area;
 }
 
-ccl_device void bssrdf_none_sample(ShaderClosure *sc, float xi, float *r, float *h)
+ccl_device void bssrdf_none_sample(const ShaderClosure *sc, float xi, float *r, float *h)
 {
 	/* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
 	 * r = sqrt(xi)*Rm */
-	const float Rm = sc->data0;
+	const Bssrdf *bssrdf = (const Bssrdf*)sc;
+	const float Rm = bssrdf->radius;
 	const float r_ = sqrtf(xi)*Rm;
 
 	*r = r_;
@@ -325,40 +346,54 @@ ccl_device void bssrdf_none_sample(ShaderClosure *sc, float xi, float *r, float
 
 /* Generic */
 
-ccl_device int bssrdf_setup(ShaderClosure *sc, ClosureType type)
+ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
+{
+	Bssrdf *bssrdf = (Bssrdf*)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
+
+	if(!bssrdf)
+		return NULL;
+
+	float sample_weight = fabsf(average(weight));
+	bssrdf->sample_weight = sample_weight;
+	return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
+}
+
+ccl_device int bssrdf_setup(Bssrdf *bssrdf, ClosureType type)
 {
-	if(sc->data0 < BSSRDF_MIN_RADIUS) {
+	if(bssrdf->radius < BSSRDF_MIN_RADIUS) {
 		/* revert to diffuse BSDF if radius too small */
 		int flag;
 		if (type == CLOSURE_BSSRDF_DISNEY_ID) {
-			sc->data0 = sc->data3;
-			sc->data1 = 0.0f;
-			flag = bsdf_disney_diffuse_setup(sc);
-			sc->type = CLOSURE_BSDF_BSSRDF_DISNEY_ID;
+			DisneyDiffuseBsdf *bsdf = (DisneyDiffuseBsdf*)bssrdf;
+			bsdf->N = bssrdf->N;
+			bsdf->roughness = bssrdf->roughness;
+			bsdf->baseColor = bssrdf->baseColor;
+			flag = bsdf_disney_diffuse_setup(bsdf);
+			bsdf->type = CLOSURE_BSDF_BSSRDF_DISNEY_ID;
 		}
 		else {
-			sc->data0 = 0.0f;
-			sc->data1 = 0.0f;
-			flag = bsdf_diffuse_setup(sc);
-			sc->type = CLOSURE_BSDF_BSSRDF_ID;
+			DiffuseBsdf *bsdf = (DiffuseBsdf*)bssrdf;
+			bsdf->N = bssrdf->N;
+			flag = bsdf_diffuse_setup(bsdf);
+			bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
 		}
 		
 		return flag;
 	}
 	else {
-		sc->data1 = saturate(sc->data1); /* texture blur */
-		sc->T.x = saturate(sc->T.x); /* sharpness */
-		sc->type = type;
+		bssrdf->texture_blur = saturate(bssrdf->texture_blur);
+		bssrdf->sharpness = saturate(bssrdf->sharpness);
+		bssrdf->type = type;
 
 		if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_DISNEY_ID) {
-			bssrdf_burley_setup(sc);
+			bssrdf_burley_setup(bssrdf);
 		}
 
 		return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF;
 	}
 }
 
-ccl_device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h)
+ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h)
 {
 	if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
 		bssrdf_cubic_sample(sc, xi, r, h);
@@ -368,7 +403,7 @@ ccl_device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h)
 		bssrdf_burley_sample(sc, xi, r, h);
 }
 
-ccl_device float bssrdf_pdf(ShaderClosure *sc, float r)
+ccl_device float bssrdf_pdf(const ShaderClosure *sc, float r)
 {
 	if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
 		return bssrdf_cubic_pdf(sc, r);
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 4d71ba50ec3..01e67c7c2fd 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -19,6 +19,12 @@
 
 CCL_NAMESPACE_BEGIN
 
+typedef ccl_addr_space struct HenyeyGreensteinVolume {
+	SHADER_CLOSURE_BASE;
+
+	float g;
+} HenyeyGreensteinVolume;
+
 /* HENYEY-GREENSTEIN CLOSURE */
 
 /* Given cosine between rays, return probability density that a photon bounces
@@ -29,19 +35,28 @@ ccl_device float single_peaked_henyey_greenstein(float cos_theta, float g)
 	return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) * (M_1_PI_F * 0.25f);
 };
 
-ccl_device int volume_henyey_greenstein_setup(ShaderClosure *sc)
+ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume)
 {
-	sc->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
+	volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
 	
 	/* clamp anisotropy to avoid delta function */
-	sc->data0 = signf(sc->data0) * min(fabsf(sc->data0), 1.0f - 1e-3f);
+	volume->g = signf(volume->g) * min(fabsf(volume->g), 1.0f - 1e-3f);
 
 	return SD_SCATTER;
 }
 
+ccl_device bool volume_henyey_greenstein_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+	const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume*)a;
+	const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume*)b;
+
+	return (volume_a->g == volume_b->g);
+}
+
 ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, const float3 I, float3 omega_in, float *pdf)
 {
-	float g = sc->data0;
+	const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc;
+	float g = volume->g;
 
 	/* note that I points towards the viewer */
 	if(fabsf(g) < 1e-3f) {
@@ -58,7 +73,8 @@ ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, c
 ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I, float3 dIdx, float3 dIdy, float randu, float randv,
 	float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
-	float g = sc->data0;
+	const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc;
+	float g = volume->g;
 	float cos_phi, sin_phi, cos_theta;
 
 	/* match pdf for small g */