Cycles: volume light sampling

* Volume multiple importace sampling support to combine equiangular and distance sampling, for both homogeneous and heterogeneous volumes. * Branched path "Sample All Direct Lights" and "Sample All Indirect Lights" now apply to volumes as well as surfaces. Implementation note: For simplicity this is all done with decoupled ray marching, the only case we do not use decoupled is for distance only sampling with one light sample. The homogeneous case should still compile on the GPU because it only requires fixed size storage, but the heterogeneous case will be trickier to get working.
author: Brecht Van Lommel <brechtvanlommel@gmail.com> 2014-04-04 18:45:49 +0400
committer: Brecht Van Lommel <brechtvanlommel@gmail.com> 2014-06-14 15:49:56 +0400
commit: a29807cd63b0cba62e664c54ce34e5717ca51a3e (patch)
tree: ac3a32e4393638711686e93879c6c218a17713eb
parent: d644753319b65a25ae043900cca235070bcdd181 (diff)
10 files changed, 676 insertions, 288 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 7205a272395..12babd95ed8 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -111,6 +111,7 @@ enum_integrator = (
 enum_volume_homogeneous_sampling = (
     ('DISTANCE', "Distance", "Use Distance Sampling"),
     ('EQUI_ANGULAR', "Equi-angular", "Use Equi-angular Sampling"),
+    ('MULTIPLE_IMPORTANCE', "Multiple Importance", "Combine distance and equi-angular sampling"),
     )
 
 
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index b382e2c833b..bda98b84da8 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -194,6 +194,17 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *st
 
 		float3 L = direct_emissive_eval(kg, &ls, -ray->D, ray->dD, ls.t, ray->time, state->bounce, state->transparent_bounce);
 
+#ifdef __VOLUME__
+		if(state->volume_stack[0].shader != SHADER_NONE) {
+			/* shadow attenuation */
+			Ray volume_ray = *ray;
+			volume_ray.t = ls.t;
+			float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
+			kernel_volume_shadow(kg, state, &volume_ray, &volume_tp);
+			L *= volume_tp;
+		}
+#endif
+
 		if(!(state->flag & PATH_RAY_MIS_SKIP)) {
 			/* multiple importance sampling, get regular light pdf,
 			 * and compute weight with respect to BSDF pdf */
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index ac432d3fe04..0adf9ed4666 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -208,8 +208,8 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3
 	return t*t/cos_pi;
 }
 
-ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
-	float randu, float randv, float3 P, LightSample *ls)
+ccl_device bool lamp_light_sample(KernelGlobals *kg, int lamp,
+	float randu, float randv, float3 P, LightSample *ls, bool for_volume)
 {
 	float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0);
 	float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1);
@@ -224,6 +224,11 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
 	ls->v = randv;
 
 	if(type == LIGHT_DISTANT) {
+#ifdef __VOLUME__
+		if(for_volume)
+			return false;
+#endif
+
 		/* distant light */
 		float3 lightD = make_float3(data0.y, data0.z, data0.w);
 		float3 D = lightD;
@@ -244,6 +249,11 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
 	}
 #ifdef __BACKGROUND_MIS__
 	else if(type == LIGHT_BACKGROUND) {
+#ifdef __VOLUME__
+		if(for_volume)
+			return false;
+#endif
+
 		/* infinite area light (e.g. light dome or env light) */
 		float3 D = background_light_sample(kg, randu, randv, &ls->pdf);
 
@@ -299,6 +309,8 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
 		ls->eval_fac *= kernel_data.integrator.inv_pdf_lights;
 		ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
 	}
+
+	return true;
 }
 
 ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
@@ -514,7 +526,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float randt)
 
 /* Generic Light */
 
-ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, LightSample *ls)
+ccl_device bool light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, LightSample *ls, bool for_volume)
 {
 	/* sample index */
 	int index = light_distribution_sample(kg, randt);
@@ -533,10 +545,12 @@ ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float
 		ls->D = normalize_len(ls->P - P, &ls->t);
 		ls->pdf = triangle_light_pdf(kg, ls->Ng, -ls->D, ls->t);
 		ls->shader |= shader_flag;
+
+		return true;
 	}
 	else {
 		int lamp = -prim-1;
-		lamp_light_sample(kg, lamp, randu, randv, P, ls);
+		return lamp_light_sample(kg, lamp, randu, randv, P, ls, for_volume);
 	}
 }
 
@@ -546,9 +560,9 @@ ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
 	return __float_as_int(data3.x);
 }
 
-ccl_device void light_select(KernelGlobals *kg, int index, float randu, float randv, float3 P, LightSample *ls)
+ccl_device bool light_select(KernelGlobals *kg, int index, float randu, float randv, float3 P, LightSample *ls, bool for_volume)
 {
-	lamp_light_sample(kg, index, randu, randv, P, ls);
+	return lamp_light_sample(kg, index, randu, randv, P, ls, for_volume);
 }
 
 ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt)
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 9a5a85abae1..22024483bfc 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -29,7 +29,6 @@
 #include "kernel_accumulate.h"
 #include "kernel_shader.h"
 #include "kernel_light.h"
-#include "kernel_emission.h"
 #include "kernel_passes.h"
 
 #ifdef __SUBSURFACE__
@@ -42,6 +41,7 @@
 
 #include "kernel_path_state.h"
 #include "kernel_shadow.h"
+#include "kernel_emission.h"
 #include "kernel_path_surface.h"
 #include "kernel_path_volume.h"
 
@@ -88,17 +88,73 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
 			Ray volume_ray = ray;
 			volume_ray.t = (hit)? isect.t: FLT_MAX;
 
-			ShaderData volume_sd;
-			VolumeIntegrateResult result = kernel_volume_integrate(kg, &state,
-				&volume_sd, &volume_ray, L, &throughput, rng);
+			bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+			bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false);
 
-			if(result == VOLUME_PATH_SCATTERED) {
-				kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L, 1.0f);
+			if(decoupled) {
+				/* cache steps along volume for repeated sampling */
+				VolumeSegment volume_segment;
+				ShaderData volume_sd;
 
-				if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f))
-					continue;
-				else
-					break;
+				shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
+				kernel_volume_decoupled_record(kg, &state,
+					&volume_ray, &volume_sd, &volume_segment, heterogeneous);
+
+				/* emission */
+				if(volume_segment.closure_flag & SD_EMISSION)
+					path_radiance_accum_emission(L, throughput, volume_segment.accum_emission, state.bounce);
+
+				/* scattering */
+				VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+				bool scatter = false;
+
+				if(volume_segment.closure_flag & SD_SCATTER) {
+					bool all = kernel_data.integrator.sample_all_lights_indirect;
+
+					/* direct light sampling */
+					kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
+						throughput, &state, L, 1.0f, all, &volume_ray, &volume_segment);
+
+					/* indirect sample. if we use distance sampling and take just
+					 * one sample for direct and indirect light, we could share
+					 * this computation, but makes code a bit complex */
+					float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
+					float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
+
+					result = kernel_volume_decoupled_scatter(kg,
+						&state, &volume_ray, &volume_sd, &throughput,
+						rphase, rscatter, &volume_segment, NULL, true);
+
+					if(result == VOLUME_PATH_SCATTERED)
+						scatter = kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f);
+				}
+
+				/* free cached steps */
+				kernel_volume_decoupled_free(kg, &volume_segment);
+
+				if(result == VOLUME_PATH_SCATTERED) {
+					if(scatter)
+						continue;
+					else
+						break;
+				}
+			}
+			else {
+				/* integrate along volume segment with distance sampling */
+				ShaderData volume_sd;
+				VolumeIntegrateResult result = kernel_volume_integrate(
+					kg, &state, &volume_sd, &volume_ray, L, &throughput, rng);
+
+				if(result == VOLUME_PATH_SCATTERED) {
+					/* direct lighting */
+					kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L, 1.0f);
+
+					/* indirect light bounce */
+					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f))
+						continue;
+					else
+						break;
+				}
 			}
 		}
 #endif
@@ -411,17 +467,73 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 			Ray volume_ray = ray;
 			volume_ray.t = (hit)? isect.t: FLT_MAX;
 
-			ShaderData volume_sd;
-			VolumeIntegrateResult result = kernel_volume_integrate(kg, &state,
-				&volume_sd, &volume_ray, &L, &throughput, rng);
+			bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+			bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true);
+
+			if(decoupled) {
+				/* cache steps along volume for repeated sampling */
+				VolumeSegment volume_segment;
+				ShaderData volume_sd;
 
-			if(result == VOLUME_PATH_SCATTERED) {
-				kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L, 1.0f);
+				shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
+				kernel_volume_decoupled_record(kg, &state,
+					&volume_ray, &volume_sd, &volume_segment, heterogeneous);
 
-				if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f))
-					continue;
-				else
-					break;
+				/* emission */
+				if(volume_segment.closure_flag & SD_EMISSION)
+					path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
+
+				/* scattering */
+				VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+				bool scatter = false;
+
+				if(volume_segment.closure_flag & SD_SCATTER) {
+					bool all = false;
+
+					/* direct light sampling */
+					kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
+						throughput, &state, &L, 1.0f, all, &volume_ray, &volume_segment);
+
+					/* indirect sample. if we use distance sampling and take just
+					 * one sample for direct and indirect light, we could share
+					 * this computation, but makes code a bit complex */
+					float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
+					float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
+
+					result = kernel_volume_decoupled_scatter(kg,
+						&state, &volume_ray, &volume_sd, &throughput,
+						rphase, rscatter, &volume_segment, NULL, true);
+
+					if(result == VOLUME_PATH_SCATTERED)
+						scatter = kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f);
+				}
+
+				/* free cached steps */
+				kernel_volume_decoupled_free(kg, &volume_segment);
+
+				if(result == VOLUME_PATH_SCATTERED) {
+					if(scatter)
+						continue;
+					else
+						break;
+				}
+			}
+			else {
+				/* integrate along volume segment with distance sampling */
+				ShaderData volume_sd;
+				VolumeIntegrateResult result = kernel_volume_integrate(
+					kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng);
+
+				if(result == VOLUME_PATH_SCATTERED) {
+					/* direct lighting */
+					kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L, 1.0f);
+
+					/* indirect light bounce */
+					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f))
+						continue;
+					else
+						break;
+				}
 			}
 		}
 #endif
@@ -700,37 +812,47 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 			kernel_volume_decoupled_record(kg, &state,
 				&volume_ray, &volume_sd, &volume_segment, heterogeneous);
 
-			/* sample scattering */
-			int num_samples = kernel_data.integrator.volume_samples;
-			float num_samples_inv = 1.0f/num_samples;
-
-			for(int j = 0; j < num_samples; j++) {
-				/* workaround to fix correlation bug in T38710, can find better solution
-				 * in random number generator later, for now this is done here to not impact
-				 * performance of rendering without volumes */
-				RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
-
-				PathState ps = state;
-				Ray pray = ray;
-				float3 tp = throughput;
-
-				/* branch RNG state */
-				path_state_branch(&ps, j, num_samples);
-
-				VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-					&ps, &volume_ray, &volume_sd, &tp, &tmp_rng, &volume_segment);
-				
-				if(result == VOLUME_PATH_SCATTERED) {
-					/* todo: use all-light sampling */
-					kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L, 1.0f);
-
-					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
-						kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
-
-						/* for render passes, sum and reset indirect light pass variables
-						 * for the next samples */
-						path_radiance_sum_indirect(&L);
-						path_radiance_reset_indirect(&L);
+			/* direct light sampling */
+			if(volume_segment.closure_flag & SD_SCATTER) {
+				bool all = kernel_data.integrator.sample_all_lights_direct;
+				kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
+					throughput, &state, &L, 1.0f, all, &volume_ray, &volume_segment);
+
+				/* indirect light sampling */
+				int num_samples = kernel_data.integrator.volume_samples;
+				float num_samples_inv = 1.0f/num_samples;
+
+				for(int j = 0; j < num_samples; j++) {
+					/* workaround to fix correlation bug in T38710, can find better solution
+					 * in random number generator later, for now this is done here to not impact
+					 * performance of rendering without volumes */
+					RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
+
+					PathState ps = state;
+					Ray pray = ray;
+					float3 tp = throughput;
+
+					/* branch RNG state */
+					path_state_branch(&ps, j, num_samples);
+
+					/* scatter sample. if we use distance sampling and take just one
+					 * sample for direct and indirect light, we could share this
+					 * computation, but makes code a bit complex */
+					float rphase = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_PHASE);
+					float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
+
+					VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+						&ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+					if(result == VOLUME_PATH_SCATTERED) {
+						if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
+							kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
+
+							/* for render passes, sum and reset indirect light pass variables
+							 * for the next samples */
+							path_radiance_sum_indirect(&L);
+							path_radiance_reset_indirect(&L);
+						}
 					}
 				}
 			}
@@ -759,12 +881,15 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 				/* branch RNG state */
 				path_state_branch(&ps, j, num_samples);
 
-				VolumeIntegrateResult result = kernel_volume_integrate(kg, &ps,
-					&volume_sd, &volume_ray, &L, &tp, rng);
+				VolumeIntegrateResult result = kernel_volume_integrate(
+					kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng);
 				
 				if(result == VOLUME_PATH_SCATTERED) {
-					/* todo: use all-light sampling */
-					if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
+					/* todo: support equiangular, MIS and all light sampling.
+					 * alternatively get decoupled ray marching working on the GPU */
+					kernel_path_volume_connect_light(kg, rng, &volume_sd, &volume_ray, throughput, &state, &L, num_samples_inv);
+
+					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
 						kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
 
 						/* for render passes, sum and reset indirect light pass variables
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 85bdebf195e..11fadcc6bcf 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -22,97 +22,101 @@ CCL_NAMESPACE_BEGIN
 ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
 	ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, bool sample_all_lights)
 {
+#ifdef __EMISSION__
 	/* sample illumination from lights to find path contribution */
-	if(sd->flag & SD_BSDF_HAS_EVAL) {
-		Ray light_ray;
-		BsdfEval L_light;
-		bool is_lamp;
+	if(!(sd->flag & SD_BSDF_HAS_EVAL))
+		return;
+
+	Ray light_ray;
+	BsdfEval L_light;
+	bool is_lamp;
 
 #ifdef __OBJECT_MOTION__
-		light_ray.time = sd->time;
+	light_ray.time = sd->time;
 #endif
 
-		if(sample_all_lights) {
-			/* lamp sampling */
-			for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-				int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
-				float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
-				RNG lamp_rng = cmj_hash(*rng, i);
+	if(sample_all_lights) {
+		/* lamp sampling */
+		for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+			int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
+			float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
+			RNG lamp_rng = cmj_hash(*rng, i);
 
-				if(kernel_data.integrator.pdf_triangles != 0.0f)
-					num_samples_inv *= 0.5f;
+			if(kernel_data.integrator.pdf_triangles != 0.0f)
+				num_samples_inv *= 0.5f;
 
-				for(int j = 0; j < num_samples; j++) {
-					float light_u, light_v;
-					path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+			for(int j = 0; j < num_samples; j++) {
+				float light_u, light_v;
+				path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
 
-					LightSample ls;
-					light_select(kg, i, light_u, light_v, sd->P, &ls);
+				LightSample ls;
+				light_select(kg, i, light_u, light_v, sd->P, &ls, false);
 
-					if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
-						/* trace shadow ray */
-						float3 shadow;
+				if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+					/* trace shadow ray */
+					float3 shadow;
 
-						if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
-							/* accumulate */
-							path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
-						}
+					if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+						/* accumulate */
+						path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
 					}
 				}
 			}
+		}
 
-			/* mesh light sampling */
-			if(kernel_data.integrator.pdf_triangles != 0.0f) {
-				int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
-				float num_samples_inv = num_samples_adjust/num_samples;
+		/* mesh light sampling */
+		if(kernel_data.integrator.pdf_triangles != 0.0f) {
+			int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
+			float num_samples_inv = num_samples_adjust/num_samples;
 
-				if(kernel_data.integrator.num_all_lights)
-					num_samples_inv *= 0.5f;
+			if(kernel_data.integrator.num_all_lights)
+				num_samples_inv *= 0.5f;
 
-				for(int j = 0; j < num_samples; j++) {
-					float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT);
-					float light_u, light_v;
-					path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+			for(int j = 0; j < num_samples; j++) {
+				float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT);
+				float light_u, light_v;
+				path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
 
-					/* only sample triangle lights */
-					if(kernel_data.integrator.num_all_lights)
-						light_t = 0.5f*light_t;
+				/* only sample triangle lights */
+				if(kernel_data.integrator.num_all_lights)
+					light_t = 0.5f*light_t;
 
-					LightSample ls;
-					light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+				LightSample ls;
+				light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false);
 
-					if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
-						/* trace shadow ray */
-						float3 shadow;
+				if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+					/* trace shadow ray */
+					float3 shadow;
 
-						if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
-							/* accumulate */
-							path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
-						}
+					if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+						/* accumulate */
+						path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
 					}
 				}
 			}
 		}
-		else {
-			float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
-			float light_u, light_v;
-			path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
-
-			LightSample ls;
-			light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
-
-			/* sample random light */
-			if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
-				/* trace shadow ray */
-				float3 shadow;
-
-				if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
-					/* accumulate */
-					path_radiance_accum_light(L, throughput, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
-				}
+	}
+	else {
+		/* sample one light at random */
+		float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+		float light_u, light_v;
+		path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+		LightSample ls;
+		light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false);
+
+		/* sample random light */
+		if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+			/* trace shadow ray */
+			float3 shadow;
+
+			if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+				/* accumulate */
+				path_radiance_accum_light(L, throughput, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
 			}
 		}
 	}
+#endif
 }
 
 /* branched path tracing: bounce off or through surface to with new direction stored in ray */
@@ -196,7 +200,7 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG
 #endif
 
 	LightSample ls;
-	light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+	light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, false);
 
 	if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
 		/* trace shadow ray */
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index 6196a35a184..6cc3085b96e 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -18,11 +18,12 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __VOLUME__
 
-ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
-	ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L, float num_samples_adjust)
+ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
+	ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L,
+	float num_samples_adjust)
 {
 #ifdef __EMISSION__
-	if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
+	if(!kernel_data.integrator.use_direct_light)
 		return;
 
 	/* sample illumination from lights to find path contribution */
@@ -32,15 +33,19 @@ ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *
 
 	Ray light_ray;
 	BsdfEval L_light;
+	LightSample ls;
 	bool is_lamp;
 
+	/* connect to light from given point where shader has been evaluated */
 #ifdef __OBJECT_MOTION__
 	light_ray.time = sd->time;
 #endif
 
-	LightSample ls;
-	light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
-
+	if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true))
+		return;
+	else if(ls.pdf == 0.0f)
+		return;
+	
 	if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
 		/* trace shadow ray */
 		float3 shadow;
@@ -53,7 +58,7 @@ ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *
 #endif
 }
 
-ccl_device_inline bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
+ccl_device bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
 	ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray,
 	float num_samples_adjust)
 {
@@ -98,6 +103,178 @@ ccl_device_inline bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
 	return true;
 }
 
+#ifdef __KERNEL_CPU__
+
+ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
+	ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L,
+	float num_samples_adjust, bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
+{
+#ifdef __EMISSION__
+	if(!kernel_data.integrator.use_direct_light)
+		return;
+
+	Ray light_ray;
+	BsdfEval L_light;
+	bool is_lamp;
+
+#ifdef __OBJECT_MOTION__
+	light_ray.time = sd->time;
+#endif
+
+	if(sample_all_lights) {
+		/* lamp sampling */
+		for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+			int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
+			float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
+			RNG lamp_rng = cmj_hash(*rng, i);
+
+			if(kernel_data.integrator.pdf_triangles != 0.0f)
+				num_samples_inv *= 0.5f;
+
+			for(int j = 0; j < num_samples; j++) {
+				/* sample random position on given light */
+				float light_u, light_v;
+				path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+				LightSample ls;
+				if(!light_select(kg, i, light_u, light_v, ray->P, &ls, true))
+					continue;
+
+				float3 tp = throughput;
+
+				/* sample position on volume segment */
+				if(segment) {
+					float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE);
+					float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+
+					VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+						state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+					if(result != VOLUME_PATH_SCATTERED)
+						continue;
+
+					/* todo: split up light_sample so we don't have to call it again with new position */
+					if(!light_select(kg, i, light_u, light_v, sd->P, &ls, true))
+						continue;
+				}
+
+				if(ls.pdf == 0.0f)
+					continue;
+
+				if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+					/* trace shadow ray */
+					float3 shadow;
+
+					if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+						/* accumulate */
+						path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+					}
+				}
+			}
+		}
+
+		/* mesh light sampling */
+		if(kernel_data.integrator.pdf_triangles != 0.0f) {
+			int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
+			float num_samples_inv = num_samples_adjust/num_samples;
+
+			if(kernel_data.integrator.num_all_lights)
+				num_samples_inv *= 0.5f;
+
+			for(int j = 0; j < num_samples; j++) {
+				/* sample random position on random triangle */
+				float light_t = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_LIGHT);
+				float light_u, light_v;
+				path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+				/* only sample triangle lights */
+				if(kernel_data.integrator.num_all_lights)
+					light_t = 0.5f*light_t;
+
+				LightSample ls;
+				if(!light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls, true))
+					continue;
+
+				float3 tp = throughput;
+
+				/* sample position on volume segment */
+				if(segment) {
+					float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE);
+					float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+
+					VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+						state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+					if(result != VOLUME_PATH_SCATTERED)
+						continue;
+
+					/* todo: split up light_sample so we don't have to call it again with new position */
+					if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true))
+						continue;
+				}
+
+				if(ls.pdf == 0.0f)
+					continue;
+
+				if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+					/* trace shadow ray */
+					float3 shadow;
+
+					if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+						/* accumulate */
+						path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+					}
+				}
+			}
+		}
+	}
+	else {
+		/* sample random position on random light */
+		float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+		float light_u, light_v;
+		path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+		LightSample ls;
+		if(!light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls, true))
+			return;
+
+		float3 tp = throughput;
+
+		/* sample position on volume segment */
+		if(segment) {
+			float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
+			float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
+
+			VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+				state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+			if(result != VOLUME_PATH_SCATTERED)
+				return;
+
+			/* todo: split up light_sample so we don't have to call it again with new position */
+			if(!light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls, true))
+				return;
+		}
+
+		if(ls.pdf == 0.0f)
+			return;
+
+		/* sample random light */
+		if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+			/* trace shadow ray */
+			float3 shadow;
+
+			if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+				/* accumulate */
+				path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+			}
+		}
+	}
+#endif
+}
+
+#endif
+
 #endif
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index ac04b3168a1..236f74c0a82 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -261,12 +261,12 @@ ccl_device uint lcg_init(uint seed)
  * For branches in the path we must be careful not to reuse the same number
  * in a sequence and offset accordingly. */
 
-ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, RNG *rng, PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension)
 {
 	return path_rng_1D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension);
 }
 
-ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension)
 {
 	/* the rng_offset is not increased for transparent bounces. if we do then
 	 * fully transparent objects can become subtly visible by the different
@@ -279,17 +279,23 @@ ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *r
 	return path_rng_1D(kg, rng, state->sample, state->num_samples, rng_offset + dimension);
 }
 
-ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, RNG *rng, PathState *state, int dimension, float *fx, float *fy)
+ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension, float *fx, float *fy)
 {
 	path_rng_2D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension, fx, fy);
 }
 
-ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, RNG *rng, PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
 {
 	return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, RNG *rng, PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
+ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
+{
+	int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
+	return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, rng_offset + dimension);
+}
+
+ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
 {
 	path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy);
 }
@@ -303,7 +309,7 @@ ccl_device_inline void path_state_branch(PathState *state, int branch, int num_b
 	state->num_samples = state->num_samples*num_branches;
 }
 
-ccl_device_inline uint lcg_state_init(RNG *rng, PathState *state, uint scramble)
+ccl_device_inline uint lcg_state_init(RNG *rng, const PathState *state, uint scramble)
 {
 	return lcg_init(*rng + state->rng_offset + state->sample*scramble);
 }
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 0ea21a85fcb..842b9f68840 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -858,7 +858,7 @@ ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, con
 
 			if(phase_pdf != 0.0f) {
 				bsdf_eval_accum(result_eval, sc->type, eval);
-				sum_pdf += phase_pdf;
+				sum_pdf += phase_pdf*sc->sample_weight;
 			}
 
 			sum_sample_weight += sc->sample_weight;
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 49e4cf64de4..61e4989147b 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -136,7 +136,7 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *s
 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
 {
 	float3 tp = *throughput;
-	const float tp_eps = 1e-10f; /* todo: this is likely not the right value */
+	const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
 
 	/* prepare for stepping */
 	int max_steps = kernel_data.integrator.volume_max_steps;
@@ -226,25 +226,6 @@ ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float s
 	return pdf;
 }
 
-ccl_device bool kernel_volume_equiangular_light_position(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float3 *light_P, bool *distant)
-{
-	/* light RNGs */
-	float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
-	float light_u, light_v;
-	path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
-
-	/* light sample */
-	LightSample ls;
-	light_sample(kg, light_t, light_u, light_v, ray->time, ray->P, &ls);
-	if(ls.pdf == 0.0f)
-		return false;
-	
-	*light_P = ls.P;
-	*distant = ls.t == FLT_MAX;
-
-	return true;
-}
-
 /* Distance sampling */
 
 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
@@ -304,7 +285,7 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe
  * the volume shading coefficient for the entire line segment */
 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
 	PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
-	RNG *rng)
+	RNG *rng, bool probalistic_scatter)
 {
 	VolumeShaderCoefficients coeff;
 
@@ -326,47 +307,37 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
 		int channel = (int)(rphase*3.0f);
 		sd->randb_closure = rphase*3.0f - channel;
 
+		/* decide if we will hit or miss */
+		bool scatter = true;
 		float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
 
-		/* decide if we will hit or miss */
-		float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
-		float sample_transmittance = expf(-sample_sigma_t * t);
+		if(probalistic_scatter) {
+			float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+			float sample_transmittance = expf(-sample_sigma_t * t);
 
-		if(xi >= sample_transmittance) {
-			/* scattering */
-			float3 pdf;
-			float3 transmittance;
-			float sample_t;
+			if(1.0f - xi >= sample_transmittance) {
+				scatter = true;
 
-			/* rescale random number so we can reuse it */
-			xi = (xi - sample_transmittance)/(1.0f - sample_transmittance);
+				/* rescale random number so we can reuse it */
+				xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
 
-			if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { 
-				/* distance sampling */
-				sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
 			}
-			else {
-				/* equiangular sampling */
-				float3 light_P;
-				float equi_pdf;
-				bool light_distant;
+			else
+				scatter = false;
+		}
 
-				if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P, &light_distant))
-					return VOLUME_PATH_MISSED;
+		if(scatter) {
+			/* scattering */
+			float3 pdf;
+			float3 transmittance;
+			float sample_t;
 
-				if(light_distant) {
-					/* distant light, revert to distance sampling because position is infinitely far away */
-					sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
-				}
-				else {
-					sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &equi_pdf);
-					transmittance = volume_color_transmittance(sigma_t, sample_t);
-					pdf = make_float3(equi_pdf, equi_pdf, equi_pdf);
-				}
-			}
+			/* distance sampling */
+			sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
 
 			/* modifiy pdf for hit/miss decision */
-			pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
+			if(probalistic_scatter)
+				pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
 
 			new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
 			t = sample_t;
@@ -385,7 +356,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
 	}
 
 	/* integrate emission attenuated by extinction */
-	if(closure_flag & SD_EMISSION) {
+	if(L && (closure_flag & SD_EMISSION)) {
 		float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
 		float3 transmittance = volume_color_transmittance(sigma_t, ray->t);
 		float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
@@ -408,13 +379,15 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
 	return VOLUME_PATH_ATTENUATED;
 }
 
-/* heterogeneous volume: integrate stepping through the volume until we
- * reach the end, get absorbed entirely, or run out of iterations */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlobals *kg,
+/* heterogeneous volume distance sampling: integrate stepping through the
+ * volume until we reach the end, get absorbed entirely, or run out of
+ * iterations. this does probalistically scatter or get transmitted through
+ * for path tracing where we don't want to branch. */
+ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
 	PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
 {
 	float3 tp = *throughput;
-	const float tp_eps = 1e-10f; /* todo: this is likely not the right value */
+	const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
 
 	/* prepare for stepping */
 	int max_steps = kernel_data.integrator.volume_max_steps;
@@ -425,9 +398,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
 	float t = 0.0f;
 	float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
 
-	/* cache some constant variables */
-	float xi;
-	int channel = -1;
+	/* pick random color channel, we use the Veach one-sample
+	 * model with balance heuristic for the channels */
+	float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
+	float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
+	int channel = (int)(rphase*3.0f);
+	sd->randb_closure = rphase*3.0f - channel;
 	bool has_scatter = false;
 
 	for(int i = 0; i < max_steps; i++) {
@@ -449,25 +425,13 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
 			float3 transmittance;
 			bool scatter = false;
 
-			/* randomly scatter, and if we do dt and new_t are shortened */
+			/* distance sampling */
 			if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) {
 				has_scatter = true;
 
-				/* average sigma_t and sigma_s over segment */
 				float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
 				float3 sigma_s = coeff.sigma_s;
 
-				/* lazily set up variables for sampling */
-				if(channel == -1) {
-					/* pick random color channel, we use the Veach one-sample
-					 * model with balance heuristic for the channels */
-					xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
-
-					float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
-					channel = (int)(rphase*3.0f);
-					sd->randb_closure = rphase*3.0f - channel;
-				}
-
 				/* compute transmittance over full step */
 				transmittance = volume_color_transmittance(sigma_t, dt);
 
@@ -480,10 +444,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
 					float new_dt = -logf(1.0f - xi)/sample_sigma_t;
 					new_t = t + new_dt;
 
-					/* transmittance, throughput */
+					/* transmittance and pdf */
 					float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt);
-					float pdf = average(sigma_t * new_transmittance);
-					new_tp = tp * sigma_s * new_transmittance / pdf;
+					float3 pdf = sigma_t * new_transmittance;
+
+					/* throughput */
+					new_tp = tp * sigma_s * new_transmittance / average(pdf);
 					scatter = true;
 				}
 				else {
@@ -504,7 +470,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
 			}
 
 			/* integrate emission attenuated by absorption */
-			if(closure_flag & SD_EMISSION) {
+			if(L && (closure_flag & SD_EMISSION)) {
 				float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
 				path_radiance_accum_emission(L, tp, emission, state->bounce);
 			}
@@ -518,19 +484,19 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
 					tp = make_float3(0.0f, 0.0f, 0.0f);
 					break;
 				}
+			}
 
-				/* prepare to scatter to new direction */
-				if(scatter) {
-					/* adjust throughput and move to new location */
-					sd->P = ray->P + new_t*ray->D;
-					*throughput = tp;
+			/* prepare to scatter to new direction */
+			if(scatter) {
+				/* adjust throughput and move to new location */
+				sd->P = ray->P + new_t*ray->D;
+				*throughput = tp;
 
-					return VOLUME_PATH_SCATTERED;
-				}
-				else {
-					/* accumulate transmittance */
-					accum_transmittance *= transmittance;
-				}
+				return VOLUME_PATH_SCATTERED;
+			}
+			else {
+				/* accumulate transmittance */
+				accum_transmittance *= transmittance;
 			}
 		}
 
@@ -545,14 +511,35 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
 	return VOLUME_PATH_ATTENUATED;
 }
 
+/* get the volume attenuation and emission over line segment defined by
+ * ray, with the assumption that there are no surfaces blocking light
+ * between the endpoints. distance sampling is used to decide if we will
+ * scatter or not. */
+ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
+	PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng)
+{
+	/* workaround to fix correlation bug in T38710, can find better solution
+	 * in random number generator later, for now this is done here to not impact
+	 * performance of rendering without volumes */
+	RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
+	bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+	shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
+
+	if(heterogeneous)
+		return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
+	else
+		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
+}
+
 /* Decoupled Volume Sampling
  *
  * VolumeSegment is list of coefficients and transmittance stored at all steps
  * through a volume. This can then latter be used for decoupled sampling as in:
- * "Importance Sampling Techniques for Path Tracing in Participating Media" */
-
-/* CPU only because of malloc/free */
-#ifdef __KERNEL_CPU__
+ * "Importance Sampling Techniques for Path Tracing in Participating Media"
+ *
+ * On the GPU this is only supported for homogeneous volumes (1 step), due to
+ * no support for malloc/free and too much stack usage with a fix size array. */
 
 typedef struct VolumeStep {
 	float3 sigma_s;				/* scatter coefficient */
@@ -565,7 +552,11 @@ typedef struct VolumeStep {
 } VolumeStep;
 
 typedef struct VolumeSegment {
+#ifdef __KERNEL_CPU__
 	VolumeStep *steps;			/* recorded steps */
+#else
+	VolumeStep steps[1];		/* recorded steps */
+#endif
 	int numsteps;				/* number of steps */
 	int closure_flag;			/* accumulated closure flags from all steps */
 
@@ -582,6 +573,8 @@ typedef struct VolumeSegment {
 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
 	Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
 {
+	const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
 	/* prepare for volume stepping */
 	int max_steps;
 	float step_size, random_jitter_offset;
@@ -608,7 +601,11 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
 
 	segment->closure_flag = 0;
 	segment->numsteps = 0;
+#ifdef __KERNEL_CPU__
 	segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+#else
+	kernel_assert(max_steps == 1);
+#endif
 
 	VolumeStep *step = segment->steps;
 
@@ -669,6 +666,10 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
 		t = new_t;
 		if(t == ray->t)
 			break;
+
+		/* stop if nearly all light blocked */
+		if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
+			break;
 	}
 
 	/* store total emission and transmittance */
@@ -690,7 +691,9 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
 
 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
 {
+#ifdef __KERNEL_CPU__
 	free(segment->steps);
+#endif
 }
 
 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
@@ -701,7 +704,8 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s
  * these also do not do emission or modify throughput. */
 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 	KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
-	float3 *throughput, RNG *rng, VolumeSegment *segment)
+	float3 *throughput, float rphase, float rscatter,
+	const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
 {
 	int closure_flag = segment->closure_flag;
 
@@ -710,38 +714,56 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 
 	/* pick random color channel, we use the Veach one-sample
 	 * model with balance heuristic for the channels */
-	float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
 	int channel = (int)(rphase*3.0f);
 	sd->randb_closure = rphase*3.0f - channel;
+	float xi = rscatter;
 
-	float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
+	/* probalistic scattering decision based on transmittance */
+	if(probalistic_scatter) {
+		float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
+
+		if(1.0f - xi >= sample_transmittance) {
+			/* rescale random number so we can reuse it */
+			xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
+		}
+		else
+			return VOLUME_PATH_MISSED;
+	}
 
 	VolumeStep *step;
 	float3 transmittance;
 	float pdf, sample_t;
+	float mis_weight = 1.0f;
+	bool distance_sample = true;
+	bool use_mis = false;
+
+	if(kernel_data.integrator.volume_homogeneous_sampling && light_P) {
+		if(kernel_data.integrator.volume_homogeneous_sampling == 2) {
+			/* multiple importance sample: randomly pick between
+			 * equiangular and distance sampling strategy */
+			if(xi < 0.5f) {
+				xi *= 2.0f;
+			}
+			else {
+				xi = (xi - 0.5f)*2.0f;
+				distance_sample = false;
+			}
 
-	/* pick position on light for equiangular */
-	bool equiangular = (kernel_data.integrator.volume_homogeneous_sampling != 0 && kernel_data.integrator.num_all_lights);
-	float3 light_P;
-
-	if(equiangular) {
-		bool light_distant;
-
-		if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P, &light_distant))
-			return VOLUME_PATH_MISSED;
-
-		/* distant light, revert to distance sampling because position is infinitely far away */
-		if(light_distant)
-			equiangular = false;
+			use_mis = true;
+		}
+		else {
+			/* only equiangular sampling */
+			distance_sample = false;
+		}
 	}
 
 	/* distance sampling */
-	if(!equiangular) {
+	if(distance_sample) {
 		/* find step in cdf */
 		step = segment->steps;
 
 		float prev_t = 0.0f;
-		float3 step_pdf = make_float3(1.0f, 1.0f, 1.0f);
+		float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
 
 		if(segment->numsteps > 1) {
 			float prev_cdf = 0.0f;
@@ -764,7 +786,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 			xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
 
 			/* pdf for picking step */
-			step_pdf = step->cdf_distance - prev_cdf_distance;
+			step_pdf_distance = step->cdf_distance - prev_cdf_distance;
 		}
 
 		/* determine range in which we will sample */
@@ -773,30 +795,59 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 		/* sample distance and compute transmittance */
 		float3 distance_pdf;
 		sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
-		pdf = average(distance_pdf * step_pdf);
+
+		/* modifiy pdf for hit/miss decision */
+		if(probalistic_scatter)
+			distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
+
+		pdf = average(distance_pdf * step_pdf_distance);
+
+		/* multiple importance sampling */
+		if(use_mis) {
+			float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
+			mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
+		}
 	}
 	/* equi-angular sampling */
 	else {
 		/* sample distance */
-		sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &pdf);
+		sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
 
 		/* find step in which sampled distance is located */
 		step = segment->steps;
 
 		float prev_t = 0.0f;
+		float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
 
 		if(segment->numsteps > 1) {
 			/* todo: optimize using binary search */
+			float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
 			for(int i = 0; i < segment->numsteps-1; i++, step++) {
 				if(sample_t < step->t)
 					break;
 
 				prev_t = step->t;
+				prev_cdf_distance = step->cdf_distance;
 			}
+
+			/* pdf for picking step with distance sampling */
+			step_pdf_distance = step->cdf_distance - prev_cdf_distance;
 		}
-		
+
+		/* determine range in which we will sample */
+		float step_t = step->t - prev_t;
+		float step_sample_t = sample_t - prev_t;
+
 		/* compute transmittance */
-		transmittance = volume_color_transmittance(step->sigma_t, sample_t - prev_t);
+		transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
+
+		/* multiple importance sampling */
+		if(use_mis) {
+			float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
+			float distance_pdf = average(distance_pdf3 * step_pdf_distance);
+			mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
+		}
 	}
 
 	/* compute transmittance up to this step */
@@ -804,7 +855,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 		transmittance *= (step-1)->accum_transmittance;
 
 	/* modify throughput */
-	*throughput *= step->sigma_s * transmittance / pdf;
+	*throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
 
 	/* evaluate shader to create closures at shading point */
 	if(segment->numsteps > 1) {
@@ -820,40 +871,28 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 	return VOLUME_PATH_SCATTERED;
 }
 
-#endif
-
-/* get the volume attenuation and emission over line segment defined by
- * ray, with the assumption that there are no surfaces blocking light
- * between the endpoints */
-ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
-	PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng)
+/* decide if we need to use decoupled or not */
+ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct)
 {
-	/* workaround to fix correlation bug in T38710, can find better solution
-	 * in random number generator later, for now this is done here to not impact
-	 * performance of rendering without volumes */
-	RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
-	bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
-
-#if 0
-	/* debugging code to compare decoupled ray marching */
-	VolumeSegment segment;
-
-	shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
-	kernel_volume_decoupled_record(kg, state, ray, sd, &segment, heterogeneous);
-
-	VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, state, ray, sd, throughput, &tmp_rng, &segment);
-
-	kernel_volume_decoupled_free(kg, &segment);
+	/* decoupled ray marching for heterogenous volumes not supported on the GPU,
+	 * which also means equiangular and multiple importance sampling is not
+	 * support for that case */
+#ifdef __KERNEL_GPU__
+	if(heterogeneous)
+		return false;
+#endif
 
-	return result;
-#else
-	shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
+	/* equiangular sampling only implemented for decoupled */
+	bool equiangular = kernel_data.integrator.volume_homogeneous_sampling != 0;
+	if(equiangular)
+		return true;
 
-	if(heterogeneous)
-		return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
+	/* for all light sampling use decoupled, reusing shader evaluations is
+	 * typically faster in that case */
+	if(direct)
+		return kernel_data.integrator.sample_all_lights_direct;
 	else
-		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
-#endif
+		return kernel_data.integrator.sample_all_lights_indirect;
 }
 
 /* Volume Stack
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index 051ba1baf3a..ee3419b055c 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -101,7 +101,11 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
 	if(!transparent_shadows)
 		kintegrator->transparent_shadows = false;
 
-	kintegrator->volume_homogeneous_sampling = volume_homogeneous_sampling;
+	if(kintegrator->num_all_lights > 0)
+		kintegrator->volume_homogeneous_sampling = volume_homogeneous_sampling;
+	else
+		kintegrator->volume_homogeneous_sampling = 0;
+
 	kintegrator->volume_max_steps = volume_max_steps;
 	kintegrator->volume_step_size = volume_step_size;
 
@@ -125,8 +129,15 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
 	kintegrator->mesh_light_samples = mesh_light_samples;
 	kintegrator->subsurface_samples = subsurface_samples;
 	kintegrator->volume_samples = volume_samples;
-	kintegrator->sample_all_lights_direct = sample_all_lights_direct;
-	kintegrator->sample_all_lights_indirect = sample_all_lights_indirect;
+
+	if(method == BRANCHED_PATH) {
+		kintegrator->sample_all_lights_direct = sample_all_lights_direct;
+		kintegrator->sample_all_lights_indirect = sample_all_lights_indirect;
+	}
+	else {
+		kintegrator->sample_all_lights_direct = false;
+		kintegrator->sample_all_lights_indirect = false;
+	}
 
 	kintegrator->sampling_pattern = sampling_pattern;
 	kintegrator->aa_samples = aa_samples;
author	Brecht Van Lommel <brechtvanlommel@gmail.com>	2014-04-04 18:45:49 +0400
committer	Brecht Van Lommel <brechtvanlommel@gmail.com>	2014-06-14 15:49:56 +0400
commit	a29807cd63b0cba62e664c54ce34e5717ca51a3e (patch)
tree	ac3a32e4393638711686e93879c6c218a17713eb
parent	d644753319b65a25ae043900cca235070bcdd181 (diff)