Cycles: Delay shooting SSS indirect rays

The idea is to delay shooting indirect rays for the SSS sampling and trace them after the main integration loop was finished. This reduces GPU stack usage even further and brings it down to around 652MB (comparing to 722MB before the change and 946MB with previous stable release). This also solves the speed regression happened in the previous commit and now simple SSS scene (SSS suzanne on the floor) renders in 0:50 (comparing to 1:16 with previous commit and 1:03 with official release).
author: Sergey Sharybin <sergey.vfx@gmail.com> 2015-11-22 13:48:33 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2015-11-25 11:01:22 +0300
commit: 2a5c1fc9ccbabfaef4eeaf90093dfb2ac0acfc90 (patch)
tree: 98ee26786419e78d9fa0e325c527bf2e1fa3d905 /intern/cycles/kernel/kernel_path_branched.h
parent: 8bca34fe326d10cc2f20df7fa541179e9ba835d2 (diff)
1 files changed, 51 insertions, 8 deletions
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index b6f95d6b0d2..ee507364b30 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -91,10 +91,27 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
 			float3 tp = throughput;
 			Ray bsdf_ray;
 
-			if(!kernel_branched_path_surface_bounce(kg, &bsdf_rng, sd, sc, j, num_samples, &tp, &ps, L, &bsdf_ray))
+			if(!kernel_branched_path_surface_bounce(kg,
+			                                        &bsdf_rng,
+			                                        sd,
+			                                        sc,
+			                                        j,
+			                                        num_samples,
+			                                        &tp,
+			                                        &ps,
+			                                        L,
+			                                        &bsdf_ray))
+			{
 				continue;
+			}
 
-			kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L);
+			kernel_path_indirect(kg,
+			                     rng,
+			                     &bsdf_ray,
+			                     tp*num_samples_inv,
+			                     num_samples,
+			                     &ps,
+			                     L);
 
 			/* for render passes, sum and reset indirect light pass variables
 			 * for the next samples */
@@ -312,12 +329,25 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 
 					VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
 						&ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
-						
+
 					(void)result;
 					kernel_assert(result == VOLUME_PATH_SCATTERED);
 
-					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
-						kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
+					if(kernel_path_volume_bounce(kg,
+					                             rng,
+					                             &volume_sd,
+					                             &tp,
+					                             &ps,
+					                             &L,
+					                             &pray))
+					{
+						kernel_path_indirect(kg,
+						                     rng,
+						                     &pray,
+						                     tp*num_samples_inv,
+						                     num_samples,
+						                     &ps,
+						                     &L);
 
 						/* for render passes, sum and reset indirect light pass variables
 						 * for the next samples */
@@ -353,15 +383,28 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 
 				VolumeIntegrateResult result = kernel_volume_integrate(
 					kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
-				
+
 #ifdef __VOLUME_SCATTER__
 				if(result == VOLUME_PATH_SCATTERED) {
 					/* todo: support equiangular, MIS and all light sampling.
 					 * alternatively get decoupled ray marching working on the GPU */
 					kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
 
-					if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
-						kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L);
+					if(kernel_path_volume_bounce(kg,
+					                             rng,
+					                             &volume_sd,
+					                             &tp,
+					                             &ps,
+					                             &L,
+					                             &pray))
+					{
+						kernel_path_indirect(kg,
+						                     rng,
+						                     &pray,
+						                     tp,
+						                     num_samples,
+						                     &ps,
+						                     &L);
 
 						/* for render passes, sum and reset indirect light pass variables
 						 * for the next samples */
author	Sergey Sharybin <sergey.vfx@gmail.com>	2015-11-22 13:48:33 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2015-11-25 11:01:22 +0300
commit	2a5c1fc9ccbabfaef4eeaf90093dfb2ac0acfc90 (patch)
tree	98ee26786419e78d9fa0e325c527bf2e1fa3d905 /intern/cycles/kernel/kernel_path_branched.h
parent	8bca34fe326d10cc2f20df7fa541179e9ba835d2 (diff)