Cycles: Fully roll-back to non-delayed SSS indirect rays for CPU

There are some issues to be solved with the recent optimization we did for the indirect rays for the SSS. Those issues will take a bit of a time to be fully solved still and we need to unlock Caminandes team now, so let's revert some changes back. CUDA will still use delayed indirect rays since it's an experimental feature. For the details about what's to be done still please refer to T46880.
author: Sergey Sharybin <sergey.vfx@gmail.com> 2015-11-27 15:12:44 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2015-11-27 15:15:02 +0300
commit: 20fc9c00fddeabc524ba91815108e52edd85ce4b (patch)
tree: 1601a0bbbf6d13fd68597d919c130f289290c9e2 /intern
parent: 175f00c89a88056b5307ec385968333c9327b63f (diff)
3 files changed, 45 insertions, 4 deletions
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 3efd7ecca5f..e9cf02caaf3 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -74,6 +74,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 			                                  &throughput,
 			                                  &ss_indirect))
 			{
+#  ifdef __SUBSURFACE_DELAYED_INDIRECT__
 				while(ss_indirect.num_rays) {
 					kernel_path_subsurface_setup_indirect(kg,
 					                                      &ss_indirect,
@@ -90,6 +91,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 					                     &state,
 					                     &L_sample);
 				}
+#  endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
 				is_sss_sample = true;
 			}
 		}
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 885782f1a67..251d7809941 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -508,7 +508,38 @@ ccl_device bool kernel_path_subsurface_scatter(
 #ifdef __LAMP_MIS__
 				hit_state->ray_t = 0.0f;
 #endif
+
+#ifdef __SUBSURFACE_DELAYED_INDIRECT__
 				ss_indirect->num_rays++;
+#else
+#  ifdef __VOLUME__
+				if(ss_indirect->need_update_volume_stack) {
+					Ray volume_ray = *ray;
+
+					/* Setup ray from previous surface point to the new one. */
+					volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
+					                             &volume_ray.t);
+
+					kernel_volume_stack_update_for_subsurface(kg,
+					                                          &volume_ray,
+					                                          hit_state->volume_stack);
+				}
+#  endif  /* __VOLUME__ */
+
+				kernel_path_indirect(kg,
+				                     rng,
+				                     hit_ray,
+				                     *hit_tp,
+				                     hit_state->num_samples,
+				                     hit_state,
+				                     L);
+
+				/* For render passes, sum and reset indirect light pass variables
+				 * for the next samples.
+				 */
+				path_radiance_sum_indirect(L);
+				path_radiance_reset_indirect(L);
+#endif
 			}
 		}
 		return true;
@@ -516,6 +547,7 @@ ccl_device bool kernel_path_subsurface_scatter(
 	return false;
 }
 
+#ifdef __SUBSURFACE_DELAYED_INDIRECT__
 ccl_device void kernel_path_subsurface_setup_indirect(
         KernelGlobals *kg,
         SubsurfaceIndirectRays *ss_indirect,
@@ -545,7 +577,7 @@ ccl_device void kernel_path_subsurface_setup_indirect(
 		                                          &volume_ray,
 		                                          state->volume_stack);
 	}
-#endif
+#endif  /* __VOLUME__ */
 
 	*ray = *indirect_ray;
 
@@ -555,6 +587,8 @@ ccl_device void kernel_path_subsurface_setup_indirect(
 	path_radiance_sum_indirect(L);
 	path_radiance_reset_indirect(L);
 }
+#endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
+
 #endif
 
 ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
@@ -578,6 +612,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 	SubsurfaceIndirectRays ss_indirect;
 	ss_indirect.num_rays = 0;
 
+#  ifdef __SUBSURFACE_DELAYED_INDIRECT__
 	/* TODO(sergey): Avoid having explicit copy of the pre-subsurface scatter
 	 * ray by storing an updated version of state in the ss_indirect which will
 	 * be updated to the new volume stack.
@@ -585,6 +620,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 	Ray ss_orig_ray;
 
 	for(;;) {
+#  endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
 #endif
 
 	/* path iteration */
@@ -833,11 +869,13 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 			                                  &throughput,
 			                                  &ss_indirect))
 			{
+#  ifdef __SUBSURFACE_DELAYED_INDIRECT__
 				ss_orig_ray = ray;
+#  endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
 				break;
 			}
 		}
-#endif
+#endif  /* __SUBSURFACE__ */
 
 		/* direct lighting */
 		kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
@@ -847,7 +885,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 			break;
 	}
 
-#ifdef __SUBSURFACE__
+#ifdef __SUBSURFACE_DELAYED_INDIRECT__
 		/* Trace indirect subsurface rays by restarting the loop. this uses less
 		 * stack memory than invoking kernel_path_indirect.
 		 */
@@ -864,7 +902,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
 			break;
 		}
 	}
-#endif
+#endif  /* __SUBSURFACE_DELAYED_INDIRECT__ */
 
 	float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
 
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index bef00355c5e..b9869886a8f 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -87,6 +87,7 @@ CCL_NAMESPACE_BEGIN
 /* Experimental on GPU */
 #ifdef __KERNEL_EXPERIMENTAL__
 #define __SUBSURFACE__
+#define __SUBSURFACE_DELAYED_INDIRECT__
 #define __CMJ__
 #endif
author	Sergey Sharybin <sergey.vfx@gmail.com>	2015-11-27 15:12:44 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2015-11-27 15:15:02 +0300
commit	20fc9c00fddeabc524ba91815108e52edd85ce4b (patch)
tree	1601a0bbbf6d13fd68597d919c130f289290c9e2 /intern
parent	175f00c89a88056b5307ec385968333c9327b63f (diff)