From 5af00a3d12cbce49cb199509373a252c2a0eda85 Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Thu, 14 Aug 2014 20:09:25 +0200 Subject: Cycles: Optimization for Heterogeneous Volume Shadows. * Don't compute expf() for every step, instead sum the intermediate values and calculate it every N (8 for now) steps. This helps a few percent (~5% on a cube with wave texture) in my tests here. --- intern/cycles/kernel/kernel_volume.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index cc4b2e3edf1..ed73ac8088c 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -176,6 +176,8 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* compute extinction at the start */ float t = 0.0f; + float3 sum = make_float3(0.0f, 0.0f, 0.0f); + for(int i = 0; i < max_steps; i++) { /* advance to new position */ float new_t = min(ray->t, (i+1) * step); @@ -190,20 +192,26 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* compute attenuation over segment */ if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) { - /* todo: we could avoid computing expf() for each step by summing, - * because exp(a)*exp(b) = exp(a+b), but we still want a quick - * tp_eps check too */ - tp *= volume_color_transmittance(sigma_t, new_t - t); - - /* stop if nearly all light blocked */ - if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) - break; + /* Compute expf() only for every Nth step, to save some calculations + * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */ + + sum += (-sigma_t * (new_t - t)); + if((i & 0x07) == 0) { /* ToDo: Other interval? */ + tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z)); + + /* stop if nearly all light is blocked */ + if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) + break; + } } /* stop if at the end of the volume */ t = new_t; - if(t == ray->t) + if(t == ray->t) { + /* Update throughput in case we haven't done it above */ + tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z)); break; + } } *throughput = tp; -- cgit v1.2.3