diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-11-22 13:00:29 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-11-25 11:01:22 +0300 |
commit | 8bca34fe326d10cc2f20df7fa541179e9ba835d2 (patch) | |
tree | aeab22e5e0ec3d4ee1a5fe8c37daee0be4a89bee /intern/cycles/kernel/geom/geom_motion_triangle.h | |
parent | e6fff424dbcd02c3fed25036a7feb7f59d427843 (diff) |
Cysles: Avoid having ShaderData on the stack
This commit introduces a SSS-oriented intersection structure which is replacing
old logic of having separate arrays for just intersections and shader data and
encapsulates all the data needed for SSS evaluation.
This giver a huge stack memory saving on GPU. In own experiments it gave 25%
memory usage reduction on GTX560Ti (722MB vs. 946MB).
Unfortunately, this gave some performance loss of 20% which only happens on GPU.
This is perhaps due to different memory access pattern. Will be solved in the
future, hopefully.
Famous saying: won in memory - lost in time (which is also valid in other way
around).
Diffstat (limited to 'intern/cycles/kernel/geom/geom_motion_triangle.h')
-rw-r--r-- | intern/cycles/kernel/geom/geom_motion_triangle.h | 27 |
1 files changed, 20 insertions, 7 deletions
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 86f93f242a1..a7b3f5cad28 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -358,8 +358,17 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection * multiple hits we pick a single random primitive as the intersection point. */ #ifdef __SUBSURFACE__ -ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, - float3 P, float3 dir, float time, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) +ccl_device_inline void motion_triangle_intersect_subsurface( + KernelGlobals *kg, + SubsurfaceIntersection *ss_isect, + float3 P, + float3 dir, + float time, + int object, + int triAddr, + float tmax, + uint *lcg_state, + int max_hits) { /* primitive index for vertex location lookup */ int prim = kernel_tex_fetch(__prim_index, triAddr); @@ -373,30 +382,34 @@ ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, I float t, u, v; if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) { - (*num_hits)++; + ss_isect->num_hits++; int hit; - if(*num_hits <= max_hits) { - hit = *num_hits - 1; + if(ss_isect->num_hits <= max_hits) { + hit = ss_isect->num_hits - 1; } else { /* reservoir sampling: if we are at the maximum number of * hits, randomly replace element or skip it */ - hit = lcg_step_uint(lcg_state) % *num_hits; + hit = lcg_step_uint(lcg_state) % ss_isect->num_hits; if(hit >= max_hits) return; } /* record intersection */ - Intersection *isect = &isect_array[hit]; + Intersection *isect = &ss_isect->hits[hit]; isect->t = t; isect->u = u; isect->v = v; isect->prim = triAddr; isect->object = object; isect->type = PRIMITIVE_MOTION_TRIANGLE; + + /* Record geometric normal. */ + ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], + verts[2] - verts[0])); } } #endif |