Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2015-11-22 13:00:29 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2015-11-25 11:01:22 +0300
commit8bca34fe326d10cc2f20df7fa541179e9ba835d2 (patch)
treeaeab22e5e0ec3d4ee1a5fe8c37daee0be4a89bee /intern
parente6fff424dbcd02c3fed25036a7feb7f59d427843 (diff)
Cysles: Avoid having ShaderData on the stack
This commit introduces a SSS-oriented intersection structure which is replacing old logic of having separate arrays for just intersections and shader data and encapsulates all the data needed for SSS evaluation. This giver a huge stack memory saving on GPU. In own experiments it gave 25% memory usage reduction on GTX560Ti (722MB vs. 946MB). Unfortunately, this gave some performance loss of 20% which only happens on GPU. This is perhaps due to different memory access pattern. Will be solved in the future, hopefully. Famous saying: won in memory - lost in time (which is also valid in other way around).
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/geom/geom_bvh.h67
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h37
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h27
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_subsurface.h32
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h20
-rw-r--r--intern/cycles/kernel/kernel_path.h27
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h43
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h134
-rw-r--r--intern/cycles/kernel/kernel_types.h12
9 files changed, 290 insertions, 109 deletions
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index 3d0d406dd0b..cea505002e2 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -255,38 +255,81 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, con
}
#ifdef __SUBSURFACE__
-ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
+ const Ray *ray,
+ SubsurfaceIntersection *ss_isect,
+ int subsurface_object,
+ uint *lcg_state,
+ int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ if(kernel_data.bvh.have_curves) {
+ return bvh_intersect_subsurface_hair_motion(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
+ }
#endif /* __HAIR__ */
- return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ return bvh_intersect_subsurface_motion(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
}
#endif /* __OBJECT_MOTION__ */
-#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves) {
+ return bvh_intersect_subsurface_hair(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
+ }
#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
#ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ if(kernel_data.bvh.have_instancing) {
+ return bvh_intersect_subsurface_instancing(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
+ }
#endif /* __INSTANCING__ */
- return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ return bvh_intersect_subsurface(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
#else /* __KERNEL_CPU__ */
#ifdef __INSTANCING__
- return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ return bvh_intersect_subsurface_instancing(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
#else
- return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ return bvh_intersect_subsurface(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
#endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index a093b9b55aa..b9f1a46afb6 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -30,9 +30,9 @@
*
*/
-ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const Ray *ray,
- Intersection *isect_array,
+ SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
@@ -60,7 +60,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int object = OBJECT_NONE;
float isect_t = ray->t;
- uint num_hits = 0;
+ ss_isect->num_hits = 0;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
@@ -210,7 +210,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
- triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ triangle_intersect_subsurface(kg,
+ &isect_precalc,
+ ss_isect,
+ P,
+ object,
+ primAddr,
+ isect_t,
+ lcg_state,
+ max_hits);
}
break;
}
@@ -223,7 +231,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
- motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ motion_triangle_intersect_subsurface(kg,
+ ss_isect,
+ P,
+ dir,
+ ray->time,
+ object,
+ primAddr,
+ isect_t,
+ lcg_state,
+ max_hits);
}
break;
}
@@ -301,13 +318,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
}
-ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
+ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
const Ray *ray,
- Intersection *isect_array,
+ SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
@@ -316,7 +331,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
if(kernel_data.bvh.use_qbvh) {
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
- isect_array,
+ ss_isect,
subsurface_object,
lcg_state,
max_hits);
@@ -327,7 +342,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
kernel_assert(kernel_data.bvh.use_qbvh == false);
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
- isect_array,
+ ss_isect,
subsurface_object,
lcg_state,
max_hits);
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 86f93f242a1..a7b3f5cad28 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -358,8 +358,17 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection
* multiple hits we pick a single random primitive as the intersection point. */
#ifdef __SUBSURFACE__
-ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
- float3 P, float3 dir, float time, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
+ccl_device_inline void motion_triangle_intersect_subsurface(
+ KernelGlobals *kg,
+ SubsurfaceIntersection *ss_isect,
+ float3 P,
+ float3 dir,
+ float time,
+ int object,
+ int triAddr,
+ float tmax,
+ uint *lcg_state,
+ int max_hits)
{
/* primitive index for vertex location lookup */
int prim = kernel_tex_fetch(__prim_index, triAddr);
@@ -373,30 +382,34 @@ ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, I
float t, u, v;
if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
- (*num_hits)++;
+ ss_isect->num_hits++;
int hit;
- if(*num_hits <= max_hits) {
- hit = *num_hits - 1;
+ if(ss_isect->num_hits <= max_hits) {
+ hit = ss_isect->num_hits - 1;
}
else {
/* reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it */
- hit = lcg_step_uint(lcg_state) % *num_hits;
+ hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
if(hit >= max_hits)
return;
}
/* record intersection */
- Intersection *isect = &isect_array[hit];
+ Intersection *isect = &ss_isect->hits[hit];
isect->t = t;
isect->u = u;
isect->v = v;
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
+
+ /* Record geometric normal. */
+ ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
+ verts[2] - verts[0]));
}
}
#endif
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
index fe231720cf7..98e1d27b79e 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
@@ -26,9 +26,9 @@
*
*/
-ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
- Intersection *isect_array,
+ SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
@@ -55,7 +55,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float3 idir = bvh_inverse_direction(dir);
int object = OBJECT_NONE;
float isect_t = ray->t;
- uint num_hits = 0;
+
+ ss_isect->num_hits = 0;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
@@ -63,7 +64,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#ifndef __KERNEL_SSE41__
if(!isfinite(P.x)) {
- return 0;
+ return;
}
#endif
@@ -226,7 +227,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(tri_object != subsurface_object) {
continue;
}
- triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ triangle_intersect_subsurface(kg,
+ &isect_precalc,
+ ss_isect,
+ P,
+ object,
+ primAddr,
+ isect_t,
+ lcg_state,
+ max_hits);
}
break;
}
@@ -240,7 +249,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(tri_object != subsurface_object) {
continue;
}
- motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ motion_triangle_intersect_subsurface(kg,
+ ss_isect,
+ P,
+ dir,
+ ray->time,
+ object,
+ primAddr,
+ isect_t,
+ lcg_state,
+ max_hits);
}
break;
}
@@ -321,6 +339,4 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
}
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 970616eb894..d37e593005c 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -204,12 +204,11 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
ccl_device_inline void triangle_intersect_subsurface(
KernelGlobals *kg,
const IsectPrecalc *isect_precalc,
- Intersection *isect_array,
+ SubsurfaceIntersection *ss_isect,
float3 P,
int object,
int triAddr,
float tmax,
- uint *num_hits,
uint *lcg_state,
int max_hits)
{
@@ -272,29 +271,36 @@ ccl_device_inline void triangle_intersect_subsurface(
/* Normalize U, V, W, and T. */
const float inv_det = 1.0f / det;
- (*num_hits)++;
+ ss_isect->num_hits++;
int hit;
- if(*num_hits <= max_hits) {
- hit = *num_hits - 1;
+ if(ss_isect->num_hits <= max_hits) {
+ hit = ss_isect->num_hits - 1;
}
else {
/* reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it */
- hit = lcg_step_uint(lcg_state) % *num_hits;
+ hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
if(hit >= max_hits)
return;
}
/* record intersection */
- Intersection *isect = &isect_array[hit];
+ Intersection *isect = &ss_isect->hits[hit];
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
isect->u = U * inv_det;
isect->v = V * inv_det;
isect->t = T * inv_det;
+
+ /* Record geometric normal. */
+ /* TODO(sergey): Use float4_to_float3() on just an edges. */
+ const float3 v0 = float4_to_float3(tri_a);
+ const float3 v1 = float4_to_float3(tri_b);
+ const float3 v2 = float4_to_float3(tri_c);
+ ss_isect->Ng[hit] = normalize(cross(v1 - v0, v2 - v0));
}
#endif
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 9794ad1d180..87d36efa4d4 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -338,10 +338,16 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
if(sc) {
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
- ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+ SubsurfaceIntersection ss_isect;
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
+ int num_hits = subsurface_scatter_multi_intersect(kg,
+ &ss_isect,
+ sd,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ false);
#ifdef __VOLUME__
Ray volume_ray = *ray;
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
@@ -350,15 +356,26 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
+ /* NOTE: We reuse the existing ShaderData, we assume the path
+ * integration loop stops when this function returns true.
+ */
+ subsurface_scatter_multi_setup(kg,
+ &ss_isect,
+ hit,
+ sd,
+ state->flag,
+ sc,
+ false);
+
float3 tp = *throughput;
PathState hit_state = *state;
Ray hit_ray = *ray;
hit_state.rng_offset += PRNG_BOUNCE_NUM;
-
- kernel_path_surface_connect_light(kg, rng, &bssrdf_sd[hit], tp, state, L);
- if(kernel_path_surface_bounce(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, L, &hit_ray)) {
+ kernel_path_surface_connect_light(kg, rng, sd, tp, state, L);
+
+ if(kernel_path_surface_bounce(kg, rng, sd, &tp, &hit_state, L, &hit_ray)) {
#ifdef __LAMP_MIS__
hit_state.ray_t = 0.0f;
#endif
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index b6d64985f6a..b6f95d6b0d2 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -128,10 +128,16 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* do subsurface scatter step with copy of shader data, this will
* replace the BSSRDF with a diffuse BSDF closure */
for(int j = 0; j < num_samples; j++) {
- ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+ SubsurfaceIntersection ss_isect;
float bssrdf_u, bssrdf_v;
path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+ int num_hits = subsurface_scatter_multi_intersect(kg,
+ &ss_isect,
+ sd,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ true);
#ifdef __VOLUME__
Ray volume_ray = *ray;
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
@@ -140,6 +146,15 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
+ ShaderData bssrdf_sd = *sd;
+ subsurface_scatter_multi_setup(kg,
+ &ss_isect,
+ hit,
+ &bssrdf_sd,
+ state->flag,
+ sc,
+ true);
+
PathState hit_state = *state;
path_state_branch(&hit_state, j, num_samples);
@@ -147,7 +162,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
#ifdef __VOLUME__
if(need_update_volume_stack) {
/* Setup ray from previous surface point to the new one. */
- float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng);
+ float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
volume_ray.D = normalize_len(P - volume_ray.P,
&volume_ray.t);
@@ -165,15 +180,27 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* direct light */
if(kernel_data.integrator.use_direct_light) {
bool all = kernel_data.integrator.sample_all_lights_direct;
- kernel_branched_path_surface_connect_light(kg, rng,
- &bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all);
+ kernel_branched_path_surface_connect_light(
+ kg,
+ rng,
+ &bssrdf_sd,
+ &hit_state,
+ throughput,
+ num_samples_inv,
+ L,
+ all);
}
#endif
/* indirect light */
- kernel_branched_path_surface_indirect_light(kg, rng,
- &bssrdf_sd[hit], throughput, num_samples_inv,
- &hit_state, L);
+ kernel_branched_path_surface_indirect_light(
+ kg,
+ rng,
+ &bssrdf_sd,
+ throughput,
+ num_samples_inv,
+ &hit_state,
+ L);
}
}
}
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 2da060c32a2..b9928561791 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -179,19 +179,23 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent)
return color;
}
-ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd, ShaderData *in_sd, int state_flag, float3 *eval, float3 *N)
+ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
+ ShaderData *sd,
+ int state_flag,
+ float3 *eval,
+ float3 *N)
{
/* average color and texture blur at outgoing point */
float texture_blur;
- float3 out_color = shader_bssrdf_sum(out_sd, NULL, &texture_blur);
+ float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
/* do we have bump mapping? */
- bool bump = (out_sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
+ bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
if(bump || texture_blur > 0.0f) {
/* average color and normal at incoming point */
- shader_eval_surface(kg, in_sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
- float3 in_color = shader_bssrdf_sum(in_sd, (bump)? N: NULL, NULL);
+ shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
+ float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
/* we simply divide out the average color and multiply with the average
* of the other one. we could try to do this per closure but it's quite
@@ -206,14 +210,23 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd
}
}
-/* subsurface scattering step, from a point on the surface to other nearby points on the same object */
-ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd, ShaderData bssrdf_sd[BSSRDF_MAX_HITS],
- int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
+/* Subsurface scattering step, from a point on the surface to other
+ * nearby points on the same object.
+ */
+ccl_device int subsurface_scatter_multi_intersect(
+ KernelGlobals *kg,
+ SubsurfaceIntersection* ss_isect,
+ ShaderData *sd,
+ ShaderClosure *sc,
+ uint *lcg_state,
+ float disk_u,
+ float disk_v,
+ bool all)
{
/* pick random axis in local frame and point on disk */
float3 disk_N, disk_T, disk_B;
float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
+
disk_N = sd->Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
@@ -259,70 +272,89 @@ ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd,
float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
/* create ray */
- Ray ray;
- ray.P = sd->P + disk_N*disk_height + disk_P;
- ray.D = -disk_N;
- ray.t = 2.0f*disk_height;
- ray.dP = sd->dP;
- ray.dD = differential3_zero();
- ray.time = sd->time;
+ Ray *ray = &ss_isect->ray;
+ ray->P = sd->P + disk_N*disk_height + disk_P;
+ ray->D = -disk_N;
+ ray->t = 2.0f*disk_height;
+ ray->dP = sd->dP;
+ ray->dD = differential3_zero();
+ ray->time = sd->time;
/* intersect with the same object. if multiple intersections are found it
* will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
- Intersection isect[BSSRDF_MAX_HITS];
- uint num_hits = scene_intersect_subsurface(kg, &ray, isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
-
- /* evaluate bssrdf */
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- int num_eval_hits = min(num_hits, BSSRDF_MAX_HITS);
+ scene_intersect_subsurface(kg,
+ ray,
+ ss_isect,
+ sd->object,
+ lcg_state,
+ BSSRDF_MAX_HITS);
+ /* TODO(sergey): Investigate whether scene_intersect_subsurface() could
+ * indeed return more than BSSRDF_MAX_HITS hits.
+ */
+ int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
for(int hit = 0; hit < num_eval_hits; hit++) {
- ShaderData *bsd = &bssrdf_sd[hit];
-
- /* setup new shading point */
- *bsd = *sd;
- shader_setup_from_subsurface(kg, bsd, &isect[hit], &ray);
+ /* Quickly retrieve P and Ng without setting up ShaderData. */
+ float3 hit_P = ray->P + ss_isect->hits[hit].t * ray->D;
+ float3 hit_Ng = ss_isect->Ng[hit];
+ if(ss_isect->hits[hit].object != OBJECT_NONE) {
+ object_normal_transform(kg, sd, &hit_Ng);
+ }
/* probability densities for local frame axes */
- float pdf_N = pick_pdf_N * fabsf(dot(disk_N, bsd->Ng));
- float pdf_T = pick_pdf_T * fabsf(dot(disk_T, bsd->Ng));
- float pdf_B = pick_pdf_B * fabsf(dot(disk_B, bsd->Ng));
-
+ float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
+ float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
+ float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
+
/* multiple importance sample between 3 axes, power heuristic
* found to be slightly better than balance heuristic */
float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
/* real distance to sampled point */
- float r = len(bsd->P - sd->P);
+ float r = len(hit_P - sd->P);
/* evaluate */
float w = mis_weight / pdf_N;
- if(num_hits > BSSRDF_MAX_HITS)
- w *= num_hits/(float)BSSRDF_MAX_HITS;
- eval = subsurface_scatter_eval(bsd, sc, disk_r, r, all) * w;
+ if(ss_isect->num_hits > BSSRDF_MAX_HITS)
+ w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
+ float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
- /* optionally blur colors and bump mapping */
- float3 N = bsd->N;
- subsurface_color_bump_blur(kg, sd, bsd, state_flag, &eval, &N);
-
- /* setup diffuse bsdf */
- subsurface_scatter_setup_diffuse_bsdf(bsd, eval, true, N);
+ ss_isect->weight[hit] = eval;
}
return num_eval_hits;
}
+ccl_device void subsurface_scatter_multi_setup(KernelGlobals *kg,
+ SubsurfaceIntersection* ss_isect,
+ int hit,
+ ShaderData *sd,
+ int state_flag,
+ ShaderClosure *sc,
+ bool all)
+{
+ /* Setup new shading point. */
+ shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray);
+
+ /* Optionally blur colors and bump mapping. */
+ float3 weight = ss_isect->weight[hit];
+ float3 N = sd->N;
+ subsurface_color_bump_blur(kg, sd, state_flag, &weight, &N);
+
+ /* Setup diffuse BSDF. */
+ subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
+}
+
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- uint num_hits = 0;
/* pick random axis in local frame and point on disk */
float3 disk_N, disk_T, disk_B;
float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
+
disk_N = sd->Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
@@ -368,21 +400,21 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
/* intersect with the same object. if multiple intersections are
* found it will randomly pick one of them */
- Intersection isect;
- num_hits = scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1);
+ SubsurfaceIntersection ss_isect;
+ scene_intersect_subsurface(kg, &ray, &ss_isect, sd->object, lcg_state, 1);
/* evaluate bssrdf */
- if(num_hits > 0) {
+ if(ss_isect.num_hits > 0) {
float3 origP = sd->P;
/* setup new shading point */
- shader_setup_from_subsurface(kg, sd, &isect, &ray);
+ shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
/* probability densities for local frame axes */
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
-
+
/* multiple importance sample between 3 axes, power heuristic
* found to be slightly better than balance heuristic */
float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
@@ -391,16 +423,16 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
float r = len(sd->P - origP);
/* evaluate */
- float w = (mis_weight * num_hits) / pdf_N;
+ float w = (mis_weight * ss_isect.num_hits) / pdf_N;
eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
}
/* optionally blur colors and bump mapping */
float3 N = sd->N;
- subsurface_color_bump_blur(kg, sd, sd, state_flag, &eval, &N);
+ subsurface_color_bump_blur(kg, sd, state_flag, &eval, &N);
/* setup diffuse bsdf */
- subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N);
+ subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 5ccbc1de94c..e04f500e410 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -520,6 +520,18 @@ typedef ccl_addr_space struct Intersection {
#endif
} Intersection;
+/* Subsurface Intersection result */
+
+struct SubsurfaceIntersection
+{
+ Ray ray;
+ float3 weight[BSSRDF_MAX_HITS];
+
+ int num_hits;
+ struct Intersection hits[BSSRDF_MAX_HITS];
+ float3 Ng[BSSRDF_MAX_HITS];
+};
+
/* Primitives */
typedef enum PrimitiveType {