Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2015-11-22 13:00:29 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2015-11-25 11:01:22 +0300
commit8bca34fe326d10cc2f20df7fa541179e9ba835d2 (patch)
treeaeab22e5e0ec3d4ee1a5fe8c37daee0be4a89bee /intern/cycles/kernel/kernel_subsurface.h
parente6fff424dbcd02c3fed25036a7feb7f59d427843 (diff)
Cysles: Avoid having ShaderData on the stack
This commit introduces a SSS-oriented intersection structure which is replacing old logic of having separate arrays for just intersections and shader data and encapsulates all the data needed for SSS evaluation. This giver a huge stack memory saving on GPU. In own experiments it gave 25% memory usage reduction on GTX560Ti (722MB vs. 946MB). Unfortunately, this gave some performance loss of 20% which only happens on GPU. This is perhaps due to different memory access pattern. Will be solved in the future, hopefully. Famous saying: won in memory - lost in time (which is also valid in other way around).
Diffstat (limited to 'intern/cycles/kernel/kernel_subsurface.h')
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h134
1 files changed, 83 insertions, 51 deletions
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 2da060c32a2..b9928561791 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -179,19 +179,23 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent)
return color;
}
-ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd, ShaderData *in_sd, int state_flag, float3 *eval, float3 *N)
+ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
+ ShaderData *sd,
+ int state_flag,
+ float3 *eval,
+ float3 *N)
{
/* average color and texture blur at outgoing point */
float texture_blur;
- float3 out_color = shader_bssrdf_sum(out_sd, NULL, &texture_blur);
+ float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
/* do we have bump mapping? */
- bool bump = (out_sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
+ bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
if(bump || texture_blur > 0.0f) {
/* average color and normal at incoming point */
- shader_eval_surface(kg, in_sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
- float3 in_color = shader_bssrdf_sum(in_sd, (bump)? N: NULL, NULL);
+ shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
+ float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
/* we simply divide out the average color and multiply with the average
* of the other one. we could try to do this per closure but it's quite
@@ -206,14 +210,23 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd
}
}
-/* subsurface scattering step, from a point on the surface to other nearby points on the same object */
-ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd, ShaderData bssrdf_sd[BSSRDF_MAX_HITS],
- int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
+/* Subsurface scattering step, from a point on the surface to other
+ * nearby points on the same object.
+ */
+ccl_device int subsurface_scatter_multi_intersect(
+ KernelGlobals *kg,
+ SubsurfaceIntersection* ss_isect,
+ ShaderData *sd,
+ ShaderClosure *sc,
+ uint *lcg_state,
+ float disk_u,
+ float disk_v,
+ bool all)
{
/* pick random axis in local frame and point on disk */
float3 disk_N, disk_T, disk_B;
float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
+
disk_N = sd->Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
@@ -259,70 +272,89 @@ ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd,
float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
/* create ray */
- Ray ray;
- ray.P = sd->P + disk_N*disk_height + disk_P;
- ray.D = -disk_N;
- ray.t = 2.0f*disk_height;
- ray.dP = sd->dP;
- ray.dD = differential3_zero();
- ray.time = sd->time;
+ Ray *ray = &ss_isect->ray;
+ ray->P = sd->P + disk_N*disk_height + disk_P;
+ ray->D = -disk_N;
+ ray->t = 2.0f*disk_height;
+ ray->dP = sd->dP;
+ ray->dD = differential3_zero();
+ ray->time = sd->time;
/* intersect with the same object. if multiple intersections are found it
* will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
- Intersection isect[BSSRDF_MAX_HITS];
- uint num_hits = scene_intersect_subsurface(kg, &ray, isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
-
- /* evaluate bssrdf */
- float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- int num_eval_hits = min(num_hits, BSSRDF_MAX_HITS);
+ scene_intersect_subsurface(kg,
+ ray,
+ ss_isect,
+ sd->object,
+ lcg_state,
+ BSSRDF_MAX_HITS);
+ /* TODO(sergey): Investigate whether scene_intersect_subsurface() could
+ * indeed return more than BSSRDF_MAX_HITS hits.
+ */
+ int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
for(int hit = 0; hit < num_eval_hits; hit++) {
- ShaderData *bsd = &bssrdf_sd[hit];
-
- /* setup new shading point */
- *bsd = *sd;
- shader_setup_from_subsurface(kg, bsd, &isect[hit], &ray);
+ /* Quickly retrieve P and Ng without setting up ShaderData. */
+ float3 hit_P = ray->P + ss_isect->hits[hit].t * ray->D;
+ float3 hit_Ng = ss_isect->Ng[hit];
+ if(ss_isect->hits[hit].object != OBJECT_NONE) {
+ object_normal_transform(kg, sd, &hit_Ng);
+ }
/* probability densities for local frame axes */
- float pdf_N = pick_pdf_N * fabsf(dot(disk_N, bsd->Ng));
- float pdf_T = pick_pdf_T * fabsf(dot(disk_T, bsd->Ng));
- float pdf_B = pick_pdf_B * fabsf(dot(disk_B, bsd->Ng));
-
+ float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
+ float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
+ float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
+
/* multiple importance sample between 3 axes, power heuristic
* found to be slightly better than balance heuristic */
float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
/* real distance to sampled point */
- float r = len(bsd->P - sd->P);
+ float r = len(hit_P - sd->P);
/* evaluate */
float w = mis_weight / pdf_N;
- if(num_hits > BSSRDF_MAX_HITS)
- w *= num_hits/(float)BSSRDF_MAX_HITS;
- eval = subsurface_scatter_eval(bsd, sc, disk_r, r, all) * w;
+ if(ss_isect->num_hits > BSSRDF_MAX_HITS)
+ w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
+ float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
- /* optionally blur colors and bump mapping */
- float3 N = bsd->N;
- subsurface_color_bump_blur(kg, sd, bsd, state_flag, &eval, &N);
-
- /* setup diffuse bsdf */
- subsurface_scatter_setup_diffuse_bsdf(bsd, eval, true, N);
+ ss_isect->weight[hit] = eval;
}
return num_eval_hits;
}
+ccl_device void subsurface_scatter_multi_setup(KernelGlobals *kg,
+ SubsurfaceIntersection* ss_isect,
+ int hit,
+ ShaderData *sd,
+ int state_flag,
+ ShaderClosure *sc,
+ bool all)
+{
+ /* Setup new shading point. */
+ shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray);
+
+ /* Optionally blur colors and bump mapping. */
+ float3 weight = ss_isect->weight[hit];
+ float3 N = sd->N;
+ subsurface_color_bump_blur(kg, sd, state_flag, &weight, &N);
+
+ /* Setup diffuse BSDF. */
+ subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
+}
+
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- uint num_hits = 0;
/* pick random axis in local frame and point on disk */
float3 disk_N, disk_T, disk_B;
float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
+
disk_N = sd->Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
@@ -368,21 +400,21 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
/* intersect with the same object. if multiple intersections are
* found it will randomly pick one of them */
- Intersection isect;
- num_hits = scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1);
+ SubsurfaceIntersection ss_isect;
+ scene_intersect_subsurface(kg, &ray, &ss_isect, sd->object, lcg_state, 1);
/* evaluate bssrdf */
- if(num_hits > 0) {
+ if(ss_isect.num_hits > 0) {
float3 origP = sd->P;
/* setup new shading point */
- shader_setup_from_subsurface(kg, sd, &isect, &ray);
+ shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
/* probability densities for local frame axes */
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
-
+
/* multiple importance sample between 3 axes, power heuristic
* found to be slightly better than balance heuristic */
float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
@@ -391,16 +423,16 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
float r = len(sd->P - origP);
/* evaluate */
- float w = (mis_weight * num_hits) / pdf_N;
+ float w = (mis_weight * ss_isect.num_hits) / pdf_N;
eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
}
/* optionally blur colors and bump mapping */
float3 N = sd->N;
- subsurface_color_bump_blur(kg, sd, sd, state_flag, &eval, &N);
+ subsurface_color_bump_blur(kg, sd, state_flag, &eval, &N);
/* setup diffuse bsdf */
- subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N);
+ subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
}
CCL_NAMESPACE_END