diff options
author | Brecht Van Lommel <brecht@blender.org> | 2022-08-11 17:53:11 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2022-08-15 14:48:02 +0300 |
commit | e949d6da5bd347663addd583ff3f0211c96b81c8 (patch) | |
tree | 4a7478de4eb8fbd4c67989d94bc8e7c0353bca94 /intern/cycles/kernel/svm | |
parent | d8841d0aa341e05c8cb9559b116b7e2a9ec11882 (diff) |
Cycles: simplify handling of ray differentials
* Store compact ray differentials in ShaderData and compute full differentials
on demand. This reduces register pressure on the GPU.
* Remove BSDF differential code that was effectively doing nothing as the
differential orientation was discarded when making it compact.
This gives a 1-5% speedup with RTX A6000 + OptiX in our benchmarks, with the
bigger speedups in simpler scenes.
Renders appear to be identical except for the Both displacement option that
does both displacement and bump.
Differential Revision: https://developer.blender.org/D15677
Diffstat (limited to 'intern/cycles/kernel/svm')
-rw-r--r-- | intern/cycles/kernel/svm/attribute.h | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/bump.h | 17 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/displace.h | 13 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/geometry.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/tex_coord.h | 20 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/types.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/wireframe.h | 16 |
7 files changed, 47 insertions, 39 deletions
diff --git a/intern/cycles/kernel/svm/attribute.h b/intern/cycles/kernel/svm/attribute.h index a3609d8b4b0..5f0d1609f08 100644 --- a/intern/cycles/kernel/svm/attribute.h +++ b/intern/cycles/kernel/svm/attribute.h @@ -140,6 +140,16 @@ ccl_device_noinline void svm_node_attr(KernelGlobals kg, } } +ccl_device_forceinline float3 svm_node_bump_P_dx(const ccl_private ShaderData *sd) +{ + return sd->P + differential_from_compact(sd->Ng, sd->dP).dx; +} + +ccl_device_forceinline float3 svm_node_bump_P_dy(const ccl_private ShaderData *sd) +{ + return sd->P + differential_from_compact(sd->Ng, sd->dP).dy; +} + ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, @@ -167,7 +177,7 @@ ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg, if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) { /* No generated attribute, fall back to object coordinates. */ - float3 f = sd->P + sd->dP.dx; + float3 f = svm_node_bump_P_dx(sd); if (sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &f); } @@ -265,7 +275,7 @@ ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg, if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) { /* No generated attribute, fall back to object coordinates. */ - float3 f = sd->P + sd->dP.dy; + float3 f = svm_node_bump_P_dy(sd); if (sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &f); } diff --git a/intern/cycles/kernel/svm/bump.h b/intern/cycles/kernel/svm/bump.h index 566c45f5f25..1009a6a4241 100644 --- a/intern/cycles/kernel/svm/bump.h +++ b/intern/cycles/kernel/svm/bump.h @@ -14,23 +14,21 @@ ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg, { /* save state */ stack_store_float3(stack, offset + 0, sd->P); - stack_store_float3(stack, offset + 3, sd->dP.dx); - stack_store_float3(stack, offset + 6, sd->dP.dy); + stack_store_float(stack, offset + 3, sd->dP); /* set state as if undisplaced */ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED); if (desc.offset != ATTR_STD_NOT_FOUND) { - float3 P, dPdx, dPdy; - P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy); + differential3 dP; + float3 P = primitive_surface_attribute_float3(kg, sd, desc, &dP.dx, &dP.dy); object_position_transform(kg, sd, &P); - object_dir_transform(kg, sd, &dPdx); - object_dir_transform(kg, sd, &dPdy); + object_dir_transform(kg, sd, &dP.dx); + object_dir_transform(kg, sd, &dP.dy); sd->P = P; - sd->dP.dx = dPdx; - sd->dP.dy = dPdy; + sd->dP = differential_make_compact(dP); } } @@ -41,8 +39,7 @@ ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg, { /* restore state */ sd->P = stack_load_float3(stack, offset + 0); - sd->dP.dx = stack_load_float3(stack, offset + 3); - sd->dP.dy = stack_load_float3(stack, offset + 6); + sd->dP = stack_load_float(stack, offset + 3); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/displace.h b/intern/cycles/kernel/svm/displace.h index 128023263fd..230f8c73820 100644 --- a/intern/cycles/kernel/svm/displace.h +++ b/intern/cycles/kernel/svm/displace.h @@ -24,18 +24,17 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg, float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; - float3 dPdx = sd->dP.dx; - float3 dPdy = sd->dP.dy; + differential3 dP = differential_from_compact(sd->Ng, sd->dP); if (use_object_space) { object_inverse_normal_transform(kg, sd, &normal_in); - object_inverse_dir_transform(kg, sd, &dPdx); - object_inverse_dir_transform(kg, sd, &dPdy); + object_inverse_dir_transform(kg, sd, &dP.dx); + object_inverse_dir_transform(kg, sd, &dP.dy); } /* get surface tangents from normal */ - float3 Rx = cross(dPdy, normal_in); - float3 Ry = cross(normal_in, dPdx); + float3 Rx = cross(dP.dy, normal_in); + float3 Ry = cross(normal_in, dP.dx); /* get bump values */ uint c_offset, x_offset, y_offset, strength_offset; @@ -46,7 +45,7 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg, float h_y = stack_load_float(stack, y_offset); /* compute surface gradient and determinant */ - float det = dot(dPdx, Rx); + float det = dot(dP.dx, Rx); float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry; float absdet = fabsf(det); diff --git a/intern/cycles/kernel/svm/geometry.h b/intern/cycles/kernel/svm/geometry.h index bbefdcfa755..cbd87d84409 100644 --- a/intern/cycles/kernel/svm/geometry.h +++ b/intern/cycles/kernel/svm/geometry.h @@ -54,7 +54,7 @@ ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg, switch (type) { case NODE_GEOM_P: - data = sd->P + sd->dP.dx; + data = svm_node_bump_P_dx(sd); break; case NODE_GEOM_uv: data = make_float3(1.0f - sd->u - sd->du.dx - sd->v - sd->dv.dx, sd->u + sd->du.dx, 0.0f); @@ -81,7 +81,7 @@ ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg, switch (type) { case NODE_GEOM_P: - data = sd->P + sd->dP.dy; + data = svm_node_bump_P_dy(sd); break; case NODE_GEOM_uv: data = make_float3(1.0f - sd->u - sd->du.dy - sd->v - sd->dv.dy, sd->u + sd->du.dy, 0.0f); diff --git a/intern/cycles/kernel/svm/tex_coord.h b/intern/cycles/kernel/svm/tex_coord.h index 2a0130e11d4..8154c542e6f 100644 --- a/intern/cycles/kernel/svm/tex_coord.h +++ b/intern/cycles/kernel/svm/tex_coord.h @@ -106,7 +106,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, switch (type) { case NODE_TEXCO_OBJECT: { - data = sd->P + sd->dP.dx; + data = svm_node_bump_P_dx(sd); if (node.w == 0) { if (sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &data); @@ -130,9 +130,9 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, Transform tfm = kernel_data.cam.worldtocamera; if (sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P + sd->dP.dx); + data = transform_point(&tfm, svm_node_bump_P_dx(sd)); else - data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg)); + data = transform_point(&tfm, svm_node_bump_P_dx(sd) + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { @@ -140,7 +140,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) data = camera_world_to_ndc(kg, sd, sd->ray_P); else - data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); + data = camera_world_to_ndc(kg, sd, svm_node_bump_P_dx(sd)); data.z = 0.0f; break; } @@ -160,7 +160,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P + sd->dP.dx; + data = svm_node_bump_P_dx(sd); # ifdef __VOLUME__ if (sd->object != OBJECT_NONE) @@ -191,7 +191,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, switch (type) { case NODE_TEXCO_OBJECT: { - data = sd->P + sd->dP.dy; + data = svm_node_bump_P_dy(sd); if (node.w == 0) { if (sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &data); @@ -215,9 +215,9 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, Transform tfm = kernel_data.cam.worldtocamera; if (sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P + sd->dP.dy); + data = transform_point(&tfm, svm_node_bump_P_dy(sd)); else - data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg)); + data = transform_point(&tfm, svm_node_bump_P_dy(sd) + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { @@ -225,7 +225,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) data = camera_world_to_ndc(kg, sd, sd->ray_P); else - data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); + data = camera_world_to_ndc(kg, sd, svm_node_bump_P_dy(sd)); data.z = 0.0f; break; } @@ -245,7 +245,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P + sd->dP.dy; + data = svm_node_bump_P_dy(sd); # ifdef __VOLUME__ if (sd->object != OBJECT_NONE) diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h index 12d0ec141e6..98dfe6a4375 100644 --- a/intern/cycles/kernel/svm/types.h +++ b/intern/cycles/kernel/svm/types.h @@ -12,7 +12,7 @@ CCL_NAMESPACE_BEGIN /* SVM stack offsets with this value indicate that it's not on the stack */ #define SVM_STACK_INVALID 255 -#define SVM_BUMP_EVAL_STATE_SIZE 9 +#define SVM_BUMP_EVAL_STATE_SIZE 4 /* Nodes */ diff --git a/intern/cycles/kernel/svm/wireframe.h b/intern/cycles/kernel/svm/wireframe.h index e5fe08e5d04..91fadf4cfc4 100644 --- a/intern/cycles/kernel/svm/wireframe.h +++ b/intern/cycles/kernel/svm/wireframe.h @@ -14,6 +14,7 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float wireframe(KernelGlobals kg, ccl_private ShaderData *sd, + const differential3 dP, float size, int pixel_size, ccl_private float3 *P) @@ -46,8 +47,8 @@ ccl_device_inline float wireframe(KernelGlobals kg, if (pixel_size) { // Project the derivatives of P to the viewing plane defined // by I so we have a measure of how big is a pixel at this point - float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I); - float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I); + float pixelwidth_x = len(dP.dx - dot(dP.dx, sd->I) * sd->I); + float pixelwidth_y = len(dP.dy - dot(dP.dy, sd->I) * sd->I); // Take the average of both axis' length pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f; } @@ -86,16 +87,17 @@ ccl_device_noinline void svm_node_wireframe(KernelGlobals kg, int pixel_size = (int)use_pixel_size; /* Calculate wireframe */ - float f = wireframe(kg, sd, size, pixel_size, &sd->P); + const differential3 dP = differential_from_compact(sd->Ng, sd->dP); + float f = wireframe(kg, sd, dP, size, pixel_size, &sd->P); /* TODO(sergey): Think of faster way to calculate derivatives. */ if (bump_offset == NODE_BUMP_OFFSET_DX) { - float3 Px = sd->P - sd->dP.dx; - f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx); + float3 Px = sd->P - dP.dx; + f += (f - wireframe(kg, sd, dP, size, pixel_size, &Px)) / len(dP.dx); } else if (bump_offset == NODE_BUMP_OFFSET_DY) { - float3 Py = sd->P - sd->dP.dy; - f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy); + float3 Py = sd->P - dP.dy; + f += (f - wireframe(kg, sd, dP, size, pixel_size, &Py)) / len(dP.dy); } if (stack_valid(out_fac)) |