From ee6b989f8e2ae99c28441ab8663a99bfd16b6c65 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Mon, 26 Oct 2020 18:13:53 +0100 Subject: Cycles: refactor to split surface and volume attribute lookup more This avoids OpenCL inlining heavy volume interpolation code once for every data type, which could cause a performance regression when we add a float4 data type in the next commit. Ref D2057 --- intern/cycles/kernel/geom/geom_primitive.h | 172 ++++++++-------------------- intern/cycles/kernel/geom/geom_volume.h | 45 ++++---- intern/cycles/kernel/osl/osl_services.cpp | 44 +++++-- intern/cycles/kernel/svm/svm_attribute.h | 66 +++++++++-- intern/cycles/kernel/svm/svm_vertex_color.h | 6 +- 5 files changed, 163 insertions(+), 170 deletions(-) diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 997abf438d0..2c31e5cee03 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -21,38 +21,11 @@ CCL_NAMESPACE_BEGIN -/* Generic primitive attribute reading functions */ -ccl_device_inline float primitive_attribute_float( - KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) -{ - if (sd->type & PRIMITIVE_ALL_TRIANGLE) { - if (subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float(kg, sd, desc, dx, dy); - } -#ifdef __HAIR__ - else if (sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float(kg, sd, desc, dx, dy); - } -#endif -#ifdef __VOLUME__ - else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - if (dx) - *dx = 0.0f; - if (dy) - *dy = 0.0f; - return volume_attribute_float(kg, sd, desc); - } -#endif - else { - if (dx) - *dx = 0.0f; - if (dy) - *dy = 0.0f; - return 0.0f; - } -} +/* Surface Attributes + * + * Read geometry attributes for surface shading. This is distinct from volume + * attributes for performance, mainly for GPU performance to avoid bringing in + * heavy volume interpolation code. */ ccl_device_inline float primitive_surface_attribute_float( KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) @@ -77,25 +50,11 @@ ccl_device_inline float primitive_surface_attribute_float( } } -#ifdef __VOLUME__ -ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc) -{ - if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float(kg, sd, desc); - } - else { - return 0.0f; - } -} -#endif - -ccl_device_inline float2 primitive_attribute_float2(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float2 *dx, - float2 *dy) +ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float2 *dx, + float2 *dy) { if (sd->type & PRIMITIVE_ALL_TRIANGLE) { if (subd_triangle_patch(kg, sd) == ~0) @@ -107,16 +66,6 @@ ccl_device_inline float2 primitive_attribute_float2(KernelGlobals *kg, else if (sd->type & PRIMITIVE_ALL_CURVE) { return curve_attribute_float2(kg, sd, desc, dx, dy); } -#endif -#ifdef __VOLUME__ - else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - kernel_assert(0); - if (dx) - *dx = make_float2(0.0f, 0.0f); - if (dy) - *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } #endif else { if (dx) @@ -127,11 +76,11 @@ ccl_device_inline float2 primitive_attribute_float2(KernelGlobals *kg, } } -ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float3 *dx, - float3 *dy) +ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, + float3 *dy) { if (sd->type & PRIMITIVE_ALL_TRIANGLE) { if (subd_triangle_patch(kg, sd) == ~0) @@ -143,15 +92,6 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, else if (sd->type & PRIMITIVE_ALL_CURVE) { return curve_attribute_float3(kg, sd, desc, dx, dy); } -#endif -#ifdef __VOLUME__ - else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - if (dx) - *dx = make_float3(0.0f, 0.0f, 0.0f); - if (dy) - *dy = make_float3(0.0f, 0.0f, 0.0f); - return volume_attribute_float3(kg, sd, desc); - } #endif else { if (dx) @@ -162,11 +102,11 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, } } -ccl_device_inline float4 primitive_attribute_float4(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float4 *dx, - float4 *dy) +ccl_device_inline float4 primitive_surface_attribute_float4(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float4 *dx, + float4 *dy) { if (sd->type & PRIMITIVE_ALL_TRIANGLE) { if (subd_triangle_patch(kg, sd) == ~0) @@ -188,68 +128,52 @@ ccl_device_inline float4 primitive_attribute_float4(KernelGlobals *kg, } } -ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float2 *dx, - float2 *dy) +#ifdef __VOLUME__ +/* Volume Attributes + * + * Read geometry attributes for volume shading. This is distinct from surface + * attributes for performance, mainly for GPU performance to avoid bringing in + * heavy volume interpolation code. */ + +ccl_device_inline bool primitive_is_volume_attribute(const ShaderData *sd, + const AttributeDescriptor desc) { - if (sd->type & PRIMITIVE_ALL_TRIANGLE) { - if (subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float2(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); - } -#ifdef __HAIR__ - else if (sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float2(kg, sd, desc, dx, dy); + return (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL); +} + +ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) +{ + if (primitive_is_volume_attribute(sd, desc)) { + return volume_attribute_value_to_float(volume_attribute_float4(kg, sd, desc)); } -#endif else { - if (dx) - *dx = make_float2(0.0f, 0.0f); - if (dy) - *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); + return 0.0f; } } -ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float3 *dx, - float3 *dy) +ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) { - if (sd->type & PRIMITIVE_ALL_TRIANGLE) { - if (subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float3(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); - } -#ifdef __HAIR__ - else if (sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float3(kg, sd, desc, dx, dy); + if (primitive_is_volume_attribute(sd, desc)) { + return volume_attribute_value_to_float3(volume_attribute_float4(kg, sd, desc)); } -#endif else { - if (dx) - *dx = make_float3(0.0f, 0.0f, 0.0f); - if (dy) - *dy = make_float3(0.0f, 0.0f, 0.0f); return make_float3(0.0f, 0.0f, 0.0f); } } -#ifdef __VOLUME__ -ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg, +ccl_device_inline float4 primitive_volume_attribute_float4(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { - if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float3(kg, sd, desc); + if (primitive_is_volume_attribute(sd, desc)) { + return volume_attribute_float4(kg, sd, desc); } else { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } } #endif diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index f43a7841b46..13b027a5f6c 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -47,38 +47,39 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, return P; } -ccl_device float volume_attribute_float(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc) +ccl_device float volume_attribute_value_to_float(const float4 value) { - /* todo: optimize this so we don't have to transform both here and in - * kernel_tex_image_interp_3d when possible. Also could optimize for the - * common case where transform is translation/scale only. */ - float3 P = sd->P; - object_inverse_position_transform(kg, sd, &P); - InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC : - INTERPOLATION_NONE; - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp); - return average(float4_to_float3(r)); + return average(float4_to_float3(value)); } -ccl_device float3 volume_attribute_float3(KernelGlobals *kg, +ccl_device float volume_attribute_value_to_alpha(const float4 value) +{ + return value.w; +} + +ccl_device float3 volume_attribute_value_to_float3(const float4 value) +{ + if (value.w > 1e-6f && value.w != 1.0f) { + /* For RGBA colors, unpremultiply after interpolation. */ + return float4_to_float3(value) / value.w; + } + else { + return float4_to_float3(value); + } +} + +ccl_device float4 volume_attribute_float4(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { + /* todo: optimize this so we don't have to transform both here and in + * kernel_tex_image_interp_3d when possible. Also could optimize for the + * common case where transform is translation/scale only. */ float3 P = sd->P; object_inverse_position_transform(kg, sd, &P); InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC : INTERPOLATION_NONE; - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp); - - if (r.w > 1e-6f && r.w != 1.0f) { - /* For RGBA colors, unpremultiply after interpolation. */ - return float4_to_float3(r) / r.w; - } - else { - return float4_to_float3(r); - } + return kernel_tex_image_interp_3d(kg, desc.offset, P, interp); } #endif diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index aee1e3a244e..7d1d1ae20c1 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -675,26 +675,50 @@ static bool get_primitive_attribute(KernelGlobals *kg, if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) { float3 fval[3]; - fval[0] = primitive_attribute_float3( - kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + if (primitive_is_volume_attribute(sd, attr.desc)) { + fval[0] = primitive_volume_attribute_float3(kg, sd, attr.desc); + } + else { + memset(fval, 0, sizeof(fval)); + fval[0] = primitive_surface_attribute_float3( + kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + } return set_attribute_float3(fval, type, derivatives, val); } else if (attr.type == TypeFloat2) { - float2 fval[3]; - fval[0] = primitive_attribute_float2( - kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); - return set_attribute_float2(fval, type, derivatives, val); + if (primitive_is_volume_attribute(sd, attr.desc)) { + assert(!"Float2 attribute not support for volumes"); + return false; + } + else { + float2 fval[3]; + fval[0] = primitive_surface_attribute_float2( + kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + return set_attribute_float2(fval, type, derivatives, val); + } } else if (attr.type == TypeDesc::TypeFloat) { float fval[3]; - fval[0] = primitive_attribute_float( - kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + if (primitive_is_volume_attribute(sd, attr.desc)) { + memset(fval, 0, sizeof(fval)); + fval[0] = primitive_volume_attribute_float(kg, sd, attr.desc); + } + else { + fval[0] = primitive_surface_attribute_float( + kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + } return set_attribute_float(fval, type, derivatives, val); } else if (attr.type == TypeRGBA) { float4 fval[3]; - fval[0] = primitive_attribute_float4( - kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + if (primitive_is_volume_attribute(sd, attr.desc)) { + memset(fval, 0, sizeof(fval)); + fval[0] = primitive_volume_attribute_float4(kg, sd, attr.desc); + } + else { + fval[0] = primitive_surface_attribute_float4( + kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + } return set_attribute_float4(fval, type, derivatives, val); } else { diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index fc7a3ba3f5a..e26a85f5b36 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -50,9 +50,27 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u uint out_offset = 0; AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); - /* fetch and store attribute */ +#ifdef __VOLUME__ + /* Volumes + * NOTE: moving this into its own node type might help improve performance. */ + if (primitive_is_volume_attribute(sd, desc)) { + const float4 value = volume_attribute_float4(kg, sd, desc); + + if (type == NODE_ATTR_FLOAT) { + const float f = volume_attribute_value_to_float(value); + stack_store_float(stack, out_offset, f); + } + else { + const float3 f = volume_attribute_value_to_float3(value); + stack_store_float3(stack, out_offset, f); + } + return; + } +#endif + + /* Surface */ if (desc.type == NODE_ATTR_FLOAT) { - float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); + float f = primitive_surface_attribute_float(kg, sd, desc, NULL, NULL); if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, f); } @@ -61,7 +79,7 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u } } else if (desc.type == NODE_ATTR_FLOAT2) { - float2 f = primitive_attribute_float2(kg, sd, desc, NULL, NULL); + float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL); if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, f.x); } @@ -70,7 +88,7 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u } } else if (desc.type == NODE_ATTR_RGBA) { - float4 f = primitive_attribute_float4(kg, sd, desc, NULL, NULL); + float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, NULL); if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, average(float4_to_float3(f))); } @@ -79,7 +97,7 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u } } else { - float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, average(f)); } @@ -95,7 +113,20 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float * uint out_offset = 0; AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); - /* fetch and store attribute */ +#ifdef __VOLUME__ + /* Volume */ + if (primitive_is_volume_attribute(sd, desc)) { + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, 0.0f); + } + else { + stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f)); + } + return; + } +#endif + + /* Surface */ if (desc.type == NODE_ATTR_FLOAT) { float dx; float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL); @@ -108,7 +139,7 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float * } else if (desc.type == NODE_ATTR_FLOAT2) { float2 dx; - float2 f = primitive_attribute_float2(kg, sd, desc, &dx, NULL); + float2 f = primitive_surface_attribute_float2(kg, sd, desc, &dx, NULL); if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, f.x + dx.x); } @@ -118,7 +149,7 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float * } else if (desc.type == NODE_ATTR_RGBA) { float4 dx; - float4 f = primitive_attribute_float4(kg, sd, desc, &dx, NULL); + float4 f = primitive_surface_attribute_float4(kg, sd, desc, &dx, NULL); if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, average(float4_to_float3(f + dx))); } @@ -144,7 +175,20 @@ ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float * uint out_offset = 0; AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); - /* fetch and store attribute */ +#ifdef __VOLUME__ + /* Volume */ + if (primitive_is_volume_attribute(sd, desc)) { + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, 0.0f); + } + else { + stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f)); + } + return; + } +#endif + + /* Surface */ if (desc.type == NODE_ATTR_FLOAT) { float dy; float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy); @@ -157,7 +201,7 @@ ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float * } else if (desc.type == NODE_ATTR_FLOAT2) { float2 dy; - float2 f = primitive_attribute_float2(kg, sd, desc, NULL, &dy); + float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, &dy); if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, f.x + dy.x); } @@ -167,7 +211,7 @@ ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float * } else if (desc.type == NODE_ATTR_RGBA) { float4 dy; - float4 f = primitive_attribute_float4(kg, sd, desc, NULL, &dy); + float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, &dy); if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, average(float4_to_float3(f + dy))); } diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h index 3c105b1cbfa..0aa45835522 100644 --- a/intern/cycles/kernel/svm/svm_vertex_color.h +++ b/intern/cycles/kernel/svm/svm_vertex_color.h @@ -25,7 +25,7 @@ ccl_device void svm_node_vertex_color(KernelGlobals *kg, { AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { - float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, NULL, NULL); + float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, NULL, NULL); stack_store_float3(stack, color_offset, float4_to_float3(vertex_color)); stack_store_float(stack, alpha_offset, vertex_color.w); } @@ -51,7 +51,7 @@ ccl_device_noinline AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { float4 dx; - float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, &dx, NULL); + float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, &dx, NULL); vertex_color += dx; stack_store_float3(stack, color_offset, float4_to_float3(vertex_color)); stack_store_float(stack, alpha_offset, vertex_color.w); @@ -78,7 +78,7 @@ ccl_device_noinline AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { float4 dy; - float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, NULL, &dy); + float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, NULL, &dy); vertex_color += dy; stack_store_float3(stack, color_offset, float4_to_float3(vertex_color)); stack_store_float(stack, alpha_offset, vertex_color.w); -- cgit v1.2.3