From e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 Mon Sep 17 00:00:00 2001 From: Campbell Barton Date: Wed, 17 Apr 2019 06:17:24 +0200 Subject: ClangFormat: apply to source, most of intern Apply clang format as proposed in T53211. For details on usage and instructions for migrating branches without conflicts, see: https://wiki.blender.org/wiki/Tools/ClangFormat --- intern/cycles/kernel/geom/geom_attribute.h | 110 +- intern/cycles/kernel/geom/geom_curve.h | 441 ++--- intern/cycles/kernel/geom/geom_curve_intersect.h | 1770 ++++++++++---------- intern/cycles/kernel/geom/geom_motion_curve.h | 306 ++-- intern/cycles/kernel/geom/geom_motion_triangle.h | 228 +-- .../kernel/geom/geom_motion_triangle_intersect.h | 418 +++-- .../kernel/geom/geom_motion_triangle_shader.h | 151 +- intern/cycles/kernel/geom/geom_object.h | 503 +++--- intern/cycles/kernel/geom/geom_patch.h | 554 +++--- intern/cycles/kernel/geom/geom_primitive.h | 484 +++--- intern/cycles/kernel/geom/geom_subd_triangle.h | 765 +++++---- intern/cycles/kernel/geom/geom_triangle.h | 353 ++-- .../cycles/kernel/geom/geom_triangle_intersect.h | 1229 +++++++------- intern/cycles/kernel/geom/geom_volume.h | 56 +- 14 files changed, 3823 insertions(+), 3545 deletions(-) (limited to 'intern/cycles/kernel/geom') diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index e991f3d685a..456608bfa22 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -30,81 +30,83 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData * ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd) { #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) { - return ATTR_PRIM_CURVE; - } - else + if (sd->type & PRIMITIVE_ALL_CURVE) { + return ATTR_PRIM_CURVE; + } + else #endif - if(subd_triangle_patch(kg, sd) != ~0) { - return ATTR_PRIM_SUBD; - } - else { - return ATTR_PRIM_TRIANGLE; - } + if (subd_triangle_patch(kg, sd) != ~0) { + return ATTR_PRIM_SUBD; + } + else { + return ATTR_PRIM_TRIANGLE; + } } ccl_device_inline AttributeDescriptor attribute_not_found() { - const AttributeDescriptor desc = {ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND}; - return desc; + const AttributeDescriptor desc = { + ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND}; + return desc; } /* Find attribute based on ID */ ccl_device_inline uint object_attribute_map_offset(KernelGlobals *kg, int object) { - return kernel_tex_fetch(__objects, object).attribute_map_offset; + return kernel_tex_fetch(__objects, object).attribute_map_offset; } -ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id) +ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, + const ShaderData *sd, + uint id) { - if(sd->object == OBJECT_NONE) { - return attribute_not_found(); - } - - /* for SVM, find attribute by unique id */ - uint attr_offset = object_attribute_map_offset(kg, sd->object); - attr_offset += attribute_primitive_type(kg, sd); - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while(attr_map.x != id) { - if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) { - return attribute_not_found(); - } - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } - - AttributeDescriptor desc; - desc.element = (AttributeElement)attr_map.y; - - if(sd->prim == PRIM_NONE && - desc.element != ATTR_ELEMENT_MESH && - desc.element != ATTR_ELEMENT_VOXEL && - desc.element != ATTR_ELEMENT_OBJECT) - { - return attribute_not_found(); - } - - /* return result */ - desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; - desc.type = (NodeAttributeType)(attr_map.w & 0xff); - desc.flags = (AttributeFlag)(attr_map.w >> 8); - - return desc; + if (sd->object == OBJECT_NONE) { + return attribute_not_found(); + } + + /* for SVM, find attribute by unique id */ + uint attr_offset = object_attribute_map_offset(kg, sd->object); + attr_offset += attribute_primitive_type(kg, sd); + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while (attr_map.x != id) { + if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) { + return attribute_not_found(); + } + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + AttributeDescriptor desc; + desc.element = (AttributeElement)attr_map.y; + + if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH && + desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) { + return attribute_not_found(); + } + + /* return result */ + desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + desc.type = (NodeAttributeType)(attr_map.w & 0xff); + desc.flags = (AttributeFlag)(attr_map.w >> 8); + + return desc; } /* Transform matrix attribute on meshes */ -ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) +ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) { - Transform tfm; + Transform tfm; - tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0); - tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1); - tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2); + tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0); + tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1); + tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2); - return tfm; + return tfm; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 9b60cf6d56b..e0aacb434eb 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -27,169 +27,199 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3) { - float fc = 0.71f; - float data[4]; - float t2 = t * t; - data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc; - data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t; - data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc; - data[3] = 3.0f * fc * t2 - 2.0f * fc * t; - return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; + float fc = 0.71f; + float data[4]; + float t2 = t * t; + data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc; + data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t; + data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc; + data[3] = 3.0f * fc * t2 - 2.0f * fc * t; + return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; } ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3) { - float data[4]; - float fc = 0.71f; - float t2 = t * t; - float t3 = t2 * t; - data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t; - data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f; - data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t; - data[3] = fc * t3 - fc * t2; - return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; + float data[4]; + float fc = 0.71f; + float t2 = t * t; + float t3 = t2 * t; + data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t; + data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f; + data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t; + data[3] = fc * t3 - fc * t2; + return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; } /* Reading attributes on various curve elements */ -ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) +ccl_device float curve_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(desc.element == ATTR_ELEMENT_CURVE) { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; -#endif - - return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); - } - else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; - - float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); - float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); - if(dy) *dy = 0.0f; -#endif - - return (1.0f - sd->u)*f0 + sd->u*f1; - } - else { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; -#endif - - return 0.0f; - } + if (desc.element == ATTR_ELEMENT_CURVE) { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; +# endif + + return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); + } + else if (desc.element == ATTR_ELEMENT_CURVE_KEY || + desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = sd->du.dx * (f1 - f0); + if (dy) + *dy = 0.0f; +# endif + + return (1.0f - sd->u) * f0 + sd->u * f1; + } + else { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; +# endif + + return 0.0f; + } } -ccl_device float2 curve_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy) +ccl_device float2 curve_attribute_float2(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float2 *dx, + float2 *dy) { - if(desc.element == ATTR_ELEMENT_CURVE) { - /* idea: we can't derive any useful differentials here, but for tiled - * mipmap image caching it would be useful to avoid reading the highest - * detail level always. maybe a derivative based on the hair density - * could be computed somehow? */ -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); -#endif - - return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); - } - else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; - - float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0); - float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); - if(dy) *dy = make_float2(0.0f, 0.0f); -#endif - - return (1.0f - sd->u)*f0 + sd->u*f1; - } - else { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); -#endif - - return make_float2(0.0f, 0.0f); - } + if (desc.element == ATTR_ELEMENT_CURVE) { + /* idea: we can't derive any useful differentials here, but for tiled + * mipmap image caching it would be useful to avoid reading the highest + * detail level always. maybe a derivative based on the hair density + * could be computed somehow? */ +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); +# endif + + return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); + } + else if (desc.element == ATTR_ELEMENT_CURVE_KEY || + desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0); + float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = sd->du.dx * (f1 - f0); + if (dy) + *dy = make_float2(0.0f, 0.0f); +# endif + + return (1.0f - sd->u) * f0 + sd->u * f1; + } + else { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); +# endif + + return make_float2(0.0f, 0.0f); + } } -ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) +ccl_device float3 curve_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, + float3 *dy) { - if(desc.element == ATTR_ELEMENT_CURVE) { - /* idea: we can't derive any useful differentials here, but for tiled - * mipmap image caching it would be useful to avoid reading the highest - * detail level always. maybe a derivative based on the hair density - * could be computed somehow? */ -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); - } - else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; - - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return (1.0f - sd->u)*f0 + sd->u*f1; - } - else { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return make_float3(0.0f, 0.0f, 0.0f); - } + if (desc.element == ATTR_ELEMENT_CURVE) { + /* idea: we can't derive any useful differentials here, but for tiled + * mipmap image caching it would be useful to avoid reading the highest + * detail level always. maybe a derivative based on the hair density + * could be computed somehow? */ +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); +# endif + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); + } + else if (desc.element == ATTR_ELEMENT_CURVE_KEY || + desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = sd->du.dx * (f1 - f0); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); +# endif + + return (1.0f - sd->u) * f0 + sd->u * f1; + } + else { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); +# endif + + return make_float3(0.0f, 0.0f, 0.0f); + } } /* Curve thickness */ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) { - float r = 0.0f; + float r = 0.0f; - if(sd->type & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; + if (sd->type & PRIMITIVE_ALL_CURVE) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; - float4 P_curve[2]; + float4 P_curve[2]; - if(sd->type & PRIMITIVE_CURVE) { - P_curve[0]= kernel_tex_fetch(__curve_keys, k0); - P_curve[1]= kernel_tex_fetch(__curve_keys, k1); - } - else { - motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); - } + if (sd->type & PRIMITIVE_CURVE) { + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + } - r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; - } + r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; + } - return r*2.0f; + return r * 2.0f; } /* Curve location for motion pass, linear interpolation between keys and @@ -197,89 +227,98 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; - float4 P_curve[2]; + float4 P_curve[2]; - P_curve[0]= kernel_tex_fetch(__curve_keys, k0); - P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); - return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); + return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); } /* Curve tangent normal */ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) { - float3 tgN = make_float3(0.0f,0.0f,0.0f); + float3 tgN = make_float3(0.0f, 0.0f, 0.0f); - if(sd->type & PRIMITIVE_ALL_CURVE) { + if (sd->type & PRIMITIVE_ALL_CURVE) { - tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu))); - tgN = normalize(tgN); + tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu, -sd->I) / len_squared(sd->dPdu))); + tgN = normalize(tgN); - /* need to find suitable scaled gd for corrected normal */ -#if 0 - tgN = normalize(tgN - gd * sd->dPdu); -#endif - } + /* need to find suitable scaled gd for corrected normal */ +# if 0 + tgN = normalize(tgN - gd * sd->dPdu); +# endif + } - return tgN; + return tgN; } /* Curve bounds utility function */ -ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3) +ccl_device_inline void curvebounds(float *lower, + float *upper, + float *extremta, + float *extrema, + float *extremtb, + float *extremb, + float p0, + float p1, + float p2, + float p3) { - float halfdiscroot = (p2 * p2 - 3 * p3 * p1); - float ta = -1.0f; - float tb = -1.0f; - - *extremta = -1.0f; - *extremtb = -1.0f; - *upper = p0; - *lower = (p0 + p1) + (p2 + p3); - *extrema = *upper; - *extremb = *lower; - - if(*lower >= *upper) { - *upper = *lower; - *lower = p0; - } - - if(halfdiscroot >= 0) { - float inv3p3 = (1.0f/3.0f)/p3; - halfdiscroot = sqrtf(halfdiscroot); - ta = (-p2 - halfdiscroot) * inv3p3; - tb = (-p2 + halfdiscroot) * inv3p3; - } - - float t2; - float t3; - - if(ta > 0.0f && ta < 1.0f) { - t2 = ta * ta; - t3 = t2 * ta; - *extremta = ta; - *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; - - *upper = fmaxf(*extrema, *upper); - *lower = fminf(*extrema, *lower); - } - - if(tb > 0.0f && tb < 1.0f) { - t2 = tb * tb; - t3 = t2 * tb; - *extremtb = tb; - *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; - - *upper = fmaxf(*extremb, *upper); - *lower = fminf(*extremb, *lower); - } + float halfdiscroot = (p2 * p2 - 3 * p3 * p1); + float ta = -1.0f; + float tb = -1.0f; + + *extremta = -1.0f; + *extremtb = -1.0f; + *upper = p0; + *lower = (p0 + p1) + (p2 + p3); + *extrema = *upper; + *extremb = *lower; + + if (*lower >= *upper) { + *upper = *lower; + *lower = p0; + } + + if (halfdiscroot >= 0) { + float inv3p3 = (1.0f / 3.0f) / p3; + halfdiscroot = sqrtf(halfdiscroot); + ta = (-p2 - halfdiscroot) * inv3p3; + tb = (-p2 + halfdiscroot) * inv3p3; + } + + float t2; + float t3; + + if (ta > 0.0f && ta < 1.0f) { + t2 = ta * ta; + t3 = t2 * ta; + *extremta = ta; + *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; + + *upper = fmaxf(*extrema, *upper); + *lower = fminf(*extrema, *lower); + } + + if (tb > 0.0f && tb < 1.0f) { + t2 = tb * tb; + t3 = t2 * tb; + *extremtb = tb; + *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; + + *upper = fmaxf(*extremb, *upper); + *lower = fminf(*extremb, *lower); + } } -#endif /* __HAIR__ */ +#endif /* __HAIR__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index 5cf8713e3a8..5fd277c2f99 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -18,484 +18,534 @@ CCL_NAMESPACE_BEGIN #ifdef __HAIR__ -#ifdef __KERNEL_SSE2__ +# ifdef __KERNEL_SSE2__ ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a) { - return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2])); + return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2])); } -#endif +# endif /* On CPU pass P and dir by reference to aligned vector. */ -ccl_device_forceinline bool cardinal_curve_intersect( - KernelGlobals *kg, - Intersection *isect, - const float3 ccl_ref P, - const float3 ccl_ref dir, - uint visibility, - int object, - int curveAddr, - float time, - int type, - uint *lcg_state, - float difl, - float extmax) +ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, + Intersection *isect, + const float3 ccl_ref P, + const float3 ccl_ref dir, + uint visibility, + int object, + int curveAddr, + float time, + int type, + uint *lcg_state, + float difl, + float extmax) { - const bool is_curve_primitive = (type & PRIMITIVE_CURVE); - - if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); - if(time < prim_time.x || time > prim_time.y) { - return false; - } - } - - int segment = PRIMITIVE_UNPACK_SEGMENT(type); - float epsilon = 0.0f; - float r_st, r_en; - - int depth = kernel_data.curve.subdivisions; - int flags = kernel_data.curve.curveflags; - int prim = kernel_tex_fetch(__prim_index, curveAddr); - -#ifdef __KERNEL_SSE2__ - ssef vdir = load4f(dir); - ssef vcurve_coef[4]; - const float3 *curve_coef = (float3 *)vcurve_coef; - - { - ssef dtmp = vdir * vdir; - ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp)); - ssef rd_ss = load1f_first(1.0f) / d_ss; - - ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]); - int2 &v00 = (int2 &)v00vec; - - int k0 = v00.x + segment; - int k1 = k0 + 1; - int ka = max(k0 - 1, v00.x); - int kb = min(k1 + 1, v00.x + v00.y - 1); - -#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && (!defined(_MSC_VER) || _MSC_VER > 1800) - avxf P_curve_0_1, P_curve_2_3; - if(is_curve_primitive) { - P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x); - P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x); - } - else { - int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; - motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3); - } -#else /* __KERNEL_AVX2__ */ - ssef P_curve[4]; - - if(is_curve_primitive) { - P_curve[0] = load4f(&kg->__curve_keys.data[ka].x); - P_curve[1] = load4f(&kg->__curve_keys.data[k0].x); - P_curve[2] = load4f(&kg->__curve_keys.data[k1].x); - P_curve[3] = load4f(&kg->__curve_keys.data[kb].x); - } - else { - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; - motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve); - } -#endif /* __KERNEL_AVX2__ */ - - ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss)); - ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn; - ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy; - ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz); - ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)); - - ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0); - ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0); - ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); - -#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && (!defined(_MSC_VER) || _MSC_VER > 1800) - const avxf vPP = _mm256_broadcast_ps(&P.m128); - const avxf htfm00 = avxf(htfm0.m128, htfm0.m128); - const avxf htfm11 = avxf(htfm1.m128, htfm1.m128); - const avxf htfm22 = avxf(htfm2.m128, htfm2.m128); - - const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP), - htfm00, - madd(shuffle<1>(P_curve_0_1 - vPP), - htfm11, - shuffle<2>(P_curve_0_1 - vPP) * htfm22)); - const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP), - htfm00, - madd(shuffle<1>(P_curve_2_3 - vPP), - htfm11, - shuffle<2>(P_curve_2_3 - vPP)*htfm22)); - - const ssef p0 = _mm256_castps256_ps128(p01); - const ssef p1 = _mm256_extractf128_ps(p01, 1); - const ssef p2 = _mm256_castps256_ps128(p23); - const ssef p3 = _mm256_extractf128_ps(p23, 1); - - const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1); - r_st = ((float4 &)P_curve_1).w; - const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3); - r_en = ((float4 &)P_curve_2).w; -#else /* __KERNEL_AVX2__ */ - ssef htfm[] = { htfm0, htfm1, htfm2 }; - ssef vP = load4f(P); - ssef p0 = transform_point_T3(htfm, P_curve[0] - vP); - ssef p1 = transform_point_T3(htfm, P_curve[1] - vP); - ssef p2 = transform_point_T3(htfm, P_curve[2] - vP); - ssef p3 = transform_point_T3(htfm, P_curve[3] - vP); - - r_st = ((float4 &)P_curve[1]).w; - r_en = ((float4 &)P_curve[2]).w; -#endif /* __KERNEL_AVX2__ */ - - float fc = 0.71f; - ssef vfc = ssef(fc); - ssef vfcxp3 = vfc * p3; - - vcurve_coef[0] = p1; - vcurve_coef[1] = vfc * (p2 - p0); - vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3))); - vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3)); - - } -#else - float3 curve_coef[4]; - - /* curve Intersection check */ - /* obtain curve parameters */ - { - /* ray transform created - this should be created at beginning of intersection loop */ - Transform htfm; - float d = sqrtf(dir.x * dir.x + dir.z * dir.z); - htfm = make_transform( - dir.z / d, 0, -dir.x /d, 0, - -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0, - dir.x, dir.y, dir.z, 0); - - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; - int k1 = k0 + 1; - - int ka = max(k0 - 1,__float_as_int(v00.x)); - int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); - - float4 P_curve[4]; - - if(is_curve_primitive) { - P_curve[0] = kernel_tex_fetch(__curve_keys, ka); - P_curve[1] = kernel_tex_fetch(__curve_keys, k0); - P_curve[2] = kernel_tex_fetch(__curve_keys, k1); - P_curve[3] = kernel_tex_fetch(__curve_keys, kb); - } - else { - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; - motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve); - } - - float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P); - float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P); - float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P); - float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P); - - float fc = 0.71f; - curve_coef[0] = p1; - curve_coef[1] = -fc*p0 + fc*p2; - curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3; - curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3; - r_st = P_curve[1].w; - r_en = P_curve[2].w; - } -#endif - - float r_curr = max(r_st, r_en); - - if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING)) - epsilon = 2 * r_curr; - - /* find bounds - this is slow for cubic curves */ - float upper, lower; - - float zextrem[4]; - curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z); - if(lower - r_curr > isect->t || upper + r_curr < epsilon) - return false; - - /* minimum width extension */ - float mw_extension = min(difl * fabsf(upper), extmax); - float r_ext = mw_extension + r_curr; - - float xextrem[4]; - curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); - if(lower > r_ext || upper < -r_ext) - return false; - - float yextrem[4]; - curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); - if(lower > r_ext || upper < -r_ext) - return false; - - /* setup recurrent loop */ - int level = 1 << depth; - int tree = 0; - float resol = 1.0f / (float)level; - bool hit = false; - - /* begin loop */ - while(!(tree >> (depth))) { - const float i_st = tree * resol; - const float i_en = i_st + (level * resol); - -#ifdef __KERNEL_SSE2__ - ssef vi_st = ssef(i_st), vi_en = ssef(i_en); - ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]); - ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]); - - ssef vbmin = min(vp_st, vp_en); - ssef vbmax = max(vp_st, vp_en); - - float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax; - float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z; - float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z; - float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en; -#else - float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0]; - float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0]; - - float bminx = min(p_st.x, p_en.x); - float bmaxx = max(p_st.x, p_en.x); - float bminy = min(p_st.y, p_en.y); - float bmaxy = max(p_st.y, p_en.y); - float bminz = min(p_st.z, p_en.z); - float bmaxz = max(p_st.z, p_en.z); -#endif - - if(xextrem[0] >= i_st && xextrem[0] <= i_en) { - bminx = min(bminx,xextrem[1]); - bmaxx = max(bmaxx,xextrem[1]); - } - if(xextrem[2] >= i_st && xextrem[2] <= i_en) { - bminx = min(bminx,xextrem[3]); - bmaxx = max(bmaxx,xextrem[3]); - } - if(yextrem[0] >= i_st && yextrem[0] <= i_en) { - bminy = min(bminy,yextrem[1]); - bmaxy = max(bmaxy,yextrem[1]); - } - if(yextrem[2] >= i_st && yextrem[2] <= i_en) { - bminy = min(bminy,yextrem[3]); - bmaxy = max(bmaxy,yextrem[3]); - } - if(zextrem[0] >= i_st && zextrem[0] <= i_en) { - bminz = min(bminz,zextrem[1]); - bmaxz = max(bmaxz,zextrem[1]); - } - if(zextrem[2] >= i_st && zextrem[2] <= i_en) { - bminz = min(bminz,zextrem[3]); - bmaxz = max(bmaxz,zextrem[3]); - } - - float r1 = r_st + (r_en - r_st) * i_st; - float r2 = r_st + (r_en - r_st) * i_en; - r_curr = max(r1, r2); - - mw_extension = min(difl * fabsf(bmaxz), extmax); - float r_ext = mw_extension + r_curr; - float coverage = 1.0f; - - if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { - /* the bounding box does not overlap the square centered at O */ - tree += level; - level = tree & -tree; - } - else if(level == 1) { - - /* the maximum recursion depth is reached. - * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. - * dP* is reversed if necessary.*/ - float t = isect->t; - float u = 0.0f; - float gd = 0.0f; - - if(flags & CURVE_KN_RIBBONS) { - float3 tg = (p_en - p_st); -#ifdef __KERNEL_SSE__ - const float3 tg_sq = tg * tg; - float w = tg_sq.x + tg_sq.y; -#else - float w = tg.x * tg.x + tg.y * tg.y; -#endif - if(w == 0) { - tree++; - level = tree & -tree; - continue; - } -#ifdef __KERNEL_SSE__ - const float3 p_sttg = p_st * tg; - w = -(p_sttg.x + p_sttg.y) / w; -#else - w = -(p_st.x * tg.x + p_st.y * tg.y) / w; -#endif - w = saturate(w); - - /* compute u on the curve segment */ - u = i_st * (1 - w) + i_en * w; - r_curr = r_st + (r_en - r_st) * u; - /* compare x-y distances */ - float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0]; - - float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if(dot(tg, dp_st)< 0) - dp_st *= -1; - if(dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { - tree++; - level = tree & -tree; - continue; - } - float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if(dot(tg, dp_en) < 0) - dp_en *= -1; - if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { - tree++; - level = tree & -tree; - continue; - } - - /* compute coverage */ - float r_ext = r_curr; - coverage = 1.0f; - if(difl != 0.0f) { - mw_extension = min(difl * fabsf(bmaxz), extmax); - r_ext = mw_extension + r_curr; -#ifdef __KERNEL_SSE__ - const float3 p_curr_sq = p_curr * p_curr; - const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128))); - float d = dxxx.x; -#else - float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); -#endif - float d0 = d - r_curr; - float d1 = d + r_curr; - float inv_mw_extension = 1.0f/mw_extension; - if(d0 >= 0) - coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f; - else // inside - coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f; - } - - if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { - tree++; - level = tree & -tree; - continue; - } - - t = p_curr.z; - - /* stochastic fade from minimum width */ - if(difl != 0.0f && lcg_state) { - if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) - return hit; - } - } - else { - float l = len(p_en - p_st); - /* minimum width extension */ - float or1 = r1; - float or2 = r2; - - if(difl != 0.0f) { - mw_extension = min(len(p_st - P) * difl, extmax); - or1 = r1 < mw_extension ? mw_extension : r1; - mw_extension = min(len(p_en - P) * difl, extmax); - or2 = r2 < mw_extension ? mw_extension : r2; - } - /* --- */ - float invl = 1.0f/l; - float3 tg = (p_en - p_st) * invl; - gd = (or2 - or1) * invl; - float difz = -dot(p_st,tg); - float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); - float invcyla = 1.0f/cyla; - float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1))); - float tcentre = -halfb*invcyla; - float zcentre = difz + (tg.z * tcentre); - float3 tdif = - p_st; - tdif.z += tcentre; - float tdifz = dot(tdif,tg); - float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); - float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; - float td = tb*tb - 4*cyla*tc; - if(td < 0.0f) { - tree++; - level = tree & -tree; - continue; - } - - float rootd = sqrtf(td); - float correction = (-tb - rootd) * 0.5f * invcyla; - t = tcentre + correction; - - float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if(dot(tg, dp_st)< 0) - dp_st *= -1; - float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if(dot(tg, dp_en) < 0) - dp_en *= -1; - - if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { - correction = (-tb + rootd) * 0.5f * invcyla; - t = tcentre + correction; - } - - if(dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { - tree++; - level = tree & -tree; - continue; - } - - float w = (zcentre + (tg.z * correction)) * invl; - w = saturate(w); - /* compute u on the curve segment */ - u = i_st * (1 - w) + i_en * w; - - /* stochastic fade from minimum width */ - if(difl != 0.0f && lcg_state) { - r_curr = r1 + (r2 - r1) * w; - r_ext = or1 + (or2 - or1) * w; - coverage = r_curr/r_ext; - - if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) - return hit; - } - } - /* we found a new intersection */ - -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) -#endif - { - /* record intersection */ - isect->t = t; - isect->u = u; - isect->v = gd; - isect->prim = curveAddr; - isect->object = object; - isect->type = type; - hit = true; - } - - tree++; - level = tree & -tree; - } - else { - /* split the curve into two curves and process */ - level = level >> 1; - } - } - - return hit; + const bool is_curve_primitive = (type & PRIMITIVE_CURVE); + + if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); + if (time < prim_time.x || time > prim_time.y) { + return false; + } + } + + int segment = PRIMITIVE_UNPACK_SEGMENT(type); + float epsilon = 0.0f; + float r_st, r_en; + + int depth = kernel_data.curve.subdivisions; + int flags = kernel_data.curve.curveflags; + int prim = kernel_tex_fetch(__prim_index, curveAddr); + +# ifdef __KERNEL_SSE2__ + ssef vdir = load4f(dir); + ssef vcurve_coef[4]; + const float3 *curve_coef = (float3 *)vcurve_coef; + + { + ssef dtmp = vdir * vdir; + ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp)); + ssef rd_ss = load1f_first(1.0f) / d_ss; + + ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]); + int2 &v00 = (int2 &)v00vec; + + int k0 = v00.x + segment; + int k1 = k0 + 1; + int ka = max(k0 - 1, v00.x); + int kb = min(k1 + 1, v00.x + v00.y - 1); + +# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \ + (!defined(_MSC_VER) || _MSC_VER > 1800) + avxf P_curve_0_1, P_curve_2_3; + if (is_curve_primitive) { + P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x); + P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_cardinal_curve_keys_avx( + kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1, &P_curve_2_3); + } +# else /* __KERNEL_AVX2__ */ + ssef P_curve[4]; + + if (is_curve_primitive) { + P_curve[0] = load4f(&kg->__curve_keys.data[ka].x); + P_curve[1] = load4f(&kg->__curve_keys.data[k0].x); + P_curve[2] = load4f(&kg->__curve_keys.data[k1].x); + P_curve[3] = load4f(&kg->__curve_keys.data[kb].x); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4 *)&P_curve); + } +# endif /* __KERNEL_AVX2__ */ + + ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss)); + ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn; + ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy; + ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz); + ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)); + + ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0); + ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0); + ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); + +# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \ + (!defined(_MSC_VER) || _MSC_VER > 1800) + const avxf vPP = _mm256_broadcast_ps(&P.m128); + const avxf htfm00 = avxf(htfm0.m128, htfm0.m128); + const avxf htfm11 = avxf(htfm1.m128, htfm1.m128); + const avxf htfm22 = avxf(htfm2.m128, htfm2.m128); + + const avxf p01 = madd( + shuffle<0>(P_curve_0_1 - vPP), + htfm00, + madd(shuffle<1>(P_curve_0_1 - vPP), htfm11, shuffle<2>(P_curve_0_1 - vPP) * htfm22)); + const avxf p23 = madd( + shuffle<0>(P_curve_2_3 - vPP), + htfm00, + madd(shuffle<1>(P_curve_2_3 - vPP), htfm11, shuffle<2>(P_curve_2_3 - vPP) * htfm22)); + + const ssef p0 = _mm256_castps256_ps128(p01); + const ssef p1 = _mm256_extractf128_ps(p01, 1); + const ssef p2 = _mm256_castps256_ps128(p23); + const ssef p3 = _mm256_extractf128_ps(p23, 1); + + const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1); + r_st = ((float4 &)P_curve_1).w; + const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3); + r_en = ((float4 &)P_curve_2).w; +# else /* __KERNEL_AVX2__ */ + ssef htfm[] = {htfm0, htfm1, htfm2}; + ssef vP = load4f(P); + ssef p0 = transform_point_T3(htfm, P_curve[0] - vP); + ssef p1 = transform_point_T3(htfm, P_curve[1] - vP); + ssef p2 = transform_point_T3(htfm, P_curve[2] - vP); + ssef p3 = transform_point_T3(htfm, P_curve[3] - vP); + + r_st = ((float4 &)P_curve[1]).w; + r_en = ((float4 &)P_curve[2]).w; +# endif /* __KERNEL_AVX2__ */ + + float fc = 0.71f; + ssef vfc = ssef(fc); + ssef vfcxp3 = vfc * p3; + + vcurve_coef[0] = p1; + vcurve_coef[1] = vfc * (p2 - p0); + vcurve_coef[2] = madd( + ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3))); + vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3)); + } +# else + float3 curve_coef[4]; + + /* curve Intersection check */ + /* obtain curve parameters */ + { + /* ray transform created - this should be created at beginning of intersection loop */ + Transform htfm; + float d = sqrtf(dir.x * dir.x + dir.z * dir.z); + htfm = make_transform(dir.z / d, + 0, + -dir.x / d, + 0, + -dir.x * dir.y / d, + d, + -dir.y * dir.z / d, + 0, + dir.x, + dir.y, + dir.z, + 0); + + float4 v00 = kernel_tex_fetch(__curves, prim); + + int k0 = __float_as_int(v00.x) + segment; + int k1 = k0 + 1; + + int ka = max(k0 - 1, __float_as_int(v00.x)); + int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + + float4 P_curve[4]; + + if (is_curve_primitive) { + P_curve[0] = kernel_tex_fetch(__curve_keys, ka); + P_curve[1] = kernel_tex_fetch(__curve_keys, k0); + P_curve[2] = kernel_tex_fetch(__curve_keys, k1); + P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve); + } + + float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P); + float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P); + float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P); + float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P); + + float fc = 0.71f; + curve_coef[0] = p1; + curve_coef[1] = -fc * p0 + fc * p2; + curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3; + curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3; + r_st = P_curve[1].w; + r_en = P_curve[2].w; + } +# endif + + float r_curr = max(r_st, r_en); + + if ((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING)) + epsilon = 2 * r_curr; + + /* find bounds - this is slow for cubic curves */ + float upper, lower; + + float zextrem[4]; + curvebounds(&lower, + &upper, + &zextrem[0], + &zextrem[1], + &zextrem[2], + &zextrem[3], + curve_coef[0].z, + curve_coef[1].z, + curve_coef[2].z, + curve_coef[3].z); + if (lower - r_curr > isect->t || upper + r_curr < epsilon) + return false; + + /* minimum width extension */ + float mw_extension = min(difl * fabsf(upper), extmax); + float r_ext = mw_extension + r_curr; + + float xextrem[4]; + curvebounds(&lower, + &upper, + &xextrem[0], + &xextrem[1], + &xextrem[2], + &xextrem[3], + curve_coef[0].x, + curve_coef[1].x, + curve_coef[2].x, + curve_coef[3].x); + if (lower > r_ext || upper < -r_ext) + return false; + + float yextrem[4]; + curvebounds(&lower, + &upper, + &yextrem[0], + &yextrem[1], + &yextrem[2], + &yextrem[3], + curve_coef[0].y, + curve_coef[1].y, + curve_coef[2].y, + curve_coef[3].y); + if (lower > r_ext || upper < -r_ext) + return false; + + /* setup recurrent loop */ + int level = 1 << depth; + int tree = 0; + float resol = 1.0f / (float)level; + bool hit = false; + + /* begin loop */ + while (!(tree >> (depth))) { + const float i_st = tree * resol; + const float i_en = i_st + (level * resol); + +# ifdef __KERNEL_SSE2__ + ssef vi_st = ssef(i_st), vi_en = ssef(i_en); + ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), + vi_st, + vcurve_coef[0]); + ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), + vi_en, + vcurve_coef[0]); + + ssef vbmin = min(vp_st, vp_en); + ssef vbmax = max(vp_st, vp_en); + + float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax; + float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z; + float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z; + float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en; +# else + float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + + curve_coef[0]; + float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + + curve_coef[0]; + + float bminx = min(p_st.x, p_en.x); + float bmaxx = max(p_st.x, p_en.x); + float bminy = min(p_st.y, p_en.y); + float bmaxy = max(p_st.y, p_en.y); + float bminz = min(p_st.z, p_en.z); + float bmaxz = max(p_st.z, p_en.z); +# endif + + if (xextrem[0] >= i_st && xextrem[0] <= i_en) { + bminx = min(bminx, xextrem[1]); + bmaxx = max(bmaxx, xextrem[1]); + } + if (xextrem[2] >= i_st && xextrem[2] <= i_en) { + bminx = min(bminx, xextrem[3]); + bmaxx = max(bmaxx, xextrem[3]); + } + if (yextrem[0] >= i_st && yextrem[0] <= i_en) { + bminy = min(bminy, yextrem[1]); + bmaxy = max(bmaxy, yextrem[1]); + } + if (yextrem[2] >= i_st && yextrem[2] <= i_en) { + bminy = min(bminy, yextrem[3]); + bmaxy = max(bmaxy, yextrem[3]); + } + if (zextrem[0] >= i_st && zextrem[0] <= i_en) { + bminz = min(bminz, zextrem[1]); + bmaxz = max(bmaxz, zextrem[1]); + } + if (zextrem[2] >= i_st && zextrem[2] <= i_en) { + bminz = min(bminz, zextrem[3]); + bmaxz = max(bmaxz, zextrem[3]); + } + + float r1 = r_st + (r_en - r_st) * i_st; + float r2 = r_st + (r_en - r_st) * i_en; + r_curr = max(r1, r2); + + mw_extension = min(difl * fabsf(bmaxz), extmax); + float r_ext = mw_extension + r_curr; + float coverage = 1.0f; + + if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext || bmaxx < -r_ext || + bminy > r_ext || bmaxy < -r_ext) { + /* the bounding box does not overlap the square centered at O */ + tree += level; + level = tree & -tree; + } + else if (level == 1) { + + /* the maximum recursion depth is reached. + * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. + * dP* is reversed if necessary.*/ + float t = isect->t; + float u = 0.0f; + float gd = 0.0f; + + if (flags & CURVE_KN_RIBBONS) { + float3 tg = (p_en - p_st); +# ifdef __KERNEL_SSE__ + const float3 tg_sq = tg * tg; + float w = tg_sq.x + tg_sq.y; +# else + float w = tg.x * tg.x + tg.y * tg.y; +# endif + if (w == 0) { + tree++; + level = tree & -tree; + continue; + } +# ifdef __KERNEL_SSE__ + const float3 p_sttg = p_st * tg; + w = -(p_sttg.x + p_sttg.y) / w; +# else + w = -(p_st.x * tg.x + p_st.y * tg.y) / w; +# endif + w = saturate(w); + + /* compute u on the curve segment */ + u = i_st * (1 - w) + i_en * w; + r_curr = r_st + (r_en - r_st) * u; + /* compare x-y distances */ + float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + + curve_coef[0]; + + float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; + if (dot(tg, dp_st) < 0) + dp_st *= -1; + if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { + tree++; + level = tree & -tree; + continue; + } + float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; + if (dot(tg, dp_en) < 0) + dp_en *= -1; + if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { + tree++; + level = tree & -tree; + continue; + } + + /* compute coverage */ + float r_ext = r_curr; + coverage = 1.0f; + if (difl != 0.0f) { + mw_extension = min(difl * fabsf(bmaxz), extmax); + r_ext = mw_extension + r_curr; +# ifdef __KERNEL_SSE__ + const float3 p_curr_sq = p_curr * p_curr; + const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128))); + float d = dxxx.x; +# else + float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); +# endif + float d0 = d - r_curr; + float d1 = d + r_curr; + float inv_mw_extension = 1.0f / mw_extension; + if (d0 >= 0) + coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * + 0.5f; + else // inside + coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * + 0.5f; + } + + if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || + isect->t < p_curr.z) { + tree++; + level = tree & -tree; + continue; + } + + t = p_curr.z; + + /* stochastic fade from minimum width */ + if (difl != 0.0f && lcg_state) { + if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) + return hit; + } + } + else { + float l = len(p_en - p_st); + /* minimum width extension */ + float or1 = r1; + float or2 = r2; + + if (difl != 0.0f) { + mw_extension = min(len(p_st - P) * difl, extmax); + or1 = r1 < mw_extension ? mw_extension : r1; + mw_extension = min(len(p_en - P) * difl, extmax); + or2 = r2 < mw_extension ? mw_extension : r2; + } + /* --- */ + float invl = 1.0f / l; + float3 tg = (p_en - p_st) * invl; + gd = (or2 - or1) * invl; + float difz = -dot(p_st, tg); + float cyla = 1.0f - (tg.z * tg.z * (1 + gd * gd)); + float invcyla = 1.0f / cyla; + float halfb = (-p_st.z - tg.z * (difz + gd * (difz * gd + or1))); + float tcentre = -halfb * invcyla; + float zcentre = difz + (tg.z * tcentre); + float3 tdif = -p_st; + tdif.z += tcentre; + float tdifz = dot(tdif, tg); + float tb = 2 * (tdif.z - tg.z * (tdifz + gd * (tdifz * gd + or1))); + float tc = dot(tdif, tdif) - tdifz * tdifz * (1 + gd * gd) - or1 * or1 - + 2 * or1 * tdifz * gd; + float td = tb * tb - 4 * cyla * tc; + if (td < 0.0f) { + tree++; + level = tree & -tree; + continue; + } + + float rootd = sqrtf(td); + float correction = (-tb - rootd) * 0.5f * invcyla; + t = tcentre + correction; + + float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; + if (dot(tg, dp_st) < 0) + dp_st *= -1; + float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; + if (dot(tg, dp_en) < 0) + dp_en *= -1; + + if (flags & CURVE_KN_BACKFACING && + (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || + isect->t < t || t <= 0.0f)) { + correction = (-tb + rootd) * 0.5f * invcyla; + t = tcentre + correction; + } + + if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || + isect->t < t || t <= 0.0f) { + tree++; + level = tree & -tree; + continue; + } + + float w = (zcentre + (tg.z * correction)) * invl; + w = saturate(w); + /* compute u on the curve segment */ + u = i_st * (1 - w) + i_en * w; + + /* stochastic fade from minimum width */ + if (difl != 0.0f && lcg_state) { + r_curr = r1 + (r2 - r1) * w; + r_ext = or1 + (or2 - or1) * w; + coverage = r_curr / r_ext; + + if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) + return hit; + } + } + /* we found a new intersection */ + +# ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) +# endif + { + /* record intersection */ + isect->t = t; + isect->u = u; + isect->v = gd; + isect->prim = curveAddr; + isect->object = object; + isect->type = type; + hit = true; + } + + tree++; + level = tree & -tree; + } + else { + /* split the curve into two curves and process */ + level = level >> 1; + } + } + + return hit; } ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, @@ -511,245 +561,247 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, float difl, float extmax) { - /* define few macros to minimize code duplication for SSE */ -#ifndef __KERNEL_SSE2__ -# define len3_squared(x) len_squared(x) -# define len3(x) len(x) -# define dot3(x, y) dot(x, y) -#endif - - const bool is_curve_primitive = (type & PRIMITIVE_CURVE); - - if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); - if(time < prim_time.x || time > prim_time.y) { - return false; - } - } - - int segment = PRIMITIVE_UNPACK_SEGMENT(type); - /* curve Intersection check */ - int flags = kernel_data.curve.curveflags; - - int prim = kernel_tex_fetch(__prim_index, curveAddr); - float4 v00 = kernel_tex_fetch(__curves, prim); - - int cnum = __float_as_int(v00.x); - int k0 = cnum + segment; - int k1 = k0 + 1; - -#ifndef __KERNEL_SSE2__ - float4 P_curve[2]; - - if(is_curve_primitive) { - P_curve[0] = kernel_tex_fetch(__curve_keys, k0); - P_curve[1] = kernel_tex_fetch(__curve_keys, k1); - } - else { - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; - motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve); - } - - float or1 = P_curve[0].w; - float or2 = P_curve[1].w; - float3 p1 = float4_to_float3(P_curve[0]); - float3 p2 = float4_to_float3(P_curve[1]); - - /* minimum width extension */ - float r1 = or1; - float r2 = or2; - float3 dif = P - p1; - float3 dif_second = P - p2; - if(difl != 0.0f) { - float pixelsize = min(len3(dif) * difl, extmax); - r1 = or1 < pixelsize ? pixelsize : or1; - pixelsize = min(len3(dif_second) * difl, extmax); - r2 = or2 < pixelsize ? pixelsize : or2; - } - /* --- */ - - float3 p21_diff = p2 - p1; - float3 sphere_dif1 = (dif + dif_second) * 0.5f; - float3 dir = direction; - float sphere_b_tmp = dot3(dir, sphere_dif1); - float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; -#else - ssef P_curve[2]; - - if(is_curve_primitive) { - P_curve[0] = load4f(&kg->__curve_keys.data[k0].x); - P_curve[1] = load4f(&kg->__curve_keys.data[k1].x); - } - else { - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; - motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve); - } - - const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); - - ssef r12 = or12; - const ssef vP = load4f(P); - const ssef dif = vP - P_curve[0]; - const ssef dif_second = vP - P_curve[1]; - if(difl != 0.0f) { - const ssef len1_sq = len3_squared_splat(dif); - const ssef len2_sq = len3_squared_splat(dif_second); - const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); - const ssef pixelsize12 = min(len12 * difl, ssef(extmax)); - r12 = max(or12, pixelsize12); - } - float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12)); - float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12)); - - const ssef p21_diff = P_curve[1] - P_curve[0]; - const ssef sphere_dif1 = (dif + dif_second) * 0.5f; - const ssef dir = load4f(direction); - const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1); - const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1); -#endif - - float mr = max(r1, r2); - float l = len3(p21_diff); - float invl = 1.0f / l; - float sp_r = mr + 0.5f * l; - - float sphere_b = dot3(dir, sphere_dif2); - float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r; - - if(sdisc < 0.0f) - return false; - - /* obtain parameters and test midpoint distance for suitable modes */ -#ifndef __KERNEL_SSE2__ - float3 tg = p21_diff * invl; -#else - const ssef tg = p21_diff * invl; -#endif - float gd = (r2 - r1) * invl; - - float dirz = dot3(dir, tg); - float difz = dot3(dif, tg); - - float a = 1.0f - (dirz*dirz*(1 + gd*gd)); - - float halfb = dot3(dir, dif) - dirz*(difz + gd*(difz*gd + r1)); - - float tcentre = -halfb/a; - float zcentre = difz + (dirz * tcentre); - - if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE)) - return false; - if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION)) - return false; - - /* test minimum separation */ -#ifndef __KERNEL_SSE2__ - float3 cprod = cross(tg, dir); - float cprod2sq = len3_squared(cross(tg, dif)); -#else - const ssef cprod = cross(tg, dir); - float cprod2sq = len3_squared(cross_zxy(tg, dif)); -#endif - float cprodsq = len3_squared(cprod); - float distscaled = dot3(cprod, dif); - - if(cprodsq == 0) - distscaled = cprod2sq; - else - distscaled = (distscaled*distscaled)/cprodsq; - - if(distscaled > mr*mr) - return false; - - /* calculate true intersection */ -#ifndef __KERNEL_SSE2__ - float3 tdif = dif + tcentre * dir; -#else - const ssef tdif = madd(ssef(tcentre), dir, dif); -#endif - float tdifz = dot3(tdif, tg); - float tdifma = tdifz*gd + r1; - float tb = 2*(dot3(dir, tdif) - dirz*(tdifz + gd*tdifma)); - float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma; - float td = tb*tb - 4*a*tc; - - if(td < 0.0f) - return false; - - float rootd = 0.0f; - float correction = 0.0f; - if(flags & CURVE_KN_ACCURATE) { - rootd = sqrtf(td); - correction = ((-tb - rootd)/(2*a)); - } - - float t = tcentre + correction; - - if(t < isect->t) { - - if(flags & CURVE_KN_INTERSECTCORRECTION) { - rootd = sqrtf(td); - correction = ((-tb - rootd)/(2*a)); - t = tcentre + correction; - } - - float z = zcentre + (dirz * correction); - // bool backface = false; - - if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { - // backface = true; - correction = ((-tb + rootd)/(2*a)); - t = tcentre + correction; - z = zcentre + (dirz * correction); - } - - /* stochastic fade from minimum width */ - float adjradius = or1 + z * (or2 - or1) * invl; - adjradius = adjradius / (r1 + z * gd); - if(lcg_state && adjradius != 1.0f) { - if(lcg_step_float(lcg_state) > adjradius) - return false; - } - /* --- */ - - if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { - - if(flags & CURVE_KN_ENCLOSEFILTER) { - float enc_ratio = 1.01f; - if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { - float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); - float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio; - if(a2*c2 < 0.0f) - return false; - } - } - -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) -#endif - { - /* record intersection */ - isect->t = t; - isect->u = z*invl; - isect->v = gd; - isect->prim = curveAddr; - isect->object = object; - isect->type = type; - - return true; - } - } - } - - return false; - -#ifndef __KERNEL_SSE2__ -# undef len3_squared -# undef len3 -# undef dot3 -#endif + /* define few macros to minimize code duplication for SSE */ +# ifndef __KERNEL_SSE2__ +# define len3_squared(x) len_squared(x) +# define len3(x) len(x) +# define dot3(x, y) dot(x, y) +# endif + + const bool is_curve_primitive = (type & PRIMITIVE_CURVE); + + if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); + if (time < prim_time.x || time > prim_time.y) { + return false; + } + } + + int segment = PRIMITIVE_UNPACK_SEGMENT(type); + /* curve Intersection check */ + int flags = kernel_data.curve.curveflags; + + int prim = kernel_tex_fetch(__prim_index, curveAddr); + float4 v00 = kernel_tex_fetch(__curves, prim); + + int cnum = __float_as_int(v00.x); + int k0 = cnum + segment; + int k1 = k0 + 1; + +# ifndef __KERNEL_SSE2__ + float4 P_curve[2]; + + if (is_curve_primitive) { + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve); + } + + float or1 = P_curve[0].w; + float or2 = P_curve[1].w; + float3 p1 = float4_to_float3(P_curve[0]); + float3 p2 = float4_to_float3(P_curve[1]); + + /* minimum width extension */ + float r1 = or1; + float r2 = or2; + float3 dif = P - p1; + float3 dif_second = P - p2; + if (difl != 0.0f) { + float pixelsize = min(len3(dif) * difl, extmax); + r1 = or1 < pixelsize ? pixelsize : or1; + pixelsize = min(len3(dif_second) * difl, extmax); + r2 = or2 < pixelsize ? pixelsize : or2; + } + /* --- */ + + float3 p21_diff = p2 - p1; + float3 sphere_dif1 = (dif + dif_second) * 0.5f; + float3 dir = direction; + float sphere_b_tmp = dot3(dir, sphere_dif1); + float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; +# else + ssef P_curve[2]; + + if (is_curve_primitive) { + P_curve[0] = load4f(&kg->__curve_keys.data[k0].x); + P_curve[1] = load4f(&kg->__curve_keys.data[k1].x); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4 *)&P_curve); + } + + const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); + + ssef r12 = or12; + const ssef vP = load4f(P); + const ssef dif = vP - P_curve[0]; + const ssef dif_second = vP - P_curve[1]; + if (difl != 0.0f) { + const ssef len1_sq = len3_squared_splat(dif); + const ssef len2_sq = len3_squared_splat(dif_second); + const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); + const ssef pixelsize12 = min(len12 * difl, ssef(extmax)); + r12 = max(or12, pixelsize12); + } + float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12)); + float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12)); + + const ssef p21_diff = P_curve[1] - P_curve[0]; + const ssef sphere_dif1 = (dif + dif_second) * 0.5f; + const ssef dir = load4f(direction); + const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1); + const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1); +# endif + + float mr = max(r1, r2); + float l = len3(p21_diff); + float invl = 1.0f / l; + float sp_r = mr + 0.5f * l; + + float sphere_b = dot3(dir, sphere_dif2); + float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r; + + if (sdisc < 0.0f) + return false; + + /* obtain parameters and test midpoint distance for suitable modes */ +# ifndef __KERNEL_SSE2__ + float3 tg = p21_diff * invl; +# else + const ssef tg = p21_diff * invl; +# endif + float gd = (r2 - r1) * invl; + + float dirz = dot3(dir, tg); + float difz = dot3(dif, tg); + + float a = 1.0f - (dirz * dirz * (1 + gd * gd)); + + float halfb = dot3(dir, dif) - dirz * (difz + gd * (difz * gd + r1)); + + float tcentre = -halfb / a; + float zcentre = difz + (dirz * tcentre); + + if ((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE)) + return false; + if ((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && + !(flags & CURVE_KN_INTERSECTCORRECTION)) + return false; + + /* test minimum separation */ +# ifndef __KERNEL_SSE2__ + float3 cprod = cross(tg, dir); + float cprod2sq = len3_squared(cross(tg, dif)); +# else + const ssef cprod = cross(tg, dir); + float cprod2sq = len3_squared(cross_zxy(tg, dif)); +# endif + float cprodsq = len3_squared(cprod); + float distscaled = dot3(cprod, dif); + + if (cprodsq == 0) + distscaled = cprod2sq; + else + distscaled = (distscaled * distscaled) / cprodsq; + + if (distscaled > mr * mr) + return false; + + /* calculate true intersection */ +# ifndef __KERNEL_SSE2__ + float3 tdif = dif + tcentre * dir; +# else + const ssef tdif = madd(ssef(tcentre), dir, dif); +# endif + float tdifz = dot3(tdif, tg); + float tdifma = tdifz * gd + r1; + float tb = 2 * (dot3(dir, tdif) - dirz * (tdifz + gd * tdifma)); + float tc = dot3(tdif, tdif) - tdifz * tdifz - tdifma * tdifma; + float td = tb * tb - 4 * a * tc; + + if (td < 0.0f) + return false; + + float rootd = 0.0f; + float correction = 0.0f; + if (flags & CURVE_KN_ACCURATE) { + rootd = sqrtf(td); + correction = ((-tb - rootd) / (2 * a)); + } + + float t = tcentre + correction; + + if (t < isect->t) { + + if (flags & CURVE_KN_INTERSECTCORRECTION) { + rootd = sqrtf(td); + correction = ((-tb - rootd) / (2 * a)); + t = tcentre + correction; + } + + float z = zcentre + (dirz * correction); + // bool backface = false; + + if (flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { + // backface = true; + correction = ((-tb + rootd) / (2 * a)); + t = tcentre + correction; + z = zcentre + (dirz * correction); + } + + /* stochastic fade from minimum width */ + float adjradius = or1 + z * (or2 - or1) * invl; + adjradius = adjradius / (r1 + z * gd); + if (lcg_state && adjradius != 1.0f) { + if (lcg_step_float(lcg_state) > adjradius) + return false; + } + /* --- */ + + if (t > 0.0f && t < isect->t && z >= 0 && z <= l) { + + if (flags & CURVE_KN_ENCLOSEFILTER) { + float enc_ratio = 1.01f; + if ((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { + float a2 = 1.0f - (dirz * dirz * (1 + gd * gd * enc_ratio * enc_ratio)); + float c2 = dot3(dif, dif) - difz * difz * (1 + gd * gd * enc_ratio * enc_ratio) - + r1 * r1 * enc_ratio * enc_ratio - 2 * r1 * difz * gd * enc_ratio; + if (a2 * c2 < 0.0f) + return false; + } + } + +# ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) +# endif + { + /* record intersection */ + isect->t = t; + isect->u = z * invl; + isect->v = gd; + isect->prim = curveAddr; + isect->object = object; + isect->type = type; + + return true; + } + } + } + + return false; + +# ifndef __KERNEL_SSE2__ +# undef len3_squared +# undef len3 +# undef dot3 +# endif } ccl_device_inline float3 curve_refine(KernelGlobals *kg, @@ -757,154 +809,154 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { - int flag = kernel_data.curve.curveflags; - float t = isect->t; - float3 P = ray->P; - float3 D = ray->D; - - if(isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - int prim = kernel_tex_fetch(__prim_index, isect->prim); - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; - - float3 tg; - - if(flag & CURVE_KN_INTERPOLATE) { - int ka = max(k0 - 1,__float_as_int(v00.x)); - int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); - - float4 P_curve[4]; - - if(sd->type & PRIMITIVE_CURVE) { - P_curve[0] = kernel_tex_fetch(__curve_keys, ka); - P_curve[1] = kernel_tex_fetch(__curve_keys, k0); - P_curve[2] = kernel_tex_fetch(__curve_keys, k1); - P_curve[3] = kernel_tex_fetch(__curve_keys, kb); - } - else { - motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); - } - - float3 p[4]; - p[0] = float4_to_float3(P_curve[0]); - p[1] = float4_to_float3(P_curve[1]); - p[2] = float4_to_float3(P_curve[2]); - p[3] = float4_to_float3(P_curve[3]); - - P = P + D*t; - -#ifdef __UV__ - sd->u = isect->u; - sd->v = 0.0f; -#endif - - tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); - - if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { - sd->Ng = normalize(-(D - tg * (dot(tg, D)))); - } - else { -#ifdef __EMBREE__ - if(kernel_data.bvh.scene) { - sd->Ng = normalize(isect->Ng); - } - else -#endif - { - /* direction from inside to surface of curve */ - float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); - sd->Ng = normalize(P - p_curr); - - /* adjustment for changing radius */ - float gd = isect->v; - - if(gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg; - sd->Ng = normalize(sd->Ng); - } - } - } - - /* todo: sometimes the normal is still so that this is detected as - * backfacing even if cull backfaces is enabled */ - - sd->N = sd->Ng; - } - else { - float4 P_curve[2]; - - if(sd->type & PRIMITIVE_CURVE) { - P_curve[0]= kernel_tex_fetch(__curve_keys, k0); - P_curve[1]= kernel_tex_fetch(__curve_keys, k1); - } - else { - motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); - } - - float l = 1.0f; - tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l); - - P = P + D*t; - - float3 dif = P - float4_to_float3(P_curve[0]); - -#ifdef __UV__ - sd->u = dot(dif,tg)/l; - sd->v = 0.0f; -#endif - - if(flag & CURVE_KN_TRUETANGENTGNORMAL) { - sd->Ng = -(D - tg * dot(tg, D)); - sd->Ng = normalize(sd->Ng); - } - else { - float gd = isect->v; - - /* direction from inside to surface of curve */ - float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f); - sd->Ng = (dif - tg * sd->u * l) / denom; - - /* adjustment for changing radius */ - if(gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg; - } - - sd->Ng = normalize(sd->Ng); - } - - sd->N = sd->Ng; - } - -#ifdef __DPDU__ - /* dPdu/dPdv */ - sd->dPdu = tg; - sd->dPdv = cross(tg, sd->Ng); -#endif - - if(isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; + int flag = kernel_data.curve.curveflags; + float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + + if (isect->object != OBJECT_NONE) { +# ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +# else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +# endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D * t); + D = normalize_len(D, &t); + } + + int prim = kernel_tex_fetch(__prim_index, isect->prim); + float4 v00 = kernel_tex_fetch(__curves, prim); + + int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float3 tg; + + if (flag & CURVE_KN_INTERPOLATE) { + int ka = max(k0 - 1, __float_as_int(v00.x)); + int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + + float4 P_curve[4]; + + if (sd->type & PRIMITIVE_CURVE) { + P_curve[0] = kernel_tex_fetch(__curve_keys, ka); + P_curve[1] = kernel_tex_fetch(__curve_keys, k0); + P_curve[2] = kernel_tex_fetch(__curve_keys, k1); + P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + } + else { + motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); + } + + float3 p[4]; + p[0] = float4_to_float3(P_curve[0]); + p[1] = float4_to_float3(P_curve[1]); + p[2] = float4_to_float3(P_curve[2]); + p[3] = float4_to_float3(P_curve[3]); + + P = P + D * t; + +# ifdef __UV__ + sd->u = isect->u; + sd->v = 0.0f; +# endif + + tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); + + if (kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { + sd->Ng = normalize(-(D - tg * (dot(tg, D)))); + } + else { +# ifdef __EMBREE__ + if (kernel_data.bvh.scene) { + sd->Ng = normalize(isect->Ng); + } + else +# endif + { + /* direction from inside to surface of curve */ + float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); + sd->Ng = normalize(P - p_curr); + + /* adjustment for changing radius */ + float gd = isect->v; + + if (gd != 0.0f) { + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); + } + } + } + + /* todo: sometimes the normal is still so that this is detected as + * backfacing even if cull backfaces is enabled */ + + sd->N = sd->Ng; + } + else { + float4 P_curve[2]; + + if (sd->type & PRIMITIVE_CURVE) { + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + } + + float l = 1.0f; + tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l); + + P = P + D * t; + + float3 dif = P - float4_to_float3(P_curve[0]); + +# ifdef __UV__ + sd->u = dot(dif, tg) / l; + sd->v = 0.0f; +# endif + + if (flag & CURVE_KN_TRUETANGENTGNORMAL) { + sd->Ng = -(D - tg * dot(tg, D)); + sd->Ng = normalize(sd->Ng); + } + else { + float gd = isect->v; + + /* direction from inside to surface of curve */ + float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f); + sd->Ng = (dif - tg * sd->u * l) / denom; + + /* adjustment for changing radius */ + if (gd != 0.0f) { + sd->Ng = sd->Ng - gd * tg; + } + + sd->Ng = normalize(sd->Ng); + } + + sd->N = sd->Ng; + } + +# ifdef __DPDU__ + /* dPdu/dPdv */ + sd->dPdu = tg; + sd->dPdv = cross(tg, sd->Ng); +# endif + + if (isect->object != OBJECT_NONE) { +# ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +# else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +# endif + + P = transform_point(&tfm, P); + } + + return P; } #endif diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h index 5cc22ae2155..7380c506bf4 100644 --- a/intern/cycles/kernel/geom/geom_motion_curve.h +++ b/intern/cycles/kernel/geom/geom_motion_curve.h @@ -25,96 +25,116 @@ CCL_NAMESPACE_BEGIN #ifdef __HAIR__ -ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem) +ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, + int object, + uint id, + AttributeElement *elem) { - /* todo: find a better (faster) solution for this, maybe store offset per object. - * - * NOTE: currently it's not a bottleneck because in test scenes the loop below runs - * zero iterations and rendering is really slow with motion curves. For until other - * areas are speed up it's probably not so crucial to optimize this out. - */ - uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE; - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while(attr_map.x != id) { - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } - - *elem = (AttributeElement)attr_map.y; - - /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + /* todo: find a better (faster) solution for this, maybe store offset per object. + * + * NOTE: currently it's not a bottleneck because in test scenes the loop below runs + * zero iterations and rendering is really slow with motion curves. For until other + * areas are speed up it's probably not so crucial to optimize this out. + */ + uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE; + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while (attr_map.x != id) { + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + *elem = (AttributeElement)attr_map.y; + + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; } -ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2]) +ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, + int offset, + int numkeys, + int numsteps, + int step, + int k0, + int k1, + float4 keys[2]) { - if(step == numsteps) { - /* center step: regular key location */ - keys[0] = kernel_tex_fetch(__curve_keys, k0); - keys[1] = kernel_tex_fetch(__curve_keys, k1); - } - else { - /* center step is not stored in this array */ - if(step > numsteps) - step--; - - offset += step*numkeys; - - keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); - keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); - } + if (step == numsteps) { + /* center step: regular key location */ + keys[0] = kernel_tex_fetch(__curve_keys, k0); + keys[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + /* center step is not stored in this array */ + if (step > numsteps) + step--; + + offset += step * numkeys; + + keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); + keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); + } } /* return 2 curve key locations */ -ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2]) +ccl_device_inline void motion_curve_keys( + KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2]) { - /* get motion info */ - int numsteps, numkeys; - object_motion_info(kg, object, &numsteps, NULL, &numkeys); + /* get motion info */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); - /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps*2; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; - /* find attribute */ - AttributeElement elem; - int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); - /* fetch key coordinates */ - float4 next_keys[2]; + /* fetch key coordinates */ + float4 next_keys[2]; - motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys); - motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, next_keys); + motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys); + motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys); - /* interpolate between steps */ - keys[0] = (1.0f - t)*keys[0] + t*next_keys[0]; - keys[1] = (1.0f - t)*keys[1] + t*next_keys[1]; + /* interpolate between steps */ + keys[0] = (1.0f - t) * keys[0] + t * next_keys[0]; + keys[1] = (1.0f - t) * keys[1] + t * next_keys[1]; } -ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4]) +ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, + int offset, + int numkeys, + int numsteps, + int step, + int k0, + int k1, + int k2, + int k3, + float4 keys[4]) { - if(step == numsteps) { - /* center step: regular key location */ - keys[0] = kernel_tex_fetch(__curve_keys, k0); - keys[1] = kernel_tex_fetch(__curve_keys, k1); - keys[2] = kernel_tex_fetch(__curve_keys, k2); - keys[3] = kernel_tex_fetch(__curve_keys, k3); - } - else { - /* center step is not stored in this array */ - if(step > numsteps) - step--; - - offset += step*numkeys; - - keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); - keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); - keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2); - keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3); - } + if (step == numsteps) { + /* center step: regular key location */ + keys[0] = kernel_tex_fetch(__curve_keys, k0); + keys[1] = kernel_tex_fetch(__curve_keys, k1); + keys[2] = kernel_tex_fetch(__curve_keys, k2); + keys[3] = kernel_tex_fetch(__curve_keys, k3); + } + else { + /* center step is not stored in this array */ + if (step > numsteps) + step--; + + offset += step * numkeys; + + keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); + keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); + keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2); + keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3); + } } /* return 2 curve key locations */ @@ -122,37 +142,41 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, - int k0, int k1, int k2, int k3, + int k0, + int k1, + int k2, + int k3, float4 keys[4]) { - /* get motion info */ - int numsteps, numkeys; - object_motion_info(kg, object, &numsteps, NULL, &numkeys); - - /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps*2; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; - - /* find attribute */ - AttributeElement elem; - int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - - /* fetch key coordinates */ - float4 next_keys[4]; - - motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys); - motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, k2, k3, next_keys); - - /* interpolate between steps */ - keys[0] = (1.0f - t)*keys[0] + t*next_keys[0]; - keys[1] = (1.0f - t)*keys[1] + t*next_keys[1]; - keys[2] = (1.0f - t)*keys[2] + t*next_keys[2]; - keys[3] = (1.0f - t)*keys[3] + t*next_keys[3]; + /* get motion info */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch key coordinates */ + float4 next_keys[4]; + + motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys); + motion_cardinal_curve_keys_for_step( + kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys); + + /* interpolate between steps */ + keys[0] = (1.0f - t) * keys[0] + t * next_keys[0]; + keys[1] = (1.0f - t) * keys[1] + t * next_keys[1]; + keys[2] = (1.0f - t) * keys[2] + t * next_keys[2]; + keys[3] = (1.0f - t) * keys[3] + t * next_keys[3]; } -#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) +# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) /* Similar to above, but returns keys as pair of two AVX registers with each * holding two float4. */ @@ -160,56 +184,44 @@ ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg, int object, int prim, float time, - int k0, int k1, - int k2, int k3, + int k0, + int k1, + int k2, + int k3, avxf *out_keys_0_1, avxf *out_keys_2_3) { - /* Get motion info. */ - int numsteps, numkeys; - object_motion_info(kg, object, &numsteps, NULL, &numkeys); - - /* Figure out which steps we need to fetch and their interpolation factor. */ - int maxstep = numsteps * 2; - int step = min((int)(time*maxstep), maxstep - 1); - float t = time*maxstep - step; - - /* Find attribute. */ - AttributeElement elem; - int offset = find_attribute_curve_motion(kg, - object, - ATTR_STD_MOTION_VERTEX_POSITION, - &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - - /* Fetch key coordinates. */ - float4 next_keys[4]; - float4 keys[4]; - motion_cardinal_curve_keys_for_step(kg, - offset, - numkeys, - numsteps, - step, - k0, k1, k2, k3, - keys); - motion_cardinal_curve_keys_for_step(kg, - offset, - numkeys, - numsteps, - step + 1, - k0, k1, k2, k3, - next_keys); - - const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128); - const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128); - const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128); - const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128); - - /* Interpolate between steps. */ - *out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1; - *out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3; + /* Get motion info. */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); + + /* Figure out which steps we need to fetch and their interpolation factor. */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + /* Find attribute. */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* Fetch key coordinates. */ + float4 next_keys[4]; + float4 keys[4]; + motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys); + motion_cardinal_curve_keys_for_step( + kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys); + + const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128); + const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128); + const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128); + const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128); + + /* Interpolate between steps. */ + *out_keys_0_1 = (1.0f - t) * keys_0_1 + t * next_keys_0_1; + *out_keys_2_3 = (1.0f - t) * keys_2_3 + t * next_keys_2_3; } -#endif +# endif #endif diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 64f6d027b99..53d6b92dd7e 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -29,127 +29,145 @@ CCL_NAMESPACE_BEGIN /* Time interpolation of vertex positions and normals */ -ccl_device_inline int find_attribute_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem) +ccl_device_inline int find_attribute_motion(KernelGlobals *kg, + int object, + uint id, + AttributeElement *elem) { - /* todo: find a better (faster) solution for this, maybe store offset per object */ - uint attr_offset = object_attribute_map_offset(kg, object); - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + /* todo: find a better (faster) solution for this, maybe store offset per object */ + uint attr_offset = object_attribute_map_offset(kg, object); + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - while(attr_map.x != id) { - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } + while (attr_map.x != id) { + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } - *elem = (AttributeElement)attr_map.y; + *elem = (AttributeElement)attr_map.y; - /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; } -ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3]) +ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, + uint4 tri_vindex, + int offset, + int numverts, + int numsteps, + int step, + float3 verts[3]) { - if(step == numsteps) { - /* center step: regular vertex location */ - verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - } - else { - /* center step not store in this array */ - if(step > numsteps) - step--; - - offset += step*numverts; - - verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); - verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); - verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); - } + if (step == numsteps) { + /* center step: regular vertex location */ + verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + } + else { + /* center step not store in this array */ + if (step > numsteps) + step--; + + offset += step * numverts; + + verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); + verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); + verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); + } } -ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3]) +ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, + uint4 tri_vindex, + int offset, + int numverts, + int numsteps, + int step, + float3 normals[3]) { - if(step == numsteps) { - /* center step: regular vertex location */ - normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); - normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); - normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); - } - else { - /* center step is not stored in this array */ - if(step > numsteps) - step--; - - offset += step*numverts; - - normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); - normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); - normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); - } + if (step == numsteps) { + /* center step: regular vertex location */ + normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); + normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); + normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + } + else { + /* center step is not stored in this array */ + if (step > numsteps) + step--; + + offset += step * numverts; + + normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); + normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); + normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); + } } -ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, int prim, float time, float3 verts[3]) +ccl_device_inline void motion_triangle_vertices( + KernelGlobals *kg, int object, int prim, float time, float3 verts[3]) { - /* get motion info */ - int numsteps, numverts; - object_motion_info(kg, object, &numsteps, &numverts, NULL); - - /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps*2; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; - - /* find attribute */ - AttributeElement elem; - int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - - /* fetch vertex coordinates */ - float3 next_verts[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - - motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); - motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); - - /* interpolate between steps */ - verts[0] = (1.0f - t)*verts[0] + t*next_verts[0]; - verts[1] = (1.0f - t)*verts[1] + t*next_verts[1]; - verts[2] = (1.0f - t)*verts[2] + t*next_verts[2]; + /* get motion info */ + int numsteps, numverts; + object_motion_info(kg, object, &numsteps, &numverts, NULL); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch vertex coordinates */ + float3 next_verts[3]; + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts); + + /* interpolate between steps */ + verts[0] = (1.0f - t) * verts[0] + t * next_verts[0]; + verts[1] = (1.0f - t) * verts[1] + t * next_verts[1]; + verts[2] = (1.0f - t) * verts[2] + t * next_verts[2]; } -ccl_device_inline float3 motion_triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time) +ccl_device_inline float3 motion_triangle_smooth_normal( + KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time) { - /* get motion info */ - int numsteps, numverts; - object_motion_info(kg, object, &numsteps, &numverts, NULL); - - /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps*2; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; - - /* find attribute */ - AttributeElement elem; - int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - - /* fetch normals */ - float3 normals[3], next_normals[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - - motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); - motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals); - - /* interpolate between steps */ - normals[0] = (1.0f - t)*normals[0] + t*next_normals[0]; - normals[1] = (1.0f - t)*normals[1] + t*next_normals[1]; - normals[2] = (1.0f - t)*normals[2] + t*next_normals[2]; - - /* interpolate between vertices */ - float w = 1.0f - u - v; - float3 N = safe_normalize(u*normals[0] + v*normals[1] + w*normals[2]); - - return is_zero(N)? Ng: N; + /* get motion info */ + int numsteps, numverts; + object_motion_info(kg, object, &numsteps, &numverts, NULL); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch normals */ + float3 normals[3], next_normals[3]; + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + + motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); + motion_triangle_normals_for_step( + kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals); + + /* interpolate between steps */ + normals[0] = (1.0f - t) * normals[0] + t * next_normals[0]; + normals[1] = (1.0f - t) * normals[1] + t * next_normals[1]; + normals[2] = (1.0f - t) * normals[2] + t * next_normals[2]; + + /* interpolate between vertices */ + float w = 1.0f - u - v; + float3 N = safe_normalize(u * normals[0] + v * normals[1] + w * normals[2]); + + return is_zero(N) ? Ng : N; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h index ec7bfad7349..49d4829af38 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h @@ -32,64 +32,57 @@ CCL_NAMESPACE_BEGIN * a closer distance. */ -ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, - ShaderData *sd, - const Intersection *isect, - const Ray *ray, - float3 verts[3]) +ccl_device_inline float3 motion_triangle_refine( + KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3]) { - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; #ifdef __INTERSECTION_REFINE__ - if(isect->object != OBJECT_NONE) { - if(UNLIKELY(t == 0.0f)) { - return P; - } + if (isect->object != OBJECT_NONE) { + if (UNLIKELY(t == 0.0f)) { + return P; + } # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; # else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); # endif - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D * t); + D = normalize_len(D, &t); + } - P = P + D*t; + P = P + D * t; - /* Compute refined intersection distance. */ - const float3 e1 = verts[0] - verts[2]; - const float3 e2 = verts[1] - verts[2]; - const float3 s1 = cross(D, e2); + /* Compute refined intersection distance. */ + const float3 e1 = verts[0] - verts[2]; + const float3 e2 = verts[1] - verts[2]; + const float3 s1 = cross(D, e2); - const float invdivisor = 1.0f/dot(s1, e1); - const float3 d = P - verts[2]; - const float3 s2 = cross(d, e1); - float rt = dot(e2, s2)*invdivisor; + const float invdivisor = 1.0f / dot(s1, e1); + const float3 d = P - verts[2]; + const float3 s2 = cross(d, e1); + float rt = dot(e2, s2) * invdivisor; - /* Compute refined position. */ - P = P + D*rt; + /* Compute refined position. */ + P = P + D * rt; - if(isect->object != OBJECT_NONE) { + if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; # else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); # endif - P = transform_point(&tfm, P); - } + P = transform_point(&tfm, P); + } - return P; + return P; #else - return P + D*t; + return P + D * t; #endif } @@ -103,116 +96,112 @@ ccl_device_noinline # else ccl_device_inline # endif -float3 motion_triangle_refine_local(KernelGlobals *kg, - ShaderData *sd, - const Intersection *isect, - const Ray *ray, - float3 verts[3]) + float3 + motion_triangle_refine_local(KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray, + float3 verts[3]) { - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; # ifdef __INTERSECTION_REFINE__ - if(isect->object != OBJECT_NONE) { + if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; # else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); # endif - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } - P = P + D*t; + P = P + D * t; - /* compute refined intersection distance */ - const float3 e1 = verts[0] - verts[2]; - const float3 e2 = verts[1] - verts[2]; - const float3 s1 = cross(D, e2); + /* compute refined intersection distance */ + const float3 e1 = verts[0] - verts[2]; + const float3 e2 = verts[1] - verts[2]; + const float3 s1 = cross(D, e2); - const float invdivisor = 1.0f/dot(s1, e1); - const float3 d = P - verts[2]; - const float3 s2 = cross(d, e1); - float rt = dot(e2, s2)*invdivisor; + const float invdivisor = 1.0f / dot(s1, e1); + const float3 d = P - verts[2]; + const float3 s2 = cross(d, e1); + float rt = dot(e2, s2) * invdivisor; - P = P + D*rt; + P = P + D * rt; - if(isect->object != OBJECT_NONE) { + if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; # else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); # endif - P = transform_point(&tfm, P); - } + P = transform_point(&tfm, P); + } - return P; + return P; # else /* __INTERSECTION_REFINE__ */ - return P + D*t; -# endif /* __INTERSECTION_REFINE__ */ + return P + D * t; +# endif /* __INTERSECTION_REFINE__ */ } -#endif /* __BVH_LOCAL__ */ - +#endif /* __BVH_LOCAL__ */ /* Ray intersection. We simply compute the vertex positions at the given ray * time and do a ray intersection with the resulting triangle. */ -ccl_device_inline bool motion_triangle_intersect( - KernelGlobals *kg, - Intersection *isect, - float3 P, - float3 dir, - float time, - uint visibility, - int object, - int prim_addr) +ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, + Intersection *isect, + float3 P, + float3 dir, + float time, + uint visibility, + int object, + int prim_addr) { - /* Primitive index for vertex location lookup. */ - int prim = kernel_tex_fetch(__prim_index, prim_addr); - int fobject = (object == OBJECT_NONE) - ? kernel_tex_fetch(__prim_object, prim_addr) - : object; - /* Get vertex locations for intersection. */ - float3 verts[3]; - motion_triangle_vertices(kg, fobject, prim, time, verts); - /* Ray-triangle intersection, unoptimized. */ - float t, u, v; - if(ray_triangle_intersect(P, - dir, - isect->t, + /* Primitive index for vertex location lookup. */ + int prim = kernel_tex_fetch(__prim_index, prim_addr); + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : object; + /* Get vertex locations for intersection. */ + float3 verts[3]; + motion_triangle_vertices(kg, fobject, prim, time, verts); + /* Ray-triangle intersection, unoptimized. */ + float t, u, v; + if (ray_triangle_intersect(P, + dir, + isect->t, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - (ssef*)verts, + (ssef *)verts, #else - verts[0], verts[1], verts[2], + verts[0], + verts[1], + verts[2], #endif - &u, &v, &t)) - { + &u, + &v, + &t)) { #ifdef __VISIBILITY_FLAG__ - /* Visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags. - */ - if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) + /* Visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags. + */ + if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif - { - isect->t = t; - isect->u = u; - isect->v = v; - isect->prim = prim_addr; - isect->object = object; - isect->type = PRIMITIVE_MOTION_TRIANGLE; - return true; - } - } - return false; + { + isect->t = t; + isect->u = u; + isect->v = v; + isect->prim = prim_addr; + isect->object = object; + isect->type = PRIMITIVE_MOTION_TRIANGLE; + return true; + } + } + return false; } /* Special ray intersection routines for local intersections. In that case we @@ -221,101 +210,102 @@ ccl_device_inline bool motion_triangle_intersect( * Returns whether traversal should be stopped. */ #ifdef __BVH_LOCAL__ -ccl_device_inline bool motion_triangle_intersect_local( - KernelGlobals *kg, - LocalIntersection *local_isect, - float3 P, - float3 dir, - float time, - int object, - int local_object, - int prim_addr, - float tmax, - uint *lcg_state, - int max_hits) +ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals *kg, + LocalIntersection *local_isect, + float3 P, + float3 dir, + float time, + int object, + int local_object, + int prim_addr, + float tmax, + uint *lcg_state, + int max_hits) { - /* Only intersect with matching object, for instanced objects we - * already know we are only intersecting the right object. */ - if(object == OBJECT_NONE) { - if(kernel_tex_fetch(__prim_object, prim_addr) != local_object) { - return false; - } - } - - /* Primitive index for vertex location lookup. */ - int prim = kernel_tex_fetch(__prim_index, prim_addr); - /* Get vertex locations for intersection. */ - float3 verts[3]; - motion_triangle_vertices(kg, local_object, prim, time, verts); - /* Ray-triangle intersection, unoptimized. */ - float t, u, v; - if(!ray_triangle_intersect(P, - dir, - tmax, -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - (ssef*)verts, -#else - verts[0], verts[1], verts[2], -#endif - &u, &v, &t)) - { - return false; - } - - /* If no actual hit information is requested, just return here. */ - if(max_hits == 0) { - return true; - } - - int hit; - if(lcg_state) { - /* Record up to max_hits intersections. */ - for(int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { - if(local_isect->hits[i].t == t) { - return false; - } - } - - local_isect->num_hits++; - - if(local_isect->num_hits <= max_hits) { - hit = local_isect->num_hits - 1; - } - else { - /* Reservoir sampling: if we are at the maximum number of - * hits, randomly replace element or skip it. - */ - hit = lcg_step_uint(lcg_state) % local_isect->num_hits; - - if(hit >= max_hits) - return false; - } - } - else { - /* Record closest intersection only. */ - if(local_isect->num_hits && t > local_isect->hits[0].t) { - return false; - } - - hit = 0; - local_isect->num_hits = 1; - } - - /* Record intersection. */ - Intersection *isect = &local_isect->hits[hit]; - isect->t = t; - isect->u = u; - isect->v = v; - isect->prim = prim_addr; - isect->object = object; - isect->type = PRIMITIVE_MOTION_TRIANGLE; - - /* Record geometric normal. */ - local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], - verts[2] - verts[0])); - - return false; + /* Only intersect with matching object, for instanced objects we + * already know we are only intersecting the right object. */ + if (object == OBJECT_NONE) { + if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) { + return false; + } + } + + /* Primitive index for vertex location lookup. */ + int prim = kernel_tex_fetch(__prim_index, prim_addr); + /* Get vertex locations for intersection. */ + float3 verts[3]; + motion_triangle_vertices(kg, local_object, prim, time, verts); + /* Ray-triangle intersection, unoptimized. */ + float t, u, v; + if (!ray_triangle_intersect(P, + dir, + tmax, +# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + (ssef *)verts, +# else + verts[0], + verts[1], + verts[2], +# endif + &u, + &v, + &t)) { + return false; + } + + /* If no actual hit information is requested, just return here. */ + if (max_hits == 0) { + return true; + } + + int hit; + if (lcg_state) { + /* Record up to max_hits intersections. */ + for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (local_isect->hits[i].t == t) { + return false; + } + } + + local_isect->num_hits++; + + if (local_isect->num_hits <= max_hits) { + hit = local_isect->num_hits - 1; + } + else { + /* Reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it. + */ + hit = lcg_step_uint(lcg_state) % local_isect->num_hits; + + if (hit >= max_hits) + return false; + } + } + else { + /* Record closest intersection only. */ + if (local_isect->num_hits && t > local_isect->hits[0].t) { + return false; + } + + hit = 0; + local_isect->num_hits = 1; + } + + /* Record intersection. */ + Intersection *isect = &local_isect->hits[hit]; + isect->t = t; + isect->u = u; + isect->v = v; + isect->prim = prim_addr; + isect->object = object; + isect->type = PRIMITIVE_MOTION_TRIANGLE; + + /* Record geometric normal. */ + local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + + return false; } -#endif /* __BVH_LOCAL__ */ +#endif /* __BVH_LOCAL__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h index e91a4be96ba..5333e82b346 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h @@ -32,91 +32,80 @@ CCL_NAMESPACE_BEGIN * normals */ /* return 3 triangle vertex normals */ -ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, - ShaderData *sd, const - Intersection *isect, - const Ray *ray, - bool is_local) +ccl_device_noinline void motion_triangle_shader_setup( + KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool is_local) { - /* Get shader. */ - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); - /* Get motion info. */ - /* TODO(sergey): This logic is really similar to motion_triangle_vertices(), - * can we de-duplicate something here? - */ - int numsteps, numverts; - object_motion_info(kg, sd->object, &numsteps, &numverts, NULL); - /* Figure out which steps we need to fetch and their interpolation factor. */ - int maxstep = numsteps*2; - int step = min((int)(sd->time*maxstep), maxstep-1); - float t = sd->time*maxstep - step; - /* Find attribute. */ - AttributeElement elem; - int offset = find_attribute_motion(kg, sd->object, - ATTR_STD_MOTION_VERTEX_POSITION, - &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - /* Fetch vertex coordinates. */ - float3 verts[3], next_verts[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); - motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); - /* Interpolate between steps. */ - verts[0] = (1.0f - t)*verts[0] + t*next_verts[0]; - verts[1] = (1.0f - t)*verts[1] + t*next_verts[1]; - verts[2] = (1.0f - t)*verts[2] + t*next_verts[2]; - /* Compute refined position. */ + /* Get shader. */ + sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + /* Get motion info. */ + /* TODO(sergey): This logic is really similar to motion_triangle_vertices(), + * can we de-duplicate something here? + */ + int numsteps, numverts; + object_motion_info(kg, sd->object, &numsteps, &numverts, NULL); + /* Figure out which steps we need to fetch and their interpolation factor. */ + int maxstep = numsteps * 2; + int step = min((int)(sd->time * maxstep), maxstep - 1); + float t = sd->time * maxstep - step; + /* Find attribute. */ + AttributeElement elem; + int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + /* Fetch vertex coordinates. */ + float3 verts[3], next_verts[3]; + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts); + /* Interpolate between steps. */ + verts[0] = (1.0f - t) * verts[0] + t * next_verts[0]; + verts[1] = (1.0f - t) * verts[1] + t * next_verts[1]; + verts[2] = (1.0f - t) * verts[2] + t * next_verts[2]; + /* Compute refined position. */ #ifdef __BVH_LOCAL__ - if(is_local) { - sd->P = motion_triangle_refine_local(kg, - sd, - isect, - ray, - verts); - } - else -#endif /* __BVH_LOCAL__*/ - { - sd->P = motion_triangle_refine(kg, sd, isect, ray, verts); - } - /* Compute face normal. */ - float3 Ng; - if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { - Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); - } - else { - Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); - } - sd->Ng = Ng; - sd->N = Ng; - /* Compute derivatives of P w.r.t. uv. */ + if (is_local) { + sd->P = motion_triangle_refine_local(kg, sd, isect, ray, verts); + } + else +#endif /* __BVH_LOCAL__*/ + { + sd->P = motion_triangle_refine(kg, sd, isect, ray, verts); + } + /* Compute face normal. */ + float3 Ng; + if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); + } + else { + Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + } + sd->Ng = Ng; + sd->N = Ng; + /* Compute derivatives of P w.r.t. uv. */ #ifdef __DPDU__ - sd->dPdu = (verts[0] - verts[2]); - sd->dPdv = (verts[1] - verts[2]); + sd->dPdu = (verts[0] - verts[2]); + sd->dPdv = (verts[1] - verts[2]); #endif - /* Compute smooth normal. */ - if(sd->shader & SHADER_SMOOTH_NORMAL) { - /* Find attribute. */ - AttributeElement elem; - int offset = find_attribute_motion(kg, - sd->object, - ATTR_STD_MOTION_VERTEX_NORMAL, - &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - /* Fetch vertex coordinates. */ - float3 normals[3], next_normals[3]; - motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); - motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals); - /* Interpolate between steps. */ - normals[0] = (1.0f - t)*normals[0] + t*next_normals[0]; - normals[1] = (1.0f - t)*normals[1] + t*next_normals[1]; - normals[2] = (1.0f - t)*normals[2] + t*next_normals[2]; - /* Interpolate between vertices. */ - float u = sd->u; - float v = sd->v; - float w = 1.0f - u - v; - sd->N = (u*normals[0] + v*normals[1] + w*normals[2]); - } + /* Compute smooth normal. */ + if (sd->shader & SHADER_SMOOTH_NORMAL) { + /* Find attribute. */ + AttributeElement elem; + int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + /* Fetch vertex coordinates. */ + float3 normals[3], next_normals[3]; + motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); + motion_triangle_normals_for_step( + kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals); + /* Interpolate between steps. */ + normals[0] = (1.0f - t) * normals[0] + t * next_normals[0]; + normals[1] = (1.0f - t) * normals[1] + t * next_normals[1]; + normals[2] = (1.0f - t) * normals[2] + t * next_normals[2]; + /* Interpolate between vertices. */ + float u = sd->u; + float v = sd->v; + float w = 1.0f - u - v; + sd->N = (u * normals[0] + v * normals[1] + w * normals[2]); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 669c932d720..2792fd64c61 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -27,131 +27,143 @@ CCL_NAMESPACE_BEGIN /* Object attributes, for now a fixed size and contents */ enum ObjectTransform { - OBJECT_TRANSFORM = 0, - OBJECT_INVERSE_TRANSFORM = 1, + OBJECT_TRANSFORM = 0, + OBJECT_INVERSE_TRANSFORM = 1, }; -enum ObjectVectorTransform { - OBJECT_PASS_MOTION_PRE = 0, - OBJECT_PASS_MOTION_POST = 1 -}; +enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST = 1 }; /* Object to world space transformation */ -ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type) +ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, + int object, + enum ObjectTransform type) { - if(type == OBJECT_INVERSE_TRANSFORM) { - return kernel_tex_fetch(__objects, object).itfm; - } - else { - return kernel_tex_fetch(__objects, object).tfm; - } + if (type == OBJECT_INVERSE_TRANSFORM) { + return kernel_tex_fetch(__objects, object).itfm; + } + else { + return kernel_tex_fetch(__objects, object).tfm; + } } /* Lamp to world space transformation */ ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse) { - if(inverse) { - return kernel_tex_fetch(__lights, lamp).itfm; - } - else { - return kernel_tex_fetch(__lights, lamp).tfm; - } + if (inverse) { + return kernel_tex_fetch(__lights, lamp).itfm; + } + else { + return kernel_tex_fetch(__lights, lamp).tfm; + } } /* Object to world space transformation for motion vectors */ -ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type) +ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg, + int object, + enum ObjectVectorTransform type) { - int offset = object*OBJECT_MOTION_PASS_SIZE + (int)type; - return kernel_tex_fetch(__object_motion_pass, offset); + int offset = object * OBJECT_MOTION_PASS_SIZE + (int)type; + return kernel_tex_fetch(__object_motion_pass, offset); } /* Motion blurred object transformations */ #ifdef __OBJECT_MOTION__ -ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time) -{ - const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset; - const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset); - const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1; - - Transform tfm; -#ifdef __EMBREE__ - if(kernel_data.bvh.scene) { - transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time); - } - else -#endif - transform_motion_array_interpolate(&tfm, motion, num_steps, time); +ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, + int object, + float time) +{ + const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset; + const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset); + const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1; + + Transform tfm; +# ifdef __EMBREE__ + if (kernel_data.bvh.scene) { + transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time); + } + else +# endif + transform_motion_array_interpolate(&tfm, motion, num_steps, time); - return tfm; + return tfm; } -ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm) +ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, + int object, + float time, + Transform *itfm) { - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_MOTION) { - /* if we do motion blur */ - Transform tfm = object_fetch_transform_motion(kg, object, time); + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_MOTION) { + /* if we do motion blur */ + Transform tfm = object_fetch_transform_motion(kg, object, time); - if(itfm) - *itfm = transform_quick_inverse(tfm); + if (itfm) + *itfm = transform_quick_inverse(tfm); - return tfm; - } - else { - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); - if(itfm) - *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + return tfm; + } + else { + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + if (itfm) + *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - return tfm; - } + return tfm; + } } #endif /* Transform position from object to world space */ -ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) +ccl_device_inline void object_position_transform(KernelGlobals *kg, + const ShaderData *sd, + float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point_auto(&sd->ob_tfm, *P); + *P = transform_point_auto(&sd->ob_tfm, *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - *P = transform_point(&tfm, *P); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + *P = transform_point(&tfm, *P); #endif } /* Transform position from world to object space */ -ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) +ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, + const ShaderData *sd, + float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point_auto(&sd->ob_itfm, *P); + *P = transform_point_auto(&sd->ob_itfm, *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - *P = transform_point(&tfm, *P); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + *P = transform_point(&tfm, *P); #endif } /* Transform normal from world to object space */ -ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) +ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, + const ShaderData *sd, + float3 *N) { #ifdef __OBJECT_MOTION__ - if((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) { - *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N)); - } + if ((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) { + *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N)); + } #else - if(sd->object != OBJECT_NONE) { - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - *N = normalize(transform_direction_transposed(&tfm, *N)); - } - else if(sd->type == PRIMITIVE_LAMP) { - Transform tfm = lamp_fetch_transform(kg, sd->lamp, false); - *N = normalize(transform_direction_transposed(&tfm, *N)); - } + if (sd->object != OBJECT_NONE) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + *N = normalize(transform_direction_transposed(&tfm, *N)); + } + else if (sd->type == PRIMITIVE_LAMP) { + Transform tfm = lamp_fetch_transform(kg, sd->lamp, false); + *N = normalize(transform_direction_transposed(&tfm, *N)); + } #endif } @@ -160,10 +172,10 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ - *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N)); + *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N)); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - *N = normalize(transform_direction_transposed(&tfm, *N)); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -172,22 +184,24 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction_auto(&sd->ob_tfm, *D); + *D = transform_direction_auto(&sd->ob_tfm, *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - *D = transform_direction(&tfm, *D); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + *D = transform_direction(&tfm, *D); #endif } /* Transform direction vector from world to object space */ -ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) +ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, + const ShaderData *sd, + float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction_auto(&sd->ob_itfm, *D); + *D = transform_direction_auto(&sd->ob_itfm, *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - *D = transform_direction(&tfm, *D); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + *D = transform_direction(&tfm, *D); #endif } @@ -195,14 +209,14 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd) { - if(sd->object == OBJECT_NONE) - return make_float3(0.0f, 0.0f, 0.0f); + if (sd->object == OBJECT_NONE) + return make_float3(0.0f, 0.0f, 0.0f); #ifdef __OBJECT_MOTION__ - return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w); + return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - return make_float3(tfm.x.w, tfm.y.w, tfm.z.w); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + return make_float3(tfm.x.w, tfm.y.w, tfm.z.w); #endif } @@ -210,218 +224,211 @@ ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd ccl_device_inline float object_surface_area(KernelGlobals *kg, int object) { - return kernel_tex_fetch(__objects, object).surface_area; + return kernel_tex_fetch(__objects, object).surface_area; } /* Pass ID number of object */ ccl_device_inline float object_pass_id(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0.0f; + if (object == OBJECT_NONE) + return 0.0f; - return kernel_tex_fetch(__objects, object).pass_id; + return kernel_tex_fetch(__objects, object).pass_id; } /* Per lamp random number for shader variation */ ccl_device_inline float lamp_random_number(KernelGlobals *kg, int lamp) { - if(lamp == LAMP_NONE) - return 0.0f; + if (lamp == LAMP_NONE) + return 0.0f; - return kernel_tex_fetch(__lights, lamp).random; + return kernel_tex_fetch(__lights, lamp).random; } /* Per object random number for shader variation */ ccl_device_inline float object_random_number(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0.0f; + if (object == OBJECT_NONE) + return 0.0f; - return kernel_tex_fetch(__objects, object).random_number; + return kernel_tex_fetch(__objects, object).random_number; } /* Particle ID from which this object was generated */ ccl_device_inline int object_particle_id(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0; + if (object == OBJECT_NONE) + return 0; - return kernel_tex_fetch(__objects, object).particle_index; + return kernel_tex_fetch(__objects, object).particle_index; } /* Generated texture coordinate on surface from where object was instanced */ ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return make_float3(0.0f, 0.0f, 0.0f); + if (object == OBJECT_NONE) + return make_float3(0.0f, 0.0f, 0.0f); - const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); - return make_float3(kobject->dupli_generated[0], - kobject->dupli_generated[1], - kobject->dupli_generated[2]); + const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); + return make_float3( + kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]); } /* UV texture coordinate on surface from where object was instanced */ ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return make_float3(0.0f, 0.0f, 0.0f); + if (object == OBJECT_NONE) + return make_float3(0.0f, 0.0f, 0.0f); - const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); - return make_float3(kobject->dupli_uv[0], - kobject->dupli_uv[1], - 0.0f); + const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); + return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f); } /* Information about mesh for motion blurred triangles and curves */ -ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys) +ccl_device_inline void object_motion_info( + KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys) { - if(numkeys) { - *numkeys = kernel_tex_fetch(__objects, object).numkeys; - } + if (numkeys) { + *numkeys = kernel_tex_fetch(__objects, object).numkeys; + } - if(numsteps) - *numsteps = kernel_tex_fetch(__objects, object).numsteps; - if(numverts) - *numverts = kernel_tex_fetch(__objects, object).numverts; + if (numsteps) + *numsteps = kernel_tex_fetch(__objects, object).numsteps; + if (numverts) + *numverts = kernel_tex_fetch(__objects, object).numverts; } /* Offset to an objects patch map */ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0; + if (object == OBJECT_NONE) + return 0; - return kernel_tex_fetch(__objects, object).patch_map_offset; + return kernel_tex_fetch(__objects, object).patch_map_offset; } /* Pass ID for shader */ ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) { - return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; + return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; } /* Cryptomatte ID */ ccl_device_inline float object_cryptomatte_id(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0.0f; + if (object == OBJECT_NONE) + return 0.0f; - return kernel_tex_fetch(__objects, object).cryptomatte_object; + return kernel_tex_fetch(__objects, object).cryptomatte_object; } ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0; + if (object == OBJECT_NONE) + return 0; - return kernel_tex_fetch(__objects, object).cryptomatte_asset; + return kernel_tex_fetch(__objects, object).cryptomatte_asset; } /* Particle data from which object was instanced */ ccl_device_inline uint particle_index(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).index; + return kernel_tex_fetch(__particles, particle).index; } ccl_device float particle_age(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).age; + return kernel_tex_fetch(__particles, particle).age; } ccl_device float particle_lifetime(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).lifetime; + return kernel_tex_fetch(__particles, particle).lifetime; } ccl_device float particle_size(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).size; + return kernel_tex_fetch(__particles, particle).size; } ccl_device float4 particle_rotation(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).rotation; + return kernel_tex_fetch(__particles, particle).rotation; } ccl_device float3 particle_location(KernelGlobals *kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).location); + return float4_to_float3(kernel_tex_fetch(__particles, particle).location); } ccl_device float3 particle_velocity(KernelGlobals *kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity); + return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity); } ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity); + return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity); } /* Object intersection in BVH */ ccl_device_inline float3 bvh_clamp_direction(float3 dir) { - /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */ + /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */ #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) - const ssef oopes(8.271806E-25f,8.271806E-25f,8.271806E-25f,0.0f); - const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes); - const ssef signdir = signmsk(dir.m128) | oopes; + const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f); + const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes); + const ssef signdir = signmsk(dir.m128) | oopes; # ifndef __KERNEL_AVX__ - ssef res = mask & ssef(dir); - res = _mm_or_ps(res,_mm_andnot_ps(mask, signdir)); + ssef res = mask & ssef(dir); + res = _mm_or_ps(res, _mm_andnot_ps(mask, signdir)); # else - ssef res = _mm_blendv_ps(signdir, dir, mask); + ssef res = _mm_blendv_ps(signdir, dir, mask); # endif - return float3(res); + return float3(res); #else /* __KERNEL_SSE__ && __KERNEL_SSE2__ */ - const float ooeps = 8.271806E-25f; - return make_float3((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x), - (fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y), - (fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z)); -#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */ + const float ooeps = 8.271806E-25f; + return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x), + (fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y), + (fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z)); +#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */ } ccl_device_inline float3 bvh_inverse_direction(float3 dir) { - return rcp(dir); + return rcp(dir); } /* Transform ray into object space to enter static object in BVH */ -ccl_device_inline float bvh_instance_push(KernelGlobals *kg, - int object, - const Ray *ray, - float3 *P, - float3 *dir, - float3 *idir, - float t) +ccl_device_inline float bvh_instance_push( + KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float t) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *P = transform_point(&tfm, ray->P); + *P = transform_point(&tfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); - if(t != FLT_MAX) { - t *= len; - } + if (t != FLT_MAX) { + t *= len; + } - return t; + return t; } #ifdef __QBVH__ @@ -440,85 +447,85 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg, float *t, float *t1) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *P = transform_point(&tfm, ray->P); + *P = transform_point(&tfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); - if(*t != FLT_MAX) - *t *= len; + if (*t != FLT_MAX) + *t *= len; - if(*t1 != -FLT_MAX) - *t1 *= len; + if (*t1 != -FLT_MAX) + *t1 *= len; } #endif /* Transorm ray to exit static object in BVH */ -ccl_device_inline float bvh_instance_pop(KernelGlobals *kg, - int object, - const Ray *ray, - float3 *P, - float3 *dir, - float3 *idir, - float t) +ccl_device_inline float bvh_instance_pop( + KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float t) { - if(t != FLT_MAX) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - t /= len(transform_direction(&tfm, ray->D)); - } + if (t != FLT_MAX) { + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + t /= len(transform_direction(&tfm, ray->D)); + } - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); - return t; + return t; } /* Same as above, but returns scale factor to apply to multiple intersection distances */ -ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac) +ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float *t_fac) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *t_fac = 1.0f / len(transform_direction(&tfm, ray->D)); + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + *t_fac = 1.0f / len(transform_direction(&tfm, ray->D)); - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); } - #ifdef __OBJECT_MOTION__ /* Transform ray into object space to enter motion blurred object in BVH */ ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg, - int object, - const Ray *ray, - float3 *P, - float3 *dir, - float3 *idir, - float t, - Transform *itfm) + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float t, + Transform *itfm) { - object_fetch_transform_motion_test(kg, object, ray->time, itfm); + object_fetch_transform_motion_test(kg, object, ray->time, itfm); - *P = transform_point(itfm, ray->P); + *P = transform_point(itfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); - if(t != FLT_MAX) { - t *= len; - } + if (t != FLT_MAX) { + t *= len; + } - return t; + return t; } -#ifdef __QBVH__ +# ifdef __QBVH__ /* Same as above, but optimized for QBVH scene intersection, * which needs to modify two max distances. * @@ -535,21 +542,21 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, float *t1, Transform *itfm) { - object_fetch_transform_motion_test(kg, object, ray->time, itfm); + object_fetch_transform_motion_test(kg, object, ray->time, itfm); - *P = transform_point(itfm, ray->P); + *P = transform_point(itfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); - if(*t != FLT_MAX) - *t *= len; + if (*t != FLT_MAX) + *t *= len; - if(*t1 != -FLT_MAX) - *t1 *= len; + if (*t1 != -FLT_MAX) + *t1 *= len; } -#endif +# endif /* Transorm ray to exit motion blurred object in BVH */ @@ -562,15 +569,15 @@ ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg, float t, Transform *itfm) { - if(t != FLT_MAX) { - t /= len(transform_direction(itfm, ray->D)); - } + if (t != FLT_MAX) { + t /= len(transform_direction(itfm, ray->D)); + } - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); - return t; + return t; } /* Same as above, but returns scale factor to apply to multiple intersection distances */ @@ -584,10 +591,10 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, float *t_fac, Transform *itfm) { - *t_fac = 1.0f / len(transform_direction(itfm, ray->D)); - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *t_fac = 1.0f / len(transform_direction(itfm, ray->D)); + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); } #endif @@ -599,30 +606,30 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, #ifdef __KERNEL_OPENCL__ ccl_device_inline void object_position_transform_addrspace(KernelGlobals *kg, - const ShaderData *sd, - ccl_addr_space float3 *P) + const ShaderData *sd, + ccl_addr_space float3 *P) { - float3 private_P = *P; - object_position_transform(kg, sd, &private_P); - *P = private_P; + float3 private_P = *P; + object_position_transform(kg, sd, &private_P); + *P = private_P; } ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg, const ShaderData *sd, ccl_addr_space float3 *D) { - float3 private_D = *D; - object_dir_transform(kg, sd, &private_D); - *D = private_D; + float3 private_D = *D; + object_dir_transform(kg, sd, &private_D); + *D = private_D; } ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg, const ShaderData *sd, ccl_addr_space float3 *N) { - float3 private_N = *N; - object_normal_transform(kg, sd, &private_N); - *N = private_N; + float3 private_N = *N; + object_normal_transform(kg, sd, &private_N); + *N = private_N; } #endif diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h index edb82172959..df19199f68e 100644 --- a/intern/cycles/kernel/geom/geom_patch.h +++ b/intern/cycles/kernel/geom/geom_patch.h @@ -27,342 +27,394 @@ CCL_NAMESPACE_BEGIN typedef struct PatchHandle { - int array_index, patch_index, vert_index; + int array_index, patch_index, vert_index; } PatchHandle; ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v) { - int quadrant = -1; - - if(*u < median) { - if(*v < median) { - quadrant = 0; - } - else { - quadrant = 1; - *v -= median; - } - } - else { - if(*v < median) { - quadrant = 3; - } - else { - quadrant = 2; - *v -= median; - } - *u -= median; - } - - return quadrant; + int quadrant = -1; + + if (*u < median) { + if (*v < median) { + quadrant = 0; + } + else { + quadrant = 1; + *v -= median; + } + } + else { + if (*v < median) { + quadrant = 3; + } + else { + quadrant = 2; + *v -= median; + } + *u -= median; + } + + return quadrant; } /* retrieve PatchHandle from patch coords */ -ccl_device_inline PatchHandle patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v) +ccl_device_inline PatchHandle +patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v) { - PatchHandle handle; + PatchHandle handle; - kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f)); + kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f)); - int node = (object_patch_map_offset(kg, object) + patch)/2; - float median = 0.5f; + int node = (object_patch_map_offset(kg, object) + patch) / 2; + float median = 0.5f; - for(int depth = 0; depth < 0xff; depth++) { - float delta = median * 0.5f; + for (int depth = 0; depth < 0xff; depth++) { + float delta = median * 0.5f; - int quadrant = patch_map_resolve_quadrant(median, &u, &v); - kernel_assert(quadrant >= 0); + int quadrant = patch_map_resolve_quadrant(median, &u, &v); + kernel_assert(quadrant >= 0); - uint child = kernel_tex_fetch(__patches, node + quadrant); + uint child = kernel_tex_fetch(__patches, node + quadrant); - /* is the quadrant a hole? */ - if(!(child & PATCH_MAP_NODE_IS_SET)) { - handle.array_index = -1; - return handle; - } + /* is the quadrant a hole? */ + if (!(child & PATCH_MAP_NODE_IS_SET)) { + handle.array_index = -1; + return handle; + } - uint index = child & PATCH_MAP_NODE_INDEX_MASK; + uint index = child & PATCH_MAP_NODE_INDEX_MASK; - if(child & PATCH_MAP_NODE_IS_LEAF) { - handle.array_index = kernel_tex_fetch(__patches, index + 0); - handle.patch_index = kernel_tex_fetch(__patches, index + 1); - handle.vert_index = kernel_tex_fetch(__patches, index + 2); + if (child & PATCH_MAP_NODE_IS_LEAF) { + handle.array_index = kernel_tex_fetch(__patches, index + 0); + handle.patch_index = kernel_tex_fetch(__patches, index + 1); + handle.vert_index = kernel_tex_fetch(__patches, index + 2); - return handle; - } else { - node = index; - } + return handle; + } + else { + node = index; + } - median = delta; - } + median = delta; + } - /* no leaf found */ - kernel_assert(0); + /* no leaf found */ + kernel_assert(0); - handle.array_index = -1; - return handle; + handle.array_index = -1; + return handle; } ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv) { - /* The four uniform cubic B-Spline basis functions evaluated at t */ - float inv_6 = 1.0f / 6.0f; - - float t2 = t * t; - float t3 = t * t2; - - point[0] = inv_6 * (1.0f - 3.0f*(t - t2) - t3); - point[1] = inv_6 * (4.0f - 6.0f*t2 + 3.0f*t3); - point[2] = inv_6 * (1.0f + 3.0f*(t + t2 - t3)); - point[3] = inv_6 * t3; - - /* Derivatives of the above four basis functions at t */ - deriv[0] = -0.5f*t2 + t - 0.5f; - deriv[1] = 1.5f*t2 - 2.0f*t; - deriv[2] = -1.5f*t2 + t + 0.5f; - deriv[3] = 0.5f*t2; + /* The four uniform cubic B-Spline basis functions evaluated at t */ + float inv_6 = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; + + point[0] = inv_6 * (1.0f - 3.0f * (t - t2) - t3); + point[1] = inv_6 * (4.0f - 6.0f * t2 + 3.0f * t3); + point[2] = inv_6 * (1.0f + 3.0f * (t + t2 - t3)); + point[3] = inv_6 * t3; + + /* Derivatives of the above four basis functions at t */ + deriv[0] = -0.5f * t2 + t - 0.5f; + deriv[1] = 1.5f * t2 - 2.0f * t; + deriv[2] = -1.5f * t2 + t + 0.5f; + deriv[3] = 0.5f * t2; } ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t) { - int boundary = ((bits >> 8) & 0xf); - - if(boundary & 1) { - t[2] -= t[0]; - t[1] += 2*t[0]; - t[0] = 0; - } - - if(boundary & 2) { - s[1] -= s[3]; - s[2] += 2*s[3]; - s[3] = 0; - } - - if(boundary & 4) { - t[1] -= t[3]; - t[2] += 2*t[3]; - t[3] = 0; - } - - if(boundary & 8) { - s[2] -= s[0]; - s[1] += 2*s[0]; - s[0] = 0; - } + int boundary = ((bits >> 8) & 0xf); + + if (boundary & 1) { + t[2] -= t[0]; + t[1] += 2 * t[0]; + t[0] = 0; + } + + if (boundary & 2) { + s[1] -= s[3]; + s[2] += 2 * s[3]; + s[3] = 0; + } + + if (boundary & 4) { + t[1] -= t[3]; + t[2] += 2 * t[3]; + t[3] = 0; + } + + if (boundary & 8) { + s[2] -= s[0]; + s[1] += 2 * s[0]; + s[0] = 0; + } } ccl_device_inline int patch_eval_depth(uint patch_bits) { - return (patch_bits & 0xf); + return (patch_bits & 0xf); } ccl_device_inline float patch_eval_param_fraction(uint patch_bits) { - bool non_quad_root = (patch_bits >> 4) & 0x1; - int depth = patch_eval_depth(patch_bits); - - if(non_quad_root) { - return 1.0f / (float)(1 << (depth-1)); - } - else { - return 1.0f / (float)(1 << depth); - } + bool non_quad_root = (patch_bits >> 4) & 0x1; + int depth = patch_eval_depth(patch_bits); + + if (non_quad_root) { + return 1.0f / (float)(1 << (depth - 1)); + } + else { + return 1.0f / (float)(1 << depth); + } } ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v) { - float frac = patch_eval_param_fraction(patch_bits); + float frac = patch_eval_param_fraction(patch_bits); - int iu = (patch_bits >> 22) & 0x3ff; - int iv = (patch_bits >> 12) & 0x3ff; + int iu = (patch_bits >> 22) & 0x3ff; + int iv = (patch_bits >> 12) & 0x3ff; - /* top left corner */ - float pu = (float)iu*frac; - float pv = (float)iv*frac; + /* top left corner */ + float pu = (float)iu * frac; + float pv = (float)iv * frac; - /* normalize uv coordinates */ - *u = (*u - pu) / frac; - *v = (*v - pv) / frac; + /* normalize uv coordinates */ + *u = (*u - pu) / frac; + *v = (*v - pv) / frac; } /* retrieve patch control indices */ -ccl_device_inline int patch_eval_indices(KernelGlobals *kg, const PatchHandle *handle, int channel, +ccl_device_inline int patch_eval_indices(KernelGlobals *kg, + const PatchHandle *handle, + int channel, int indices[PATCH_MAX_CONTROL_VERTS]) { - int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index; + int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index; - /* XXX: regular patches only */ - for(int i = 0; i < 16; i++) { - indices[i] = kernel_tex_fetch(__patches, index_base + i); - } + /* XXX: regular patches only */ + for (int i = 0; i < 16; i++) { + indices[i] = kernel_tex_fetch(__patches, index_base + i); + } - return 16; + return 16; } /* evaluate patch basis functions */ -ccl_device_inline void patch_eval_basis(KernelGlobals *kg, const PatchHandle *handle, float u, float v, - float weights[PATCH_MAX_CONTROL_VERTS], - float weights_du[PATCH_MAX_CONTROL_VERTS], - float weights_dv[PATCH_MAX_CONTROL_VERTS]) +ccl_device_inline void patch_eval_basis(KernelGlobals *kg, + const PatchHandle *handle, + float u, + float v, + float weights[PATCH_MAX_CONTROL_VERTS], + float weights_du[PATCH_MAX_CONTROL_VERTS], + float weights_dv[PATCH_MAX_CONTROL_VERTS]) { - uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */ - float d_scale = 1 << patch_eval_depth(patch_bits); + uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */ + float d_scale = 1 << patch_eval_depth(patch_bits); - bool non_quad_root = (patch_bits >> 4) & 0x1; - if(non_quad_root) { - d_scale *= 0.5f; - } + bool non_quad_root = (patch_bits >> 4) & 0x1; + if (non_quad_root) { + d_scale *= 0.5f; + } - patch_eval_normalize_coords(patch_bits, &u, &v); + patch_eval_normalize_coords(patch_bits, &u, &v); - /* XXX: regular patches only for now. */ + /* XXX: regular patches only for now. */ - float s[4], t[4], ds[4], dt[4]; + float s[4], t[4], ds[4], dt[4]; - patch_eval_bspline_weights(u, s, ds); - patch_eval_bspline_weights(v, t, dt); + patch_eval_bspline_weights(u, s, ds); + patch_eval_bspline_weights(v, t, dt); - patch_eval_adjust_boundary_weights(patch_bits, s, t); - patch_eval_adjust_boundary_weights(patch_bits, ds, dt); + patch_eval_adjust_boundary_weights(patch_bits, s, t); + patch_eval_adjust_boundary_weights(patch_bits, ds, dt); - for(int k = 0; k < 4; k++) { - for(int l = 0; l < 4; l++) { - weights[4*k+l] = s[l] * t[k]; - weights_du[4*k+l] = ds[l] * t[k] * d_scale; - weights_dv[4*k+l] = s[l] * dt[k] * d_scale; - } - } + for (int k = 0; k < 4; k++) { + for (int l = 0; l < 4; l++) { + weights[4 * k + l] = s[l] * t[k]; + weights_du[4 * k + l] = ds[l] * t[k] * d_scale; + weights_dv[4 * k + l] = s[l] * dt[k] * d_scale; + } + } } /* generic function for evaluating indices and weights from patch coords */ -ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, int object, int patch, float u, float v, int channel, - int indices[PATCH_MAX_CONTROL_VERTS], - float weights[PATCH_MAX_CONTROL_VERTS], - float weights_du[PATCH_MAX_CONTROL_VERTS], - float weights_dv[PATCH_MAX_CONTROL_VERTS]) +ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, + int object, + int patch, + float u, + float v, + int channel, + int indices[PATCH_MAX_CONTROL_VERTS], + float weights[PATCH_MAX_CONTROL_VERTS], + float weights_du[PATCH_MAX_CONTROL_VERTS], + float weights_dv[PATCH_MAX_CONTROL_VERTS]) { - PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v); - kernel_assert(handle.array_index >= 0); + PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v); + kernel_assert(handle.array_index >= 0); - int num_control = patch_eval_indices(kg, &handle, channel, indices); - patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv); + int num_control = patch_eval_indices(kg, &handle, channel, indices); + patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv); - return num_control; + return num_control; } /* functions for evaluating attributes on patches */ -ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int offset, - int patch, float u, float v, int channel, - float *du, float* dv) +ccl_device float patch_eval_float(KernelGlobals *kg, + const ShaderData *sd, + int offset, + int patch, + float u, + float v, + int channel, + float *du, + float *dv) { - int indices[PATCH_MAX_CONTROL_VERTS]; - float weights[PATCH_MAX_CONTROL_VERTS]; - float weights_du[PATCH_MAX_CONTROL_VERTS]; - float weights_dv[PATCH_MAX_CONTROL_VERTS]; - - int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, - indices, weights, weights_du, weights_dv); - - float val = 0.0f; - if(du) *du = 0.0f; - if(dv) *dv = 0.0f; - - for(int i = 0; i < num_control; i++) { - float v = kernel_tex_fetch(__attributes_float, offset + indices[i]); - - val += v * weights[i]; - if(du) *du += v * weights_du[i]; - if(dv) *dv += v * weights_dv[i]; - } - - return val; + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts( + kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); + + float val = 0.0f; + if (du) + *du = 0.0f; + if (dv) + *dv = 0.0f; + + for (int i = 0; i < num_control; i++) { + float v = kernel_tex_fetch(__attributes_float, offset + indices[i]); + + val += v * weights[i]; + if (du) + *du += v * weights_du[i]; + if (dv) + *dv += v * weights_dv[i]; + } + + return val; } -ccl_device float2 patch_eval_float2(KernelGlobals *kg, const ShaderData *sd, int offset, - int patch, float u, float v, int channel, - float2 *du, float2 *dv) +ccl_device float2 patch_eval_float2(KernelGlobals *kg, + const ShaderData *sd, + int offset, + int patch, + float u, + float v, + int channel, + float2 *du, + float2 *dv) { - int indices[PATCH_MAX_CONTROL_VERTS]; - float weights[PATCH_MAX_CONTROL_VERTS]; - float weights_du[PATCH_MAX_CONTROL_VERTS]; - float weights_dv[PATCH_MAX_CONTROL_VERTS]; - - int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, - indices, weights, weights_du, weights_dv); - - float2 val = make_float2(0.0f, 0.0f); - if(du) *du = make_float2(0.0f, 0.0f); - if(dv) *dv = make_float2(0.0f, 0.0f); - - for(int i = 0; i < num_control; i++) { - float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]); - - val += v * weights[i]; - if(du) *du += v * weights_du[i]; - if(dv) *dv += v * weights_dv[i]; - } - - return val; + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts( + kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); + + float2 val = make_float2(0.0f, 0.0f); + if (du) + *du = make_float2(0.0f, 0.0f); + if (dv) + *dv = make_float2(0.0f, 0.0f); + + for (int i = 0; i < num_control; i++) { + float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]); + + val += v * weights[i]; + if (du) + *du += v * weights_du[i]; + if (dv) + *dv += v * weights_dv[i]; + } + + return val; } -ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int offset, - int patch, float u, float v, int channel, - float3 *du, float3 *dv) +ccl_device float3 patch_eval_float3(KernelGlobals *kg, + const ShaderData *sd, + int offset, + int patch, + float u, + float v, + int channel, + float3 *du, + float3 *dv) { - int indices[PATCH_MAX_CONTROL_VERTS]; - float weights[PATCH_MAX_CONTROL_VERTS]; - float weights_du[PATCH_MAX_CONTROL_VERTS]; - float weights_dv[PATCH_MAX_CONTROL_VERTS]; - - int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, - indices, weights, weights_du, weights_dv); - - float3 val = make_float3(0.0f, 0.0f, 0.0f); - if(du) *du = make_float3(0.0f, 0.0f, 0.0f); - if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f); - - for(int i = 0; i < num_control; i++) { - float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i])); - - val += v * weights[i]; - if(du) *du += v * weights_du[i]; - if(dv) *dv += v * weights_dv[i]; - } - - return val; + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts( + kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); + + float3 val = make_float3(0.0f, 0.0f, 0.0f); + if (du) + *du = make_float3(0.0f, 0.0f, 0.0f); + if (dv) + *dv = make_float3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < num_control; i++) { + float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i])); + + val += v * weights[i]; + if (du) + *du += v * weights_du[i]; + if (dv) + *dv += v * weights_dv[i]; + } + + return val; } -ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int offset, - int patch, float u, float v, int channel, - float3 *du, float3 *dv) +ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, + const ShaderData *sd, + int offset, + int patch, + float u, + float v, + int channel, + float3 *du, + float3 *dv) { - int indices[PATCH_MAX_CONTROL_VERTS]; - float weights[PATCH_MAX_CONTROL_VERTS]; - float weights_du[PATCH_MAX_CONTROL_VERTS]; - float weights_dv[PATCH_MAX_CONTROL_VERTS]; - - int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, - indices, weights, weights_du, weights_dv); - - float3 val = make_float3(0.0f, 0.0f, 0.0f); - if(du) *du = make_float3(0.0f, 0.0f, 0.0f); - if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f); - - for(int i = 0; i < num_control; i++) { - float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i])); - - val += v * weights[i]; - if(du) *du += v * weights_du[i]; - if(dv) *dv += v * weights_dv[i]; - } - - return val; + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts( + kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); + + float3 val = make_float3(0.0f, 0.0f, 0.0f); + if (du) + *du = make_float3(0.0f, 0.0f, 0.0f); + if (dv) + *dv = make_float3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < num_control; i++) { + float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i])); + + val += v * weights[i]; + if (du) + *du += v * weights_du[i]; + if (dv) + *dv += v * weights_dv[i]; + } + + return val; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 95d9d1050fb..7f2b52a24c4 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -22,57 +22,59 @@ CCL_NAMESPACE_BEGIN /* Generic primitive attribute reading functions */ -ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float *dx, float *dy) +ccl_device_inline float primitive_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float(kg, sd, desc, dx, dy); + } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - return volume_attribute_float(kg, sd, desc); - } + else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + return volume_attribute_float(kg, sd, desc); + } #endif - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - return 0.0f; - } + else { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + return 0.0f; + } } -ccl_device_inline float primitive_surface_attribute_float(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float *dx, float *dy) +ccl_device_inline float primitive_surface_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float(kg, sd, desc, dx, dy); + } #endif - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - return 0.0f; - } + else { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + return 0.0f; + } } #ifdef __VOLUME__ @@ -80,120 +82,136 @@ ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { - if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float(kg, sd, desc); - } - else { - return 0.0f; - } + if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float(kg, sd, desc); + } + else { + return 0.0f; + } } #endif ccl_device_inline float2 primitive_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, - float2 *dx, float2 *dy) + float2 *dx, + float2 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float2(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float2(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float2(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float2(kg, sd, desc, dx, dy); + } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - kernel_assert(0); - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + kernel_assert(0); + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + return make_float2(0.0f, 0.0f); + } #endif - else { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + else { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + return make_float2(0.0f, 0.0f); + } } ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, - float3 *dx, float3 *dy) + float3 *dx, + float3 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float3(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float3(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float3(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float3(kg, sd, desc, dx, dy); + } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return volume_attribute_float3(kg, sd, desc); - } + else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + return volume_attribute_float3(kg, sd, desc); + } #endif - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return make_float3(0.0f, 0.0f, 0.0f); - } + else { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); + } } ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, - float2 *dx, float2 *dy) + float2 *dx, + float2 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float2(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float2(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float2(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float2(kg, sd, desc, dx, dy); + } #endif - else { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + else { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + return make_float2(0.0f, 0.0f); + } } ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, - float3 *dx, float3 *dy) + float3 *dx, + float3 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float3(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float3(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float3(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float3(kg, sd, desc, dx, dy); + } #endif - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return make_float3(0.0f, 0.0f, 0.0f); - } + else { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); + } } #ifdef __VOLUME__ @@ -201,12 +219,12 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { - if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float3(kg, sd, desc); - } - else { - return make_float3(0.0f, 0.0f, 0.0f); - } + if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float3(kg, sd, desc); + } + else { + return make_float3(0.0f, 0.0f, 0.0f); + } } #endif @@ -214,33 +232,33 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg, ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) { - const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV); + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV); - if(desc.offset == ATTR_STD_NOT_FOUND) - return make_float3(0.0f, 0.0f, 0.0f); + if (desc.offset == ATTR_STD_NOT_FOUND) + return make_float3(0.0f, 0.0f, 0.0f); - float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL); - return make_float3(uv.x, uv.y, 1.0f); + float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL); + return make_float3(uv.x, uv.y, 1.0f); } /* Ptex coordinates */ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id) { - /* storing ptex data as attributes is not memory efficient but simple for tests */ - const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID); - const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV); + /* storing ptex data as attributes is not memory efficient but simple for tests */ + const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID); + const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV); - if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND) - return false; + if (desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND) + return false; - float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL); - float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL); + float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL); + float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL); - *uv = make_float2(uv3.x, uv3.y); - *face_id = (int)face_id_f; + *uv = make_float2(uv3.x, uv3.y); + *face_id = (int)face_id_f; - return true; + return true; } /* Surface tangent */ @@ -248,125 +266,125 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) { #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) + if (sd->type & PRIMITIVE_ALL_CURVE) # ifdef __DPDU__ - return normalize(sd->dPdu); + return normalize(sd->dPdu); # else - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); # endif #endif - /* try to create spherical tangent from generated coordinates */ - const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED); - - if(desc.offset != ATTR_STD_NOT_FOUND) { - float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); - data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); - object_normal_transform(kg, sd, &data); - return cross(sd->N, normalize(cross(data, sd->N))); - } - else { - /* otherwise use surface derivatives */ + /* try to create spherical tangent from generated coordinates */ + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED); + + if (desc.offset != ATTR_STD_NOT_FOUND) { + float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); + data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); + object_normal_transform(kg, sd, &data); + return cross(sd->N, normalize(cross(data, sd->N))); + } + else { + /* otherwise use surface derivatives */ #ifdef __DPDU__ - return normalize(sd->dPdu); + return normalize(sd->dPdu); #else - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); #endif - } + } } /* Motion vector for motion pass */ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) { - /* center position */ - float3 center; + /* center position */ + float3 center; #ifdef __HAIR__ - bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE; - if(is_curve_primitive) { - center = curve_motion_center_location(kg, sd); - - if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_position_transform(kg, sd, ¢er); - } - } - else + bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE; + if (is_curve_primitive) { + center = curve_motion_center_location(kg, sd); + + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_position_transform(kg, sd, ¢er); + } + } + else #endif - center = sd->P; + center = sd->P; - float3 motion_pre = center, motion_post = center; + float3 motion_pre = center, motion_post = center; - /* deformation motion */ - AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION); + /* deformation motion */ + AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION); - if(desc.offset != ATTR_STD_NOT_FOUND) { - /* get motion info */ - int numverts, numkeys; - object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); + if (desc.offset != ATTR_STD_NOT_FOUND) { + /* get motion info */ + int numverts, numkeys; + object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); - /* lookup attributes */ - motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); + /* lookup attributes */ + motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); - desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys; - motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); + desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE) ? numverts : numkeys; + motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); #ifdef __HAIR__ - if(is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { - object_position_transform(kg, sd, &motion_pre); - object_position_transform(kg, sd, &motion_post); - } + if (is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { + object_position_transform(kg, sd, &motion_pre); + object_position_transform(kg, sd, &motion_post); + } #endif - } - - /* object motion. note that depending on the mesh having motion vectors, this - * transformation was set match the world/object space of motion_pre/post */ - Transform tfm; - - tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE); - motion_pre = transform_point(&tfm, motion_pre); - - tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST); - motion_post = transform_point(&tfm, motion_post); - - float3 motion_center; - - /* camera motion, for perspective/orthographic motion.pre/post will be a - * world-to-raster matrix, for panorama it's world-to-camera */ - if(kernel_data.cam.type != CAMERA_PANORAMA) { - ProjectionTransform projection = kernel_data.cam.worldtoraster; - motion_center = transform_perspective(&projection, center); - - projection = kernel_data.cam.perspective_pre; - motion_pre = transform_perspective(&projection, motion_pre); - - projection = kernel_data.cam.perspective_post; - motion_post = transform_perspective(&projection, motion_post); - } - else { - tfm = kernel_data.cam.worldtocamera; - motion_center = normalize(transform_point(&tfm, center)); - motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center)); - motion_center.x *= kernel_data.cam.width; - motion_center.y *= kernel_data.cam.height; - - tfm = kernel_data.cam.motion_pass_pre; - motion_pre = normalize(transform_point(&tfm, motion_pre)); - motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre)); - motion_pre.x *= kernel_data.cam.width; - motion_pre.y *= kernel_data.cam.height; - - tfm = kernel_data.cam.motion_pass_post; - motion_post = normalize(transform_point(&tfm, motion_post)); - motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post)); - motion_post.x *= kernel_data.cam.width; - motion_post.y *= kernel_data.cam.height; - } - - motion_pre = motion_pre - motion_center; - motion_post = motion_center - motion_post; - - return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y); + } + + /* object motion. note that depending on the mesh having motion vectors, this + * transformation was set match the world/object space of motion_pre/post */ + Transform tfm; + + tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE); + motion_pre = transform_point(&tfm, motion_pre); + + tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST); + motion_post = transform_point(&tfm, motion_post); + + float3 motion_center; + + /* camera motion, for perspective/orthographic motion.pre/post will be a + * world-to-raster matrix, for panorama it's world-to-camera */ + if (kernel_data.cam.type != CAMERA_PANORAMA) { + ProjectionTransform projection = kernel_data.cam.worldtoraster; + motion_center = transform_perspective(&projection, center); + + projection = kernel_data.cam.perspective_pre; + motion_pre = transform_perspective(&projection, motion_pre); + + projection = kernel_data.cam.perspective_post; + motion_post = transform_perspective(&projection, motion_post); + } + else { + tfm = kernel_data.cam.worldtocamera; + motion_center = normalize(transform_point(&tfm, center)); + motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center)); + motion_center.x *= kernel_data.cam.width; + motion_center.y *= kernel_data.cam.height; + + tfm = kernel_data.cam.motion_pass_pre; + motion_pre = normalize(transform_point(&tfm, motion_pre)); + motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre)); + motion_pre.x *= kernel_data.cam.width; + motion_pre.y *= kernel_data.cam.height; + + tfm = kernel_data.cam.motion_pass_post; + motion_post = normalize(transform_point(&tfm, motion_post)); + motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post)); + motion_post.x *= kernel_data.cam.width; + motion_post.y *= kernel_data.cam.height; + } + + motion_pre = motion_pre - motion_center; + motion_post = motion_center - motion_post; + + return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h index 251e070c21f..8d5b3c12833 100644 --- a/intern/cycles/kernel/geom/geom_subd_triangle.h +++ b/intern/cycles/kernel/geom/geom_subd_triangle.h @@ -22,455 +22,492 @@ CCL_NAMESPACE_BEGIN ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd) { - return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0; + return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0; } /* UV coords of triangle within patch */ -ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg, const ShaderData *sd, float2 uv[3]) +ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg, + const ShaderData *sd, + float2 uv[3]) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x); - uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y); - uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z); + uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x); + uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y); + uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z); } /* Vertex indices of patch */ ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals *kg, int patch) { - uint4 indices; + uint4 indices; - indices.x = kernel_tex_fetch(__patches, patch+0); - indices.y = kernel_tex_fetch(__patches, patch+1); - indices.z = kernel_tex_fetch(__patches, patch+2); - indices.w = kernel_tex_fetch(__patches, patch+3); + indices.x = kernel_tex_fetch(__patches, patch + 0); + indices.y = kernel_tex_fetch(__patches, patch + 1); + indices.z = kernel_tex_fetch(__patches, patch + 2); + indices.w = kernel_tex_fetch(__patches, patch + 3); - return indices; + return indices; } /* Originating face for patch */ ccl_device_inline uint subd_triangle_patch_face(KernelGlobals *kg, int patch) { - return kernel_tex_fetch(__patches, patch+4); + return kernel_tex_fetch(__patches, patch + 4); } /* Number of corners on originating face */ ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals *kg, int patch) { - return kernel_tex_fetch(__patches, patch+5) & 0xffff; + return kernel_tex_fetch(__patches, patch + 5) & 0xffff; } /* Indices of the four corners that are used by the patch */ ccl_device_inline void subd_triangle_patch_corners(KernelGlobals *kg, int patch, int corners[4]) { - uint4 data; - - data.x = kernel_tex_fetch(__patches, patch+4); - data.y = kernel_tex_fetch(__patches, patch+5); - data.z = kernel_tex_fetch(__patches, patch+6); - data.w = kernel_tex_fetch(__patches, patch+7); - - int num_corners = data.y & 0xffff; - - if(num_corners == 4) { - /* quad */ - corners[0] = data.z; - corners[1] = data.z+1; - corners[2] = data.z+2; - corners[3] = data.z+3; - } - else { - /* ngon */ - int c = data.y >> 16; - - corners[0] = data.z + c; - corners[1] = data.z + mod(c+1, num_corners); - corners[2] = data.w; - corners[3] = data.z + mod(c-1, num_corners); - } + uint4 data; + + data.x = kernel_tex_fetch(__patches, patch + 4); + data.y = kernel_tex_fetch(__patches, patch + 5); + data.z = kernel_tex_fetch(__patches, patch + 6); + data.w = kernel_tex_fetch(__patches, patch + 7); + + int num_corners = data.y & 0xffff; + + if (num_corners == 4) { + /* quad */ + corners[0] = data.z; + corners[1] = data.z + 1; + corners[2] = data.z + 2; + corners[3] = data.z + 3; + } + else { + /* ngon */ + int c = data.y >> 16; + + corners[0] = data.z + c; + corners[1] = data.z + mod(c + 1, num_corners); + corners[2] = data.w; + corners[3] = data.z + mod(c - 1, num_corners); + } } /* Reading attributes on various subdivision triangle elements */ -ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) +ccl_device_noinline float subd_triangle_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd); #ifdef __PATCH_EVAL__ - if(desc.flags & ATTR_SUBDIVIDED) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; - - /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; - - float a, dads, dadt; - a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + if (desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + + float a, dads, dadt; + a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if (dx) { + float dudx = sd->du.dx; + float dvdx = sd->dv.dx; + + float dsdx = dsdu * dudx + dsdv * dvdx; + float dtdx = dtdu * dudx + dtdv * dvdx; + + *dx = dads * dsdx + dadt * dtdx; + } + if (dy) { + float dudy = sd->du.dy; + float dvdy = sd->dv.dy; + + float dsdy = dsdu * dudy + dsdv * dvdy; + float dtdy = dtdu * dudy + dtdv * dvdy; + + *dy = dads * dsdy + dadt * dtdy; + } + } +# endif + + return a; + } + else +#endif /* __PATCH_EVAL__ */ + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + + return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch)); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + uint4 v = subd_triangle_patch_indices(kg, patch); + + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y); + float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z); + float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w); + + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } + + float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx || dy) { - float dsdu = dpdu.x; - float dtdu = dpdu.y; - float dsdv = dpdv.x; - float dtdv = dpdv.y; - - if(dx) { - float dudx = sd->du.dx; - float dvdx = sd->dv.dx; - - float dsdx = dsdu*dudx + dsdv*dvdx; - float dtdx = dtdu*dudx + dtdv*dvdx; - - *dx = dads*dsdx + dadt*dtdx; - } - if(dy) { - float dudy = sd->du.dy; - float dvdy = sd->dv.dy; - - float dsdy = dsdu*dudy + dsdv*dvdy; - float dtdy = dtdu*dudy + dtdv*dvdy; - - *dy = dads*dsdy + dadt*dtdy; - } - } + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return a; - } - else -#endif /* __PATCH_EVAL__ */ - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else if (desc.element == ATTR_ELEMENT_CORNER) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); - return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch)); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); - uint4 v = subd_triangle_patch_indices(kg, patch); + float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset); + float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset); + float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset); + float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset); - float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x); - float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y); - float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z); - float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w); + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else if(desc.element == ATTR_ELEMENT_CORNER) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - int corners[4]; - subd_triangle_patch_corners(kg, patch, corners); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; - float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset); - float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset); - float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset); - float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset); - - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; -#endif - - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - - return 0.0f; - } + return 0.0f; + } } -ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy) +ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float2 *dx, + float2 *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd); #ifdef __PATCH_EVAL__ - if(desc.flags & ATTR_SUBDIVIDED) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; - - /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; - - float2 a, dads, dadt; - - a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx || dy) { - float dsdu = dpdu.x; - float dtdu = dpdu.y; - float dsdv = dpdv.x; - float dtdv = dpdv.y; - - if(dx) { - float dudx = sd->du.dx; - float dvdx = sd->dv.dx; - - float dsdx = dsdu*dudx + dsdv*dvdx; - float dtdx = dtdu*dudx + dtdv*dvdx; - - *dx = dads*dsdx + dadt*dtdx; - } - if(dy) { - float dudy = sd->du.dy; - float dvdy = sd->dv.dy; - - float dsdy = dsdu*dudy + dsdv*dvdy; - float dtdy = dtdu*dudy + dtdv*dvdy; - - *dy = dads*dsdy + dadt*dtdy; - } - } -#endif - - return a; - } - else -#endif /* __PATCH_EVAL__ */ - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); - - return kernel_tex_fetch(__attributes_float2, desc.offset + subd_triangle_patch_face(kg, patch)); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - uint4 v = subd_triangle_patch_indices(kg, patch); - - float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x); - float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y); - float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z); - float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w); - - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + if (desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + + float2 a, dads, dadt; + + a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if (dx) { + float dudx = sd->du.dx; + float dvdx = sd->dv.dx; + + float dsdx = dsdu * dudx + dsdv * dvdx; + float dtdx = dtdu * dudx + dtdv * dvdx; + + *dx = dads * dsdx + dadt * dtdx; + } + if (dy) { + float dudy = sd->du.dy; + float dvdy = sd->dv.dy; + + float dsdy = dsdu * dudy + dsdv * dvdy; + float dtdy = dtdu * dudy + dtdv * dvdy; + + *dy = dads * dsdy + dadt * dtdy; + } + } +# endif + + return a; + } + else +#endif /* __PATCH_EVAL__ */ + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + + return kernel_tex_fetch(__attributes_float2, + desc.offset + subd_triangle_patch_face(kg, patch)); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + uint4 v = subd_triangle_patch_indices(kg, patch); + + float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x); + float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y); + float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z); + float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w); + + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } + + float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else if(desc.element == ATTR_ELEMENT_CORNER) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else if (desc.element == ATTR_ELEMENT_CORNER) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); - int corners[4]; - subd_triangle_patch_corners(kg, patch, corners); + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); - float2 f0, f1, f2, f3; + float2 f0, f1, f2, f3; - f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset); - f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset); - f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset); - f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset); + f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset); + f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset); + f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset); + f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset); - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } - float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + return make_float2(0.0f, 0.0f); + } } -ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) +ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, + float3 *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd); #ifdef __PATCH_EVAL__ - if(desc.flags & ATTR_SUBDIVIDED) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; - - /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; - - float3 a, dads, dadt; - - if(desc.element == ATTR_ELEMENT_CORNER_BYTE) { - a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); - } - else { - a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); - } - -#ifdef __RAY_DIFFERENTIALS__ - if(dx || dy) { - float dsdu = dpdu.x; - float dtdu = dpdu.y; - float dsdv = dpdv.x; - float dtdv = dpdv.y; - - if(dx) { - float dudx = sd->du.dx; - float dvdx = sd->dv.dx; - - float dsdx = dsdu*dudx + dsdv*dvdx; - float dtdx = dtdu*dudx + dtdv*dvdx; - - *dx = dads*dsdx + dadt*dtdx; - } - if(dy) { - float dudy = sd->du.dy; - float dvdy = sd->dv.dy; - - float dsdy = dsdu*dudy + dsdv*dvdy; - float dtdy = dtdu*dudy + dtdv*dvdy; - - *dy = dads*dsdy + dadt*dtdy; - } - } -#endif - - return a; - } - else -#endif /* __PATCH_EVAL__ */ - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch))); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - uint4 v = subd_triangle_patch_indices(kg, patch); - - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z)); - float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w)); - - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + if (desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + + float3 a, dads, dadt; + + if (desc.element == ATTR_ELEMENT_CORNER_BYTE) { + a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + } + else { + a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + } + +# ifdef __RAY_DIFFERENTIALS__ + if (dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if (dx) { + float dudx = sd->du.dx; + float dvdx = sd->dv.dx; + + float dsdx = dsdu * dudx + dsdv * dvdx; + float dtdx = dtdu * dudx + dtdv * dvdx; + + *dx = dads * dsdx + dadt * dtdx; + } + if (dy) { + float dudy = sd->du.dy; + float dvdy = sd->dv.dy; + + float dsdy = dsdu * dudy + dsdv * dvdy; + float dtdy = dtdu * dudy + dtdv * dvdy; + + *dy = dads * dsdy + dadt * dtdy; + } + } +# endif + + return a; + } + else +#endif /* __PATCH_EVAL__ */ + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3( + kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch))); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + uint4 v = subd_triangle_patch_indices(kg, patch); + + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y)); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z)); + float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w)); + + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } + + float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - int corners[4]; - subd_triangle_patch_corners(kg, patch, corners); - - float3 f0, f1, f2, f3; - - if(desc.element == ATTR_ELEMENT_CORNER) { - f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset)); - f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset)); - f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset)); - f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset)); - } - else { - f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset)); - f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset)); - f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset)); - f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)); - } - - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); + + float3 f0, f1, f2, f3; + + if (desc.element == ATTR_ELEMENT_CORNER) { + f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset)); + f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset)); + f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset)); + f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset)); + } + else { + f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset)); + f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset)); + f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset)); + f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)); + } + + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } + + float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); - return make_float3(0.0f, 0.0f, 0.0f); - } + return make_float3(0.0f, 0.0f, 0.0f); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 300227c38e6..9938c0ba2c3 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -25,227 +25,268 @@ CCL_NAMESPACE_BEGIN /* normal on triangle */ ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) { - /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - - /* return normal */ - if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { - return normalize(cross(v2 - v0, v1 - v0)); - } - else { - return normalize(cross(v1 - v0, v2 - v0)); - } + /* load triangle vertices */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + + /* return normal */ + if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + return normalize(cross(v2 - v0, v1 - v0)); + } + else { + return normalize(cross(v1 - v0, v2 - v0)); + } } /* point and normal on triangle */ -ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) +ccl_device_inline void triangle_point_normal( + KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) { - /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - /* compute point */ - float t = 1.0f - u - v; - *P = (u*v0 + v*v1 + t*v2); - /* get object flags */ - int object_flag = kernel_tex_fetch(__object_flag, object); - /* compute normal */ - if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { - *Ng = normalize(cross(v2 - v0, v1 - v0)); - } - else { - *Ng = normalize(cross(v1 - v0, v2 - v0)); - } - /* shader`*/ - *shader = kernel_tex_fetch(__tri_shader, prim); + /* load triangle vertices */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + /* compute point */ + float t = 1.0f - u - v; + *P = (u * v0 + v * v1 + t * v2); + /* get object flags */ + int object_flag = kernel_tex_fetch(__object_flag, object); + /* compute normal */ + if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + *Ng = normalize(cross(v2 - v0, v1 - v0)); + } + else { + *Ng = normalize(cross(v1 - v0, v2 - v0)); + } + /* shader`*/ + *shader = kernel_tex_fetch(__tri_shader, prim); } /* Triangle vertex locations */ ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3]) { - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); } /* Interpolate smooth vertex normal from vertices */ -ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int prim, float u, float v) +ccl_device_inline float3 +triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int prim, float u, float v) { - /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); - float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); - float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + /* load triangle vertices */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); + float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); + float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); - float3 N = safe_normalize((1.0f - u - v)*n2 + u*n0 + v*n1); + float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1); - return is_zero(N)? Ng: N; + return is_zero(N) ? Ng : N; } /* Ray differentials on triangle */ -ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv) +ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, + int prim, + ccl_addr_space float3 *dPdu, + ccl_addr_space float3 *dPdv) { - /* fetch triangle vertex coordinates */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - - /* compute derivatives of P w.r.t. uv */ - *dPdu = (p0 - p2); - *dPdv = (p1 - p2); + /* fetch triangle vertex coordinates */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + + /* compute derivatives of P w.r.t. uv */ + *dPdu = (p0 - p2); + *dPdv = (p1 - p2); } /* Reading attributes on various triangle elements */ -ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) +ccl_device float triangle_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x); - float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y); - float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z); + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y); + float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else if(desc.element == ATTR_ELEMENT_CORNER) { - int tri = desc.offset + sd->prim*3; - float f0 = kernel_tex_fetch(__attributes_float, tri + 0); - float f1 = kernel_tex_fetch(__attributes_float, tri + 1); - float f2 = kernel_tex_fetch(__attributes_float, tri + 2); + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else if (desc.element == ATTR_ELEMENT_CORNER) { + int tri = desc.offset + sd->prim * 3; + float f0 = kernel_tex_fetch(__attributes_float, tri + 0); + float f1 = kernel_tex_fetch(__attributes_float, tri + 1); + float f2 = kernel_tex_fetch(__attributes_float, tri + 2); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; - return 0.0f; - } + return 0.0f; + } } -ccl_device float2 triangle_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy) +ccl_device float2 triangle_attribute_float2(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float2 *dx, + float2 *dy) { - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); - return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x); - float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y); - float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z); + float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x); + float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y); + float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else if(desc.element == ATTR_ELEMENT_CORNER) { - int tri = desc.offset + sd->prim*3; - float2 f0, f1, f2; + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else if (desc.element == ATTR_ELEMENT_CORNER) { + int tri = desc.offset + sd->prim * 3; + float2 f0, f1, f2; - if(desc.element == ATTR_ELEMENT_CORNER) { - f0 = kernel_tex_fetch(__attributes_float2, tri + 0); - f1 = kernel_tex_fetch(__attributes_float2, tri + 1); - f2 = kernel_tex_fetch(__attributes_float2, tri + 2); - } + if (desc.element == ATTR_ELEMENT_CORNER) { + f0 = kernel_tex_fetch(__attributes_float2, tri + 0); + f1 = kernel_tex_fetch(__attributes_float2, tri + 1); + f2 = kernel_tex_fetch(__attributes_float2, tri + 2); + } #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + return make_float2(0.0f, 0.0f); + } } -ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) +ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, + float3 *dy) { - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z)); + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + + float3 f0 = float4_to_float3( + kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x)); + float3 f1 = float4_to_float3( + kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y)); + float3 f2 = float4_to_float3( + kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { - int tri = desc.offset + sd->prim*3; - float3 f0, f1, f2; - - if(desc.element == ATTR_ELEMENT_CORNER) { - f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); - f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); - f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); - } - else { - f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0)); - f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1)); - f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2)); - } + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { + int tri = desc.offset + sd->prim * 3; + float3 f0, f1, f2; + + if (desc.element == ATTR_ELEMENT_CORNER) { + f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); + f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); + f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); + } + else { + f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0)); + f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1)); + f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2)); + } #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); - return make_float3(0.0f, 0.0f, 0.0f); - } + return make_float3(0.0f, 0.0f, 0.0f); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 56dbc4473fa..bcad03102d2 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -1,4 +1,4 @@ - /* +/* * Copyright 2014, Blender Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,447 +30,464 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, int object, int prim_addr) { - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; #else - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); #endif - float t, u, v; - if(ray_triangle_intersect(P, - dir, - isect->t, + float t, u, v; + if (ray_triangle_intersect(P, + dir, + isect->t, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - ssef_verts, + ssef_verts, #else - float4_to_float3(tri_a), - float4_to_float3(tri_b), - float4_to_float3(tri_c), + float4_to_float3(tri_a), + float4_to_float3(tri_b), + float4_to_float3(tri_c), #endif - &u, &v, &t)) - { + &u, + &v, + &t)) { #ifdef __VISIBILITY_FLAG__ - /* Visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags. - */ - if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) + /* Visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags. + */ + if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif - { - isect->prim = prim_addr; - isect->object = object; - isect->type = PRIMITIVE_TRIANGLE; - isect->u = u; - isect->v = v; - isect->t = t; - return true; - } - } - return false; + { + isect->prim = prim_addr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = u; + isect->v = v; + isect->t = t; + return true; + } + } + return false; } #ifdef __KERNEL_AVX2__ -#define cross256(A,B, C,D) _mm256_fmsub_ps(A,B, _mm256_mul_ps(C,D)) -ccl_device_inline int ray_triangle_intersect8( - KernelGlobals *kg, - float3 ray_P, - float3 ray_dir, - Intersection **isect, - uint visibility, - int object, - __m256 *triA, - __m256 *triB, - __m256 *triC, - int prim_addr, - int prim_num, - uint *num_hits, - uint max_hits, - int *num_hits_in_instance, - float isect_t) +# define cross256(A, B, C, D) _mm256_fmsub_ps(A, B, _mm256_mul_ps(C, D)) +ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg, + float3 ray_P, + float3 ray_dir, + Intersection **isect, + uint visibility, + int object, + __m256 *triA, + __m256 *triB, + __m256 *triC, + int prim_addr, + int prim_num, + uint *num_hits, + uint max_hits, + int *num_hits_in_instance, + float isect_t) { - const unsigned char prim_num_mask = (1 << prim_num) - 1; - - const __m256i zero256 = _mm256_setzero_si256(); - - const __m256 Px256 = _mm256_set1_ps(ray_P.x); - const __m256 Py256 = _mm256_set1_ps(ray_P.y); - const __m256 Pz256 = _mm256_set1_ps(ray_P.z); - - const __m256 dirx256 = _mm256_set1_ps(ray_dir.x); - const __m256 diry256 = _mm256_set1_ps(ray_dir.y); - const __m256 dirz256 = _mm256_set1_ps(ray_dir.z); - - /* Calculate vertices relative to ray origin. */ - __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256); - __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256); - __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256); - - __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256); - __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256); - __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256); - - __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256); - __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256); - __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256); - - __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256); - __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256); - __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256); - - __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256); - __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256); - __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256); - - __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256); - __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256); - __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256); - - /* Calculate triangle edges. */ - __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256); - __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256); - __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256); - - __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256); - __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256); - __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256); - - __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256); - __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256); - __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256); - - /* Perform edge tests. */ - /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */ - __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256); - __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256); - __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256); - /* vertical dot */ - __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256); - U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256); - U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256); - - __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256); - __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256); - __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256); - /* vertical dot */ - __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256); - V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256); - V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256); - - __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256); - __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256); - __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256); - /* vertical dot */ - __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256); - W_256 = _mm256_fmadd_ps(W_y_256, diry256,W_256); - W_256 = _mm256_fmadd_ps(W_z_256, dirz256,W_256); - - __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31); - __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31); - __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31); - __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1); - - const __m256i one256 = _mm256_set1_epi32(1); - const __m256i two256 = _mm256_set1_epi32(2); - - __m256i mask_minmaxUVW_256 = _mm256_or_si256( - _mm256_cmpeq_epi32(one256, UVW_256_1), - _mm256_cmpeq_epi32(two256, UVW_256_1)); - - unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256)); - if((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set - return false; - } - - /* Calculate geometry normal and denominator. */ - __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256); - __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256); - __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256); - - Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256); - Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256); - Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256); - - /* vertical dot */ - __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256); - den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256,den_256); - den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256,den_256); - - /* Perform depth test. */ - __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256); - T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256,T_256); - T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256,T_256); - - const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000); - __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000); - - __m256 sign_T_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256)); - - unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256); - if(((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) { - return false; - } - - __m256 xor_signmask_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)); - - ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8]; - ccl_align(32) unsigned int mask_minmaxUVW8[8]; - - if(visibility == PATH_RAY_SHADOW_OPAQUE) { - __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256); - __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256); - __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256)); - __m256 rayt_256 = _mm256_set1_ps((*isect)->t); - __m256i mask1 = _mm256_cmpgt_epi32(_mm256_castps_si256(sign_T_256), - _mm256_castps_si256( - _mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256) - ) - ); - mask0 = _mm256_or_si256(mask1, mask0); - mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) - mask_final_256 = _mm256_andnot_si256(maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden) - unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256)); - if((mask_final & prim_num_mask) == 0) { - return false; - } - const int i = __bsf(mask_final); - __m256 inv_den_256 = _mm256_rcp_ps(den_256); - U_256 = _mm256_mul_ps(U_256, inv_den_256); - V_256 = _mm256_mul_ps(V_256, inv_den_256); - T_256 = _mm256_mul_ps(T_256, inv_den_256); - _mm256_store_ps(U8, U_256); - _mm256_store_ps(V8, V_256); - _mm256_store_ps(T8, T_256); - /* NOTE: Here we assume visibility for all triangles in the node is - * the same. */ - (*isect)->u = U8[i]; - (*isect)->v = V8[i]; - (*isect)->t = T8[i]; - (*isect)->prim = (prim_addr + i); - (*isect)->object = object; - (*isect)->type = PRIMITIVE_TRIANGLE; - return true; - } - else { - _mm256_store_ps(den8, den_256); - _mm256_store_ps(U8, U_256); - _mm256_store_ps(V8, V_256); - _mm256_store_ps(T8, T_256); - - _mm256_store_ps(sign_T8, sign_T_256); - _mm256_store_ps(xor_signmask8, xor_signmask_256); - _mm256_store_si256((__m256i*)mask_minmaxUVW8, mask_minmaxUVW_256); - - int ret = false; - - if(visibility == PATH_RAY_SHADOW) { - for(int i = 0; i < prim_num; i++) { - if(mask_minmaxUVW8[i]) { - continue; - } -#ifdef __VISIBILITY_FLAG__ - if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) { - continue; - } -#endif - if((sign_T8[i] < 0.0f) || - (sign_T8[i] > (*isect)->t * xor_signmask8[i])) - { - continue; - } - if(!den8[i]) { - continue; - } - const float inv_den = 1.0f / den8[i]; - (*isect)->u = U8[i] * inv_den; - (*isect)->v = V8[i] * inv_den; - (*isect)->t = T8[i] * inv_den; - (*isect)->prim = (prim_addr + i); - (*isect)->object = object; - (*isect)->type = PRIMITIVE_TRIANGLE; - const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim); - int shader = 0; -#ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE) -#endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } -#ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } -#endif - const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; - /* If no transparent shadows, all light is blocked. */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return 2; - } - /* If maximum number of hits reached, block all light. */ - else if(num_hits == NULL || *num_hits == max_hits) { - return 2; - } - /* Move on to next entry in intersections array. */ - ret = true; - (*isect)++; - (*num_hits)++; - (*num_hits_in_instance)++; - (*isect)->t = isect_t; - } - } - else { - for(int i = 0; i < prim_num; i++) { - if(mask_minmaxUVW8[i]) { - continue; - } -#ifdef __VISIBILITY_FLAG__ - if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) { - continue; - } -#endif - if((sign_T8[i] < 0.0f) || - (sign_T8[i] > (*isect)->t * xor_signmask8[i])) - { - continue; - } - if(!den8[i]) { - continue; - } - const float inv_den = 1.0f / den8[i]; - (*isect)->u = U8[i] * inv_den; - (*isect)->v = V8[i] * inv_den; - (*isect)->t = T8[i] * inv_den; - (*isect)->prim = (prim_addr + i); - (*isect)->object = object; - (*isect)->type = PRIMITIVE_TRIANGLE; - ret = true; - } - } - return ret; - } + const unsigned char prim_num_mask = (1 << prim_num) - 1; + + const __m256i zero256 = _mm256_setzero_si256(); + + const __m256 Px256 = _mm256_set1_ps(ray_P.x); + const __m256 Py256 = _mm256_set1_ps(ray_P.y); + const __m256 Pz256 = _mm256_set1_ps(ray_P.z); + + const __m256 dirx256 = _mm256_set1_ps(ray_dir.x); + const __m256 diry256 = _mm256_set1_ps(ray_dir.y); + const __m256 dirz256 = _mm256_set1_ps(ray_dir.z); + + /* Calculate vertices relative to ray origin. */ + __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256); + __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256); + __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256); + + __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256); + __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256); + __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256); + + __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256); + __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256); + __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256); + + __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256); + __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256); + __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256); + + __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256); + __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256); + __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256); + + __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256); + __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256); + __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256); + + /* Calculate triangle edges. */ + __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256); + __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256); + __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256); + + __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256); + __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256); + __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256); + + __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256); + __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256); + __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256); + + /* Perform edge tests. */ + /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */ + __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256); + __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256); + __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256); + /* vertical dot */ + __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256); + U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256); + U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256); + + __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256); + __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256); + __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256); + /* vertical dot */ + __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256); + V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256); + V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256); + + __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256); + __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256); + __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256); + /* vertical dot */ + __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256); + W_256 = _mm256_fmadd_ps(W_y_256, diry256, W_256); + W_256 = _mm256_fmadd_ps(W_z_256, dirz256, W_256); + + __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31); + __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31); + __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31); + __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1); + + const __m256i one256 = _mm256_set1_epi32(1); + const __m256i two256 = _mm256_set1_epi32(2); + + __m256i mask_minmaxUVW_256 = _mm256_or_si256(_mm256_cmpeq_epi32(one256, UVW_256_1), + _mm256_cmpeq_epi32(two256, UVW_256_1)); + + unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256)); + if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set + return false; + } + + /* Calculate geometry normal and denominator. */ + __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256); + __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256); + __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256); + + Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256); + Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256); + Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256); + + /* vertical dot */ + __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256); + den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256, den_256); + den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256, den_256); + + /* Perform depth test. */ + __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256); + T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256, T_256); + T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256, T_256); + + const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000); + __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000); + + __m256 sign_T_256 = _mm256_castsi256_ps( + _mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256)); + + unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256); + if (((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) { + return false; + } + + __m256 xor_signmask_256 = _mm256_castsi256_ps( + _mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)); + + ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8]; + ccl_align(32) unsigned int mask_minmaxUVW8[8]; + + if (visibility == PATH_RAY_SHADOW_OPAQUE) { + __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256); + __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256); + __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256)); + __m256 rayt_256 = _mm256_set1_ps((*isect)->t); + __m256i mask1 = _mm256_cmpgt_epi32( + _mm256_castps_si256(sign_T_256), + _mm256_castps_si256(_mm256_mul_ps( + _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), + rayt_256))); + mask0 = _mm256_or_si256(mask1, mask0); + mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) + mask_final_256 = _mm256_andnot_si256( + maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden) + unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256)); + if ((mask_final & prim_num_mask) == 0) { + return false; + } + const int i = __bsf(mask_final); + __m256 inv_den_256 = _mm256_rcp_ps(den_256); + U_256 = _mm256_mul_ps(U_256, inv_den_256); + V_256 = _mm256_mul_ps(V_256, inv_den_256); + T_256 = _mm256_mul_ps(T_256, inv_den_256); + _mm256_store_ps(U8, U_256); + _mm256_store_ps(V8, V_256); + _mm256_store_ps(T8, T_256); + /* NOTE: Here we assume visibility for all triangles in the node is + * the same. */ + (*isect)->u = U8[i]; + (*isect)->v = V8[i]; + (*isect)->t = T8[i]; + (*isect)->prim = (prim_addr + i); + (*isect)->object = object; + (*isect)->type = PRIMITIVE_TRIANGLE; + return true; + } + else { + _mm256_store_ps(den8, den_256); + _mm256_store_ps(U8, U_256); + _mm256_store_ps(V8, V_256); + _mm256_store_ps(T8, T_256); + + _mm256_store_ps(sign_T8, sign_T_256); + _mm256_store_ps(xor_signmask8, xor_signmask_256); + _mm256_store_si256((__m256i *)mask_minmaxUVW8, mask_minmaxUVW_256); + + int ret = false; + + if (visibility == PATH_RAY_SHADOW) { + for (int i = 0; i < prim_num; i++) { + if (mask_minmaxUVW8[i]) { + continue; + } +# ifdef __VISIBILITY_FLAG__ + if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) { + continue; + } +# endif + if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) { + continue; + } + if (!den8[i]) { + continue; + } + const float inv_den = 1.0f / den8[i]; + (*isect)->u = U8[i] * inv_den; + (*isect)->v = V8[i] * inv_den; + (*isect)->t = T8[i] * inv_den; + (*isect)->prim = (prim_addr + i); + (*isect)->object = object; + (*isect)->type = PRIMITIVE_TRIANGLE; + const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim); + int shader = 0; +# ifdef __HAIR__ + if (kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE) +# endif + { + shader = kernel_tex_fetch(__tri_shader, prim); + } +# ifdef __HAIR__ + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +# endif + const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + /* If no transparent shadows, all light is blocked. */ + if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return 2; + } + /* If maximum number of hits reached, block all light. */ + else if (num_hits == NULL || *num_hits == max_hits) { + return 2; + } + /* Move on to next entry in intersections array. */ + ret = true; + (*isect)++; + (*num_hits)++; + (*num_hits_in_instance)++; + (*isect)->t = isect_t; + } + } + else { + for (int i = 0; i < prim_num; i++) { + if (mask_minmaxUVW8[i]) { + continue; + } +# ifdef __VISIBILITY_FLAG__ + if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) { + continue; + } +# endif + if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) { + continue; + } + if (!den8[i]) { + continue; + } + const float inv_den = 1.0f / den8[i]; + (*isect)->u = U8[i] * inv_den; + (*isect)->v = V8[i] * inv_den; + (*isect)->t = T8[i] * inv_den; + (*isect)->prim = (prim_addr + i); + (*isect)->object = object; + (*isect)->type = PRIMITIVE_TRIANGLE; + ret = true; + } + } + return ret; + } } -ccl_device_inline int triangle_intersect8( - KernelGlobals *kg, - Intersection **isect, - float3 P, - float3 dir, - uint visibility, - int object, - int prim_addr, - int prim_num, - uint *num_hits, - uint max_hits, - int *num_hits_in_instance, - float isect_t) - { - __m128 tri_a[8], tri_b[8], tri_c[8]; - __m256 tritmp[12], tri[12]; - __m256 triA[3], triB[3], triC[3]; - - int i, r; - - uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); - for(i = 0; i < prim_num; i++) { - tri_a[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++]; - tri_b[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++]; - tri_c[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++]; - } - //create 9 or 12 placeholders - tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256 - tri[1] = _mm256_castps128_ps256(tri_b[0]);//_mm256_zextps128_ps256 - tri[2] = _mm256_castps128_ps256(tri_c[0]);//_mm256_zextps128_ps256 - - tri[3] = _mm256_castps128_ps256(tri_a[1]); //_mm256_zextps128_ps256 - tri[4] = _mm256_castps128_ps256(tri_b[1]);//_mm256_zextps128_ps256 - tri[5] = _mm256_castps128_ps256(tri_c[1]);//_mm256_zextps128_ps256 - - tri[6] = _mm256_castps128_ps256(tri_a[2]); //_mm256_zextps128_ps256 - tri[7] = _mm256_castps128_ps256(tri_b[2]);//_mm256_zextps128_ps256 - tri[8] = _mm256_castps128_ps256(tri_c[2]);//_mm256_zextps128_ps256 - - if(prim_num > 3) { - tri[9] = _mm256_castps128_ps256(tri_a[3]); //_mm256_zextps128_ps256 - tri[10] = _mm256_castps128_ps256(tri_b[3]);//_mm256_zextps128_ps256 - tri[11] = _mm256_castps128_ps256(tri_c[3]);//_mm256_zextps128_ps256 - } - - for(i = 4, r = 0; i < prim_num; i ++, r += 3) { - tri[r] = _mm256_insertf128_ps(tri[r] , tri_a[i], 1); - tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1); - tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1); - } - - //------------------------------------------------ - //0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1 - //1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1 - //2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1 - - //3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1 - //4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1 - //5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1 - - //6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1 - //7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1 - //8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1 - - //9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1 - //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1 - //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1 - - //"transpose" - tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5 - tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1 - - tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7 - tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1 - - tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5 - tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1 - - tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7 - tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1 - - tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5 - tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1 - - tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7 - tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1 - - /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ - triA[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]), _mm256_castps_pd(tritmp[2]))); // Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7 - triA[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]), _mm256_castps_pd(tritmp[2]))); // Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7 - triA[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]), _mm256_castps_pd(tritmp[3]))); // Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7 - - triB[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]), _mm256_castps_pd(tritmp[6]))); // Xb0 Xb1 Xb2 Xb3 Xb4 Xb5 Xb5 Xb7 - triB[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]), _mm256_castps_pd(tritmp[6]))); // Yb0 Yb1 Yb2 Yb3 Yb4 Yb5 Yb5 Yb7 - triB[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]), _mm256_castps_pd(tritmp[7]))); // Zb0 Zb1 Zb2 Zb3 Zb4 Zb5 Zb5 Zb7 - - triC[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]), _mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7 - triC[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]), _mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7 - triC[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]), _mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7 - - /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ - - int result = ray_triangle_intersect8(kg, P, - dir, - isect, - visibility, object, - triA, - triB, - triC, - prim_addr, - prim_num, - num_hits, - max_hits, - num_hits_in_instance, - isect_t); - return result; +ccl_device_inline int triangle_intersect8(KernelGlobals *kg, + Intersection **isect, + float3 P, + float3 dir, + uint visibility, + int object, + int prim_addr, + int prim_num, + uint *num_hits, + uint max_hits, + int *num_hits_in_instance, + float isect_t) +{ + __m128 tri_a[8], tri_b[8], tri_c[8]; + __m256 tritmp[12], tri[12]; + __m256 triA[3], triB[3], triC[3]; + + int i, r; + + uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + for (i = 0; i < prim_num; i++) { + tri_a[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; + tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; + tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; + } + //create 9 or 12 placeholders + tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256 + tri[1] = _mm256_castps128_ps256(tri_b[0]); //_mm256_zextps128_ps256 + tri[2] = _mm256_castps128_ps256(tri_c[0]); //_mm256_zextps128_ps256 + + tri[3] = _mm256_castps128_ps256(tri_a[1]); //_mm256_zextps128_ps256 + tri[4] = _mm256_castps128_ps256(tri_b[1]); //_mm256_zextps128_ps256 + tri[5] = _mm256_castps128_ps256(tri_c[1]); //_mm256_zextps128_ps256 + + tri[6] = _mm256_castps128_ps256(tri_a[2]); //_mm256_zextps128_ps256 + tri[7] = _mm256_castps128_ps256(tri_b[2]); //_mm256_zextps128_ps256 + tri[8] = _mm256_castps128_ps256(tri_c[2]); //_mm256_zextps128_ps256 + + if (prim_num > 3) { + tri[9] = _mm256_castps128_ps256(tri_a[3]); //_mm256_zextps128_ps256 + tri[10] = _mm256_castps128_ps256(tri_b[3]); //_mm256_zextps128_ps256 + tri[11] = _mm256_castps128_ps256(tri_c[3]); //_mm256_zextps128_ps256 + } + + for (i = 4, r = 0; i < prim_num; i++, r += 3) { + tri[r] = _mm256_insertf128_ps(tri[r], tri_a[i], 1); + tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1); + tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1); + } + + //------------------------------------------------ + //0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1 + //1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1 + //2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1 + + //3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1 + //4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1 + //5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1 + + //6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1 + //7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1 + //8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1 + + //9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1 + //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1 + //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1 + + //"transpose" + tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5 + tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1 + + tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7 + tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1 + + tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5 + tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1 + + tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7 + tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1 + + tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5 + tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1 + + tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7 + tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1 + + /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ + triA[0] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]), + _mm256_castps_pd(tritmp[2]))); // Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7 + triA[1] = _mm256_castpd_ps( + _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]), + _mm256_castps_pd(tritmp[2]))); // Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7 + triA[2] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]), + _mm256_castps_pd(tritmp[3]))); // Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7 + + triB[0] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]), + _mm256_castps_pd(tritmp[6]))); // Xb0 Xb1 Xb2 Xb3 Xb4 Xb5 Xb5 Xb7 + triB[1] = _mm256_castpd_ps( + _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]), + _mm256_castps_pd(tritmp[6]))); // Yb0 Yb1 Yb2 Yb3 Yb4 Yb5 Yb5 Yb7 + triB[2] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]), + _mm256_castps_pd(tritmp[7]))); // Zb0 Zb1 Zb2 Zb3 Zb4 Zb5 Zb5 Zb7 + + triC[0] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]), + _mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7 + triC[1] = _mm256_castpd_ps( + _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]), + _mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7 + triC[2] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]), + _mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7 + + /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ + + int result = ray_triangle_intersect8(kg, + P, + dir, + isect, + visibility, + object, + triA, + triB, + triC, + prim_addr, + prim_num, + num_hits, + max_hits, + num_hits_in_instance, + isect_t); + return result; } -#endif /* __KERNEL_AVX2__ */ +#endif /* __KERNEL_AVX2__ */ /* Special ray intersection routines for subsurface scattering. In that case we * only want to intersect with primitives in the same object, and if case of @@ -479,106 +496,108 @@ ccl_device_inline int triangle_intersect8( */ #ifdef __BVH_LOCAL__ -ccl_device_inline bool triangle_intersect_local( - KernelGlobals *kg, - LocalIntersection *local_isect, - float3 P, - float3 dir, - int object, - int local_object, - int prim_addr, - float tmax, - uint *lcg_state, - int max_hits) +ccl_device_inline bool triangle_intersect_local(KernelGlobals *kg, + LocalIntersection *local_isect, + float3 P, + float3 dir, + int object, + int local_object, + int prim_addr, + float tmax, + uint *lcg_state, + int max_hits) { - /* Only intersect with matching object, for instanced objects we - * already know we are only intersecting the right object. */ - if(object == OBJECT_NONE) { - if(kernel_tex_fetch(__prim_object, prim_addr) != local_object) { - return false; - } - } - - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex]; -#else - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2)); -#endif - float t, u, v; - if(!ray_triangle_intersect(P, - dir, - tmax, -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - ssef_verts, -#else - tri_a, tri_b, tri_c, -#endif - &u, &v, &t)) - { - return false; - } - - /* If no actual hit information is requested, just return here. */ - if(max_hits == 0) { - return true; - } - - int hit; - if(lcg_state) { - /* Record up to max_hits intersections. */ - for(int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { - if(local_isect->hits[i].t == t) { - return false; - } - } - - local_isect->num_hits++; - - if(local_isect->num_hits <= max_hits) { - hit = local_isect->num_hits - 1; - } - else { - /* reservoir sampling: if we are at the maximum number of - * hits, randomly replace element or skip it */ - hit = lcg_step_uint(lcg_state) % local_isect->num_hits; - - if(hit >= max_hits) - return false; - } - } - else { - /* Record closest intersection only. */ - if(local_isect->num_hits && t > local_isect->hits[0].t) { - return false; - } - - hit = 0; - local_isect->num_hits = 1; - } - - /* Record intersection. */ - Intersection *isect = &local_isect->hits[hit]; - isect->prim = prim_addr; - isect->object = object; - isect->type = PRIMITIVE_TRIANGLE; - isect->u = u; - isect->v = v; - isect->t = t; - - /* Record geometric normal. */ -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2)); -#endif - local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + /* Only intersect with matching object, for instanced objects we + * already know we are only intersecting the right object. */ + if (object == OBJECT_NONE) { + if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) { + return false; + } + } + + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); +# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; +# else + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); +# endif + float t, u, v; + if (!ray_triangle_intersect(P, + dir, + tmax, +# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + ssef_verts, +# else + tri_a, + tri_b, + tri_c, +# endif + &u, + &v, + &t)) { + return false; + } + + /* If no actual hit information is requested, just return here. */ + if (max_hits == 0) { + return true; + } + + int hit; + if (lcg_state) { + /* Record up to max_hits intersections. */ + for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (local_isect->hits[i].t == t) { + return false; + } + } + + local_isect->num_hits++; + + if (local_isect->num_hits <= max_hits) { + hit = local_isect->num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit = lcg_step_uint(lcg_state) % local_isect->num_hits; + + if (hit >= max_hits) + return false; + } + } + else { + /* Record closest intersection only. */ + if (local_isect->num_hits && t > local_isect->hits[0].t) { + return false; + } + + hit = 0; + local_isect->num_hits = 1; + } + + /* Record intersection. */ + Intersection *isect = &local_isect->hits[hit]; + isect->prim = prim_addr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = u; + isect->v = v; + isect->t = t; + + /* Record geometric normal. */ +# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); +# endif + local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); - return false; + return false; } -#endif /* __BVH_LOCAL__ */ +#endif /* __BVH_LOCAL__ */ /* Refine triangle intersection to more precise hit point. For rays that travel * far the precision is often not so good, this reintersects the primitive from @@ -596,61 +615,61 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; #ifdef __INTERSECTION_REFINE__ - if(isect->object != OBJECT_NONE) { - if(UNLIKELY(t == 0.0f)) { - return P; - } + if (isect->object != OBJECT_NONE) { + if (UNLIKELY(t == 0.0f)) { + return P; + } # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; # else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); # endif - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - P = P + D*t; - - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); - float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); - float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); - float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); - float3 qvec = cross(tvec, edge1); - float3 pvec = cross(D, edge2); - float det = dot(edge1, pvec); - if(det != 0.0f) { - /* If determinant is zero it means ray lies in the plane of - * the triangle. It is possible in theory due to watertight - * nature of triangle intersection. For such cases we simply - * don't refine intersection hoping it'll go all fine. - */ - float rt = dot(edge2, qvec) / det; - P = P + D*rt; - } - - if(isect->object != OBJECT_NONE) { + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D * t); + D = normalize_len(D, &t); + } + + P = P + D * t; + + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); + float3 qvec = cross(tvec, edge1); + float3 pvec = cross(D, edge2); + float det = dot(edge1, pvec); + if (det != 0.0f) { + /* If determinant is zero it means ray lies in the plane of + * the triangle. It is possible in theory due to watertight + * nature of triangle intersection. For such cases we simply + * don't refine intersection hoping it'll go all fine. + */ + float rt = dot(edge2, qvec) / det; + P = P + D * rt; + } + + if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; # else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); # endif - P = transform_point(&tfm, P); - } + P = transform_point(&tfm, P); + } - return P; + return P; #else - return P + D*t; + return P + D * t; #endif } @@ -662,61 +681,57 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; - if(isect->object != OBJECT_NONE) { + if (isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; #else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } - P = P + D*t; + P = P + D * t; #ifdef __INTERSECTION_REFINE__ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); - float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); - float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); - float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); - float3 qvec = cross(tvec, edge1); - float3 pvec = cross(D, edge2); - float det = dot(edge1, pvec); - if(det != 0.0f) { - /* If determinant is zero it means ray lies in the plane of - * the triangle. It is possible in theory due to watertight - * nature of triangle intersection. For such cases we simply - * don't refine intersection hoping it'll go all fine. - */ - float rt = dot(edge2, qvec) / det; - P = P + D*rt; - } -#endif /* __INTERSECTION_REFINE__ */ - - if(isect->object != OBJECT_NONE) { + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); + float3 qvec = cross(tvec, edge1); + float3 pvec = cross(D, edge2); + float det = dot(edge1, pvec); + if (det != 0.0f) { + /* If determinant is zero it means ray lies in the plane of + * the triangle. It is possible in theory due to watertight + * nature of triangle intersection. For such cases we simply + * don't refine intersection hoping it'll go all fine. + */ + float rt = dot(edge2, qvec) / det; + P = P + D * rt; + } +#endif /* __INTERSECTION_REFINE__ */ + + if (isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; #else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif - P = transform_point(&tfm, P); - } + P = transform_point(&tfm, P); + } - return P; + return P; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 1977d263ece..96cf35a40dc 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -33,41 +33,47 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P) { - /* todo: optimize this so it's just a single matrix multiplication when - * possible (not motion blur), or perhaps even just translation + scale */ - const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM); + /* todo: optimize this so it's just a single matrix multiplication when + * possible (not motion blur), or perhaps even just translation + scale */ + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM); - object_inverse_position_transform(kg, sd, &P); + object_inverse_position_transform(kg, sd, &P); - if(desc.offset != ATTR_STD_NOT_FOUND) { - Transform tfm = primitive_attribute_matrix(kg, sd, desc); - P = transform_point(&tfm, P); - } + if (desc.offset != ATTR_STD_NOT_FOUND) { + Transform tfm = primitive_attribute_matrix(kg, sd, desc); + P = transform_point(&tfm, P); + } - return P; + return P; } -ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) +ccl_device float volume_attribute_float(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) { - float3 P = volume_normalized_position(kg, sd, sd->P); - InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); - return average(float4_to_float3(r)); + float3 P = volume_normalized_position(kg, sd, sd->P); + InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC : + INTERPOLATION_NONE; + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); + return average(float4_to_float3(r)); } -ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) +ccl_device float3 volume_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) { - float3 P = volume_normalized_position(kg, sd, sd->P); - InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); + float3 P = volume_normalized_position(kg, sd, sd->P); + InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC : + INTERPOLATION_NONE; + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); - if(r.w > 1e-6f && r.w != 1.0f) { - /* For RGBA colors, unpremultiply after interpolation. */ - return float4_to_float3(r) / r.w; - } - else { - return float4_to_float3(r); - } + if (r.w > 1e-6f && r.w != 1.0f) { + /* For RGBA colors, unpremultiply after interpolation. */ + return float4_to_float3(r) / r.w; + } + else { + return float4_to_float3(r); + } } #endif -- cgit v1.2.3