diff options
Diffstat (limited to 'intern/cycles/kernel/geom/geom_curve.h')
-rw-r--r-- | intern/cycles/kernel/geom/geom_curve.h | 136 |
1 files changed, 80 insertions, 56 deletions
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 9de335403ce..bb33b91847e 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -22,6 +22,12 @@ CCL_NAMESPACE_BEGIN #ifdef __HAIR__ +#if defined(__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300) +# define ccl_device_curveintersect ccl_device +#else +# define ccl_device_curveintersect ccl_device_forceinline +#endif + /* Reading attributes on various curve elements */ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) @@ -32,22 +38,22 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, if(dy) *dy = 0.0f; #endif - return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim)); + return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); } else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); + if(dx) *dx = sd->du.dx*(f1 - f0); if(dy) *dy = 0.0f; #endif - return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1; + return (1.0f - sd->u)*f0 + sd->u*f1; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -71,22 +77,22 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim))); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); } else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); + if(dx) *dx = sd->du.dx*(f1 - f0); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1; + return (1.0f - sd->u)*f0 + sd->u*f1; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -104,22 +110,22 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) { float r = 0.0f; - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + if(sd->type & PRIMITIVE_ALL_CURVE) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; - if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { + if(sd->type & PRIMITIVE_CURVE) { P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); } else { - motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve); + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); } - r = (P_curve[1].w - P_curve[0].w) * ccl_fetch(sd, u) + P_curve[0].w; + r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; } return r*2.0f; @@ -130,8 +136,8 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; @@ -139,7 +145,7 @@ ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); - return float4_to_float3(P_curve[1]) * ccl_fetch(sd, u) + float4_to_float3(P_curve[0]) * (1.0f - ccl_fetch(sd, u)); + return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); } /* Curve tangent normal */ @@ -148,14 +154,14 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) { float3 tgN = make_float3(0.0f,0.0f,0.0f); - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + if(sd->type & PRIMITIVE_ALL_CURVE) { - tgN = -(-ccl_fetch(sd, I) - ccl_fetch(sd, dPdu) * (dot(ccl_fetch(sd, dPdu),-ccl_fetch(sd, I)) / len_squared(ccl_fetch(sd, dPdu)))); + tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu))); tgN = normalize(tgN); /* need to find suitable scaled gd for corrected normal */ #if 0 - tgN = normalize(tgN - gd * ccl_fetch(sd, dPdu)); + tgN = normalize(tgN - gd * sd->dPdu); #endif } @@ -222,13 +228,22 @@ ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a) #ifdef __KERNEL_SSE2__ /* Pass P and dir by reference to aligned vector */ -ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, +ccl_device_curveintersect bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, const float3 &P, const float3 &dir, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax) #else -ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, +ccl_device_curveintersect bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, float3 P, float3 dir, uint visibility, int object, int curveAddr, float time,int type, uint *lcg_state, float difl, float extmax) #endif { + const bool is_curve_primitive = (type & PRIMITIVE_CURVE); + + if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); + if(time < prim_time.x || time > prim_time.y) { + return false; + } + } + int segment = PRIMITIVE_UNPACK_SEGMENT(type); float epsilon = 0.0f; float r_st, r_en; @@ -255,9 +270,9 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte int ka = max(k0 - 1, v00.x); int kb = min(k1 + 1, v00.x + v00.y - 1); -#ifdef __KERNEL_AVX2__ +#if defined(__KERNEL_AVX2__) && (!defined(_MSC_VER) || _MSC_VER > 1800) avxf P_curve_0_1, P_curve_2_3; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x); P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x); } @@ -268,7 +283,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte #else /* __KERNEL_AVX2__ */ ssef P_curve[4]; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve[0] = load4f(&kg->__curve_keys.data[ka].x); P_curve[1] = load4f(&kg->__curve_keys.data[k0].x); P_curve[2] = load4f(&kg->__curve_keys.data[k1].x); @@ -290,7 +305,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0); ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); -#ifdef __KERNEL_AVX2__ +#if defined(__KERNEL_AVX2__) && (!defined(_MSC_VER) || _MSC_VER > 1800) const avxf vPP = _mm256_broadcast_ps(&P.m128); const avxf htfm00 = avxf(htfm0.m128, htfm0.m128); const avxf htfm11 = avxf(htfm1.m128, htfm1.m128); @@ -363,7 +378,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte float4 P_curve[4]; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve[0] = kernel_tex_fetch(__curve_keys, ka); P_curve[1] = kernel_tex_fetch(__curve_keys, k0); P_curve[2] = kernel_tex_fetch(__curve_keys, k1); @@ -679,7 +694,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte return hit; } -ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, +ccl_device_curveintersect bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, float3 P, float3 direction, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax) { /* define few macros to minimize code duplication for SSE */ @@ -689,6 +704,15 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection # define dot3(x, y) dot(x, y) #endif + const bool is_curve_primitive = (type & PRIMITIVE_CURVE); + + if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); + if(time < prim_time.x || time > prim_time.y) { + return false; + } + } + int segment = PRIMITIVE_UNPACK_SEGMENT(type); /* curve Intersection check */ int flags = kernel_data.curve.curveflags; @@ -703,7 +727,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection #ifndef __KERNEL_SSE2__ float4 P_curve[2]; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve[0] = kernel_tex_fetch(__curve_keys, k0); P_curve[1] = kernel_tex_fetch(__curve_keys, k1); } @@ -738,7 +762,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection #else ssef P_curve[2]; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve[0] = load4f(&kg->__curve_keys.data[k0].x); P_curve[1] = load4f(&kg->__curve_keys.data[k1].x); } @@ -948,7 +972,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_itfm); + Transform tfm = sd->ob_itfm; #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -961,7 +985,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con int prim = kernel_tex_fetch(__prim_index, isect->prim); float4 v00 = kernel_tex_fetch(__curves, prim); - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float3 tg; @@ -972,14 +996,14 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float4 P_curve[4]; - if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { + if(sd->type & PRIMITIVE_CURVE) { P_curve[0] = kernel_tex_fetch(__curve_keys, ka); P_curve[1] = kernel_tex_fetch(__curve_keys, k0); P_curve[2] = kernel_tex_fetch(__curve_keys, k1); P_curve[3] = kernel_tex_fetch(__curve_keys, kb); } else { - motion_cardinal_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), ka, k0, k1, kb, P_curve); + motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); } float3 p[4]; @@ -991,43 +1015,43 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con P = P + D*t; #ifdef __UV__ - ccl_fetch(sd, u) = isect->u; - ccl_fetch(sd, v) = 0.0f; + sd->u = isect->u; + sd->v = 0.0f; #endif tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { - ccl_fetch(sd, Ng) = normalize(-(D - tg * (dot(tg, D)))); + sd->Ng = normalize(-(D - tg * (dot(tg, D)))); } else { /* direction from inside to surface of curve */ float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); - ccl_fetch(sd, Ng) = normalize(P - p_curr); + sd->Ng = normalize(P - p_curr); /* adjustment for changing radius */ float gd = isect->v; if(gd != 0.0f) { - ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg; - ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); } } /* todo: sometimes the normal is still so that this is detected as * backfacing even if cull backfaces is enabled */ - ccl_fetch(sd, N) = ccl_fetch(sd, Ng); + sd->N = sd->Ng; } else { float4 P_curve[2]; - if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { + if(sd->type & PRIMITIVE_CURVE) { P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); } else { - motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve); + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); } float l = 1.0f; @@ -1038,39 +1062,39 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float3 dif = P - float4_to_float3(P_curve[0]); #ifdef __UV__ - ccl_fetch(sd, u) = dot(dif,tg)/l; - ccl_fetch(sd, v) = 0.0f; + sd->u = dot(dif,tg)/l; + sd->v = 0.0f; #endif if(flag & CURVE_KN_TRUETANGENTGNORMAL) { - ccl_fetch(sd, Ng) = -(D - tg * dot(tg, D)); - ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); + sd->Ng = -(D - tg * dot(tg, D)); + sd->Ng = normalize(sd->Ng); } else { float gd = isect->v; /* direction from inside to surface of curve */ - ccl_fetch(sd, Ng) = (dif - tg * ccl_fetch(sd, u) * l) / (P_curve[0].w + ccl_fetch(sd, u) * l * gd); + sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd); /* adjustment for changing radius */ if(gd != 0.0f) { - ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg; - ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); } } - ccl_fetch(sd, N) = ccl_fetch(sd, Ng); + sd->N = sd->Ng; } #ifdef __DPDU__ /* dPdu/dPdv */ - ccl_fetch(sd, dPdu) = tg; - ccl_fetch(sd, dPdv) = cross(tg, ccl_fetch(sd, Ng)); + sd->dPdu = tg; + sd->dPdv = cross(tg, sd->Ng); #endif if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_tfm); + Transform tfm = sd->ob_tfm; #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif |