Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h66
-rw-r--r--intern/cycles/kernel/geom/geom_motion_curve.h66
2 files changed, 127 insertions, 5 deletions
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 84aaaab7453..636dbcc71e0 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -255,6 +255,17 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int ka = max(k0 - 1, v00.x);
int kb = min(k1 + 1, v00.x + v00.y - 1);
+#ifdef __KERNEL_AVX2__
+ avxf P_curve_0_1, P_curve_2_3;
+ if(type & PRIMITIVE_CURVE) {
+ P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
+ P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+ motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
+ }
+#else /* __KERNEL_AVX2__ */
ssef P_curve[4];
if(type & PRIMITIVE_CURVE) {
@@ -267,6 +278,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
}
+#endif /* __KERNEL_AVX2__ */
ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
@@ -278,6 +290,33 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
+#ifdef __KERNEL_AVX2__
+ const avxf vPP = _mm256_broadcast_ps(&P.m128);
+ const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
+ const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
+ const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
+
+ const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
+ htfm00,
+ madd(shuffle<1>(P_curve_0_1 - vPP),
+ htfm11,
+ shuffle<2>(P_curve_0_1 - vPP) * htfm22));
+ const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
+ htfm00,
+ madd(shuffle<1>(P_curve_2_3 - vPP),
+ htfm11,
+ shuffle<2>(P_curve_2_3 - vPP)*htfm22));
+
+ const ssef p0 = _mm256_castps256_ps128(p01);
+ const ssef p1 = _mm256_extractf128_ps(p01, 1);
+ const ssef p2 = _mm256_castps256_ps128(p23);
+ const ssef p3 = _mm256_extractf128_ps(p23, 1);
+
+ const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
+ r_st = ((float4 &)P_curve_1).w;
+ const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
+ r_en = ((float4 &)P_curve_2).w;
+#else /* __KERNEL_AVX2__ */
ssef htfm[] = { htfm0, htfm1, htfm2 };
ssef vP = load4f(P);
ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
@@ -285,6 +324,10 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
+ r_st = ((float4 &)P_curve[1]).w;
+ r_en = ((float4 &)P_curve[2]).w;
+#endif /* __KERNEL_AVX2__ */
+
float fc = 0.71f;
ssef vfc = ssef(fc);
ssef vfcxp3 = vfc * p3;
@@ -294,8 +337,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
- r_st = ((float4 &)P_curve[1]).w;
- r_en = ((float4 &)P_curve[2]).w;
}
#else
float3 curve_coef[4];
@@ -383,8 +424,9 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
/* begin loop */
while(!(tree >> (depth))) {
- float i_st = tree * resol;
- float i_en = i_st + (level * resol);
+ const float i_st = tree * resol;
+ const float i_en = i_st + (level * resol);
+
#ifdef __KERNEL_SSE2__
ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
@@ -458,13 +500,23 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
if(flags & CURVE_KN_RIBBONS) {
float3 tg = (p_en - p_st);
+#ifdef __KERNEL_SSE__
+ const float3 tg_sq = tg * tg;
+ float w = tg_sq.x + tg_sq.y;
+#else
float w = tg.x * tg.x + tg.y * tg.y;
+#endif
if(w == 0) {
tree++;
level = tree & -tree;
continue;
}
+#ifdef __KERNEL_SSE__
+ const float3 p_sttg = p_st * tg;
+ w = -(p_sttg.x + p_sttg.y) / w;
+#else
w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
+#endif
w = saturate(w);
/* compute u on the curve segment */
@@ -496,7 +548,13 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
if(difl != 0.0f) {
mw_extension = min(difl * fabsf(bmaxz), extmax);
r_ext = mw_extension + r_curr;
+#ifdef __KERNEL_SSE__
+ const float3 p_curr_sq = p_curr * p_curr;
+ const float3 dxxx = _mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128));
+ float d = dxxx.x;
+#else
float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
+#endif
float d0 = d - r_curr;
float d1 = d + r_curr;
float inv_mw_extension = 1.0f/mw_extension;
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 6de5aa7ea99..80b33fad68b 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -118,7 +118,12 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, in
}
/* return 2 curve key locations */
-ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4])
+ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
+ int object,
+ int prim,
+ float time,
+ int k0, int k1, int k2, int k3,
+ float4 keys[4])
{
/* get motion info */
int numsteps, numkeys;
@@ -147,6 +152,65 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object,
keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
}
+#ifdef __KERNEL_AVX2__
+/* Similar to above, but returns keys as pair of two AVX registers with each
+ * holding two float4.
+ */
+ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
+ int object,
+ int prim,
+ float time,
+ int k0, int k1,
+ int k2, int k3,
+ avxf *out_keys_0_1,
+ avxf *out_keys_2_3)
+{
+ /* Get motion info. */
+ int numsteps, numkeys;
+ object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ int maxstep = numsteps * 2;
+ int step = min((int)(time*maxstep), maxstep - 1);
+ float t = time*maxstep - step;
+
+ /* Find attribute. */
+ AttributeElement elem;
+ int offset = find_attribute_curve_motion(kg,
+ object,
+ ATTR_STD_MOTION_VERTEX_POSITION,
+ &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* Fetch key coordinates. */
+ float4 next_keys[4];
+ float4 keys[4];
+ motion_cardinal_curve_keys_for_step(kg,
+ offset,
+ numkeys,
+ numsteps,
+ step,
+ k0, k1, k2, k3,
+ keys);
+ motion_cardinal_curve_keys_for_step(kg,
+ offset,
+ numkeys,
+ numsteps,
+ step + 1,
+ k0, k1, k2, k3,
+ next_keys);
+
+ const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
+ const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
+ const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
+ const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
+
+ /* Interpolate between steps. */
+ *out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
+ *out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
+}
+#endif
+
#endif
CCL_NAMESPACE_END