diff options
Diffstat (limited to 'source/blender/blenlib/intern/math_geom.c')
-rw-r--r-- | source/blender/blenlib/intern/math_geom.c | 650 |
1 files changed, 544 insertions, 106 deletions
diff --git a/source/blender/blenlib/intern/math_geom.c b/source/blender/blenlib/intern/math_geom.c index 7e12cec5023..e8fb922ce4d 100644 --- a/source/blender/blenlib/intern/math_geom.c +++ b/source/blender/blenlib/intern/math_geom.c @@ -25,14 +25,13 @@ * ***** END GPL LICENSE BLOCK ***** * */ -#include <float.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> + +#include "MEM_guardedalloc.h" #include "BLI_math.h" #include "BLI_memarena.h" -#include "MEM_guardedalloc.h" + +#include "BKE_utildefines.h" /********************************** Polygons *********************************/ @@ -63,6 +62,7 @@ float normal_tri_v3(float n[3], const float v1[3], const float v2[3], const floa n[0]= n1[1]*n2[2]-n1[2]*n2[1]; n[1]= n1[2]*n2[0]-n1[0]*n2[2]; n[2]= n1[0]*n2[1]-n1[1]*n2[0]; + return normalize_v3(n); } @@ -125,10 +125,6 @@ float area_tri_v3(const float v1[3], const float v2[3], const float v3[3]) /* T return (len/2.0f); } -#define MAX2(x,y) ((x)>(y) ? (x) : (y)) -#define MAX3(x,y,z) MAX2(MAX2((x),(y)) , (z)) - - float area_poly_v3(int nr, float verts[][3], float *normal) { float x, y, z, area, max; @@ -406,16 +402,17 @@ int isect_line_tri_v3(float p1[3], float p2[3], float v0[3], float v1[3], float sub_v3_v3v3(s, p1, v0); - cross_v3_v3v3(q, s, e1); - *lambda = f * dot_v3v3(e2, q); - if ((*lambda < 0.0)||(*lambda > 1.0)) return 0; - u = f * dot_v3v3(s, p); if ((u < 0.0)||(u > 1.0)) return 0; - v = f * dot_v3v3(d, q); + cross_v3_v3v3(q, s, e1); + + v = f * dot_v3v3(d, q); if ((v < 0.0)||((u + v) > 1.0)) return 0; + *lambda = f * dot_v3v3(e2, q); + if ((*lambda < 0.0)||(*lambda > 1.0)) return 0; + if(uv) { uv[0]= u; uv[1]= v; @@ -445,17 +442,18 @@ int isect_ray_tri_v3(float p1[3], float d[3], float v0[3], float v1[3], float v2 sub_v3_v3v3(s, p1, v0); - cross_v3_v3v3(q, s, e1); - *lambda = f * dot_v3v3(e2, q); - if ((*lambda < 0.0)) return 0; - u = f * dot_v3v3(s, p); if ((u < 0.0)||(u > 1.0)) return 0; + cross_v3_v3v3(q, s, e1); + v = f * dot_v3v3(d, q); if ((v < 0.0)||((u + v) > 1.0)) return 0; - if(uv) { + *lambda = f * dot_v3v3(e2, q); + if ((*lambda < 0.0)) return 0; + + if(uv) { uv[0]= u; uv[1]= v; } @@ -465,36 +463,36 @@ int isect_ray_tri_v3(float p1[3], float d[3], float v0[3], float v1[3], float v2 int isect_ray_tri_epsilon_v3(float p1[3], float d[3], float v0[3], float v1[3], float v2[3], float *lambda, float *uv, float epsilon) { - float p[3], s[3], e1[3], e2[3], q[3]; - float a, f, u, v; - - sub_v3_v3v3(e1, v1, v0); - sub_v3_v3v3(e2, v2, v0); - - cross_v3_v3v3(p, d, e2); - a = dot_v3v3(e1, p); - if (a == 0.0f) return 0; - f = 1.0f/a; - - sub_v3_v3v3(s, p1, v0); - - cross_v3_v3v3(q, s, e1); + float p[3], s[3], e1[3], e2[3], q[3]; + float a, f, u, v; - u = f * dot_v3v3(s, p); - if ((u < -epsilon)||(u > 1.0f+epsilon)) return 0; - - v = f * dot_v3v3(d, q); - if ((v < -epsilon)||((u + v) > 1.0f+epsilon)) return 0; + sub_v3_v3v3(e1, v1, v0); + sub_v3_v3v3(e2, v2, v0); - *lambda = f * dot_v3v3(e2, q); - if ((*lambda < 0.0f)) return 0; + cross_v3_v3v3(p, d, e2); + a = dot_v3v3(e1, p); + if (a == 0.0f) return 0; + f = 1.0f/a; - if(uv) { - uv[0]= u; - uv[1]= v; - } - - return 1; + sub_v3_v3v3(s, p1, v0); + + u = f * dot_v3v3(s, p); + if ((u < -epsilon)||(u > 1.0f+epsilon)) return 0; + + cross_v3_v3v3(q, s, e1); + + v = f * dot_v3v3(d, q); + if ((v < -epsilon)||((u + v) > 1.0f+epsilon)) return 0; + + *lambda = f * dot_v3v3(e2, q); + if ((*lambda < 0.0f)) return 0; + + if(uv) { + uv[0]= u; + uv[1]= v; + } + + return 1; } int isect_ray_tri_threshold_v3(float p1[3], float d[3], float v0[3], float v1[3], float v2[3], float *lambda, float *uv, float threshold) @@ -724,7 +722,7 @@ int isect_sweeping_sphere_tri_v3(float p1[3], float p2[3], float radius, float v *lambda = newLambda; copy_v3_v3(ipoint,e1); mul_v3_fl(ipoint,e); - add_v3_v3v3(ipoint,ipoint,v0); + add_v3_v3(ipoint, v0); found_by_sweep=1; } } @@ -748,7 +746,7 @@ int isect_sweeping_sphere_tri_v3(float p1[3], float p2[3], float radius, float v *lambda = newLambda; copy_v3_v3(ipoint,e2); mul_v3_fl(ipoint,e); - add_v3_v3v3(ipoint,ipoint,v0); + add_v3_v3(ipoint, v0); found_by_sweep=1; } } @@ -777,7 +775,7 @@ int isect_sweeping_sphere_tri_v3(float p1[3], float p2[3], float radius, float v *lambda = newLambda; copy_v3_v3(ipoint,e3); mul_v3_fl(ipoint,e); - add_v3_v3v3(ipoint,ipoint,v1); + add_v3_v3(ipoint, v1); found_by_sweep=1; } } @@ -961,7 +959,7 @@ int isect_line_line_strict_v3(float v1[3], float v2[3], float v3[3], float v4[3] int isect_aabb_aabb_v3(float min1[3], float max1[3], float min2[3], float max2[3]) { return (min1[0]<max2[0] && min1[1]<max2[1] && min1[2]<max2[2] && - min2[0]<max1[0] && min2[1]<max1[1] && min2[2]<max1[2]); + min2[0]<max1[0] && min2[1]<max1[1] && min2[2]<max1[2]); } /* find closest point to p on line through l1,l2 and return lambda, @@ -1400,14 +1398,14 @@ void barycentric_weights_v2(const float v1[2], const float v2[2], const float v3 wtot = w[0]+w[1]+w[2]; if (wtot != 0.0f) { - wtot_inv = 1.0f/wtot; + wtot_inv = 1.0f/wtot; - w[0] = w[0]*wtot_inv; - w[1] = w[1]*wtot_inv; - w[2] = w[2]*wtot_inv; + w[0] = w[0]*wtot_inv; + w[1] = w[1]*wtot_inv; + w[2] = w[2]*wtot_inv; } else /* dummy values for zero area face */ - w[0] = w[1] = w[2] = 1.0f/3.0f; + w[0] = w[1] = w[2] = 1.0f/3.0f; } /* given 2 triangles in 3D space, and a point in relation to the first triangle. @@ -1612,21 +1610,21 @@ void interp_cubic_v3(float *x, float *v,float *x1, float *v1, float *x2, float * void orthographic_m4(float matrix[][4],float left, float right, float bottom, float top, float nearClip, float farClip) { - float Xdelta, Ydelta, Zdelta; + float Xdelta, Ydelta, Zdelta; - Xdelta = right - left; - Ydelta = top - bottom; - Zdelta = farClip - nearClip; - if (Xdelta == 0.0 || Ydelta == 0.0 || Zdelta == 0.0) { + Xdelta = right - left; + Ydelta = top - bottom; + Zdelta = farClip - nearClip; + if (Xdelta == 0.0 || Ydelta == 0.0 || Zdelta == 0.0) { return; - } - unit_m4(matrix); - matrix[0][0] = 2.0f/Xdelta; - matrix[3][0] = -(right + left)/Xdelta; - matrix[1][1] = 2.0f/Ydelta; - matrix[3][1] = -(top + bottom)/Ydelta; - matrix[2][2] = -2.0f/Zdelta; /* note: negate Z */ - matrix[3][2] = -(farClip + nearClip)/Zdelta; + } + unit_m4(matrix); + matrix[0][0] = 2.0f/Xdelta; + matrix[3][0] = -(right + left)/Xdelta; + matrix[1][1] = 2.0f/Ydelta; + matrix[3][1] = -(top + bottom)/Ydelta; + matrix[2][2] = -2.0f/Zdelta; /* note: negate Z */ + matrix[3][2] = -(farClip + nearClip)/Zdelta; } void perspective_m4(float mat[][4],float left, float right, float bottom, float top, float nearClip, float farClip) @@ -1648,22 +1646,22 @@ void perspective_m4(float mat[][4],float left, float right, float bottom, float mat[2][3] = -1.0f; mat[3][2] = (-2.0f * nearClip * farClip)/Zdelta; mat[0][1] = mat[0][2] = mat[0][3] = - mat[1][0] = mat[1][2] = mat[1][3] = - mat[3][0] = mat[3][1] = mat[3][3] = 0.0; + mat[1][0] = mat[1][2] = mat[1][3] = + mat[3][0] = mat[3][1] = mat[3][3] = 0.0; } static void i_multmatrix(float icand[][4], float Vm[][4]) { - int row, col; - float temp[4][4]; - - for(row=0 ; row<4 ; row++) - for(col=0 ; col<4 ; col++) - temp[row][col] = icand[row][0] * Vm[0][col] - + icand[row][1] * Vm[1][col] - + icand[row][2] * Vm[2][col] - + icand[row][3] * Vm[3][col]; + int row, col; + float temp[4][4]; + + for(row=0 ; row<4 ; row++) + for(col=0 ; col<4 ; col++) + temp[row][col] = icand[row][0] * Vm[0][col] + + icand[row][1] * Vm[1][col] + + icand[row][2] * Vm[2][col] + + icand[row][3] * Vm[3][col]; copy_m4_m4(Vm, temp); } @@ -1673,10 +1671,10 @@ void polarview_m4(float Vm[][4],float dist, float azimuth, float incidence, floa unit_m4(Vm); - translate_m4(Vm,0.0, 0.0, -dist); - rotate_m4(Vm,'z',-twist); - rotate_m4(Vm,'x',-incidence); - rotate_m4(Vm,'z',-azimuth); + translate_m4(Vm,0.0, 0.0, -dist); + rotate_m4(Vm,'Z',-twist); + rotate_m4(Vm,'X',-incidence); + rotate_m4(Vm,'Z',-azimuth); } void lookat_m4(float mat[][4],float vx, float vy, float vz, float px, float py, float pz, float twist) @@ -1687,7 +1685,7 @@ void lookat_m4(float mat[][4],float vx, float vy, float vz, float px, float py, unit_m4(mat); unit_m4(mat1); - rotate_m4(mat,'z',-twist); + rotate_m4(mat, 'Z', -twist); dx = px - vx; dy = py - vy; @@ -1710,8 +1708,8 @@ void lookat_m4(float mat[][4],float vx, float vy, float vz, float px, float py, i_multmatrix(mat1, mat); - mat1[1][1] = mat1[2][2] = 1.0f; /* be careful here to reinit */ - mat1[1][2] = mat1[2][1] = 0.0; /* those modified by the last */ + mat1[1][1] = mat1[2][2] = 1.0f; /* be careful here to reinit */ + mat1[1][2] = mat1[2][1] = 0.0; /* those modified by the last */ /* paragraph */ if (hyp != 0.0f) { /* rotate Y */ @@ -1767,6 +1765,27 @@ int box_clip_bounds_m4(float boundbox[2][3], float bounds[4], float winmat[4][4] return flag; } +void box_minmax_bounds_m4(float min[3], float max[3], float boundbox[2][3], float mat[4][4]) +{ + float mn[3], mx[3], vec[3]; + int a; + + copy_v3_v3(mn, min); + copy_v3_v3(mx, max); + + for(a=0; a<8; a++) { + vec[0]= (a & 1)? boundbox[0][0]: boundbox[1][0]; + vec[1]= (a & 2)? boundbox[0][1]: boundbox[1][1]; + vec[2]= (a & 4)? boundbox[0][2]: boundbox[1][2]; + + mul_m4_v3(mat, vec); + DO_MINMAX(vec, mn, mx); + } + + copy_v3_v3(min, mn); + copy_v3_v3(max, mx); +} + /********************************** Mapping **********************************/ void map_to_tube(float *u, float *v,float x, float y, float z) @@ -1798,9 +1817,7 @@ void map_to_sphere(float *u, float *v,float x, float y, float z) } } -/********************************************************/ - -/* Tangents */ +/********************************* Tangents **********************************/ /* For normal map tangents we need to detect uv boundaries, and only average * tangents in case the uvs are connected. Alternative would be to store 1 @@ -1818,7 +1835,7 @@ void sum_or_add_vertex_tangent(void *arena, VertexTangent **vtang, float *tang, /* find a tangent with connected uvs */ for(vt= *vtang; vt; vt=vt->next) { if(fabs(uv[0]-vt->uv[0]) < STD_UV_CONNECT_LIMIT && fabs(uv[1]-vt->uv[1]) < STD_UV_CONNECT_LIMIT) { - add_v3_v3v3(vt->tang, vt->tang, tang); + add_v3_v3(vt->tang, tang); return; } } @@ -1852,10 +1869,12 @@ void tangent_from_uv(float *uv1, float *uv2, float *uv3, float *co1, float *co2, float s2= uv3[0] - uv1[0]; float t1= uv2[1] - uv1[1]; float t2= uv3[1] - uv1[1]; + float det= (s1 * t2 - s2 * t1); + + if(det != 0.0f) { /* otherwise 'tang' becomes nan */ + float tangv[3], ct[3], e1[3], e2[3]; - if(s1 && s2 && t1 && t2) { /* otherwise 'tang' becomes nan */ - float tangv[3], ct[3], e1[3], e2[3], det; - det= 1.0f / (s1 * t2 - s2 * t1); + det= 1.0f/det; /* normals in render are inversed... */ sub_v3_v3v3(e1, co1, co2); @@ -1877,11 +1896,11 @@ void tangent_from_uv(float *uv1, float *uv2, float *uv3, float *co1, float *co2, } } -/********************************************************/ +/****************************** Vector Clouds ********************************/ /* vector clouds */ /* void vcloud_estimate_transform(int list_size, float (*pos)[3], float *weight,float (*rpos)[3], float *rweight, - float lloc[3],float rloc[3],float lrot[3][3],float lscale[3][3]) + float lloc[3],float rloc[3],float lrot[3][3],float lscale[3][3]) input ( @@ -1905,13 +1924,13 @@ pointers may be NULL if not needed /* can't believe there is none in math utils */ float _det_m3(float m2[3][3]) { - float det = 0.f; - if (m2){ - det= m2[0][0]* (m2[1][1]*m2[2][2] - m2[1][2]*m2[2][1]) - -m2[1][0]* (m2[0][1]*m2[2][2] - m2[0][2]*m2[2][1]) - +m2[2][0]* (m2[0][1]*m2[1][2] - m2[0][2]*m2[1][1]); - } - return det; + float det = 0.f; + if (m2){ + det= m2[0][0]* (m2[1][1]*m2[2][2] - m2[1][2]*m2[2][1]) + -m2[1][0]* (m2[0][1]*m2[2][2] - m2[0][2]*m2[2][1]) + +m2[2][0]* (m2[0][1]*m2[1][2] - m2[0][2]*m2[1][1]); + } + return det; } @@ -1936,19 +1955,19 @@ void vcloud_estimate_transform(int list_size, float (*pos)[3], float *weight,flo float v[3]; copy_v3_v3(v,pos[a]); mul_v3_fl(v,weight[a]); - add_v3_v3v3(accu_com,accu_com,v); + add_v3_v3(accu_com, v); accu_weight +=weight[a]; } - else add_v3_v3v3(accu_com,accu_com,pos[a]); + else add_v3_v3(accu_com, pos[a]); if (rweight){ float v[3]; copy_v3_v3(v,rpos[a]); mul_v3_fl(v,rweight[a]); - add_v3_v3v3(accu_rcom,accu_rcom,v); + add_v3_v3(accu_rcom, v); accu_rweight +=rweight[a]; } - else add_v3_v3v3(accu_rcom,accu_rcom,rpos[a]); + else add_v3_v3(accu_rcom, rpos[a]); } if (!weight || !rweight){ @@ -2036,3 +2055,422 @@ void vcloud_estimate_transform(int list_size, float (*pos)[3], float *weight,flo } } +/******************************* Form Factor *********************************/ + +static void vec_add_dir(float r[3], float v1[3], float v2[3], float fac) +{ + r[0]= v1[0] + fac*(v2[0] - v1[0]); + r[1]= v1[1] + fac*(v2[1] - v1[1]); + r[2]= v1[2] + fac*(v2[2] - v1[2]); +} + +static int ff_visible_quad(float p[3], float n[3], float v0[3], float v1[3], float v2[3], float q0[3], float q1[3], float q2[3], float q3[3]) +{ + static const float epsilon = 1e-6f; + float c, sd[3]; + + c= dot_v3v3(n, p); + + /* signed distances from the vertices to the plane. */ + sd[0]= dot_v3v3(n, v0) - c; + sd[1]= dot_v3v3(n, v1) - c; + sd[2]= dot_v3v3(n, v2) - c; + + if(fabsf(sd[0]) < epsilon) sd[0] = 0.0f; + if(fabsf(sd[1]) < epsilon) sd[1] = 0.0f; + if(fabsf(sd[2]) < epsilon) sd[2] = 0.0f; + + if(sd[0] > 0) { + if(sd[1] > 0) { + if(sd[2] > 0) { + // +++ + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + else if(sd[2] < 0) { + // ++- + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + vec_add_dir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); + vec_add_dir(q3, v0, v2, (sd[0]/(sd[0]-sd[2]))); + } + else { + // ++0 + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + } + else if(sd[1] < 0) { + if(sd[2] > 0) { + // +-+ + copy_v3_v3(q0, v0); + vec_add_dir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); + vec_add_dir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); + copy_v3_v3(q3, v2); + } + else if(sd[2] < 0) { + // +-- + copy_v3_v3(q0, v0); + vec_add_dir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); + vec_add_dir(q2, v0, v2, (sd[0]/(sd[0]-sd[2]))); + copy_v3_v3(q3, q2); + } + else { + // +-0 + copy_v3_v3(q0, v0); + vec_add_dir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + } + else { + if(sd[2] > 0) { + // +0+ + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + else if(sd[2] < 0) { + // +0- + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + vec_add_dir(q2, v0, v2, (sd[0]/(sd[0]-sd[2]))); + copy_v3_v3(q3, q2); + } + else { + // +00 + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + } + } + else if(sd[0] < 0) { + if(sd[1] > 0) { + if(sd[2] > 0) { + // -++ + vec_add_dir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + vec_add_dir(q3, v0, v2, (sd[0]/(sd[0]-sd[2]))); + } + else if(sd[2] < 0) { + // -+- + vec_add_dir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); + copy_v3_v3(q1, v1); + vec_add_dir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); + copy_v3_v3(q3, q2); + } + else { + // -+0 + vec_add_dir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + } + else if(sd[1] < 0) { + if(sd[2] > 0) { + // --+ + vec_add_dir(q0, v0, v2, (sd[0]/(sd[0]-sd[2]))); + vec_add_dir(q1, v1, v2, (sd[1]/(sd[1]-sd[2]))); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + else if(sd[2] < 0) { + // --- + return 0; + } + else { + // --0 + return 0; + } + } + else { + if(sd[2] > 0) { + // -0+ + vec_add_dir(q0, v0, v2, (sd[0]/(sd[0]-sd[2]))); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + else if(sd[2] < 0) { + // -0- + return 0; + } + else { + // -00 + return 0; + } + } + } + else { + if(sd[1] > 0) { + if(sd[2] > 0) { + // 0++ + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + else if(sd[2] < 0) { + // 0+- + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + vec_add_dir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); + copy_v3_v3(q3, q2); + } + else { + // 0+0 + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + } + else if(sd[1] < 0) { + if(sd[2] > 0) { + // 0-+ + copy_v3_v3(q0, v0); + vec_add_dir(q1, v1, v2, (sd[1]/(sd[1]-sd[2]))); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + else if(sd[2] < 0) { + // 0-- + return 0; + } + else { + // 0-0 + return 0; + } + } + else { + if(sd[2] > 0) { + // 00+ + copy_v3_v3(q0, v0); + copy_v3_v3(q1, v1); + copy_v3_v3(q2, v2); + copy_v3_v3(q3, q2); + } + else if(sd[2] < 0) { + // 00- + return 0; + } + else { + // 000 + return 0; + } + } + } + + return 1; +} + +/* altivec optimization, this works, but is unused */ + +#if 0 +#include <Accelerate/Accelerate.h> + +typedef union { + vFloat v; + float f[4]; +} vFloatResult; + +static vFloat vec_splat_float(float val) +{ + return (vFloat){val, val, val, val}; +} + +static float ff_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) +{ + vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle; + vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3}; + vFloatResult vresult; + float result; + + /* compute r* */ + vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]); + vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]); + vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]); + + /* normalize r* */ + rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f)); + vrx = vrx*rlen; + vry = vry*rlen; + vrz = vrz*rlen; + + /* rotate r* for cross and dot */ + vsrx= vec_perm(vrx, vrx, rotate); + vsry= vec_perm(vry, vry, rotate); + vsrz= vec_perm(vrz, vrz, rotate); + + /* cross product */ + gx = vsry*vrz - vsrz*vry; + gy = vsrz*vrx - vsrx*vrz; + gz = vsrx*vry - vsry*vrx; + + /* normalize */ + rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f)); + gx = gx*rlen; + gy = gy*rlen; + gz = gz*rlen; + + /* angle */ + vcos = vrx*vsrx + vry*vsry + vrz*vsrz; + vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f)); + vangle= vacosf(vcos); + + /* dot */ + vresult.v = (vec_splat_float(n[0])*gx + + vec_splat_float(n[1])*gy + + vec_splat_float(n[2])*gz)*vangle; + + result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI); + result= MAX2(result, 0.0f); + + return result; +} + +#endif + +/* SSE optimization, acos code doesn't work */ + +#if 0 + +#include <xmmintrin.h> + +static __m128 sse_approx_acos(__m128 x) +{ + /* needs a better approximation than taylor expansion of acos, since that + * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work + * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */ + + return _mm_set_ps1(1.0f); +} + +static float ff_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) +{ + float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3]; + float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result; + float fresult[4] __attribute__((aligned(16))); + __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult; + + /* compute r */ + qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]); + qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]); + qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]); + + rx = qx - _mm_set_ps1(p[0]); + ry = qy - _mm_set_ps1(p[1]); + rz = qz - _mm_set_ps1(p[2]); + + /* normalize r */ + rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f)); + rx = rx*rlen; + ry = ry*rlen; + rz = rz*rlen; + + /* cross product */ + srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1)); + sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1)); + srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1)); + + gx = sry*rz - srz*ry; + gy = srz*rx - srx*rz; + gz = srx*ry - sry*rx; + + /* normalize g */ + glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f)); + gx = gx*glen; + gy = gy*glen; + gz = gz*glen; + + /* compute angle */ + rcos = rx*srx + ry*sry + rz*srz; + rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f)); + + angle = sse_approx_cos(rcos); + aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle; + + /* sum together */ + result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI); + result= MAX2(result, 0.0f); + + return result; +} + +#endif + +static void ff_normalize(float n[3]) +{ + float d; + + d= dot_v3v3(n, n); + + if(d > 1.0e-35F) { + d= 1.0f/sqrtf(d); + + n[0] *= d; + n[1] *= d; + n[2] *= d; + } +} + +static float ff_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) +{ + float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3]; + float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result; + + sub_v3_v3v3(r0, q0, p); + sub_v3_v3v3(r1, q1, p); + sub_v3_v3v3(r2, q2, p); + sub_v3_v3v3(r3, q3, p); + + ff_normalize(r0); + ff_normalize(r1); + ff_normalize(r2); + ff_normalize(r3); + + cross_v3_v3v3(g0, r1, r0); ff_normalize(g0); + cross_v3_v3v3(g1, r2, r1); ff_normalize(g1); + cross_v3_v3v3(g2, r3, r2); ff_normalize(g2); + cross_v3_v3v3(g3, r0, r3); ff_normalize(g3); + + a1= saacosf(dot_v3v3(r0, r1)); + a2= saacosf(dot_v3v3(r1, r2)); + a3= saacosf(dot_v3v3(r2, r3)); + a4= saacosf(dot_v3v3(r3, r0)); + + dot1= dot_v3v3(n, g0); + dot2= dot_v3v3(n, g1); + dot3= dot_v3v3(n, g2); + dot4= dot_v3v3(n, g3); + + result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI; + result= MAX2(result, 0.0f); + + return result; +} + +float form_factor_hemi_poly(float p[3], float n[3], float v1[3], float v2[3], float v3[3], float v4[3]) +{ + /* computes how much hemisphere defined by point and normal is + covered by a quad or triangle, cosine weighted */ + float q0[3], q1[3], q2[3], q3[3], contrib= 0.0f; + + if(ff_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3)) + contrib += ff_quad_form_factor(p, n, q0, q1, q2, q3); + + if(v4 && ff_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3)) + contrib += ff_quad_form_factor(p, n, q0, q1, q2, q3); + + return contrib; +} + |