diff options
Diffstat (limited to 'extern/bullet/BulletDynamics/ConstraintSolver/SorLcp.cpp')
-rw-r--r-- | extern/bullet/BulletDynamics/ConstraintSolver/SorLcp.cpp | 821 |
1 files changed, 821 insertions, 0 deletions
diff --git a/extern/bullet/BulletDynamics/ConstraintSolver/SorLcp.cpp b/extern/bullet/BulletDynamics/ConstraintSolver/SorLcp.cpp new file mode 100644 index 00000000000..d5c6d80552d --- /dev/null +++ b/extern/bullet/BulletDynamics/ConstraintSolver/SorLcp.cpp @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2005 Erwin Coumans http://www.erwincoumans.com + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies. + * Erwin Coumans makes no representations about the suitability + * of this software for any purpose. + * It is provided "as is" without express or implied warranty. +*/ +#include "SorLcp.h" + +#ifdef USE_SOR_SOLVER + +// SOR LCP taken from ode quickstep, +// todo: write own successive overrelaxation gauss-seidel, or jacobi iterative solver + + +#include "SimdScalar.h" + +#include "Dynamics/RigidBody.h" +#include <math.h> +#include <float.h>//FLT_MAX +#ifdef WIN32 +#include <memory.h> +#endif +#include <string.h> +#include <stdio.h> + +#ifdef WIN32 +#include <malloc.h> +#else +#include <alloca.h> +#endif + +#include "Dynamics/BU_Joint.h" +#include "ContactSolverInfo.h" + +//////////////////////////////////////////////////////////////////// +//math stuff + +typedef SimdScalar dVector4[4]; +typedef SimdScalar dMatrix3[4*3]; +#define dInfinity FLT_MAX + + + +#define dRecip(x) ((float)(1.0f/(x))) /* reciprocal */ + + + +#define dMULTIPLY0_331NEW(A,op,B,C) \ +{\ + float tmp[3];\ + tmp[0] = C.getX();\ + tmp[1] = C.getY();\ + tmp[2] = C.getZ();\ + dMULTIPLYOP0_331(A,op,B,tmp);\ +} + +#define dMULTIPLY0_331(A,B,C) dMULTIPLYOP0_331(A,=,B,C) +#define dMULTIPLYOP0_331(A,op,B,C) \ + (A)[0] op dDOT1((B),(C)); \ + (A)[1] op dDOT1((B+4),(C)); \ + (A)[2] op dDOT1((B+8),(C)); + +#define dAASSERT ASSERT +#define dIASSERT ASSERT + +#define REAL float +#define dDOTpq(a,b,p,q) ((a)[0]*(b)[0] + (a)[p]*(b)[q] + (a)[2*(p)]*(b)[2*(q)]) +SimdScalar dDOT1 (const SimdScalar *a, const SimdScalar *b) { return dDOTpq(a,b,1,1); } +#define dDOT14(a,b) dDOTpq(a,b,1,4) + +#define dCROSS(a,op,b,c) \ + (a)[0] op ((b)[1]*(c)[2] - (b)[2]*(c)[1]); \ + (a)[1] op ((b)[2]*(c)[0] - (b)[0]*(c)[2]); \ + (a)[2] op ((b)[0]*(c)[1] - (b)[1]*(c)[0]); + + +#define dMULTIPLYOP2_333(A,op,B,C) \ + (A)[0] op dDOT1((B),(C)); \ + (A)[1] op dDOT1((B),(C+4)); \ + (A)[2] op dDOT1((B),(C+8)); \ + (A)[4] op dDOT1((B+4),(C)); \ + (A)[5] op dDOT1((B+4),(C+4)); \ + (A)[6] op dDOT1((B+4),(C+8)); \ + (A)[8] op dDOT1((B+8),(C)); \ + (A)[9] op dDOT1((B+8),(C+4)); \ + (A)[10] op dDOT1((B+8),(C+8)); +#define dMULTIPLYOP0_333(A,op,B,C) \ + (A)[0] op dDOT14((B),(C)); \ + (A)[1] op dDOT14((B),(C+1)); \ + (A)[2] op dDOT14((B),(C+2)); \ + (A)[4] op dDOT14((B+4),(C)); \ + (A)[5] op dDOT14((B+4),(C+1)); \ + (A)[6] op dDOT14((B+4),(C+2)); \ + (A)[8] op dDOT14((B+8),(C)); \ + (A)[9] op dDOT14((B+8),(C+1)); \ + (A)[10] op dDOT14((B+8),(C+2)); + +#define dMULTIPLY2_333(A,B,C) dMULTIPLYOP2_333(A,=,B,C) +#define dMULTIPLY0_333(A,B,C) dMULTIPLYOP0_333(A,=,B,C) +#define dMULTIPLYADD0_331(A,B,C) dMULTIPLYOP0_331(A,+=,B,C) + + +//////////////////////////////////////////////////////////////////// +#define EFFICIENT_ALIGNMENT 16 +#define dEFFICIENT_SIZE(x) ((((x)-1)|(EFFICIENT_ALIGNMENT-1))+1) +/* alloca aligned to the EFFICIENT_ALIGNMENT. note that this can waste + * up to 15 bytes per allocation, depending on what alloca() returns. + */ + +#define dALLOCA16(n) \ + ((char*)dEFFICIENT_SIZE(((size_t)(alloca((n)+(EFFICIENT_ALIGNMENT-1)))))) + + + +///////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////// + +#ifdef DEBUG +#define ANSI_FTOL 1 + +extern "C" { + __declspec(naked) void _ftol2() { + __asm { +#if ANSI_FTOL + fnstcw WORD PTR [esp-2] + mov ax, WORD PTR [esp-2] + + OR AX, 0C00h + + mov WORD PTR [esp-4], ax + fldcw WORD PTR [esp-4] + fistp QWORD PTR [esp-12] + fldcw WORD PTR [esp-2] + mov eax, DWORD PTR [esp-12] + mov edx, DWORD PTR [esp-8] +#else + fistp DWORD PTR [esp-12] + mov eax, DWORD PTR [esp-12] + mov ecx, DWORD PTR [esp-8] +#endif + ret + } + } +} +#endif //DEBUG + + + + + + +#define ALLOCA dALLOCA16 + +typedef const SimdScalar *dRealPtr; +typedef SimdScalar *dRealMutablePtr; +#define dRealArray(name,n) SimdScalar name[n]; +#define dRealAllocaArray(name,n) SimdScalar *name = (SimdScalar*) ALLOCA ((n)*sizeof(SimdScalar)); + +void dSetZero1 (SimdScalar *a, int n) +{ + dAASSERT (a && n >= 0); + while (n > 0) { + *(a++) = 0; + n--; + } +} + +void dSetValue1 (SimdScalar *a, int n, SimdScalar value) +{ + dAASSERT (a && n >= 0); + while (n > 0) { + *(a++) = value; + n--; + } +} + + +//*************************************************************************** +// configuration + +// for the SOR and CG methods: +// uncomment the following line to use warm starting. this definitely +// help for motor-driven joints. unfortunately it appears to hurt +// with high-friction contacts using the SOR method. use with care + +//#define WARM_STARTING 1 + +// for the SOR method: +// uncomment the following line to randomly reorder constraint rows +// during the solution. depending on the situation, this can help a lot +// or hardly at all, but it doesn't seem to hurt. + +//#define RANDOMLY_REORDER_CONSTRAINTS 1 + + + +//*************************************************************************** +// various common computations involving the matrix J + +// compute iMJ = inv(M)*J' + +static void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb, + RigidBody * const *body, dRealPtr invI) +{ + int i,j; + dRealMutablePtr iMJ_ptr = iMJ; + dRealMutablePtr J_ptr = J; + for (i=0; i<m; i++) { + int b1 = jb[i*2]; + int b2 = jb[i*2+1]; + SimdScalar k = body[b1]->getInvMass(); + for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j]; + dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3); + if (b2 >= 0) { + k = body[b2]->getInvMass(); + for (j=0; j<3; j++) iMJ_ptr[j+6] = k*J_ptr[j+6]; + dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9); + } + J_ptr += 12; + iMJ_ptr += 12; + } +} + +static void multiply_invM_JTSpecial (int m, int nb, dRealMutablePtr iMJ, int *jb, + dRealMutablePtr in, dRealMutablePtr out,int onlyBody1,int onlyBody2) +{ + int i,j; + + + + dRealMutablePtr out_ptr1 = out + onlyBody1*6; + + for (j=0; j<6; j++) + out_ptr1[j] = 0; + + if (onlyBody2 >= 0) + { + out_ptr1 = out + onlyBody2*6; + + for (j=0; j<6; j++) + out_ptr1[j] = 0; + } + + dRealPtr iMJ_ptr = iMJ; + for (i=0; i<m; i++) { + + int b1 = jb[i*2]; + + dRealMutablePtr out_ptr = out + b1*6; + if ((b1 == onlyBody1) || (b1 == onlyBody2)) + { + for (j=0; j<6; j++) + out_ptr[j] += iMJ_ptr[j] * in[i] ; + } + + iMJ_ptr += 6; + + int b2 = jb[i*2+1]; + if ((b2 == onlyBody1) || (b2 == onlyBody2)) + { + if (b2 >= 0) + { + out_ptr = out + b2*6; + for (j=0; j<6; j++) + out_ptr[j] += iMJ_ptr[j] * in[i]; + } + } + + iMJ_ptr += 6; + + } +} + + +// compute out = inv(M)*J'*in. + +static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb, + dRealMutablePtr in, dRealMutablePtr out) +{ + int i,j; + dSetZero1 (out,6*nb); + dRealPtr iMJ_ptr = iMJ; + for (i=0; i<m; i++) { + int b1 = jb[i*2]; + int b2 = jb[i*2+1]; + dRealMutablePtr out_ptr = out + b1*6; + for (j=0; j<6; j++) + out_ptr[j] += iMJ_ptr[j] * in[i]; + iMJ_ptr += 6; + if (b2 >= 0) { + out_ptr = out + b2*6; + for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i]; + } + iMJ_ptr += 6; + } +} + + +// compute out = J*in. + +static void multiply_J (int m, dRealMutablePtr J, int *jb, + dRealMutablePtr in, dRealMutablePtr out) +{ + int i,j; + dRealPtr J_ptr = J; + for (i=0; i<m; i++) { + int b1 = jb[i*2]; + int b2 = jb[i*2+1]; + SimdScalar sum = 0; + dRealMutablePtr in_ptr = in + b1*6; + for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j]; + J_ptr += 6; + if (b2 >= 0) { + in_ptr = in + b2*6; + for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j]; + } + J_ptr += 6; + out[i] = sum; + } +} + +//*************************************************************************** +// SOR-LCP method + +// nb is the number of bodies in the body array. +// J is an m*12 matrix of constraint rows +// jb is an array of first and second body numbers for each constraint row +// invI is the global frame inverse inertia for each body (stacked 3x3 matrices) +// +// this returns lambda and fc (the constraint force). +// note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda +// +// b, lo and hi are modified on exit + + +struct IndexError { + SimdScalar error; // error to sort on + int findex; + int index; // row index +}; + +static unsigned long seed2 = 0; + +unsigned long dRand2() +{ + seed2 = (1664525L*seed2 + 1013904223L) & 0xffffffff; + return seed2; +} + +int dRandInt2 (int n) +{ + float a = float(n) / 4294967296.0f; + return (int) (float(dRand2()) * a); +} + + +static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, RigidBody * const *body, + dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr invMforce, dRealMutablePtr rhs, + dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex, + int numiter,float overRelax) +{ + const int num_iterations = numiter; + const float sor_w = overRelax; // SOR over-relaxation parameter + + int i,j; + +#ifdef WARM_STARTING + // for warm starting, this seems to be necessary to prevent + // jerkiness in motor-driven joints. i have no idea why this works. + for (i=0; i<m; i++) lambda[i] *= 0.9; +#else + dSetZero1 (lambda,m); +#endif + + // the lambda computed at the previous iteration. + // this is used to measure error for when we are reordering the indexes. + dRealAllocaArray (last_lambda,m); + + // a copy of the 'hi' vector in case findex[] is being used + dRealAllocaArray (hicopy,m); + memcpy (hicopy,hi,m*sizeof(float)); + + // precompute iMJ = inv(M)*J' + dRealAllocaArray (iMJ,m*12); + compute_invM_JT (m,J,iMJ,jb,body,invI); + + // compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc + // as we change lambda. +#ifdef WARM_STARTING + multiply_invM_JT (m,nb,iMJ,jb,lambda,fc); +#else + dSetZero1 (invMforce,nb*6); +#endif + + // precompute 1 / diagonals of A + dRealAllocaArray (Ad,m); + dRealPtr iMJ_ptr = iMJ; + dRealMutablePtr J_ptr = J; + for (i=0; i<m; i++) { + float sum = 0; + for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j]; + if (jb[i*2+1] >= 0) { + for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j]; + } + iMJ_ptr += 12; + J_ptr += 12; + Ad[i] = sor_w / (sum + cfm[i]); + } + + // scale J and b by Ad + J_ptr = J; + for (i=0; i<m; i++) { + for (j=0; j<12; j++) { + J_ptr[0] *= Ad[i]; + J_ptr++; + } + rhs[i] *= Ad[i]; + } + + // scale Ad by CFM + for (i=0; i<m; i++) Ad[i] *= cfm[i]; + + // order to solve constraint rows in + IndexError *order = (IndexError*) alloca (m*sizeof(IndexError)); + +#ifndef REORDER_CONSTRAINTS + // make sure constraints with findex < 0 come first. + j=0; + for (i=0; i<m; i++) if (findex[i] < 0) order[j++].index = i; + for (i=0; i<m; i++) if (findex[i] >= 0) order[j++].index = i; + dIASSERT (j==m); +#endif + + for (int iteration=0; iteration < num_iterations; iteration++) { + +#ifdef REORDER_CONSTRAINTS + // constraints with findex < 0 always come first. + if (iteration < 2) { + // for the first two iterations, solve the constraints in + // the given order + for (i=0; i<m; i++) { + order[i].error = i; + order[i].findex = findex[i]; + order[i].index = i; + } + } + else { + // sort the constraints so that the ones converging slowest + // get solved last. use the absolute (not relative) error. + for (i=0; i<m; i++) { + float v1 = dFabs (lambda[i]); + float v2 = dFabs (last_lambda[i]); + float max = (v1 > v2) ? v1 : v2; + if (max > 0) { + //@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max; + order[i].error = dFabs(lambda[i]-last_lambda[i]); + } + else { + order[i].error = dInfinity; + } + order[i].findex = findex[i]; + order[i].index = i; + } + } + qsort (order,m,sizeof(IndexError),&compare_index_error); +#endif +#ifdef RANDOMLY_REORDER_CONSTRAINTS + if ((iteration & 7) == 0) { + for (i=1; i<m; ++i) { + IndexError tmp = order[i]; + int swapi = dRandInt2(i+1); + order[i] = order[swapi]; + order[swapi] = tmp; + } + } +#endif + + //@@@ potential optimization: swap lambda and last_lambda pointers rather + // than copying the data. we must make sure lambda is properly + // returned to the caller + memcpy (last_lambda,lambda,m*sizeof(float)); + + for (int i=0; i<m; i++) { + // @@@ potential optimization: we could pre-sort J and iMJ, thereby + // linearizing access to those arrays. hmmm, this does not seem + // like a win, but we should think carefully about our memory + // access pattern. + + int index = order[i].index; + J_ptr = J + index*12; + iMJ_ptr = iMJ + index*12; + + // set the limits for this constraint. note that 'hicopy' is used. + // this is the place where the QuickStep method differs from the + // direct LCP solving method, since that method only performs this + // limit adjustment once per time step, whereas this method performs + // once per iteration per constraint row. + // the constraints are ordered so that all lambda[] values needed have + // already been computed. + if (findex[index] >= 0) { + hi[index] = fabsf (hicopy[index] * lambda[findex[index]]); + lo[index] = -hi[index]; + } + + int b1 = jb[index*2]; + int b2 = jb[index*2+1]; + float delta = rhs[index] - lambda[index]*Ad[index]; + dRealMutablePtr fc_ptr = invMforce + 6*b1; + + // @@@ potential optimization: SIMD-ize this and the b2 >= 0 case + delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] + + fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] + + fc_ptr[4] * J_ptr[4] + fc_ptr[5] * J_ptr[5]; + // @@@ potential optimization: handle 1-body constraints in a separate + // loop to avoid the cost of test & jump? + if (b2 >= 0) { + fc_ptr = invMforce + 6*b2; + delta -=fc_ptr[0] * J_ptr[6] + fc_ptr[1] * J_ptr[7] + + fc_ptr[2] * J_ptr[8] + fc_ptr[3] * J_ptr[9] + + fc_ptr[4] * J_ptr[10] + fc_ptr[5] * J_ptr[11]; + } + + // compute lambda and clamp it to [lo,hi]. + // @@@ potential optimization: does SSE have clamping instructions + // to save test+jump penalties here? + float new_lambda = lambda[index] + delta; + if (new_lambda < lo[index]) { + delta = lo[index]-lambda[index]; + lambda[index] = lo[index]; + } + else if (new_lambda > hi[index]) { + delta = hi[index]-lambda[index]; + lambda[index] = hi[index]; + } + else { + lambda[index] = new_lambda; + } + + //@@@ a trick that may or may not help + //float ramp = (1-((float)(iteration+1)/(float)num_iterations)); + //delta *= ramp; + + // update invMforce. + // @@@ potential optimization: SIMD for this and the b2 >= 0 case + fc_ptr = invMforce + 6*b1; + fc_ptr[0] += delta * iMJ_ptr[0]; + fc_ptr[1] += delta * iMJ_ptr[1]; + fc_ptr[2] += delta * iMJ_ptr[2]; + fc_ptr[3] += delta * iMJ_ptr[3]; + fc_ptr[4] += delta * iMJ_ptr[4]; + fc_ptr[5] += delta * iMJ_ptr[5]; + // @@@ potential optimization: handle 1-body constraints in a separate + // loop to avoid the cost of test & jump? + if (b2 >= 0) { + fc_ptr = invMforce + 6*b2; + fc_ptr[0] += delta * iMJ_ptr[6]; + fc_ptr[1] += delta * iMJ_ptr[7]; + fc_ptr[2] += delta * iMJ_ptr[8]; + fc_ptr[3] += delta * iMJ_ptr[9]; + fc_ptr[4] += delta * iMJ_ptr[10]; + fc_ptr[5] += delta * iMJ_ptr[11]; + } + } + } +} + + + +void SolveInternal1 (float global_cfm, + float global_erp, + RigidBody * const *body, int nb, + BU_Joint * const *_joint, + int nj, + const ContactSolverInfo& solverInfo) +{ + + int numIter = 30; + float sOr = 1.3f; + + int i,j; + + SimdScalar stepsize1 = dRecip(solverInfo.m_timeStep); + + // number all bodies in the body list - set their tag values + for (i=0; i<nb; i++) + body[i]->m_odeTag = i; + + // make a local copy of the joint array, because we might want to modify it. + // (the "BU_Joint *const*" declaration says we're allowed to modify the joints + // but not the joint array, because the caller might need it unchanged). + //@@@ do we really need to do this? we'll be sorting constraint rows individually, not joints + BU_Joint **joint = (BU_Joint**) alloca (nj * sizeof(BU_Joint*)); + memcpy (joint,_joint,nj * sizeof(BU_Joint*)); + + // for all bodies, compute the inertia tensor and its inverse in the global + // frame, and compute the rotational force and add it to the torque + // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body. + dRealAllocaArray (I,3*4*nb); + dRealAllocaArray (invI,3*4*nb); +/* for (i=0; i<nb; i++) { + dMatrix3 tmp; + // compute inertia tensor in global frame + dMULTIPLY2_333 (tmp,body[i]->m_I,body[i]->m_R); + // compute inverse inertia tensor in global frame + dMULTIPLY2_333 (tmp,body[i]->m_invI,body[i]->m_R); + dMULTIPLY0_333 (invI+i*12,body[i]->m_R,tmp); + // compute rotational force + dCROSS (body[i]->m_tacc,-=,body[i]->getAngularVelocity(),tmp); + } +*/ + for (i=0; i<nb; i++) { + dMatrix3 tmp; + // compute inertia tensor in global frame + dMULTIPLY2_333 (tmp,body[i]->m_I,body[i]->m_R); + dMULTIPLY0_333 (I+i*12,body[i]->m_R,tmp); + // compute inverse inertia tensor in global frame + dMULTIPLY2_333 (tmp,body[i]->m_invI,body[i]->m_R); + dMULTIPLY0_333 (invI+i*12,body[i]->m_R,tmp); + // compute rotational force + dMULTIPLY0_331 (tmp,I+i*12,body[i]->getAngularVelocity()); + //dCROSS (body[i]->tacc,-=,body[i]->avel,tmp); + dCROSS (body[i]->m_tacc,-=,body[i]->getAngularVelocity(),tmp); + + } + + + // get joint information (m = total constraint dimension, nub = number of unbounded variables). + // joints with m=0 are inactive and are removed from the joints array + // entirely, so that the code that follows does not consider them. + //@@@ do we really need to save all the info1's + BU_Joint::Info1 *info = (BU_Joint::Info1*) alloca (nj*sizeof(BU_Joint::Info1)); + for (i=0, j=0; j<nj; j++) { // i=dest, j=src + joint[j]->GetInfo1 (info+i); + dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m); + if (info[i].m > 0) { + joint[i] = joint[j]; + i++; + } + } + nj = i; + + // create the row offset array + int m = 0; + int *ofs = (int*) alloca (nj*sizeof(int)); + for (i=0; i<nj; i++) { + ofs[i] = m; + m += info[i].m; + } + + // if there are constraints, compute the constraint force + dRealAllocaArray (J,m*12); + int *jb = (int*) alloca (m*2*sizeof(int)); + if (m > 0) { + // create a constraint equation right hand side vector `c', a constraint + // force mixing vector `cfm', and LCP low and high bound vectors, and an + // 'findex' vector. + dRealAllocaArray (c,m); + dRealAllocaArray (cfm,m); + dRealAllocaArray (lo,m); + dRealAllocaArray (hi,m); + int *findex = (int*) alloca (m*sizeof(int)); + dSetZero1 (c,m); + dSetValue1 (cfm,m,global_cfm); + dSetValue1 (lo,m,-dInfinity); + dSetValue1 (hi,m, dInfinity); + for (i=0; i<m; i++) findex[i] = -1; + + // get jacobian data from constraints. an m*12 matrix will be created + // to store the two jacobian blocks from each constraint. it has this + // format: + // + // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 \ . + // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }-- jacobian for joint 0, body 1 and body 2 (3 rows) + // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 / + // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }--- jacobian for joint 1, body 1 and body 2 (3 rows) + // etc... + // + // (lll) = linear jacobian data + // (aaa) = angular jacobian data + // + dSetZero1 (J,m*12); + BU_Joint::Info2 Jinfo; + Jinfo.rowskip = 12; + Jinfo.fps = stepsize1; + Jinfo.erp = global_erp; + for (i=0; i<nj; i++) { + Jinfo.J1l = J + ofs[i]*12; + Jinfo.J1a = Jinfo.J1l + 3; + Jinfo.J2l = Jinfo.J1l + 6; + Jinfo.J2a = Jinfo.J1l + 9; + Jinfo.c = c + ofs[i]; + Jinfo.cfm = cfm + ofs[i]; + Jinfo.lo = lo + ofs[i]; + Jinfo.hi = hi + ofs[i]; + Jinfo.findex = findex + ofs[i]; + joint[i]->GetInfo2 (&Jinfo); + + if (Jinfo.c[0] > solverInfo.m_maxErrorReduction) + Jinfo.c[0] = solverInfo.m_maxErrorReduction; + + + + + // adjust returned findex values for global index numbering + for (j=0; j<info[i].m; j++) { + if (findex[ofs[i] + j] >= 0) + findex[ofs[i] + j] += ofs[i]; + } + } + + // create an array of body numbers for each joint row + int *jb_ptr = jb; + for (i=0; i<nj; i++) { + int b1 = (joint[i]->node[0].body) ? (joint[i]->node[0].body->m_odeTag) : -1; + int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->m_odeTag) : -1; + for (j=0; j<info[i].m; j++) { + jb_ptr[0] = b1; + jb_ptr[1] = b2; + jb_ptr += 2; + } + } + dIASSERT (jb_ptr == jb+2*m); + + // compute the right hand side `rhs' + dRealAllocaArray (tmp1,nb*6); + // put v/h + invM*fe into tmp1 + for (i=0; i<nb; i++) { + SimdScalar body_invMass = body[i]->getInvMass(); + for (j=0; j<3; j++) tmp1[i*6+j] = body[i]->m_facc[j] * body_invMass + body[i]->getLinearVelocity()[j] * stepsize1; + dMULTIPLY0_331NEW (tmp1 + i*6 + 3,=,invI + i*12,body[i]->m_tacc); + for (j=0; j<3; j++) tmp1[i*6+3+j] += body[i]->getAngularVelocity()[j] * stepsize1; + } + + // put J*tmp1 into rhs + dRealAllocaArray (rhs,m); + multiply_J (m,J,jb,tmp1,rhs); + + // complete rhs + for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i]; + + // scale CFM + for (i=0; i<m; i++) + cfm[i] =0;//*= stepsize1; + + // load lambda from the value saved on the previous iteration + dRealAllocaArray (lambda,m); +#ifdef WARM_STARTING + dSetZero1 (lambda,m); //@@@ shouldn't be necessary + for (i=0; i<nj; i++) { + memcpy (lambda+ofs[i],joint[i]->lambda,info[i].m * sizeof(SimdScalar)); + } +#endif + + // solve the LCP problem and get lambda and invM*constraint_force + dRealAllocaArray (cforce,nb*6); + + SOR_LCP (m,nb,J,jb,body,invI,lambda,cforce,rhs,lo,hi,cfm,findex,numIter,sOr); + +#ifdef WARM_STARTING + // save lambda for the next iteration + //@@@ note that this doesn't work for contact joints yet, as they are + // recreated every iteration + for (i=0; i<nj; i++) { + memcpy (joint[i]->lambda,lambda+ofs[i],info[i].m * sizeof(SimdScalar)); + } +#endif + + + // note that the SOR method overwrites rhs and J at this point, so + // they should not be used again. + + // add stepsize * cforce to the body velocity + for (i=0; i<nb; i++) { + SimdVector3 linvel = body[i]->getLinearVelocity(); + SimdVector3 angvel = body[i]->getAngularVelocity(); + + for (j=0; j<3; j++) + linvel[j] += solverInfo.m_timeStep* cforce[i*6+j]; + for (j=0; j<3; j++) + angvel[j] += solverInfo.m_timeStep* cforce[i*6+3+j]; + + body[i]->setLinearVelocity(linvel); + body[i]->setAngularVelocity(angvel); + + } + + } + + + + // compute the velocity update: + // add stepsize * invM * fe to the body velocity + + for (i=0; i<nb; i++) { + SimdScalar body_invMass = body[i]->getInvMass(); + SimdVector3 linvel = body[i]->getLinearVelocity(); + SimdVector3 angvel = body[i]->getAngularVelocity(); + + for (j=0; j<3; j++) + { + linvel[j] += solverInfo.m_timeStep * body_invMass * body[i]->m_facc[j]; + } + for (j=0; j<3; j++) + { + body[i]->m_tacc[j] *= solverInfo.m_timeStep; + } + dMULTIPLY0_331NEW(angvel,+=,invI + i*12,body[i]->m_tacc); + body[i]->setAngularVelocity(angvel); + + } + + + +} + + +#endif //USE_SOR_SOLVER
\ No newline at end of file |