/************************************************************************* * * * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. * * All rights reserved. Email: russ@q12.org Web: www.q12.org * * * * This library is free software; you can redistribute it and/or * * modify it under the terms of EITHER: * * (1) The GNU Lesser General Public License as published by the Free * * Software Foundation; either version 2.1 of the License, or (at * * your option) any later version. The text of the GNU Lesser * * General Public License is included with this library in the * * file LICENSE.TXT. * * (2) The BSD-style license that is included with this library in * * the file LICENSE-BSD.TXT. * * * * This library is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * * LICENSE.TXT and LICENSE-BSD.TXT for more details. * * * *************************************************************************/ /* THE ALGORITHM ------------- solve A*x = b+w, with x and w subject to certain LCP conditions. each x(i),w(i) must lie on one of the three line segments in the following diagram. each line segment corresponds to one index set : w(i) /|\ | : | | : | |i in N : w>0 | |state[i]=0 : | | : | | : i in C w=0 + +-----------------------+ | : | | : | w<0 | : |i in N | : |state[i]=1 | : | | : | +-------|-----------|-----------|----------> x(i) lo 0 hi the Dantzig algorithm proceeds as follows: for i=1:n * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or negative towards the line. as this is done, the other (x(j),w(j)) for j= 0. this makes the algorithm a bit simpler, because the starting point for x(i),w(i) is always on the dotted line x=0 and x will only ever increase in one direction, so it can only hit two out of the three line segments. NOTES ----- this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m". the implementation is split into an LCP problem object (dLCP) and an LCP driver function. most optimization occurs in the dLCP object. a naive implementation of the algorithm requires either a lot of data motion or a lot of permutation-array lookup, because we are constantly re-ordering rows and columns. to avoid this and make a more optimized algorithm, a non-trivial data structure is used to represent the matrix A (this is implemented in the fast version of the dLCP object). during execution of this algorithm, some indexes in A are clamped (set C), some are non-clamped (set N), and some are "don't care" (where x=0). A,x,b,w (and other problem vectors) are permuted such that the clamped indexes are first, the unclamped indexes are next, and the don't-care indexes are last. this permutation is recorded in the array `p'. initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped, the corresponding elements of p are swapped. because the C and N elements are grouped together in the rows of A, we can do lots of work with a fast dot product function. if A,x,etc were not permuted and we only had a permutation array, then those dot products would be much slower as we would have a permutation array lookup in some inner loops. A is accessed through an array of row pointers, so that element (i,j) of the permuted matrix is A[i][j]. this makes row swapping fast. for column swapping we still have to actually move the data. during execution of this algorithm we maintain an L*D*L' factorization of the clamped submatrix of A (call it `AC') which is the top left nC*nC submatrix of A. there are two ways we could arrange the rows/columns in AC. (1) AC is always permuted such that L*D*L' = AC. this causes a problem when a row/column is removed from C, because then all the rows/columns of A between the deleted index and the end of C need to be rotated downward. this results in a lot of data motion and slows things down. (2) L*D*L' is actually a factorization of a *permutation* of AC (which is itself a permutation of the underlying A). this is what we do - the permutation is recorded in the vector C. call this permutation A[C,C]. when a row/column is removed from C, all we have to do is swap two rows/columns and manipulate C. */ #include "dart/external/odelcpsolver/odeconfig.h" #include "dart/external/odelcpsolver/lcp.h" #include "dart/external/odelcpsolver/matrix.h" #include "dart/external/odelcpsolver/misc.h" //*************************************************************************** // code generation parameters // LCP debugging (mostly for fast dLCP) - this slows things down a lot //#define DEBUG_LCP #define dLCP_FAST // use fast dLCP object // option 1 : matrix row pointers (less data copying) #define ROWPTRS #define ATYPE dReal ** #define AROW(i) (m_A[i]) // option 2 : no matrix row pointers (slightly faster inner loops) //#define NOROWPTRS //#define ATYPE dReal * //#define AROW(i) (m_A+(i)*m_nskip) #define NUB_OPTIMIZATIONS //*************************************************************************** // swap row/column i1 with i2 in the n*n matrix A. the leading dimension of // A is nskip. this only references and swaps the lower triangle. // if `do_fast_row_swaps' is nonzero and row pointers are being used, then // rows will be swapped by exchanging row pointers. otherwise the data will // be copied. static void swapRowsAndCols (ATYPE A, int n, int i1, int i2, int nskip, int do_fast_row_swaps) { dAASSERT (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 < i2); # ifdef ROWPTRS dReal *A_i1 = A[i1]; dReal *A_i2 = A[i2]; for (int i=i1+1; i0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 <= i2); if (i1==i2) return; swapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps); tmpr = x[i1]; x[i1] = x[i2]; x[i2] = tmpr; tmpr = b[i1]; b[i1] = b[i2]; b[i2] = tmpr; tmpr = w[i1]; w[i1] = w[i2]; w[i2] = tmpr; tmpr = lo[i1]; lo[i1] = lo[i2]; lo[i2] = tmpr; tmpr = hi[i1]; hi[i1] = hi[i2]; hi[i2] = tmpr; tmpi = p[i1]; p[i1] = p[i2]; p[i2] = tmpi; tmpb = state[i1]; state[i1] = state[i2]; state[i2] = tmpb; if (findex) { tmpi = findex[i1]; findex[i1] = findex[i2]; findex[i2] = tmpi; } } // for debugging - check that L,d is the factorization of A[C,C]. // A[C,C] has size nC*nC and leading dimension nskip. // L has size nC*nC and leading dimension nskip. // d has size nC. #ifdef DEBUG_LCP static void checkFactorization (ATYPE A, dReal *_L, dReal *_d, int nC, int *C, int nskip) { int i,j; if (nC==0) return; // get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C] dMatrix A1 (nC,nC); for (i=0; i 1e-8) dDebug (0,"L*D*L' check, maximum difference = %.6e\n",diff); } #endif // for debugging #ifdef DEBUG_LCP static void checkPermutations (int i, int n, int nC, int nN, int *p, int *C) { int j,k; dIASSERT (nC>=0 && nN>=0 && (nC+nN)==i && i < n); for (k=0; k= 0 && p[k] < i); for (k=i; k nub { const int n = m_n; const int nub = m_nub; if (nub < n) { for (int k=0; k<100; k++) { int i1,i2; do { i1 = dRandInt(n-nub)+nub; i2 = dRandInt(n-nub)+nub; } while (i1 > i2); //printf ("--> %d %d\n",i1,i2); swapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,n,i1,i2,m_nskip,0); } } */ // permute the problem so that *all* the unbounded variables are at the // start, i.e. look for unbounded variables not included in `nub'. we can // potentially push up `nub' this way and get a bigger initial factorization. // note that when we swap rows/cols here we must not just swap row pointers, // as the initial factorization relies on the data being all in one chunk. // variables that have findex >= 0 are *not* considered to be unbounded even // if lo=-inf and hi=inf - this is because these limits may change during the // solution process. { int *findex = m_findex; dReal *lo = m_lo, *hi = m_hi; const int n = m_n; for (int k = m_nub; k= 0) continue; if (lo[k]==-dInfinity && hi[k]==dInfinity) { swapProblem (m_A,m_x,m_b,m_w,lo,hi,m_p,m_state,findex,n,m_nub,k,m_nskip,0); m_nub++; } } } // if there are unbounded variables at the start, factorize A up to that // point and solve for x. this puts all indexes 0..nub-1 into C. if (m_nub > 0) { const int nub = m_nub; { dReal *Lrow = m_L; const int nskip = m_nskip; for (int j=0; j nub such that all findex variables are at the end if (m_findex) { const int nub = m_nub; int *findex = m_findex; int num_at_end = 0; for (int k=m_n-1; k >= nub; k--) { if (findex[k] >= 0) { swapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,findex,m_n,k,m_n-1-num_at_end,m_nskip,1); num_at_end++; } } } // print info about indexes /* { const int n = m_n; const int nub = m_nub; for (int k=0; k 0) { // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C)) { const int nC = m_nC; dReal *const Ltgt = m_L + nC*m_nskip, *ell = m_ell; for (int j=0; j 0) { { dReal *const aptr = AROW(i); dReal *Dell = m_Dell; const int *C = m_C; # ifdef NUB_OPTIMIZATIONS // if nub>0, initial part of aptr unpermuted const int nub = m_nub; int j=0; for ( ; j 0) { const int nN = m_nN; for (int j=0; j 0) { { dReal *Dell = m_Dell; int *C = m_C; dReal *aptr = AROW(i); # ifdef NUB_OPTIMIZATIONS // if nub>0, initial part of aptr[] is guaranteed unpermuted const int nub = m_nub; int j=0; for ( ; j 0) { int *C = m_C; dReal *tmp = m_tmp; const int nC = m_nC; for (int j=0; j0 && A && x && b && lo && hi && nub >= 0 && nub <= n); # ifndef dNODEBUG { // check restrictions on lo and hi for (int k=0; k= 0); } # endif // if all the variables are unbounded then we can just factor, solve, // and return if (nub >= n) { dReal *d = new dReal[n]; dSetZero (d, n); int nskip = dPAD(n); dFactorLDLT (A, d, n, nskip); dSolveLDLT (A, d, b, n, nskip); memcpy (x, b, n*sizeof(dReal)); return; } const int nskip = dPAD(n); dReal *L = new dReal[ (n*nskip)]; dReal *d = new dReal[ (n)]; dReal *w = outer_w ? outer_w : (new dReal[n]); dReal *delta_w = new dReal[ (n)]; dReal *delta_x = new dReal[ (n)]; dReal *Dell = new dReal[ (n)]; dReal *ell = new dReal[ (n)]; #ifdef ROWPTRS dReal **Arows = new dReal* [n]; #else dReal **Arows = nullptr; #endif int *p = new int[n]; int *C = new int[n]; // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i) bool *state = new bool[n]; // create LCP object. note that tmp is set to delta_w to save space, this // optimization relies on knowledge of how tmp is used, so be careful! dLCP lcp(n,nskip,nub,A,x,b,w,lo,hi,L,d,Dell,ell,delta_w,state,findex,p,C,Arows); int adj_nub = lcp.getNub(); // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the // LCP conditions then i is added to the appropriate index set. otherwise // x(i),w(i) is driven either +ve or -ve to force it to the valid region. // as we drive x(i), x(C) is also adjusted to keep w(C) at zero. // while driving x(i) we maintain the LCP conditions on the other variables // 0..i-1. we do this by watching out for other x(i),w(i) values going // outside the valid region, and then switching them between index sets // when that happens. bool hit_first_friction_index = false; for (int i=adj_nub; i= 0) { // un-permute x into delta_w, which is not being used at the moment for (int j=0; j= 0) { lcp.transfer_i_to_N2 (i); state[i] = false; } else if (hi[i]==0 && w[i] <= 0) { lcp.transfer_i_to_N2 (i); state[i] = true; } else if (w[i]==0) { // this is a degenerate case. by the time we get to this test we know // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve, // and similarly that hi > 0. this means that the line segment // corresponding to set C is at least finite in extent, and we are on it. // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C() lcp.solve1 (delta_x,i,0,1); lcp.transfer_i_to_C2 (i); } else { // we must push x(i) and w(i) for (;;) { int dir; dReal dirf; // find direction to push on x(i) if (w[i] <= 0) { dir = 1; dirf = REAL(1.0); } else { dir = -1; dirf = REAL(-1.0); } // compute: delta_x(C) = -dir*A(C,C)\A(C,i) lcp.solve1 (delta_x,i,dir); // note that delta_x[i] = dirf, but we wont bother to set it // compute: delta_w = A*delta_x ... note we only care about // delta_w(N) and delta_w(i), the rest is ignored lcp.pN_equals_ANC_times_qC (delta_w,delta_x); lcp.pN_plusequals_ANi (delta_w,i,dir); delta_w[i] = lcp.AiC_times_qC (i,delta_x) + lcp.Aii(i)*dirf; // find largest step we can take (size=s), either to drive x(i),w(i) // to the valid LCP region or to drive an already-valid variable // outside the valid region. int cmd = 1; // index switching command int si = 0; // si = index to switch if cmd>3 dReal s = -w[i]/delta_w[i]; if (dir > 0) { if (hi[i] < dInfinity) { dReal s2 = (hi[i]-x[i])*dirf; // was (hi[i]-x[i])/dirf // step to x(i)=hi(i) if (s2 < s) { s = s2; cmd = 3; } } } else { if (lo[i] > -dInfinity) { dReal s2 = (lo[i]-x[i])*dirf; // was (lo[i]-x[i])/dirf // step to x(i)=lo(i) if (s2 < s) { s = s2; cmd = 2; } } } { const int numN = lcp.numN(); for (int k=0; k < numN; ++k) { const int indexN_k = lcp.indexN(k); if (!state[indexN_k] ? delta_w[indexN_k] < 0 : delta_w[indexN_k] > 0) { // don't bother checking if lo=hi=0 if (lo[indexN_k] == 0 && hi[indexN_k] == 0) continue; dReal s2 = -w[indexN_k] / delta_w[indexN_k]; if (s2 < s) { s = s2; cmd = 4; si = indexN_k; } } } } { const int numC = lcp.numC(); for (int k=adj_nub; k < numC; ++k) { const int indexC_k = lcp.indexC(k); if (delta_x[indexC_k] < 0 && lo[indexC_k] > -dInfinity) { dReal s2 = (lo[indexC_k]-x[indexC_k]) / delta_x[indexC_k]; if (s2 < s) { s = s2; cmd = 5; si = indexC_k; } } if (delta_x[indexC_k] > 0 && hi[indexC_k] < dInfinity) { dReal s2 = (hi[indexC_k]-x[indexC_k]) / delta_x[indexC_k]; if (s2 < s) { s = s2; cmd = 6; si = indexC_k; } } } } //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C", // "C->NL","C->NH"}; //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i); // if s <= 0 then we've got a problem. if we just keep going then // we're going to get stuck in an infinite loop. instead, just cross // our fingers and exit with the current solution. if (s <= REAL(0.0)) { //dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",(double)s); if (i < n) { dSetZero (x+i,n-i); dSetZero (w+i,n-i); } s_error = true; break; } // apply x = x + s * delta_x lcp.pC_plusequals_s_times_qC (x, s, delta_x); x[i] += s * dirf; // apply w = w + s * delta_w lcp.pN_plusequals_s_times_qN (w, s, delta_w); w[i] += s * delta_w[i]; // void *tmpbuf; // switch indexes between sets if necessary switch (cmd) { case 1: // done w[i] = 0; lcp.transfer_i_to_C2 (i); break; case 2: // done x[i] = lo[i]; state[i] = false; lcp.transfer_i_to_N2 (i); break; case 3: // done x[i] = hi[i]; state[i] = true; lcp.transfer_i_to_N2 (i); break; case 4: // keep going w[si] = 0; lcp.transfer_i_from_N_to_C2 (si); break; case 5: // keep going x[si] = lo[si]; state[si] = false; lcp.transfer_i_from_C_to_N2 (si, nullptr); break; case 6: // keep going x[si] = hi[si]; state[si] = true; lcp.transfer_i_from_C_to_N2 (si, nullptr); break; } if (cmd <= 3) break; } // for (;;) } // else if (s_error) { break; } } // for (int i=adj_nub; i tol ? "FAILED" : "passed"); if (diff > tol) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff); int n1=0,n2=0,n3=0; for (i=0; i= 0) { n1++; // ok } else if (x[i]==hi[i] && w[i] <= 0) { n2++; // ok } else if (x[i] >= lo[i] && x[i] <= hi[i] && w[i] == 0) { n3++; // ok } else { dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i, x[i],w[i],lo[i],hi[i]); } } // pacifier printf ("passed: NL=%3d NH=%3d C=%3d ",n1,n2,n3); } delete[] A; delete[] x; delete[] b; delete[] w; delete[] lo ; delete[] hi ; delete[] A2 ; delete[] b2 ; delete[] lo2; delete[] hi2; delete[] tmp1; delete[] tmp2; return 1; }