1 /************************************************************************* 2 * * 3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. * 4 * All rights reserved. Email: russ (at) q12.org Web: www.q12.org * 5 * * 6 * This library is free software; you can redistribute it and/or * 7 * modify it under the terms of EITHER: * 8 * (1) The GNU Lesser General Public License as published by the Free * 9 * Software Foundation; either version 2.1 of the License, or (at * 10 * your option) any later version. The text of the GNU Lesser * 11 * General Public License is included with this library in the * 12 * file LICENSE.TXT. * 13 * (2) The BSD-style license that is included with this library in * 14 * the file LICENSE-BSD.TXT. * 15 * * 16 * This library is distributed in the hope that it will be useful, * 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * 19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. * 20 * * 21 *************************************************************************/ 22 23 /* 24 25 26 THE ALGORITHM 27 ------------- 28 29 solve A*x = b+w, with x and w subject to certain LCP conditions. 30 each x(i),w(i) must lie on one of the three line segments in the following 31 diagram. each line segment corresponds to one index set : 32 33 w(i) 34 /|\ | : 35 | | : 36 | |i in N : 37 w>0 | |state[i]=0 : 38 | | : 39 | | : i in C 40 w=0 + +-----------------------+ 41 | : | 42 | : | 43 w<0 | : |i in N 44 | : |state[i]=1 45 | : | 46 | : | 47 +-------|-----------|-----------|----------> x(i) 48 lo 0 hi 49 50 the Dantzig algorithm proceeds as follows: 51 for i=1:n 52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or 53 negative towards the line. as this is done, the other (x(j),w(j)) 54 for j<i are constrained to be on the line. if any (x,w) reaches the 55 end of a line segment then it is switched between index sets. 56 * i is added to the appropriate index set depending on what line segment 57 it hits. 58 59 we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit 60 simpler, because the starting point for x(i),w(i) is always on the dotted 61 line x=0 and x will only ever increase in one direction, so it can only hit 62 two out of the three line segments. 63 64 65 NOTES 66 ----- 67 68 this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m". 69 the implementation is split into an LCP problem object (btLCP) and an LCP 70 driver function. most optimization occurs in the btLCP object. 71 72 a naive implementation of the algorithm requires either a lot of data motion 73 or a lot of permutation-array lookup, because we are constantly re-ordering 74 rows and columns. to avoid this and make a more optimized algorithm, a 75 non-trivial data structure is used to represent the matrix A (this is 76 implemented in the fast version of the btLCP object). 77 78 during execution of this algorithm, some indexes in A are clamped (set C), 79 some are non-clamped (set N), and some are "don't care" (where x=0). 80 A,x,b,w (and other problem vectors) are permuted such that the clamped 81 indexes are first, the unclamped indexes are next, and the don't-care 82 indexes are last. this permutation is recorded in the array `p'. 83 initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped, 84 the corresponding elements of p are swapped. 85 86 because the C and N elements are grouped together in the rows of A, we can do 87 lots of work with a fast dot product function. if A,x,etc were not permuted 88 and we only had a permutation array, then those dot products would be much 89 slower as we would have a permutation array lookup in some inner loops. 90 91 A is accessed through an array of row pointers, so that element (i,j) of the 92 permuted matrix is A[i][j]. this makes row swapping fast. for column swapping 93 we still have to actually move the data. 94 95 during execution of this algorithm we maintain an L*D*L' factorization of 96 the clamped submatrix of A (call it `AC') which is the top left nC*nC 97 submatrix of A. there are two ways we could arrange the rows/columns in AC. 98 99 (1) AC is always permuted such that L*D*L' = AC. this causes a problem 100 when a row/column is removed from C, because then all the rows/columns of A 101 between the deleted index and the end of C need to be rotated downward. 102 this results in a lot of data motion and slows things down. 103 (2) L*D*L' is actually a factorization of a *permutation* of AC (which is 104 itself a permutation of the underlying A). this is what we do - the 105 permutation is recorded in the vector C. call this permutation A[C,C]. 106 when a row/column is removed from C, all we have to do is swap two 107 rows/columns and manipulate C. 108 109 */ 110 111 112 #include "btDantzigLCP.h" 113 114 #include <string.h>//memcpy 115 116 bool s_error = false; 117 118 //*************************************************************************** 119 // code generation parameters 120 121 122 #define btLCP_FAST // use fast btLCP object 123 124 // option 1 : matrix row pointers (less data copying) 125 #define BTROWPTRS 126 #define BTATYPE btScalar ** 127 #define BTAROW(i) (m_A[i]) 128 129 // option 2 : no matrix row pointers (slightly faster inner loops) 130 //#define NOROWPTRS 131 //#define BTATYPE btScalar * 132 //#define BTAROW(i) (m_A+(i)*m_nskip) 133 134 #define BTNUB_OPTIMIZATIONS 135 136 137 138 /* solve L*X=B, with B containing 1 right hand sides. 139 * L is an n*n lower triangular matrix with ones on the diagonal. 140 * L is stored by rows and its leading dimension is lskip. 141 * B is an n*1 matrix that contains the right hand sides. 142 * B is stored by columns and its leading dimension is also lskip. 143 * B is overwritten with X. 144 * this processes blocks of 2*2. 145 * if this is in the factorizer source file, n must be a multiple of 2. 146 */ 147 148 static void btSolveL1_1 (const btScalar *L, btScalar *B, int n, int lskip1) 149 { 150 /* declare variables - Z matrix, p and q vectors, etc */ 151 btScalar Z11,m11,Z21,m21,p1,q1,p2,*ex; 152 const btScalar *ell; 153 int i,j; 154 /* compute all 2 x 1 blocks of X */ 155 for (i=0; i < n; i+=2) { 156 /* compute all 2 x 1 block of X, from rows i..i+2-1 */ 157 /* set the Z matrix to 0 */ 158 Z11=0; 159 Z21=0; 160 ell = L + i*lskip1; 161 ex = B; 162 /* the inner loop that computes outer products and adds them to Z */ 163 for (j=i-2; j >= 0; j -= 2) { 164 /* compute outer product and add it to the Z matrix */ 165 p1=ell[0]; 166 q1=ex[0]; 167 m11 = p1 * q1; 168 p2=ell[lskip1]; 169 m21 = p2 * q1; 170 Z11 += m11; 171 Z21 += m21; 172 /* compute outer product and add it to the Z matrix */ 173 p1=ell[1]; 174 q1=ex[1]; 175 m11 = p1 * q1; 176 p2=ell[1+lskip1]; 177 m21 = p2 * q1; 178 /* advance pointers */ 179 ell += 2; 180 ex += 2; 181 Z11 += m11; 182 Z21 += m21; 183 /* end of inner loop */ 184 } 185 /* compute left-over iterations */ 186 j += 2; 187 for (; j > 0; j--) { 188 /* compute outer product and add it to the Z matrix */ 189 p1=ell[0]; 190 q1=ex[0]; 191 m11 = p1 * q1; 192 p2=ell[lskip1]; 193 m21 = p2 * q1; 194 /* advance pointers */ 195 ell += 1; 196 ex += 1; 197 Z11 += m11; 198 Z21 += m21; 199 } 200 /* finish computing the X(i) block */ 201 Z11 = ex[0] - Z11; 202 ex[0] = Z11; 203 p1 = ell[lskip1]; 204 Z21 = ex[1] - Z21 - p1*Z11; 205 ex[1] = Z21; 206 /* end of outer loop */ 207 } 208 } 209 210 /* solve L*X=B, with B containing 2 right hand sides. 211 * L is an n*n lower triangular matrix with ones on the diagonal. 212 * L is stored by rows and its leading dimension is lskip. 213 * B is an n*2 matrix that contains the right hand sides. 214 * B is stored by columns and its leading dimension is also lskip. 215 * B is overwritten with X. 216 * this processes blocks of 2*2. 217 * if this is in the factorizer source file, n must be a multiple of 2. 218 */ 219 220 static void btSolveL1_2 (const btScalar *L, btScalar *B, int n, int lskip1) 221 { 222 /* declare variables - Z matrix, p and q vectors, etc */ 223 btScalar Z11,m11,Z12,m12,Z21,m21,Z22,m22,p1,q1,p2,q2,*ex; 224 const btScalar *ell; 225 int i,j; 226 /* compute all 2 x 2 blocks of X */ 227 for (i=0; i < n; i+=2) { 228 /* compute all 2 x 2 block of X, from rows i..i+2-1 */ 229 /* set the Z matrix to 0 */ 230 Z11=0; 231 Z12=0; 232 Z21=0; 233 Z22=0; 234 ell = L + i*lskip1; 235 ex = B; 236 /* the inner loop that computes outer products and adds them to Z */ 237 for (j=i-2; j >= 0; j -= 2) { 238 /* compute outer product and add it to the Z matrix */ 239 p1=ell[0]; 240 q1=ex[0]; 241 m11 = p1 * q1; 242 q2=ex[lskip1]; 243 m12 = p1 * q2; 244 p2=ell[lskip1]; 245 m21 = p2 * q1; 246 m22 = p2 * q2; 247 Z11 += m11; 248 Z12 += m12; 249 Z21 += m21; 250 Z22 += m22; 251 /* compute outer product and add it to the Z matrix */ 252 p1=ell[1]; 253 q1=ex[1]; 254 m11 = p1 * q1; 255 q2=ex[1+lskip1]; 256 m12 = p1 * q2; 257 p2=ell[1+lskip1]; 258 m21 = p2 * q1; 259 m22 = p2 * q2; 260 /* advance pointers */ 261 ell += 2; 262 ex += 2; 263 Z11 += m11; 264 Z12 += m12; 265 Z21 += m21; 266 Z22 += m22; 267 /* end of inner loop */ 268 } 269 /* compute left-over iterations */ 270 j += 2; 271 for (; j > 0; j--) { 272 /* compute outer product and add it to the Z matrix */ 273 p1=ell[0]; 274 q1=ex[0]; 275 m11 = p1 * q1; 276 q2=ex[lskip1]; 277 m12 = p1 * q2; 278 p2=ell[lskip1]; 279 m21 = p2 * q1; 280 m22 = p2 * q2; 281 /* advance pointers */ 282 ell += 1; 283 ex += 1; 284 Z11 += m11; 285 Z12 += m12; 286 Z21 += m21; 287 Z22 += m22; 288 } 289 /* finish computing the X(i) block */ 290 Z11 = ex[0] - Z11; 291 ex[0] = Z11; 292 Z12 = ex[lskip1] - Z12; 293 ex[lskip1] = Z12; 294 p1 = ell[lskip1]; 295 Z21 = ex[1] - Z21 - p1*Z11; 296 ex[1] = Z21; 297 Z22 = ex[1+lskip1] - Z22 - p1*Z12; 298 ex[1+lskip1] = Z22; 299 /* end of outer loop */ 300 } 301 } 302 303 304 void btFactorLDLT (btScalar *A, btScalar *d, int n, int nskip1) 305 { 306 int i,j; 307 btScalar sum,*ell,*dee,dd,p1,p2,q1,q2,Z11,m11,Z21,m21,Z22,m22; 308 if (n < 1) return; 309 310 for (i=0; i<=n-2; i += 2) { 311 /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */ 312 btSolveL1_2 (A,A+i*nskip1,i,nskip1); 313 /* scale the elements in a 2 x i block at A(i,0), and also */ 314 /* compute Z = the outer product matrix that we'll need. */ 315 Z11 = 0; 316 Z21 = 0; 317 Z22 = 0; 318 ell = A+i*nskip1; 319 dee = d; 320 for (j=i-6; j >= 0; j -= 6) { 321 p1 = ell[0]; 322 p2 = ell[nskip1]; 323 dd = dee[0]; 324 q1 = p1*dd; 325 q2 = p2*dd; 326 ell[0] = q1; 327 ell[nskip1] = q2; 328 m11 = p1*q1; 329 m21 = p2*q1; 330 m22 = p2*q2; 331 Z11 += m11; 332 Z21 += m21; 333 Z22 += m22; 334 p1 = ell[1]; 335 p2 = ell[1+nskip1]; 336 dd = dee[1]; 337 q1 = p1*dd; 338 q2 = p2*dd; 339 ell[1] = q1; 340 ell[1+nskip1] = q2; 341 m11 = p1*q1; 342 m21 = p2*q1; 343 m22 = p2*q2; 344 Z11 += m11; 345 Z21 += m21; 346 Z22 += m22; 347 p1 = ell[2]; 348 p2 = ell[2+nskip1]; 349 dd = dee[2]; 350 q1 = p1*dd; 351 q2 = p2*dd; 352 ell[2] = q1; 353 ell[2+nskip1] = q2; 354 m11 = p1*q1; 355 m21 = p2*q1; 356 m22 = p2*q2; 357 Z11 += m11; 358 Z21 += m21; 359 Z22 += m22; 360 p1 = ell[3]; 361 p2 = ell[3+nskip1]; 362 dd = dee[3]; 363 q1 = p1*dd; 364 q2 = p2*dd; 365 ell[3] = q1; 366 ell[3+nskip1] = q2; 367 m11 = p1*q1; 368 m21 = p2*q1; 369 m22 = p2*q2; 370 Z11 += m11; 371 Z21 += m21; 372 Z22 += m22; 373 p1 = ell[4]; 374 p2 = ell[4+nskip1]; 375 dd = dee[4]; 376 q1 = p1*dd; 377 q2 = p2*dd; 378 ell[4] = q1; 379 ell[4+nskip1] = q2; 380 m11 = p1*q1; 381 m21 = p2*q1; 382 m22 = p2*q2; 383 Z11 += m11; 384 Z21 += m21; 385 Z22 += m22; 386 p1 = ell[5]; 387 p2 = ell[5+nskip1]; 388 dd = dee[5]; 389 q1 = p1*dd; 390 q2 = p2*dd; 391 ell[5] = q1; 392 ell[5+nskip1] = q2; 393 m11 = p1*q1; 394 m21 = p2*q1; 395 m22 = p2*q2; 396 Z11 += m11; 397 Z21 += m21; 398 Z22 += m22; 399 ell += 6; 400 dee += 6; 401 } 402 /* compute left-over iterations */ 403 j += 6; 404 for (; j > 0; j--) { 405 p1 = ell[0]; 406 p2 = ell[nskip1]; 407 dd = dee[0]; 408 q1 = p1*dd; 409 q2 = p2*dd; 410 ell[0] = q1; 411 ell[nskip1] = q2; 412 m11 = p1*q1; 413 m21 = p2*q1; 414 m22 = p2*q2; 415 Z11 += m11; 416 Z21 += m21; 417 Z22 += m22; 418 ell++; 419 dee++; 420 } 421 /* solve for diagonal 2 x 2 block at A(i,i) */ 422 Z11 = ell[0] - Z11; 423 Z21 = ell[nskip1] - Z21; 424 Z22 = ell[1+nskip1] - Z22; 425 dee = d + i; 426 /* factorize 2 x 2 block Z,dee */ 427 /* factorize row 1 */ 428 dee[0] = btRecip(Z11); 429 /* factorize row 2 */ 430 sum = 0; 431 q1 = Z21; 432 q2 = q1 * dee[0]; 433 Z21 = q2; 434 sum += q1*q2; 435 dee[1] = btRecip(Z22 - sum); 436 /* done factorizing 2 x 2 block */ 437 ell[nskip1] = Z21; 438 } 439 /* compute the (less than 2) rows at the bottom */ 440 switch (n-i) { 441 case 0: 442 break; 443 444 case 1: 445 btSolveL1_1 (A,A+i*nskip1,i,nskip1); 446 /* scale the elements in a 1 x i block at A(i,0), and also */ 447 /* compute Z = the outer product matrix that we'll need. */ 448 Z11 = 0; 449 ell = A+i*nskip1; 450 dee = d; 451 for (j=i-6; j >= 0; j -= 6) { 452 p1 = ell[0]; 453 dd = dee[0]; 454 q1 = p1*dd; 455 ell[0] = q1; 456 m11 = p1*q1; 457 Z11 += m11; 458 p1 = ell[1]; 459 dd = dee[1]; 460 q1 = p1*dd; 461 ell[1] = q1; 462 m11 = p1*q1; 463 Z11 += m11; 464 p1 = ell[2]; 465 dd = dee[2]; 466 q1 = p1*dd; 467 ell[2] = q1; 468 m11 = p1*q1; 469 Z11 += m11; 470 p1 = ell[3]; 471 dd = dee[3]; 472 q1 = p1*dd; 473 ell[3] = q1; 474 m11 = p1*q1; 475 Z11 += m11; 476 p1 = ell[4]; 477 dd = dee[4]; 478 q1 = p1*dd; 479 ell[4] = q1; 480 m11 = p1*q1; 481 Z11 += m11; 482 p1 = ell[5]; 483 dd = dee[5]; 484 q1 = p1*dd; 485 ell[5] = q1; 486 m11 = p1*q1; 487 Z11 += m11; 488 ell += 6; 489 dee += 6; 490 } 491 /* compute left-over iterations */ 492 j += 6; 493 for (; j > 0; j--) { 494 p1 = ell[0]; 495 dd = dee[0]; 496 q1 = p1*dd; 497 ell[0] = q1; 498 m11 = p1*q1; 499 Z11 += m11; 500 ell++; 501 dee++; 502 } 503 /* solve for diagonal 1 x 1 block at A(i,i) */ 504 Z11 = ell[0] - Z11; 505 dee = d + i; 506 /* factorize 1 x 1 block Z,dee */ 507 /* factorize row 1 */ 508 dee[0] = btRecip(Z11); 509 /* done factorizing 1 x 1 block */ 510 break; 511 512 //default: *((char*)0)=0; /* this should never happen! */ 513 } 514 } 515 516 /* solve L*X=B, with B containing 1 right hand sides. 517 * L is an n*n lower triangular matrix with ones on the diagonal. 518 * L is stored by rows and its leading dimension is lskip. 519 * B is an n*1 matrix that contains the right hand sides. 520 * B is stored by columns and its leading dimension is also lskip. 521 * B is overwritten with X. 522 * this processes blocks of 4*4. 523 * if this is in the factorizer source file, n must be a multiple of 4. 524 */ 525 526 void btSolveL1 (const btScalar *L, btScalar *B, int n, int lskip1) 527 { 528 /* declare variables - Z matrix, p and q vectors, etc */ 529 btScalar Z11,Z21,Z31,Z41,p1,q1,p2,p3,p4,*ex; 530 const btScalar *ell; 531 int lskip2,lskip3,i,j; 532 /* compute lskip values */ 533 lskip2 = 2*lskip1; 534 lskip3 = 3*lskip1; 535 /* compute all 4 x 1 blocks of X */ 536 for (i=0; i <= n-4; i+=4) { 537 /* compute all 4 x 1 block of X, from rows i..i+4-1 */ 538 /* set the Z matrix to 0 */ 539 Z11=0; 540 Z21=0; 541 Z31=0; 542 Z41=0; 543 ell = L + i*lskip1; 544 ex = B; 545 /* the inner loop that computes outer products and adds them to Z */ 546 for (j=i-12; j >= 0; j -= 12) { 547 /* load p and q values */ 548 p1=ell[0]; 549 q1=ex[0]; 550 p2=ell[lskip1]; 551 p3=ell[lskip2]; 552 p4=ell[lskip3]; 553 /* compute outer product and add it to the Z matrix */ 554 Z11 += p1 * q1; 555 Z21 += p2 * q1; 556 Z31 += p3 * q1; 557 Z41 += p4 * q1; 558 /* load p and q values */ 559 p1=ell[1]; 560 q1=ex[1]; 561 p2=ell[1+lskip1]; 562 p3=ell[1+lskip2]; 563 p4=ell[1+lskip3]; 564 /* compute outer product and add it to the Z matrix */ 565 Z11 += p1 * q1; 566 Z21 += p2 * q1; 567 Z31 += p3 * q1; 568 Z41 += p4 * q1; 569 /* load p and q values */ 570 p1=ell[2]; 571 q1=ex[2]; 572 p2=ell[2+lskip1]; 573 p3=ell[2+lskip2]; 574 p4=ell[2+lskip3]; 575 /* compute outer product and add it to the Z matrix */ 576 Z11 += p1 * q1; 577 Z21 += p2 * q1; 578 Z31 += p3 * q1; 579 Z41 += p4 * q1; 580 /* load p and q values */ 581 p1=ell[3]; 582 q1=ex[3]; 583 p2=ell[3+lskip1]; 584 p3=ell[3+lskip2]; 585 p4=ell[3+lskip3]; 586 /* compute outer product and add it to the Z matrix */ 587 Z11 += p1 * q1; 588 Z21 += p2 * q1; 589 Z31 += p3 * q1; 590 Z41 += p4 * q1; 591 /* load p and q values */ 592 p1=ell[4]; 593 q1=ex[4]; 594 p2=ell[4+lskip1]; 595 p3=ell[4+lskip2]; 596 p4=ell[4+lskip3]; 597 /* compute outer product and add it to the Z matrix */ 598 Z11 += p1 * q1; 599 Z21 += p2 * q1; 600 Z31 += p3 * q1; 601 Z41 += p4 * q1; 602 /* load p and q values */ 603 p1=ell[5]; 604 q1=ex[5]; 605 p2=ell[5+lskip1]; 606 p3=ell[5+lskip2]; 607 p4=ell[5+lskip3]; 608 /* compute outer product and add it to the Z matrix */ 609 Z11 += p1 * q1; 610 Z21 += p2 * q1; 611 Z31 += p3 * q1; 612 Z41 += p4 * q1; 613 /* load p and q values */ 614 p1=ell[6]; 615 q1=ex[6]; 616 p2=ell[6+lskip1]; 617 p3=ell[6+lskip2]; 618 p4=ell[6+lskip3]; 619 /* compute outer product and add it to the Z matrix */ 620 Z11 += p1 * q1; 621 Z21 += p2 * q1; 622 Z31 += p3 * q1; 623 Z41 += p4 * q1; 624 /* load p and q values */ 625 p1=ell[7]; 626 q1=ex[7]; 627 p2=ell[7+lskip1]; 628 p3=ell[7+lskip2]; 629 p4=ell[7+lskip3]; 630 /* compute outer product and add it to the Z matrix */ 631 Z11 += p1 * q1; 632 Z21 += p2 * q1; 633 Z31 += p3 * q1; 634 Z41 += p4 * q1; 635 /* load p and q values */ 636 p1=ell[8]; 637 q1=ex[8]; 638 p2=ell[8+lskip1]; 639 p3=ell[8+lskip2]; 640 p4=ell[8+lskip3]; 641 /* compute outer product and add it to the Z matrix */ 642 Z11 += p1 * q1; 643 Z21 += p2 * q1; 644 Z31 += p3 * q1; 645 Z41 += p4 * q1; 646 /* load p and q values */ 647 p1=ell[9]; 648 q1=ex[9]; 649 p2=ell[9+lskip1]; 650 p3=ell[9+lskip2]; 651 p4=ell[9+lskip3]; 652 /* compute outer product and add it to the Z matrix */ 653 Z11 += p1 * q1; 654 Z21 += p2 * q1; 655 Z31 += p3 * q1; 656 Z41 += p4 * q1; 657 /* load p and q values */ 658 p1=ell[10]; 659 q1=ex[10]; 660 p2=ell[10+lskip1]; 661 p3=ell[10+lskip2]; 662 p4=ell[10+lskip3]; 663 /* compute outer product and add it to the Z matrix */ 664 Z11 += p1 * q1; 665 Z21 += p2 * q1; 666 Z31 += p3 * q1; 667 Z41 += p4 * q1; 668 /* load p and q values */ 669 p1=ell[11]; 670 q1=ex[11]; 671 p2=ell[11+lskip1]; 672 p3=ell[11+lskip2]; 673 p4=ell[11+lskip3]; 674 /* compute outer product and add it to the Z matrix */ 675 Z11 += p1 * q1; 676 Z21 += p2 * q1; 677 Z31 += p3 * q1; 678 Z41 += p4 * q1; 679 /* advance pointers */ 680 ell += 12; 681 ex += 12; 682 /* end of inner loop */ 683 } 684 /* compute left-over iterations */ 685 j += 12; 686 for (; j > 0; j--) { 687 /* load p and q values */ 688 p1=ell[0]; 689 q1=ex[0]; 690 p2=ell[lskip1]; 691 p3=ell[lskip2]; 692 p4=ell[lskip3]; 693 /* compute outer product and add it to the Z matrix */ 694 Z11 += p1 * q1; 695 Z21 += p2 * q1; 696 Z31 += p3 * q1; 697 Z41 += p4 * q1; 698 /* advance pointers */ 699 ell += 1; 700 ex += 1; 701 } 702 /* finish computing the X(i) block */ 703 Z11 = ex[0] - Z11; 704 ex[0] = Z11; 705 p1 = ell[lskip1]; 706 Z21 = ex[1] - Z21 - p1*Z11; 707 ex[1] = Z21; 708 p1 = ell[lskip2]; 709 p2 = ell[1+lskip2]; 710 Z31 = ex[2] - Z31 - p1*Z11 - p2*Z21; 711 ex[2] = Z31; 712 p1 = ell[lskip3]; 713 p2 = ell[1+lskip3]; 714 p3 = ell[2+lskip3]; 715 Z41 = ex[3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31; 716 ex[3] = Z41; 717 /* end of outer loop */ 718 } 719 /* compute rows at end that are not a multiple of block size */ 720 for (; i < n; i++) { 721 /* compute all 1 x 1 block of X, from rows i..i+1-1 */ 722 /* set the Z matrix to 0 */ 723 Z11=0; 724 ell = L + i*lskip1; 725 ex = B; 726 /* the inner loop that computes outer products and adds them to Z */ 727 for (j=i-12; j >= 0; j -= 12) { 728 /* load p and q values */ 729 p1=ell[0]; 730 q1=ex[0]; 731 /* compute outer product and add it to the Z matrix */ 732 Z11 += p1 * q1; 733 /* load p and q values */ 734 p1=ell[1]; 735 q1=ex[1]; 736 /* compute outer product and add it to the Z matrix */ 737 Z11 += p1 * q1; 738 /* load p and q values */ 739 p1=ell[2]; 740 q1=ex[2]; 741 /* compute outer product and add it to the Z matrix */ 742 Z11 += p1 * q1; 743 /* load p and q values */ 744 p1=ell[3]; 745 q1=ex[3]; 746 /* compute outer product and add it to the Z matrix */ 747 Z11 += p1 * q1; 748 /* load p and q values */ 749 p1=ell[4]; 750 q1=ex[4]; 751 /* compute outer product and add it to the Z matrix */ 752 Z11 += p1 * q1; 753 /* load p and q values */ 754 p1=ell[5]; 755 q1=ex[5]; 756 /* compute outer product and add it to the Z matrix */ 757 Z11 += p1 * q1; 758 /* load p and q values */ 759 p1=ell[6]; 760 q1=ex[6]; 761 /* compute outer product and add it to the Z matrix */ 762 Z11 += p1 * q1; 763 /* load p and q values */ 764 p1=ell[7]; 765 q1=ex[7]; 766 /* compute outer product and add it to the Z matrix */ 767 Z11 += p1 * q1; 768 /* load p and q values */ 769 p1=ell[8]; 770 q1=ex[8]; 771 /* compute outer product and add it to the Z matrix */ 772 Z11 += p1 * q1; 773 /* load p and q values */ 774 p1=ell[9]; 775 q1=ex[9]; 776 /* compute outer product and add it to the Z matrix */ 777 Z11 += p1 * q1; 778 /* load p and q values */ 779 p1=ell[10]; 780 q1=ex[10]; 781 /* compute outer product and add it to the Z matrix */ 782 Z11 += p1 * q1; 783 /* load p and q values */ 784 p1=ell[11]; 785 q1=ex[11]; 786 /* compute outer product and add it to the Z matrix */ 787 Z11 += p1 * q1; 788 /* advance pointers */ 789 ell += 12; 790 ex += 12; 791 /* end of inner loop */ 792 } 793 /* compute left-over iterations */ 794 j += 12; 795 for (; j > 0; j--) { 796 /* load p and q values */ 797 p1=ell[0]; 798 q1=ex[0]; 799 /* compute outer product and add it to the Z matrix */ 800 Z11 += p1 * q1; 801 /* advance pointers */ 802 ell += 1; 803 ex += 1; 804 } 805 /* finish computing the X(i) block */ 806 Z11 = ex[0] - Z11; 807 ex[0] = Z11; 808 } 809 } 810 811 /* solve L^T * x=b, with b containing 1 right hand side. 812 * L is an n*n lower triangular matrix with ones on the diagonal. 813 * L is stored by rows and its leading dimension is lskip. 814 * b is an n*1 matrix that contains the right hand side. 815 * b is overwritten with x. 816 * this processes blocks of 4. 817 */ 818 819 void btSolveL1T (const btScalar *L, btScalar *B, int n, int lskip1) 820 { 821 /* declare variables - Z matrix, p and q vectors, etc */ 822 btScalar Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex; 823 const btScalar *ell; 824 int lskip2,i,j; 825 // int lskip3; 826 /* special handling for L and B because we're solving L1 *transpose* */ 827 L = L + (n-1)*(lskip1+1); 828 B = B + n-1; 829 lskip1 = -lskip1; 830 /* compute lskip values */ 831 lskip2 = 2*lskip1; 832 //lskip3 = 3*lskip1; 833 /* compute all 4 x 1 blocks of X */ 834 for (i=0; i <= n-4; i+=4) { 835 /* compute all 4 x 1 block of X, from rows i..i+4-1 */ 836 /* set the Z matrix to 0 */ 837 Z11=0; 838 Z21=0; 839 Z31=0; 840 Z41=0; 841 ell = L - i; 842 ex = B; 843 /* the inner loop that computes outer products and adds them to Z */ 844 for (j=i-4; j >= 0; j -= 4) { 845 /* load p and q values */ 846 p1=ell[0]; 847 q1=ex[0]; 848 p2=ell[-1]; 849 p3=ell[-2]; 850 p4=ell[-3]; 851 /* compute outer product and add it to the Z matrix */ 852 m11 = p1 * q1; 853 m21 = p2 * q1; 854 m31 = p3 * q1; 855 m41 = p4 * q1; 856 ell += lskip1; 857 Z11 += m11; 858 Z21 += m21; 859 Z31 += m31; 860 Z41 += m41; 861 /* load p and q values */ 862 p1=ell[0]; 863 q1=ex[-1]; 864 p2=ell[-1]; 865 p3=ell[-2]; 866 p4=ell[-3]; 867 /* compute outer product and add it to the Z matrix */ 868 m11 = p1 * q1; 869 m21 = p2 * q1; 870 m31 = p3 * q1; 871 m41 = p4 * q1; 872 ell += lskip1; 873 Z11 += m11; 874 Z21 += m21; 875 Z31 += m31; 876 Z41 += m41; 877 /* load p and q values */ 878 p1=ell[0]; 879 q1=ex[-2]; 880 p2=ell[-1]; 881 p3=ell[-2]; 882 p4=ell[-3]; 883 /* compute outer product and add it to the Z matrix */ 884 m11 = p1 * q1; 885 m21 = p2 * q1; 886 m31 = p3 * q1; 887 m41 = p4 * q1; 888 ell += lskip1; 889 Z11 += m11; 890 Z21 += m21; 891 Z31 += m31; 892 Z41 += m41; 893 /* load p and q values */ 894 p1=ell[0]; 895 q1=ex[-3]; 896 p2=ell[-1]; 897 p3=ell[-2]; 898 p4=ell[-3]; 899 /* compute outer product and add it to the Z matrix */ 900 m11 = p1 * q1; 901 m21 = p2 * q1; 902 m31 = p3 * q1; 903 m41 = p4 * q1; 904 ell += lskip1; 905 ex -= 4; 906 Z11 += m11; 907 Z21 += m21; 908 Z31 += m31; 909 Z41 += m41; 910 /* end of inner loop */ 911 } 912 /* compute left-over iterations */ 913 j += 4; 914 for (; j > 0; j--) { 915 /* load p and q values */ 916 p1=ell[0]; 917 q1=ex[0]; 918 p2=ell[-1]; 919 p3=ell[-2]; 920 p4=ell[-3]; 921 /* compute outer product and add it to the Z matrix */ 922 m11 = p1 * q1; 923 m21 = p2 * q1; 924 m31 = p3 * q1; 925 m41 = p4 * q1; 926 ell += lskip1; 927 ex -= 1; 928 Z11 += m11; 929 Z21 += m21; 930 Z31 += m31; 931 Z41 += m41; 932 } 933 /* finish computing the X(i) block */ 934 Z11 = ex[0] - Z11; 935 ex[0] = Z11; 936 p1 = ell[-1]; 937 Z21 = ex[-1] - Z21 - p1*Z11; 938 ex[-1] = Z21; 939 p1 = ell[-2]; 940 p2 = ell[-2+lskip1]; 941 Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21; 942 ex[-2] = Z31; 943 p1 = ell[-3]; 944 p2 = ell[-3+lskip1]; 945 p3 = ell[-3+lskip2]; 946 Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31; 947 ex[-3] = Z41; 948 /* end of outer loop */ 949 } 950 /* compute rows at end that are not a multiple of block size */ 951 for (; i < n; i++) { 952 /* compute all 1 x 1 block of X, from rows i..i+1-1 */ 953 /* set the Z matrix to 0 */ 954 Z11=0; 955 ell = L - i; 956 ex = B; 957 /* the inner loop that computes outer products and adds them to Z */ 958 for (j=i-4; j >= 0; j -= 4) { 959 /* load p and q values */ 960 p1=ell[0]; 961 q1=ex[0]; 962 /* compute outer product and add it to the Z matrix */ 963 m11 = p1 * q1; 964 ell += lskip1; 965 Z11 += m11; 966 /* load p and q values */ 967 p1=ell[0]; 968 q1=ex[-1]; 969 /* compute outer product and add it to the Z matrix */ 970 m11 = p1 * q1; 971 ell += lskip1; 972 Z11 += m11; 973 /* load p and q values */ 974 p1=ell[0]; 975 q1=ex[-2]; 976 /* compute outer product and add it to the Z matrix */ 977 m11 = p1 * q1; 978 ell += lskip1; 979 Z11 += m11; 980 /* load p and q values */ 981 p1=ell[0]; 982 q1=ex[-3]; 983 /* compute outer product and add it to the Z matrix */ 984 m11 = p1 * q1; 985 ell += lskip1; 986 ex -= 4; 987 Z11 += m11; 988 /* end of inner loop */ 989 } 990 /* compute left-over iterations */ 991 j += 4; 992 for (; j > 0; j--) { 993 /* load p and q values */ 994 p1=ell[0]; 995 q1=ex[0]; 996 /* compute outer product and add it to the Z matrix */ 997 m11 = p1 * q1; 998 ell += lskip1; 999 ex -= 1; 1000 Z11 += m11; 1001 } 1002 /* finish computing the X(i) block */ 1003 Z11 = ex[0] - Z11; 1004 ex[0] = Z11; 1005 } 1006 } 1007 1008 1009 1010 void btVectorScale (btScalar *a, const btScalar *d, int n) 1011 { 1012 btAssert (a && d && n >= 0); 1013 for (int i=0; i<n; i++) { 1014 a[i] *= d[i]; 1015 } 1016 } 1017 1018 void btSolveLDLT (const btScalar *L, const btScalar *d, btScalar *b, int n, int nskip) 1019 { 1020 btAssert (L && d && b && n > 0 && nskip >= n); 1021 btSolveL1 (L,b,n,nskip); 1022 btVectorScale (b,d,n); 1023 btSolveL1T (L,b,n,nskip); 1024 } 1025 1026 1027 1028 //*************************************************************************** 1029 1030 // swap row/column i1 with i2 in the n*n matrix A. the leading dimension of 1031 // A is nskip. this only references and swaps the lower triangle. 1032 // if `do_fast_row_swaps' is nonzero and row pointers are being used, then 1033 // rows will be swapped by exchanging row pointers. otherwise the data will 1034 // be copied. 1035 1036 static void btSwapRowsAndCols (BTATYPE A, int n, int i1, int i2, int nskip, 1037 int do_fast_row_swaps) 1038 { 1039 btAssert (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n && 1040 nskip >= n && i1 < i2); 1041 1042 # ifdef BTROWPTRS 1043 btScalar *A_i1 = A[i1]; 1044 btScalar *A_i2 = A[i2]; 1045 for (int i=i1+1; i<i2; ++i) { 1046 btScalar *A_i_i1 = A[i] + i1; 1047 A_i1[i] = *A_i_i1; 1048 *A_i_i1 = A_i2[i]; 1049 } 1050 A_i1[i2] = A_i1[i1]; 1051 A_i1[i1] = A_i2[i1]; 1052 A_i2[i1] = A_i2[i2]; 1053 // swap rows, by swapping row pointers 1054 if (do_fast_row_swaps) { 1055 A[i1] = A_i2; 1056 A[i2] = A_i1; 1057 } 1058 else { 1059 // Only swap till i2 column to match A plain storage variant. 1060 for (int k = 0; k <= i2; ++k) { 1061 btScalar tmp = A_i1[k]; 1062 A_i1[k] = A_i2[k]; 1063 A_i2[k] = tmp; 1064 } 1065 } 1066 // swap columns the hard way 1067 for (int j=i2+1; j<n; ++j) { 1068 btScalar *A_j = A[j]; 1069 btScalar tmp = A_j[i1]; 1070 A_j[i1] = A_j[i2]; 1071 A_j[i2] = tmp; 1072 } 1073 # else 1074 btScalar *A_i1 = A+i1*nskip; 1075 btScalar *A_i2 = A+i2*nskip; 1076 for (int k = 0; k < i1; ++k) { 1077 btScalar tmp = A_i1[k]; 1078 A_i1[k] = A_i2[k]; 1079 A_i2[k] = tmp; 1080 } 1081 btScalar *A_i = A_i1 + nskip; 1082 for (int i=i1+1; i<i2; A_i+=nskip, ++i) { 1083 btScalar tmp = A_i2[i]; 1084 A_i2[i] = A_i[i1]; 1085 A_i[i1] = tmp; 1086 } 1087 { 1088 btScalar tmp = A_i1[i1]; 1089 A_i1[i1] = A_i2[i2]; 1090 A_i2[i2] = tmp; 1091 } 1092 btScalar *A_j = A_i2 + nskip; 1093 for (int j=i2+1; j<n; A_j+=nskip, ++j) { 1094 btScalar tmp = A_j[i1]; 1095 A_j[i1] = A_j[i2]; 1096 A_j[i2] = tmp; 1097 } 1098 # endif 1099 } 1100 1101 1102 // swap two indexes in the n*n LCP problem. i1 must be <= i2. 1103 1104 static void btSwapProblem (BTATYPE A, btScalar *x, btScalar *b, btScalar *w, btScalar *lo, 1105 btScalar *hi, int *p, bool *state, int *findex, 1106 int n, int i1, int i2, int nskip, 1107 int do_fast_row_swaps) 1108 { 1109 btScalar tmpr; 1110 int tmpi; 1111 bool tmpb; 1112 btAssert (n>0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 <= i2); 1113 if (i1==i2) return; 1114 1115 btSwapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps); 1116 1117 tmpr = x[i1]; 1118 x[i1] = x[i2]; 1119 x[i2] = tmpr; 1120 1121 tmpr = b[i1]; 1122 b[i1] = b[i2]; 1123 b[i2] = tmpr; 1124 1125 tmpr = w[i1]; 1126 w[i1] = w[i2]; 1127 w[i2] = tmpr; 1128 1129 tmpr = lo[i1]; 1130 lo[i1] = lo[i2]; 1131 lo[i2] = tmpr; 1132 1133 tmpr = hi[i1]; 1134 hi[i1] = hi[i2]; 1135 hi[i2] = tmpr; 1136 1137 tmpi = p[i1]; 1138 p[i1] = p[i2]; 1139 p[i2] = tmpi; 1140 1141 tmpb = state[i1]; 1142 state[i1] = state[i2]; 1143 state[i2] = tmpb; 1144 1145 if (findex) { 1146 tmpi = findex[i1]; 1147 findex[i1] = findex[i2]; 1148 findex[i2] = tmpi; 1149 } 1150 } 1151 1152 1153 1154 1155 //*************************************************************************** 1156 // btLCP manipulator object. this represents an n*n LCP problem. 1157 // 1158 // two index sets C and N are kept. each set holds a subset of 1159 // the variable indexes 0..n-1. an index can only be in one set. 1160 // initially both sets are empty. 1161 // 1162 // the index set C is special: solutions to A(C,C)\A(C,i) can be generated. 1163 1164 //*************************************************************************** 1165 // fast implementation of btLCP. see the above definition of btLCP for 1166 // interface comments. 1167 // 1168 // `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is 1169 // permuted as the other vectors/matrices are permuted. 1170 // 1171 // A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have 1172 // contiguous indexes. the don't-care indexes follow N. 1173 // 1174 // an L*D*L' factorization is maintained of A(C,C), and whenever indexes are 1175 // added or removed from the set C the factorization is updated. 1176 // thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A. 1177 // the leading dimension of the matrix L is always `nskip'. 1178 // 1179 // at the start there may be other indexes that are unbounded but are not 1180 // included in `nub'. btLCP will permute the matrix so that absolutely all 1181 // unbounded vectors are at the start. thus there may be some initial 1182 // permutation. 1183 // 1184 // the algorithms here assume certain patterns, particularly with respect to 1185 // index transfer. 1186 1187 #ifdef btLCP_FAST 1188 1189 struct btLCP 1190 { 1191 const int m_n; 1192 const int m_nskip; 1193 int m_nub; 1194 int m_nC, m_nN; // size of each index set 1195 BTATYPE const m_A; // A rows 1196 btScalar *const m_x, * const m_b, *const m_w, *const m_lo,* const m_hi; // permuted LCP problem data 1197 btScalar *const m_L, *const m_d; // L*D*L' factorization of set C 1198 btScalar *const m_Dell, *const m_ell, *const m_tmp; 1199 bool *const m_state; 1200 int *const m_findex, *const m_p, *const m_C; 1201 1202 btLCP (int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w, 1203 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d, 1204 btScalar *_Dell, btScalar *_ell, btScalar *_tmp, 1205 bool *_state, int *_findex, int *p, int *c, btScalar **Arows); 1206 int getNub() const { return m_nub; } 1207 void transfer_i_to_C (int i); 1208 void transfer_i_to_N (int i) { m_nN++; } // because we can assume C and N span 1:i-1 1209 void transfer_i_from_N_to_C (int i); 1210 void transfer_i_from_C_to_N (int i, btAlignedObjectArray<btScalar>& scratch); 1211 int numC() const { return m_nC; } 1212 int numN() const { return m_nN; } 1213 int indexC (int i) const { return i; } 1214 int indexN (int i) const { return i+m_nC; } 1215 btScalar Aii (int i) const { return BTAROW(i)[i]; } 1216 btScalar AiC_times_qC (int i, btScalar *q) const { return btLargeDot (BTAROW(i), q, m_nC); } 1217 btScalar AiN_times_qN (int i, btScalar *q) const { return btLargeDot (BTAROW(i)+m_nC, q+m_nC, m_nN); } 1218 void pN_equals_ANC_times_qC (btScalar *p, btScalar *q); 1219 void pN_plusequals_ANi (btScalar *p, int i, int sign=1); 1220 void pC_plusequals_s_times_qC (btScalar *p, btScalar s, btScalar *q); 1221 void pN_plusequals_s_times_qN (btScalar *p, btScalar s, btScalar *q); 1222 void solve1 (btScalar *a, int i, int dir=1, int only_transfer=0); 1223 void unpermute(); 1224 }; 1225 1226 1227 btLCP::btLCP (int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w, 1228 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d, 1229 btScalar *_Dell, btScalar *_ell, btScalar *_tmp, 1230 bool *_state, int *_findex, int *p, int *c, btScalar **Arows): 1231 m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0), 1232 # ifdef BTROWPTRS 1233 m_A(Arows), 1234 #else 1235 m_A(_Adata), 1236 #endif 1237 m_x(_x), m_b(_b), m_w(_w), m_lo(_lo), m_hi(_hi), 1238 m_L(l), m_d(_d), m_Dell(_Dell), m_ell(_ell), m_tmp(_tmp), 1239 m_state(_state), m_findex(_findex), m_p(p), m_C(c) 1240 { 1241 { 1242 btSetZero (m_x,m_n); 1243 } 1244 1245 { 1246 # ifdef BTROWPTRS 1247 // make matrix row pointers 1248 btScalar *aptr = _Adata; 1249 BTATYPE A = m_A; 1250 const int n = m_n, nskip = m_nskip; 1251 for (int k=0; k<n; aptr+=nskip, ++k) A[k] = aptr; 1252 # endif 1253 } 1254 1255 { 1256 int *p = m_p; 1257 const int n = m_n; 1258 for (int k=0; k<n; ++k) p[k]=k; // initially unpermuted 1259 } 1260 1261 /* 1262 // for testing, we can do some random swaps in the area i > nub 1263 { 1264 const int n = m_n; 1265 const int nub = m_nub; 1266 if (nub < n) { 1267 for (int k=0; k<100; k++) { 1268 int i1,i2; 1269 do { 1270 i1 = dRandInt(n-nub)+nub; 1271 i2 = dRandInt(n-nub)+nub; 1272 } 1273 while (i1 > i2); 1274 //printf ("--> %d %d\n",i1,i2); 1275 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,n,i1,i2,m_nskip,0); 1276 } 1277 } 1278 */ 1279 1280 // permute the problem so that *all* the unbounded variables are at the 1281 // start, i.e. look for unbounded variables not included in `nub'. we can 1282 // potentially push up `nub' this way and get a bigger initial factorization. 1283 // note that when we swap rows/cols here we must not just swap row pointers, 1284 // as the initial factorization relies on the data being all in one chunk. 1285 // variables that have findex >= 0 are *not* considered to be unbounded even 1286 // if lo=-inf and hi=inf - this is because these limits may change during the 1287 // solution process. 1288 1289 { 1290 int *findex = m_findex; 1291 btScalar *lo = m_lo, *hi = m_hi; 1292 const int n = m_n; 1293 for (int k = m_nub; k<n; ++k) { 1294 if (findex && findex[k] >= 0) continue; 1295 if (lo[k]==-BT_INFINITY && hi[k]==BT_INFINITY) { 1296 btSwapProblem (m_A,m_x,m_b,m_w,lo,hi,m_p,m_state,findex,n,m_nub,k,m_nskip,0); 1297 m_nub++; 1298 } 1299 } 1300 } 1301 1302 // if there are unbounded variables at the start, factorize A up to that 1303 // point and solve for x. this puts all indexes 0..nub-1 into C. 1304 if (m_nub > 0) { 1305 const int nub = m_nub; 1306 { 1307 btScalar *Lrow = m_L; 1308 const int nskip = m_nskip; 1309 for (int j=0; j<nub; Lrow+=nskip, ++j) memcpy(Lrow,BTAROW(j),(j+1)*sizeof(btScalar)); 1310 } 1311 btFactorLDLT (m_L,m_d,nub,m_nskip); 1312 memcpy (m_x,m_b,nub*sizeof(btScalar)); 1313 btSolveLDLT (m_L,m_d,m_x,nub,m_nskip); 1314 btSetZero (m_w,nub); 1315 { 1316 int *C = m_C; 1317 for (int k=0; k<nub; ++k) C[k] = k; 1318 } 1319 m_nC = nub; 1320 } 1321 1322 // permute the indexes > nub such that all findex variables are at the end 1323 if (m_findex) { 1324 const int nub = m_nub; 1325 int *findex = m_findex; 1326 int num_at_end = 0; 1327 for (int k=m_n-1; k >= nub; k--) { 1328 if (findex[k] >= 0) { 1329 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,findex,m_n,k,m_n-1-num_at_end,m_nskip,1); 1330 num_at_end++; 1331 } 1332 } 1333 } 1334 1335 // print info about indexes 1336 /* 1337 { 1338 const int n = m_n; 1339 const int nub = m_nub; 1340 for (int k=0; k<n; k++) { 1341 if (k<nub) printf ("C"); 1342 else if (m_lo[k]==-BT_INFINITY && m_hi[k]==BT_INFINITY) printf ("c"); 1343 else printf ("."); 1344 } 1345 printf ("\n"); 1346 } 1347 */ 1348 } 1349 1350 1351 void btLCP::transfer_i_to_C (int i) 1352 { 1353 { 1354 if (m_nC > 0) { 1355 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C)) 1356 { 1357 const int nC = m_nC; 1358 btScalar *const Ltgt = m_L + nC*m_nskip, *ell = m_ell; 1359 for (int j=0; j<nC; ++j) Ltgt[j] = ell[j]; 1360 } 1361 const int nC = m_nC; 1362 m_d[nC] = btRecip (BTAROW(i)[i] - btLargeDot(m_ell,m_Dell,nC)); 1363 } 1364 else { 1365 m_d[0] = btRecip (BTAROW(i)[i]); 1366 } 1367 1368 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,m_nC,i,m_nskip,1); 1369 1370 const int nC = m_nC; 1371 m_C[nC] = nC; 1372 m_nC = nC + 1; // nC value is outdated after this line 1373 } 1374 1375 } 1376 1377 1378 void btLCP::transfer_i_from_N_to_C (int i) 1379 { 1380 { 1381 if (m_nC > 0) { 1382 { 1383 btScalar *const aptr = BTAROW(i); 1384 btScalar *Dell = m_Dell; 1385 const int *C = m_C; 1386 # ifdef BTNUB_OPTIMIZATIONS 1387 // if nub>0, initial part of aptr unpermuted 1388 const int nub = m_nub; 1389 int j=0; 1390 for ( ; j<nub; ++j) Dell[j] = aptr[j]; 1391 const int nC = m_nC; 1392 for ( ; j<nC; ++j) Dell[j] = aptr[C[j]]; 1393 # else 1394 const int nC = m_nC; 1395 for (int j=0; j<nC; ++j) Dell[j] = aptr[C[j]]; 1396 # endif 1397 } 1398 btSolveL1 (m_L,m_Dell,m_nC,m_nskip); 1399 { 1400 const int nC = m_nC; 1401 btScalar *const Ltgt = m_L + nC*m_nskip; 1402 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d; 1403 for (int j=0; j<nC; ++j) Ltgt[j] = ell[j] = Dell[j] * d[j]; 1404 } 1405 const int nC = m_nC; 1406 m_d[nC] = btRecip (BTAROW(i)[i] - btLargeDot(m_ell,m_Dell,nC)); 1407 } 1408 else { 1409 m_d[0] = btRecip (BTAROW(i)[i]); 1410 } 1411 1412 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,m_nC,i,m_nskip,1); 1413 1414 const int nC = m_nC; 1415 m_C[nC] = nC; 1416 m_nN--; 1417 m_nC = nC + 1; // nC value is outdated after this line 1418 } 1419 1420 // @@@ TO DO LATER 1421 // if we just finish here then we'll go back and re-solve for 1422 // delta_x. but actually we can be more efficient and incrementally 1423 // update delta_x here. but if we do this, we wont have ell and Dell 1424 // to use in updating the factorization later. 1425 1426 } 1427 1428 void btRemoveRowCol (btScalar *A, int n, int nskip, int r) 1429 { 1430 btAssert(A && n > 0 && nskip >= n && r >= 0 && r < n); 1431 if (r >= n-1) return; 1432 if (r > 0) { 1433 { 1434 const size_t move_size = (n-r-1)*sizeof(btScalar); 1435 btScalar *Adst = A + r; 1436 for (int i=0; i<r; Adst+=nskip,++i) { 1437 btScalar *Asrc = Adst + 1; 1438 memmove (Adst,Asrc,move_size); 1439 } 1440 } 1441 { 1442 const size_t cpy_size = r*sizeof(btScalar); 1443 btScalar *Adst = A + r * nskip; 1444 for (int i=r; i<(n-1); ++i) { 1445 btScalar *Asrc = Adst + nskip; 1446 memcpy (Adst,Asrc,cpy_size); 1447 Adst = Asrc; 1448 } 1449 } 1450 } 1451 { 1452 const size_t cpy_size = (n-r-1)*sizeof(btScalar); 1453 btScalar *Adst = A + r * (nskip + 1); 1454 for (int i=r; i<(n-1); ++i) { 1455 btScalar *Asrc = Adst + (nskip + 1); 1456 memcpy (Adst,Asrc,cpy_size); 1457 Adst = Asrc - 1; 1458 } 1459 } 1460 } 1461 1462 1463 1464 1465 void btLDLTAddTL (btScalar *L, btScalar *d, const btScalar *a, int n, int nskip, btAlignedObjectArray<btScalar>& scratch) 1466 { 1467 btAssert (L && d && a && n > 0 && nskip >= n); 1468 1469 if (n < 2) return; 1470 scratch.resize(2*nskip); 1471 btScalar *W1 = &scratch[0]; 1472 1473 btScalar *W2 = W1 + nskip; 1474 1475 W1[0] = btScalar(0.0); 1476 W2[0] = btScalar(0.0); 1477 for (int j=1; j<n; ++j) { 1478 W1[j] = W2[j] = (btScalar) (a[j] * SIMDSQRT12); 1479 } 1480 btScalar W11 = (btScalar) ((btScalar(0.5)*a[0]+1)*SIMDSQRT12); 1481 btScalar W21 = (btScalar) ((btScalar(0.5)*a[0]-1)*SIMDSQRT12); 1482 1483 btScalar alpha1 = btScalar(1.0); 1484 btScalar alpha2 = btScalar(1.0); 1485 1486 { 1487 btScalar dee = d[0]; 1488 btScalar alphanew = alpha1 + (W11*W11)*dee; 1489 btAssert(alphanew != btScalar(0.0)); 1490 dee /= alphanew; 1491 btScalar gamma1 = W11 * dee; 1492 dee *= alpha1; 1493 alpha1 = alphanew; 1494 alphanew = alpha2 - (W21*W21)*dee; 1495 dee /= alphanew; 1496 //btScalar gamma2 = W21 * dee; 1497 alpha2 = alphanew; 1498 btScalar k1 = btScalar(1.0) - W21*gamma1; 1499 btScalar k2 = W21*gamma1*W11 - W21; 1500 btScalar *ll = L + nskip; 1501 for (int p=1; p<n; ll+=nskip, ++p) { 1502 btScalar Wp = W1[p]; 1503 btScalar ell = *ll; 1504 W1[p] = Wp - W11*ell; 1505 W2[p] = k1*Wp + k2*ell; 1506 } 1507 } 1508 1509 btScalar *ll = L + (nskip + 1); 1510 for (int j=1; j<n; ll+=nskip+1, ++j) { 1511 btScalar k1 = W1[j]; 1512 btScalar k2 = W2[j]; 1513 1514 btScalar dee = d[j]; 1515 btScalar alphanew = alpha1 + (k1*k1)*dee; 1516 btAssert(alphanew != btScalar(0.0)); 1517 dee /= alphanew; 1518 btScalar gamma1 = k1 * dee; 1519 dee *= alpha1; 1520 alpha1 = alphanew; 1521 alphanew = alpha2 - (k2*k2)*dee; 1522 dee /= alphanew; 1523 btScalar gamma2 = k2 * dee; 1524 dee *= alpha2; 1525 d[j] = dee; 1526 alpha2 = alphanew; 1527 1528 btScalar *l = ll + nskip; 1529 for (int p=j+1; p<n; l+=nskip, ++p) { 1530 btScalar ell = *l; 1531 btScalar Wp = W1[p] - k1 * ell; 1532 ell += gamma1 * Wp; 1533 W1[p] = Wp; 1534 Wp = W2[p] - k2 * ell; 1535 ell -= gamma2 * Wp; 1536 W2[p] = Wp; 1537 *l = ell; 1538 } 1539 } 1540 } 1541 1542 1543 #define _BTGETA(i,j) (A[i][j]) 1544 //#define _GETA(i,j) (A[(i)*nskip+(j)]) 1545 #define BTGETA(i,j) ((i > j) ? _BTGETA(i,j) : _BTGETA(j,i)) 1546 1547 inline size_t btEstimateLDLTAddTLTmpbufSize(int nskip) 1548 { 1549 return nskip * 2 * sizeof(btScalar); 1550 } 1551 1552 1553 void btLDLTRemove (btScalar **A, const int *p, btScalar *L, btScalar *d, 1554 int n1, int n2, int r, int nskip, btAlignedObjectArray<btScalar>& scratch) 1555 { 1556 btAssert(A && p && L && d && n1 > 0 && n2 > 0 && r >= 0 && r < n2 && 1557 n1 >= n2 && nskip >= n1); 1558 #ifdef BT_DEBUG 1559 for (int i=0; i<n2; ++i) 1560 btAssert(p[i] >= 0 && p[i] < n1); 1561 #endif 1562 1563 if (r==n2-1) { 1564 return; // deleting last row/col is easy 1565 } 1566 else { 1567 size_t LDLTAddTL_size = btEstimateLDLTAddTLTmpbufSize(nskip); 1568 btAssert(LDLTAddTL_size % sizeof(btScalar) == 0); 1569 scratch.resize(nskip * 2+n2); 1570 btScalar *tmp = &scratch[0]; 1571 if (r==0) { 1572 btScalar *a = (btScalar *)((char *)tmp + LDLTAddTL_size); 1573 const int p_0 = p[0]; 1574 for (int i=0; i<n2; ++i) { 1575 a[i] = -BTGETA(p[i],p_0); 1576 } 1577 a[0] += btScalar(1.0); 1578 btLDLTAddTL (L,d,a,n2,nskip,scratch); 1579 } 1580 else { 1581 btScalar *t = (btScalar *)((char *)tmp + LDLTAddTL_size); 1582 { 1583 btScalar *Lcurr = L + r*nskip; 1584 for (int i=0; i<r; ++Lcurr, ++i) { 1585 btAssert(d[i] != btScalar(0.0)); 1586 t[i] = *Lcurr / d[i]; 1587 } 1588 } 1589 btScalar *a = t + r; 1590 { 1591 btScalar *Lcurr = L + r*nskip; 1592 const int *pp_r = p + r, p_r = *pp_r; 1593 const int n2_minus_r = n2-r; 1594 for (int i=0; i<n2_minus_r; Lcurr+=nskip,++i) { 1595 a[i] = btLargeDot(Lcurr,t,r) - BTGETA(pp_r[i],p_r); 1596 } 1597 } 1598 a[0] += btScalar(1.0); 1599 btLDLTAddTL (L + r*nskip+r, d+r, a, n2-r, nskip, scratch); 1600 } 1601 } 1602 1603 // snip out row/column r from L and d 1604 btRemoveRowCol (L,n2,nskip,r); 1605 if (r < (n2-1)) memmove (d+r,d+r+1,(n2-r-1)*sizeof(btScalar)); 1606 } 1607 1608 1609 void btLCP::transfer_i_from_C_to_N (int i, btAlignedObjectArray<btScalar>& scratch) 1610 { 1611 { 1612 int *C = m_C; 1613 // remove a row/column from the factorization, and adjust the 1614 // indexes (black magic!) 1615 int last_idx = -1; 1616 const int nC = m_nC; 1617 int j = 0; 1618 for ( ; j<nC; ++j) { 1619 if (C[j]==nC-1) { 1620 last_idx = j; 1621 } 1622 if (C[j]==i) { 1623 btLDLTRemove (m_A,C,m_L,m_d,m_n,nC,j,m_nskip,scratch); 1624 int k; 1625 if (last_idx == -1) { 1626 for (k=j+1 ; k<nC; ++k) { 1627 if (C[k]==nC-1) { 1628 break; 1629 } 1630 } 1631 btAssert (k < nC); 1632 } 1633 else { 1634 k = last_idx; 1635 } 1636 C[k] = C[j]; 1637 if (j < (nC-1)) memmove (C+j,C+j+1,(nC-j-1)*sizeof(int)); 1638 break; 1639 } 1640 } 1641 btAssert (j < nC); 1642 1643 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,i,nC-1,m_nskip,1); 1644 1645 m_nN++; 1646 m_nC = nC - 1; // nC value is outdated after this line 1647 } 1648 1649 } 1650 1651 1652 void btLCP::pN_equals_ANC_times_qC (btScalar *p, btScalar *q) 1653 { 1654 // we could try to make this matrix-vector multiplication faster using 1655 // outer product matrix tricks, e.g. with the dMultidotX() functions. 1656 // but i tried it and it actually made things slower on random 100x100 1657 // problems because of the overhead involved. so we'll stick with the 1658 // simple method for now. 1659 const int nC = m_nC; 1660 btScalar *ptgt = p + nC; 1661 const int nN = m_nN; 1662 for (int i=0; i<nN; ++i) { 1663 ptgt[i] = btLargeDot (BTAROW(i+nC),q,nC); 1664 } 1665 } 1666 1667 1668 void btLCP::pN_plusequals_ANi (btScalar *p, int i, int sign) 1669 { 1670 const int nC = m_nC; 1671 btScalar *aptr = BTAROW(i) + nC; 1672 btScalar *ptgt = p + nC; 1673 if (sign > 0) { 1674 const int nN = m_nN; 1675 for (int j=0; j<nN; ++j) ptgt[j] += aptr[j]; 1676 } 1677 else { 1678 const int nN = m_nN; 1679 for (int j=0; j<nN; ++j) ptgt[j] -= aptr[j]; 1680 } 1681 } 1682 1683 void btLCP::pC_plusequals_s_times_qC (btScalar *p, btScalar s, btScalar *q) 1684 { 1685 const int nC = m_nC; 1686 for (int i=0; i<nC; ++i) { 1687 p[i] += s*q[i]; 1688 } 1689 } 1690 1691 void btLCP::pN_plusequals_s_times_qN (btScalar *p, btScalar s, btScalar *q) 1692 { 1693 const int nC = m_nC; 1694 btScalar *ptgt = p + nC, *qsrc = q + nC; 1695 const int nN = m_nN; 1696 for (int i=0; i<nN; ++i) { 1697 ptgt[i] += s*qsrc[i]; 1698 } 1699 } 1700 1701 void btLCP::solve1 (btScalar *a, int i, int dir, int only_transfer) 1702 { 1703 // the `Dell' and `ell' that are computed here are saved. if index i is 1704 // later added to the factorization then they can be reused. 1705 // 1706 // @@@ question: do we need to solve for entire delta_x??? yes, but 1707 // only if an x goes below 0 during the step. 1708 1709 if (m_nC > 0) { 1710 { 1711 btScalar *Dell = m_Dell; 1712 int *C = m_C; 1713 btScalar *aptr = BTAROW(i); 1714 # ifdef BTNUB_OPTIMIZATIONS 1715 // if nub>0, initial part of aptr[] is guaranteed unpermuted 1716 const int nub = m_nub; 1717 int j=0; 1718 for ( ; j<nub; ++j) Dell[j] = aptr[j]; 1719 const int nC = m_nC; 1720 for ( ; j<nC; ++j) Dell[j] = aptr[C[j]]; 1721 # else 1722 const int nC = m_nC; 1723 for (int j=0; j<nC; ++j) Dell[j] = aptr[C[j]]; 1724 # endif 1725 } 1726 btSolveL1 (m_L,m_Dell,m_nC,m_nskip); 1727 { 1728 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d; 1729 const int nC = m_nC; 1730 for (int j=0; j<nC; ++j) ell[j] = Dell[j] * d[j]; 1731 } 1732 1733 if (!only_transfer) { 1734 btScalar *tmp = m_tmp, *ell = m_ell; 1735 { 1736 const int nC = m_nC; 1737 for (int j=0; j<nC; ++j) tmp[j] = ell[j]; 1738 } 1739 btSolveL1T (m_L,tmp,m_nC,m_nskip); 1740 if (dir > 0) { 1741 int *C = m_C; 1742 btScalar *tmp = m_tmp; 1743 const int nC = m_nC; 1744 for (int j=0; j<nC; ++j) a[C[j]] = -tmp[j]; 1745 } else { 1746 int *C = m_C; 1747 btScalar *tmp = m_tmp; 1748 const int nC = m_nC; 1749 for (int j=0; j<nC; ++j) a[C[j]] = tmp[j]; 1750 } 1751 } 1752 } 1753 } 1754 1755 1756 void btLCP::unpermute() 1757 { 1758 // now we have to un-permute x and w 1759 { 1760 memcpy (m_tmp,m_x,m_n*sizeof(btScalar)); 1761 btScalar *x = m_x, *tmp = m_tmp; 1762 const int *p = m_p; 1763 const int n = m_n; 1764 for (int j=0; j<n; ++j) x[p[j]] = tmp[j]; 1765 } 1766 { 1767 memcpy (m_tmp,m_w,m_n*sizeof(btScalar)); 1768 btScalar *w = m_w, *tmp = m_tmp; 1769 const int *p = m_p; 1770 const int n = m_n; 1771 for (int j=0; j<n; ++j) w[p[j]] = tmp[j]; 1772 } 1773 } 1774 1775 #endif // btLCP_FAST 1776 1777 1778 //*************************************************************************** 1779 // an optimized Dantzig LCP driver routine for the lo-hi LCP problem. 1780 1781 bool btSolveDantzigLCP (int n, btScalar *A, btScalar *x, btScalar *b, 1782 btScalar* outer_w, int nub, btScalar *lo, btScalar *hi, int *findex, btDantzigScratchMemory& scratchMem) 1783 { 1784 s_error = false; 1785 1786 // printf("btSolveDantzigLCP n=%d\n",n); 1787 btAssert (n>0 && A && x && b && lo && hi && nub >= 0 && nub <= n); 1788 btAssert(outer_w); 1789 1790 #ifdef BT_DEBUG 1791 { 1792 // check restrictions on lo and hi 1793 for (int k=0; k<n; ++k) 1794 btAssert (lo[k] <= 0 && hi[k] >= 0); 1795 } 1796 # endif 1797 1798 1799 // if all the variables are unbounded then we can just factor, solve, 1800 // and return 1801 if (nub >= n) 1802 { 1803 1804 1805 int nskip = (n); 1806 btFactorLDLT (A, outer_w, n, nskip); 1807 btSolveLDLT (A, outer_w, b, n, nskip); 1808 memcpy (x, b, n*sizeof(btScalar)); 1809 1810 return !s_error; 1811 } 1812 1813 const int nskip = (n); 1814 scratchMem.L.resize(n*nskip); 1815 1816 scratchMem.d.resize(n); 1817 1818 btScalar *w = outer_w; 1819 scratchMem.delta_w.resize(n); 1820 scratchMem.delta_x.resize(n); 1821 scratchMem.Dell.resize(n); 1822 scratchMem.ell.resize(n); 1823 scratchMem.Arows.resize(n); 1824 scratchMem.p.resize(n); 1825 scratchMem.C.resize(n); 1826 1827 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i) 1828 scratchMem.state.resize(n); 1829 1830 1831 // create LCP object. note that tmp is set to delta_w to save space, this 1832 // optimization relies on knowledge of how tmp is used, so be careful! 1833 btLCP lcp(n,nskip,nub,A,x,b,w,lo,hi,&scratchMem.L[0],&scratchMem.d[0],&scratchMem.Dell[0],&scratchMem.ell[0],&scratchMem.delta_w[0],&scratchMem.state[0],findex,&scratchMem.p[0],&scratchMem.C[0],&scratchMem.Arows[0]); 1834 int adj_nub = lcp.getNub(); 1835 1836 // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the 1837 // LCP conditions then i is added to the appropriate index set. otherwise 1838 // x(i),w(i) is driven either +ve or -ve to force it to the valid region. 1839 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero. 1840 // while driving x(i) we maintain the LCP conditions on the other variables 1841 // 0..i-1. we do this by watching out for other x(i),w(i) values going 1842 // outside the valid region, and then switching them between index sets 1843 // when that happens. 1844 1845 bool hit_first_friction_index = false; 1846 for (int i=adj_nub; i<n; ++i) 1847 { 1848 s_error = false; 1849 // the index i is the driving index and indexes i+1..n-1 are "dont care", 1850 // i.e. when we make changes to the system those x's will be zero and we 1851 // don't care what happens to those w's. in other words, we only consider 1852 // an (i+1)*(i+1) sub-problem of A*x=b+w. 1853 1854 // if we've hit the first friction index, we have to compute the lo and 1855 // hi values based on the values of x already computed. we have been 1856 // permuting the indexes, so the values stored in the findex vector are 1857 // no longer valid. thus we have to temporarily unpermute the x vector. 1858 // for the purposes of this computation, 0*infinity = 0 ... so if the 1859 // contact constraint's normal force is 0, there should be no tangential 1860 // force applied. 1861 1862 if (!hit_first_friction_index && findex && findex[i] >= 0) { 1863 // un-permute x into delta_w, which is not being used at the moment 1864 for (int j=0; j<n; ++j) scratchMem.delta_w[scratchMem.p[j]] = x[j]; 1865 1866 // set lo and hi values 1867 for (int k=i; k<n; ++k) { 1868 btScalar wfk = scratchMem.delta_w[findex[k]]; 1869 if (wfk == 0) { 1870 hi[k] = 0; 1871 lo[k] = 0; 1872 } 1873 else { 1874 hi[k] = btFabs (hi[k] * wfk); 1875 lo[k] = -hi[k]; 1876 } 1877 } 1878 hit_first_friction_index = true; 1879 } 1880 1881 // thus far we have not even been computing the w values for indexes 1882 // greater than i, so compute w[i] now. 1883 w[i] = lcp.AiC_times_qC (i,x) + lcp.AiN_times_qN (i,x) - b[i]; 1884 1885 // if lo=hi=0 (which can happen for tangential friction when normals are 1886 // 0) then the index will be assigned to set N with some state. however, 1887 // set C's line has zero size, so the index will always remain in set N. 1888 // with the "normal" switching logic, if w changed sign then the index 1889 // would have to switch to set C and then back to set N with an inverted 1890 // state. this is pointless, and also computationally expensive. to 1891 // prevent this from happening, we use the rule that indexes with lo=hi=0 1892 // will never be checked for set changes. this means that the state for 1893 // these indexes may be incorrect, but that doesn't matter. 1894 1895 // see if x(i),w(i) is in a valid region 1896 if (lo[i]==0 && w[i] >= 0) { 1897 lcp.transfer_i_to_N (i); 1898 scratchMem.state[i] = false; 1899 } 1900 else if (hi[i]==0 && w[i] <= 0) { 1901 lcp.transfer_i_to_N (i); 1902 scratchMem.state[i] = true; 1903 } 1904 else if (w[i]==0) { 1905 // this is a degenerate case. by the time we get to this test we know 1906 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve, 1907 // and similarly that hi > 0. this means that the line segment 1908 // corresponding to set C is at least finite in extent, and we are on it. 1909 // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C() 1910 lcp.solve1 (&scratchMem.delta_x[0],i,0,1); 1911 1912 lcp.transfer_i_to_C (i); 1913 } 1914 else { 1915 // we must push x(i) and w(i) 1916 for (;;) { 1917 int dir; 1918 btScalar dirf; 1919 // find direction to push on x(i) 1920 if (w[i] <= 0) { 1921 dir = 1; 1922 dirf = btScalar(1.0); 1923 } 1924 else { 1925 dir = -1; 1926 dirf = btScalar(-1.0); 1927 } 1928 1929 // compute: delta_x(C) = -dir*A(C,C)\A(C,i) 1930 lcp.solve1 (&scratchMem.delta_x[0],i,dir); 1931 1932 // note that delta_x[i] = dirf, but we wont bother to set it 1933 1934 // compute: delta_w = A*delta_x ... note we only care about 1935 // delta_w(N) and delta_w(i), the rest is ignored 1936 lcp.pN_equals_ANC_times_qC (&scratchMem.delta_w[0],&scratchMem.delta_x[0]); 1937 lcp.pN_plusequals_ANi (&scratchMem.delta_w[0],i,dir); 1938 scratchMem.delta_w[i] = lcp.AiC_times_qC (i,&scratchMem.delta_x[0]) + lcp.Aii(i)*dirf; 1939 1940 // find largest step we can take (size=s), either to drive x(i),w(i) 1941 // to the valid LCP region or to drive an already-valid variable 1942 // outside the valid region. 1943 1944 int cmd = 1; // index switching command 1945 int si = 0; // si = index to switch if cmd>3 1946 btScalar s = -w[i]/scratchMem.delta_w[i]; 1947 if (dir > 0) { 1948 if (hi[i] < BT_INFINITY) { 1949 btScalar s2 = (hi[i]-x[i])*dirf; // was (hi[i]-x[i])/dirf // step to x(i)=hi(i) 1950 if (s2 < s) { 1951 s = s2; 1952 cmd = 3; 1953 } 1954 } 1955 } 1956 else { 1957 if (lo[i] > -BT_INFINITY) { 1958 btScalar s2 = (lo[i]-x[i])*dirf; // was (lo[i]-x[i])/dirf // step to x(i)=lo(i) 1959 if (s2 < s) { 1960 s = s2; 1961 cmd = 2; 1962 } 1963 } 1964 } 1965 1966 { 1967 const int numN = lcp.numN(); 1968 for (int k=0; k < numN; ++k) { 1969 const int indexN_k = lcp.indexN(k); 1970 if (!scratchMem.state[indexN_k] ? scratchMem.delta_w[indexN_k] < 0 : scratchMem.delta_w[indexN_k] > 0) { 1971 // don't bother checking if lo=hi=0 1972 if (lo[indexN_k] == 0 && hi[indexN_k] == 0) continue; 1973 btScalar s2 = -w[indexN_k] / scratchMem.delta_w[indexN_k]; 1974 if (s2 < s) { 1975 s = s2; 1976 cmd = 4; 1977 si = indexN_k; 1978 } 1979 } 1980 } 1981 } 1982 1983 { 1984 const int numC = lcp.numC(); 1985 for (int k=adj_nub; k < numC; ++k) { 1986 const int indexC_k = lcp.indexC(k); 1987 if (scratchMem.delta_x[indexC_k] < 0 && lo[indexC_k] > -BT_INFINITY) { 1988 btScalar s2 = (lo[indexC_k]-x[indexC_k]) / scratchMem.delta_x[indexC_k]; 1989 if (s2 < s) { 1990 s = s2; 1991 cmd = 5; 1992 si = indexC_k; 1993 } 1994 } 1995 if (scratchMem.delta_x[indexC_k] > 0 && hi[indexC_k] < BT_INFINITY) { 1996 btScalar s2 = (hi[indexC_k]-x[indexC_k]) / scratchMem.delta_x[indexC_k]; 1997 if (s2 < s) { 1998 s = s2; 1999 cmd = 6; 2000 si = indexC_k; 2001 } 2002 } 2003 } 2004 } 2005 2006 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C", 2007 // "C->NL","C->NH"}; 2008 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i); 2009 2010 // if s <= 0 then we've got a problem. if we just keep going then 2011 // we're going to get stuck in an infinite loop. instead, just cross 2012 // our fingers and exit with the current solution. 2013 if (s <= btScalar(0.0)) 2014 { 2015 // printf("LCP internal error, s <= 0 (s=%.4e)",(double)s); 2016 if (i < n) { 2017 btSetZero (x+i,n-i); 2018 btSetZero (w+i,n-i); 2019 } 2020 s_error = true; 2021 break; 2022 } 2023 2024 // apply x = x + s * delta_x 2025 lcp.pC_plusequals_s_times_qC (x, s, &scratchMem.delta_x[0]); 2026 x[i] += s * dirf; 2027 2028 // apply w = w + s * delta_w 2029 lcp.pN_plusequals_s_times_qN (w, s, &scratchMem.delta_w[0]); 2030 w[i] += s * scratchMem.delta_w[i]; 2031 2032 // void *tmpbuf; 2033 // switch indexes between sets if necessary 2034 switch (cmd) { 2035 case 1: // done 2036 w[i] = 0; 2037 lcp.transfer_i_to_C (i); 2038 break; 2039 case 2: // done 2040 x[i] = lo[i]; 2041 scratchMem.state[i] = false; 2042 lcp.transfer_i_to_N (i); 2043 break; 2044 case 3: // done 2045 x[i] = hi[i]; 2046 scratchMem.state[i] = true; 2047 lcp.transfer_i_to_N (i); 2048 break; 2049 case 4: // keep going 2050 w[si] = 0; 2051 lcp.transfer_i_from_N_to_C (si); 2052 break; 2053 case 5: // keep going 2054 x[si] = lo[si]; 2055 scratchMem.state[si] = false; 2056 lcp.transfer_i_from_C_to_N (si, scratchMem.m_scratch); 2057 break; 2058 case 6: // keep going 2059 x[si] = hi[si]; 2060 scratchMem.state[si] = true; 2061 lcp.transfer_i_from_C_to_N (si, scratchMem.m_scratch); 2062 break; 2063 } 2064 2065 if (cmd <= 3) break; 2066 } // for (;;) 2067 } // else 2068 2069 if (s_error) 2070 { 2071 break; 2072 } 2073 } // for (int i=adj_nub; i<n; ++i) 2074 2075 lcp.unpermute(); 2076 2077 2078 return !s_error; 2079 } 2080 2081