1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "common/math/mat.h" 18 19 #include <assert.h> 20 #include <float.h> 21 22 #ifdef _OS_BUILD_ 23 #include <nanohub_math.h> 24 #include <seos.h> 25 #else 26 #include <math.h> 27 #ifndef UNROLLED 28 #define UNROLLED 29 #endif 30 #endif // _OS_BUILD_ 31 32 #include <stddef.h> 33 #include <string.h> 34 35 #define EPSILON 1E-5 36 #define CHOLESKY_TOLERANCE 1E-6 37 38 // Forward declarations. 39 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j); 40 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k); 41 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, 42 uint32_t l, uint32_t i, uint32_t j); 43 44 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j); 45 46 void initZeroMatrix(struct Mat33 *A) { 47 ASSERT_NOT_NULL(A); 48 memset(A->elem, 0.0f, sizeof(A->elem)); 49 } 50 51 UNROLLED 52 void initDiagonalMatrix(struct Mat33 *A, float x) { 53 ASSERT_NOT_NULL(A); 54 initZeroMatrix(A); 55 56 uint32_t i; 57 for (i = 0; i < 3; ++i) { 58 A->elem[i][i] = x; 59 } 60 } 61 62 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1, 63 const struct Vec3 *v2, const struct Vec3 *v3) { 64 ASSERT_NOT_NULL(A); 65 ASSERT_NOT_NULL(v1); 66 ASSERT_NOT_NULL(v2); 67 ASSERT_NOT_NULL(v3); 68 A->elem[0][0] = v1->x; 69 A->elem[0][1] = v2->x; 70 A->elem[0][2] = v3->x; 71 72 A->elem[1][0] = v1->y; 73 A->elem[1][1] = v2->y; 74 A->elem[1][2] = v3->y; 75 76 A->elem[2][0] = v1->z; 77 A->elem[2][1] = v2->z; 78 A->elem[2][2] = v3->z; 79 } 80 81 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) { 82 ASSERT_NOT_NULL(out); 83 ASSERT_NOT_NULL(A); 84 ASSERT_NOT_NULL(v); 85 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z; 86 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z; 87 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z; 88 } 89 90 UNROLLED 91 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A, 92 const struct Mat33 *B) { 93 ASSERT_NOT_NULL(out); 94 ASSERT_NOT_NULL(A); 95 ASSERT_NOT_NULL(B); 96 ASSERT(out != A); 97 ASSERT(out != B); 98 99 uint32_t i; 100 for (i = 0; i < 3; ++i) { 101 uint32_t j; 102 for (j = 0; j < 3; ++j) { 103 uint32_t k; 104 float sum = 0.0f; 105 for (k = 0; k < 3; ++k) { 106 sum += A->elem[i][k] * B->elem[k][j]; 107 } 108 109 out->elem[i][j] = sum; 110 } 111 } 112 } 113 114 UNROLLED 115 void mat33ScalarMul(struct Mat33 *A, float c) { 116 ASSERT_NOT_NULL(A); 117 uint32_t i; 118 for (i = 0; i < 3; ++i) { 119 uint32_t j; 120 for (j = 0; j < 3; ++j) { 121 A->elem[i][j] *= c; 122 } 123 } 124 } 125 126 UNROLLED 127 void mat33Add(struct Mat33 *out, const struct Mat33 *A) { 128 ASSERT_NOT_NULL(out); 129 ASSERT_NOT_NULL(A); 130 uint32_t i; 131 for (i = 0; i < 3; ++i) { 132 uint32_t j; 133 for (j = 0; j < 3; ++j) { 134 out->elem[i][j] += A->elem[i][j]; 135 } 136 } 137 } 138 139 UNROLLED 140 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) { 141 ASSERT_NOT_NULL(out); 142 ASSERT_NOT_NULL(A); 143 uint32_t i; 144 for (i = 0; i < 3; ++i) { 145 uint32_t j; 146 for (j = 0; j < 3; ++j) { 147 out->elem[i][j] -= A->elem[i][j]; 148 } 149 } 150 } 151 152 UNROLLED 153 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) { 154 ASSERT_NOT_NULL(A); 155 uint32_t i; 156 for (i = 0; i < 3; ++i) { 157 if (A->elem[i][i] < 0.0f) { 158 return 0; 159 } 160 } 161 162 for (i = 0; i < 3; ++i) { 163 uint32_t j; 164 for (j = i + 1; j < 3; ++j) { 165 if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) { 166 return 0; 167 } 168 } 169 } 170 171 return 1; 172 } 173 174 UNROLLED 175 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A, 176 const struct Mat33 *B) { 177 ASSERT(out != A); 178 ASSERT(out != B); 179 ASSERT_NOT_NULL(out); 180 ASSERT_NOT_NULL(A); 181 ASSERT_NOT_NULL(B); 182 uint32_t i; 183 for (i = 0; i < 3; ++i) { 184 uint32_t j; 185 for (j = 0; j < 3; ++j) { 186 uint32_t k; 187 float sum = 0.0f; 188 for (k = 0; k < 3; ++k) { 189 sum += A->elem[k][i] * B->elem[k][j]; 190 } 191 192 out->elem[i][j] = sum; 193 } 194 } 195 } 196 197 UNROLLED 198 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A, 199 const struct Mat33 *B) { 200 ASSERT(out != A); 201 ASSERT(out != B); 202 ASSERT_NOT_NULL(out); 203 ASSERT_NOT_NULL(A); 204 ASSERT_NOT_NULL(B); 205 uint32_t i; 206 for (i = 0; i < 3; ++i) { 207 uint32_t j; 208 for (j = 0; j < 3; ++j) { 209 uint32_t k; 210 float sum = 0.0f; 211 for (k = 0; k < 3; ++k) { 212 sum += A->elem[i][k] * B->elem[j][k]; 213 } 214 215 out->elem[i][j] = sum; 216 } 217 } 218 } 219 220 UNROLLED 221 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) { 222 ASSERT_NOT_NULL(out); 223 ASSERT_NOT_NULL(A); 224 float t; 225 initDiagonalMatrix(out, 1.0f); 226 227 struct Mat33 tmp = *A; 228 229 uint32_t i, k; 230 for (i = 0; i < 3; ++i) { 231 uint32_t swap = i; 232 uint32_t j; 233 for (j = i + 1; j < 3; ++j) { 234 if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) { 235 swap = j; 236 } 237 } 238 239 if (swap != i) { 240 for (k = 0; k < 3; ++k) { 241 t = tmp.elem[i][k]; 242 tmp.elem[i][k] = tmp.elem[swap][k]; 243 tmp.elem[swap][k] = t; 244 245 t = out->elem[i][k]; 246 out->elem[i][k] = out->elem[swap][k]; 247 out->elem[swap][k] = t; 248 } 249 } 250 // divide by zero guard. 251 ASSERT(fabs(tmp.elem[i][i]) > 0); 252 if(!(fabs(tmp.elem[i][i]) > 0)) { 253 return; 254 } 255 t = 1.0f / tmp.elem[i][i]; 256 for (k = 0; k < 3; ++k) { 257 tmp.elem[i][k] *= t; 258 out->elem[i][k] *= t; 259 } 260 261 for (j = 0; j < 3; ++j) { 262 if (j != i) { 263 t = tmp.elem[j][i]; 264 for (k = 0; k < 3; ++k) { 265 tmp.elem[j][k] -= tmp.elem[i][k] * t; 266 out->elem[j][k] -= out->elem[i][k] * t; 267 } 268 } 269 } 270 } 271 } 272 273 UNROLLED 274 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) { 275 ASSERT_NOT_NULL(out); 276 ASSERT_NOT_NULL(A); 277 uint32_t i; 278 for (i = 0; i < 3; ++i) { 279 uint32_t j; 280 for (j = 0; j < 3; ++j) { 281 out->elem[i][j] = A->elem[j][i]; 282 } 283 } 284 } 285 286 UNROLLED 287 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) { 288 ASSERT_NOT_NULL(A); 289 const uint32_t N = 3; 290 uint32_t k; 291 292 if (i == j) { 293 return; 294 } 295 296 for (k = 0; k < N; ++k) { 297 float tmp = A->elem[i][k]; 298 A->elem[i][k] = A->elem[j][k]; 299 A->elem[j][k] = tmp; 300 } 301 } 302 303 UNROLLED 304 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals, 305 struct Mat33 *eigenvecs) { 306 ASSERT_NOT_NULL(S); 307 ASSERT_NOT_NULL(eigenvals); 308 ASSERT_NOT_NULL(eigenvecs); 309 const uint32_t N = 3; 310 uint32_t i, j, k, l, m; 311 312 float _eigenvals[N]; 313 314 uint32_t ind[N]; 315 for (k = 0; k < N; ++k) { 316 ind[k] = mat33Maxind(S, k); 317 _eigenvals[k] = S->elem[k][k]; 318 } 319 320 initDiagonalMatrix(eigenvecs, 1.0f); 321 322 for (;;) { 323 m = 0; 324 for (k = 1; k + 1 < N; ++k) { 325 if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) { 326 m = k; 327 } 328 } 329 330 k = m; 331 l = ind[m]; 332 float p = S->elem[k][l]; 333 334 if (fabsf(p) < EPSILON) { 335 break; 336 } 337 338 float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f; 339 340 float t = fabsf(y) + sqrtf(p * p + y * y); 341 float s = sqrtf(p * p + t * t); 342 float c = t / s; 343 s = p / s; 344 t = p * p / t; 345 346 if (y < 0.0f) { 347 s = -s; 348 t = -t; 349 } 350 351 S->elem[k][l] = 0.0f; 352 353 _eigenvals[k] -= t; 354 _eigenvals[l] += t; 355 356 for (i = 0; i < k; ++i) { 357 mat33Rotate(S, c, s, i, k, i, l); 358 } 359 360 for (i = k + 1; i < l; ++i) { 361 mat33Rotate(S, c, s, k, i, i, l); 362 } 363 364 for (i = l + 1; i < N; ++i) { 365 mat33Rotate(S, c, s, k, i, l, i); 366 } 367 368 for (i = 0; i < N; ++i) { 369 float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i]; 370 eigenvecs->elem[l][i] = 371 s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i]; 372 eigenvecs->elem[k][i] = tmp; 373 } 374 375 ind[k] = mat33Maxind(S, k); 376 ind[l] = mat33Maxind(S, l); 377 378 float sum = 0.0f; 379 for (i = 0; i < N; ++i) { 380 for (j = i + 1; j < N; ++j) { 381 sum += fabsf(S->elem[i][j]); 382 } 383 } 384 385 if (sum < EPSILON) { 386 break; 387 } 388 } 389 390 for (k = 0; k < N; ++k) { 391 m = k; 392 for (l = k + 1; l < N; ++l) { 393 if (_eigenvals[l] > _eigenvals[m]) { 394 m = l; 395 } 396 } 397 398 if (k != m) { 399 float tmp = _eigenvals[k]; 400 _eigenvals[k] = _eigenvals[m]; 401 _eigenvals[m] = tmp; 402 403 mat33SwapRows(eigenvecs, k, m); 404 } 405 } 406 407 initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]); 408 } 409 410 // index of largest off-diagonal element in row k 411 UNROLLED 412 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) { 413 ASSERT_NOT_NULL(A); 414 const uint32_t N = 3; 415 416 uint32_t m = k + 1; 417 uint32_t i; 418 419 for (i = k + 2; i < N; ++i) { 420 if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) { 421 m = i; 422 } 423 } 424 425 return m; 426 } 427 428 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l, 429 uint32_t i, uint32_t j) { 430 ASSERT_NOT_NULL(A); 431 float tmp = c * A->elem[k][l] - s * A->elem[i][j]; 432 A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j]; 433 A->elem[k][l] = tmp; 434 } 435 436 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) { 437 ASSERT_NOT_NULL(out); 438 ASSERT_NOT_NULL(A); 439 ASSERT_NOT_NULL(v); 440 441 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z + 442 A->elem[0][3] * v->w; 443 444 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z + 445 A->elem[1][3] * v->w; 446 447 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z + 448 A->elem[2][3] * v->w; 449 450 out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z + 451 A->elem[3][3] * v->w; 452 } 453 454 UNROLLED 455 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) { 456 ASSERT_NOT_NULL(LU); 457 ASSERT_NOT_NULL(pivot); 458 const uint32_t N = 4; 459 uint32_t i, j, k; 460 461 for (k = 0; k < N; ++k) { 462 pivot->elem[k] = k; 463 float max = fabsf(LU->elem[k][k]); 464 for (j = k + 1; j < N; ++j) { 465 if (max < fabsf(LU->elem[j][k])) { 466 max = fabsf(LU->elem[j][k]); 467 pivot->elem[k] = j; 468 } 469 } 470 471 if (pivot->elem[k] != k) { 472 mat44SwapRows(LU, k, pivot->elem[k]); 473 } 474 475 if (fabsf(LU->elem[k][k]) < EPSILON) { 476 continue; 477 } 478 479 for (j = k + 1; j < N; ++j) { 480 LU->elem[k][j] /= LU->elem[k][k]; 481 } 482 483 for (i = k + 1; i < N; ++i) { 484 for (j = k + 1; j < N; ++j) { 485 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j]; 486 } 487 } 488 } 489 } 490 491 UNROLLED 492 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) { 493 ASSERT_NOT_NULL(A); 494 const uint32_t N = 4; 495 uint32_t k; 496 497 if (i == j) { 498 return; 499 } 500 501 for (k = 0; k < N; ++k) { 502 float tmp = A->elem[i][k]; 503 A->elem[i][k] = A->elem[j][k]; 504 A->elem[j][k] = tmp; 505 } 506 } 507 508 UNROLLED 509 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b, 510 const struct Size4 *pivot) { 511 ASSERT_NOT_NULL(A); 512 ASSERT_NOT_NULL(x); 513 ASSERT_NOT_NULL(b); 514 ASSERT_NOT_NULL(pivot); 515 const uint32_t N = 4; 516 uint32_t i, k; 517 518 float bCopy[N]; 519 bCopy[0] = b->x; 520 bCopy[1] = b->y; 521 bCopy[2] = b->z; 522 bCopy[3] = b->w; 523 524 float _x[N]; 525 for (k = 0; k < N; ++k) { 526 if (pivot->elem[k] != k) { 527 float tmp = bCopy[k]; 528 bCopy[k] = bCopy[pivot->elem[k]]; 529 bCopy[pivot->elem[k]] = tmp; 530 } 531 532 _x[k] = bCopy[k]; 533 for (i = 0; i < k; ++i) { 534 _x[k] -= _x[i] * A->elem[k][i]; 535 } 536 _x[k] /= A->elem[k][k]; 537 } 538 539 for (k = N; k-- > 0;) { 540 for (i = k + 1; i < N; ++i) { 541 _x[k] -= _x[i] * A->elem[k][i]; 542 } 543 } 544 545 initVec4(x, _x[0], _x[1], _x[2], _x[3]); 546 } 547 548 float matMaxDiagonalElement(const float *square_mat, size_t n) { 549 ASSERT_NOT_NULL(square_mat); 550 ASSERT(n > 0); 551 size_t i; 552 float max = square_mat[0]; 553 const size_t n_square = n * n; 554 const size_t offset = n + 1; 555 for (i = offset; i < n_square; i += offset) { 556 if (square_mat[i] > max) { 557 max = square_mat[i]; 558 } 559 } 560 return max; 561 } 562 563 void matAddConstantDiagonal(float *square_mat, float u, size_t n) { 564 ASSERT_NOT_NULL(square_mat); 565 size_t i; 566 const size_t n_square = n * n; 567 const size_t offset = n + 1; 568 for (i = 0; i < n_square; i += offset) { 569 square_mat[i] += u; 570 } 571 } 572 573 void matTransposeMultiplyMat(float *out, const float *A, 574 size_t nrows, size_t ncols) { 575 ASSERT_NOT_NULL(out); 576 ASSERT_NOT_NULL(A); 577 size_t i; 578 size_t j; 579 size_t k; 580 memset(out, 0, sizeof(float) * ncols * ncols); 581 for (i = 0; i < ncols; ++i) { 582 for (j = 0; j < ncols; ++j) { 583 // Since A' * A is symmetric, can use upper diagonal elements 584 // to fill in the lower diagonal without recomputing. 585 if (j < i) { 586 out[i * ncols + j] = out[j * ncols + i]; 587 } else { 588 // mat_out[i, j] = ai ' * aj 589 out[i * ncols + j] = 0; 590 for (k = 0; k < nrows; ++k) { 591 out[i * ncols + j] += A[k * ncols + i] * 592 A[k * ncols + j]; 593 } 594 } 595 } 596 } 597 } 598 599 void matMultiplyVec(float *out, const float *A, const float *v, 600 size_t nrows, size_t ncols) { 601 ASSERT_NOT_NULL(out); 602 ASSERT_NOT_NULL(A); 603 ASSERT_NOT_NULL(v); 604 size_t i; 605 for (i = 0; i < nrows; ++i) { 606 const float *row = &A[i * ncols]; 607 out[i] = vecDot(row, v, (int)ncols); 608 } 609 } 610 611 void matTransposeMultiplyVec(float *out, const float *A, const float *v, 612 size_t nrows, size_t ncols) { 613 ASSERT_NOT_NULL(out); 614 ASSERT_NOT_NULL(A); 615 ASSERT_NOT_NULL(v); 616 size_t i, j; 617 for (i = 0; i < ncols; ++i) { 618 out[i] = 0; 619 for (j = 0; j < nrows; ++j) { 620 out[i] += A[j * ncols + i] * v[j]; 621 } 622 } 623 } 624 625 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) { 626 ASSERT_NOT_NULL(x); 627 ASSERT_NOT_NULL(L); 628 ASSERT_NOT_NULL(b); 629 ASSERT(n <= INT32_MAX); 630 int32_t i, j; // Loops below require signed integers. 631 int32_t s_n = (int32_t)n; // Signed n. 632 float sum = 0.0f; 633 // 1. Solve Ly = b through forward substitution. Use x[] to store y. 634 for (i = 0; i < s_n; ++i) { 635 sum = 0.0f; 636 for (j = 0; j < i; ++j) { 637 sum += L[i * s_n + j] * x[j]; 638 } 639 // Check for non-zero diagonals (don't divide by zero). 640 if (L[i * s_n + i] < EPSILON) { 641 return false; 642 } 643 x[i] = (b[i] - sum) / L[i * s_n + i]; 644 } 645 646 // 2. Solve L'x = y through backwards substitution. Use x[] to store both 647 // y and x. 648 for (i = s_n - 1; i >= 0; --i) { 649 sum = 0.0f; 650 for (j = i + 1; j < s_n; ++j) { 651 sum += L[j * s_n + i] * x[j]; 652 } 653 x[i] = (x[i] - sum) / L[i * s_n + i]; 654 } 655 656 return true; 657 } 658 659 bool matCholeskyDecomposition(float *L, const float *A, size_t n) { 660 ASSERT_NOT_NULL(L); 661 ASSERT_NOT_NULL(A); 662 size_t i, j, k; 663 float sum = 0.0f; 664 // initialize L to zero. 665 memset(L, 0, sizeof(float) * n * n); 666 667 for (i = 0; i < n; ++i) { 668 // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k]) 669 sum = 0.0f; 670 for (k = 0; k < i; ++k) { 671 sum += L[i * n + k] * L[i * n + k]; 672 } 673 sum = A[i * n + i] - sum; 674 // If diagonal element of L is too small, cholesky fails. 675 if (sum < CHOLESKY_TOLERANCE) { 676 return false; 677 } 678 L[i * n + i] = sqrtf(sum); 679 680 // for j = i+1:N, compute L[j][i] = 681 // (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k]) 682 for (j = i + 1; j < n; ++j) { 683 sum = 0.0f; 684 for (k = 0; k < i; ++k) { 685 sum += L[i * n + k] * L[j * n + k]; 686 } 687 // division okay because magnitude of L[i][i] already checked above. 688 L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i]; 689 } 690 } 691 692 return true; 693 } 694