1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "common/math/mat.h" 18 19 #include <assert.h> 20 #include <float.h> 21 22 #ifdef _OS_BUILD_ 23 #include <nanohub_math.h> 24 #include <seos.h> 25 #else 26 #include <math.h> 27 #ifndef UNROLLED 28 #define UNROLLED 29 #endif 30 #endif // _OS_BUILD_ 31 32 #include <stddef.h> 33 #include <string.h> 34 35 #define EPSILON 1E-5f 36 #define CHOLESKY_TOLERANCE 1E-6f 37 38 // Forward declarations. 39 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j); 40 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k); 41 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, 42 uint32_t l, uint32_t i, uint32_t j); 43 44 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j); 45 46 void initZeroMatrix(struct Mat33 *A) { 47 ASSERT_NOT_NULL(A); 48 memset(A->elem, 0.0f, sizeof(A->elem)); 49 } 50 51 UNROLLED 52 void initDiagonalMatrix(struct Mat33 *A, float x) { 53 ASSERT_NOT_NULL(A); 54 initZeroMatrix(A); 55 56 uint32_t i; 57 for (i = 0; i < 3; ++i) { 58 A->elem[i][i] = x; 59 } 60 } 61 62 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1, 63 const struct Vec3 *v2, const struct Vec3 *v3) { 64 ASSERT_NOT_NULL(A); 65 ASSERT_NOT_NULL(v1); 66 ASSERT_NOT_NULL(v2); 67 ASSERT_NOT_NULL(v3); 68 A->elem[0][0] = v1->x; 69 A->elem[0][1] = v2->x; 70 A->elem[0][2] = v3->x; 71 72 A->elem[1][0] = v1->y; 73 A->elem[1][1] = v2->y; 74 A->elem[1][2] = v3->y; 75 76 A->elem[2][0] = v1->z; 77 A->elem[2][1] = v2->z; 78 A->elem[2][2] = v3->z; 79 } 80 81 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) { 82 ASSERT_NOT_NULL(out); 83 ASSERT_NOT_NULL(A); 84 ASSERT_NOT_NULL(v); 85 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z; 86 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z; 87 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z; 88 } 89 90 UNROLLED 91 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A, 92 const struct Mat33 *B) { 93 ASSERT_NOT_NULL(out); 94 ASSERT_NOT_NULL(A); 95 ASSERT_NOT_NULL(B); 96 ASSERT(out != A); 97 ASSERT(out != B); 98 99 uint32_t i; 100 for (i = 0; i < 3; ++i) { 101 uint32_t j; 102 for (j = 0; j < 3; ++j) { 103 uint32_t k; 104 float sum = 0.0f; 105 for (k = 0; k < 3; ++k) { 106 sum += A->elem[i][k] * B->elem[k][j]; 107 } 108 109 out->elem[i][j] = sum; 110 } 111 } 112 } 113 114 UNROLLED 115 void mat33ScalarMul(struct Mat33 *A, float c) { 116 ASSERT_NOT_NULL(A); 117 uint32_t i; 118 for (i = 0; i < 3; ++i) { 119 uint32_t j; 120 for (j = 0; j < 3; ++j) { 121 A->elem[i][j] *= c; 122 } 123 } 124 } 125 126 UNROLLED 127 void mat33Add(struct Mat33 *out, const struct Mat33 *A) { 128 ASSERT_NOT_NULL(out); 129 ASSERT_NOT_NULL(A); 130 uint32_t i; 131 for (i = 0; i < 3; ++i) { 132 uint32_t j; 133 for (j = 0; j < 3; ++j) { 134 out->elem[i][j] += A->elem[i][j]; 135 } 136 } 137 } 138 139 UNROLLED 140 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) { 141 ASSERT_NOT_NULL(out); 142 ASSERT_NOT_NULL(A); 143 uint32_t i; 144 for (i = 0; i < 3; ++i) { 145 uint32_t j; 146 for (j = 0; j < 3; ++j) { 147 out->elem[i][j] -= A->elem[i][j]; 148 } 149 } 150 } 151 152 UNROLLED 153 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) { 154 ASSERT_NOT_NULL(A); 155 uint32_t i; 156 for (i = 0; i < 3; ++i) { 157 if (A->elem[i][i] < 0.0f) { 158 return 0; 159 } 160 } 161 162 for (i = 0; i < 3; ++i) { 163 uint32_t j; 164 for (j = i + 1; j < 3; ++j) { 165 if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) { 166 return 0; 167 } 168 } 169 } 170 171 return 1; 172 } 173 174 UNROLLED 175 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A, 176 const struct Mat33 *B) { 177 ASSERT(out != A); 178 ASSERT(out != B); 179 ASSERT_NOT_NULL(out); 180 ASSERT_NOT_NULL(A); 181 ASSERT_NOT_NULL(B); 182 uint32_t i; 183 for (i = 0; i < 3; ++i) { 184 uint32_t j; 185 for (j = 0; j < 3; ++j) { 186 uint32_t k; 187 float sum = 0.0f; 188 for (k = 0; k < 3; ++k) { 189 sum += A->elem[k][i] * B->elem[k][j]; 190 } 191 192 out->elem[i][j] = sum; 193 } 194 } 195 } 196 197 UNROLLED 198 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A, 199 const struct Mat33 *B) { 200 ASSERT(out != A); 201 ASSERT(out != B); 202 ASSERT_NOT_NULL(out); 203 ASSERT_NOT_NULL(A); 204 ASSERT_NOT_NULL(B); 205 uint32_t i; 206 for (i = 0; i < 3; ++i) { 207 uint32_t j; 208 for (j = 0; j < 3; ++j) { 209 uint32_t k; 210 float sum = 0.0f; 211 for (k = 0; k < 3; ++k) { 212 sum += A->elem[i][k] * B->elem[j][k]; 213 } 214 215 out->elem[i][j] = sum; 216 } 217 } 218 } 219 220 UNROLLED 221 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) { 222 ASSERT_NOT_NULL(out); 223 ASSERT_NOT_NULL(A); 224 float t; 225 initDiagonalMatrix(out, 1.0f); 226 227 struct Mat33 tmp = *A; 228 229 uint32_t i, k; 230 for (i = 0; i < 3; ++i) { 231 uint32_t swap = i; 232 uint32_t j; 233 for (j = i + 1; j < 3; ++j) { 234 if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) { 235 swap = j; 236 } 237 } 238 239 if (swap != i) { 240 for (k = 0; k < 3; ++k) { 241 t = tmp.elem[i][k]; 242 tmp.elem[i][k] = tmp.elem[swap][k]; 243 tmp.elem[swap][k] = t; 244 245 t = out->elem[i][k]; 246 out->elem[i][k] = out->elem[swap][k]; 247 out->elem[swap][k] = t; 248 } 249 } 250 // divide by zero guard. 251 ASSERT(fabsf(tmp.elem[i][i]) > 0); 252 if(!(fabsf(tmp.elem[i][i]) > 0)) { 253 return; 254 } 255 t = 1.0f / tmp.elem[i][i]; 256 for (k = 0; k < 3; ++k) { 257 tmp.elem[i][k] *= t; 258 out->elem[i][k] *= t; 259 } 260 261 for (j = 0; j < 3; ++j) { 262 if (j != i) { 263 t = tmp.elem[j][i]; 264 for (k = 0; k < 3; ++k) { 265 tmp.elem[j][k] -= tmp.elem[i][k] * t; 266 out->elem[j][k] -= out->elem[i][k] * t; 267 } 268 } 269 } 270 } 271 } 272 273 UNROLLED 274 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) { 275 ASSERT_NOT_NULL(out); 276 ASSERT_NOT_NULL(A); 277 uint32_t i; 278 for (i = 0; i < 3; ++i) { 279 uint32_t j; 280 for (j = 0; j < 3; ++j) { 281 out->elem[i][j] = A->elem[j][i]; 282 } 283 } 284 } 285 286 UNROLLED 287 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) { 288 ASSERT_NOT_NULL(A); 289 const uint32_t N = 3; 290 uint32_t k; 291 292 if (i == j) { 293 return; 294 } 295 296 for (k = 0; k < N; ++k) { 297 float tmp = A->elem[i][k]; 298 A->elem[i][k] = A->elem[j][k]; 299 A->elem[j][k] = tmp; 300 } 301 } 302 303 UNROLLED 304 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals, 305 struct Mat33 *eigenvecs) { 306 ASSERT_NOT_NULL(S); 307 ASSERT_NOT_NULL(eigenvals); 308 ASSERT_NOT_NULL(eigenvecs); 309 const uint32_t N = 3; 310 uint32_t i, j, k, l, m; 311 312 float _eigenvals[N]; 313 314 uint32_t ind[N]; 315 for (k = 0; k < N; ++k) { 316 ind[k] = mat33Maxind(S, k); 317 _eigenvals[k] = S->elem[k][k]; 318 } 319 320 initDiagonalMatrix(eigenvecs, 1.0f); 321 322 for (;;) { 323 m = 0; 324 for (k = 1; k + 1 < N; ++k) { 325 if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) { 326 m = k; 327 } 328 } 329 330 k = m; 331 l = ind[m]; 332 float p = S->elem[k][l]; 333 334 if (fabsf(p) < EPSILON) { 335 break; 336 } 337 338 float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f; 339 340 float t = fabsf(y) + sqrtf(p * p + y * y); 341 float s = sqrtf(p * p + t * t); 342 float c = t / s; 343 s = p / s; 344 t = p * p / t; 345 346 if (y < 0.0f) { 347 s = -s; 348 t = -t; 349 } 350 351 S->elem[k][l] = 0.0f; 352 353 _eigenvals[k] -= t; 354 _eigenvals[l] += t; 355 356 for (i = 0; i < k; ++i) { 357 mat33Rotate(S, c, s, i, k, i, l); 358 } 359 360 for (i = k + 1; i < l; ++i) { 361 mat33Rotate(S, c, s, k, i, i, l); 362 } 363 364 for (i = l + 1; i < N; ++i) { 365 mat33Rotate(S, c, s, k, i, l, i); 366 } 367 368 for (i = 0; i < N; ++i) { 369 float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i]; 370 eigenvecs->elem[l][i] = 371 s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i]; 372 eigenvecs->elem[k][i] = tmp; 373 } 374 375 ind[k] = mat33Maxind(S, k); 376 ind[l] = mat33Maxind(S, l); 377 378 float sum = 0.0f; 379 for (i = 0; i < N; ++i) { 380 for (j = i + 1; j < N; ++j) { 381 sum += fabsf(S->elem[i][j]); 382 } 383 } 384 385 if (sum < EPSILON) { 386 break; 387 } 388 } 389 390 for (k = 0; k < N; ++k) { 391 m = k; 392 for (l = k + 1; l < N; ++l) { 393 if (_eigenvals[l] > _eigenvals[m]) { 394 m = l; 395 } 396 } 397 398 if (k != m) { 399 float tmp = _eigenvals[k]; 400 _eigenvals[k] = _eigenvals[m]; 401 _eigenvals[m] = tmp; 402 403 mat33SwapRows(eigenvecs, k, m); 404 } 405 } 406 407 initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]); 408 } 409 410 float mat33Determinant(const struct Mat33 *A) { 411 ASSERT_NOT_NULL(A); 412 return A->elem[0][0] * 413 (A->elem[1][1] * A->elem[2][2] - A->elem[1][2] * A->elem[2][1]) 414 - A->elem[0][1] * 415 (A->elem[1][0] * A->elem[2][2] - A->elem[1][2] * A->elem[2][0]) 416 + A->elem[0][2] * 417 (A->elem[1][0] * A->elem[2][1] - A->elem[1][1] * A->elem[2][0]); 418 } 419 420 // index of largest off-diagonal element in row k 421 UNROLLED 422 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) { 423 ASSERT_NOT_NULL(A); 424 const uint32_t N = 3; 425 426 uint32_t m = k + 1; 427 uint32_t i; 428 429 for (i = k + 2; i < N; ++i) { 430 if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) { 431 m = i; 432 } 433 } 434 435 return m; 436 } 437 438 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l, 439 uint32_t i, uint32_t j) { 440 ASSERT_NOT_NULL(A); 441 float tmp = c * A->elem[k][l] - s * A->elem[i][j]; 442 A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j]; 443 A->elem[k][l] = tmp; 444 } 445 446 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) { 447 ASSERT_NOT_NULL(out); 448 ASSERT_NOT_NULL(A); 449 ASSERT_NOT_NULL(v); 450 451 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z + 452 A->elem[0][3] * v->w; 453 454 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z + 455 A->elem[1][3] * v->w; 456 457 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z + 458 A->elem[2][3] * v->w; 459 460 out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z + 461 A->elem[3][3] * v->w; 462 } 463 464 UNROLLED 465 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) { 466 ASSERT_NOT_NULL(LU); 467 ASSERT_NOT_NULL(pivot); 468 const uint32_t N = 4; 469 uint32_t i, j, k; 470 471 for (k = 0; k < N; ++k) { 472 pivot->elem[k] = k; 473 float max = fabsf(LU->elem[k][k]); 474 for (j = k + 1; j < N; ++j) { 475 if (max < fabsf(LU->elem[j][k])) { 476 max = fabsf(LU->elem[j][k]); 477 pivot->elem[k] = j; 478 } 479 } 480 481 if (pivot->elem[k] != k) { 482 mat44SwapRows(LU, k, pivot->elem[k]); 483 } 484 485 if (fabsf(LU->elem[k][k]) < EPSILON) { 486 continue; 487 } 488 489 for (j = k + 1; j < N; ++j) { 490 LU->elem[k][j] /= LU->elem[k][k]; 491 } 492 493 for (i = k + 1; i < N; ++i) { 494 for (j = k + 1; j < N; ++j) { 495 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j]; 496 } 497 } 498 } 499 } 500 501 UNROLLED 502 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) { 503 ASSERT_NOT_NULL(A); 504 const uint32_t N = 4; 505 uint32_t k; 506 507 if (i == j) { 508 return; 509 } 510 511 for (k = 0; k < N; ++k) { 512 float tmp = A->elem[i][k]; 513 A->elem[i][k] = A->elem[j][k]; 514 A->elem[j][k] = tmp; 515 } 516 } 517 518 UNROLLED 519 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b, 520 const struct Size4 *pivot) { 521 ASSERT_NOT_NULL(A); 522 ASSERT_NOT_NULL(x); 523 ASSERT_NOT_NULL(b); 524 ASSERT_NOT_NULL(pivot); 525 const uint32_t N = 4; 526 uint32_t i, k; 527 528 float bCopy[N]; 529 bCopy[0] = b->x; 530 bCopy[1] = b->y; 531 bCopy[2] = b->z; 532 bCopy[3] = b->w; 533 534 float _x[N]; 535 for (k = 0; k < N; ++k) { 536 if (pivot->elem[k] != k) { 537 float tmp = bCopy[k]; 538 bCopy[k] = bCopy[pivot->elem[k]]; 539 bCopy[pivot->elem[k]] = tmp; 540 } 541 542 _x[k] = bCopy[k]; 543 for (i = 0; i < k; ++i) { 544 _x[k] -= _x[i] * A->elem[k][i]; 545 } 546 _x[k] /= A->elem[k][k]; 547 } 548 549 for (k = N; k-- > 0;) { 550 for (i = k + 1; i < N; ++i) { 551 _x[k] -= _x[i] * A->elem[k][i]; 552 } 553 } 554 555 initVec4(x, _x[0], _x[1], _x[2], _x[3]); 556 } 557 558 float matMaxDiagonalElement(const float *square_mat, size_t n) { 559 ASSERT_NOT_NULL(square_mat); 560 ASSERT(n > 0); 561 size_t i; 562 float max = square_mat[0]; 563 const size_t n_square = n * n; 564 const size_t offset = n + 1; 565 for (i = offset; i < n_square; i += offset) { 566 if (square_mat[i] > max) { 567 max = square_mat[i]; 568 } 569 } 570 return max; 571 } 572 573 void matAddConstantDiagonal(float *square_mat, float u, size_t n) { 574 ASSERT_NOT_NULL(square_mat); 575 size_t i; 576 const size_t n_square = n * n; 577 const size_t offset = n + 1; 578 for (i = 0; i < n_square; i += offset) { 579 square_mat[i] += u; 580 } 581 } 582 583 void matTransposeMultiplyMat(float *out, const float *A, 584 size_t nrows, size_t ncols) { 585 ASSERT_NOT_NULL(out); 586 ASSERT_NOT_NULL(A); 587 size_t i; 588 size_t j; 589 size_t k; 590 memset(out, 0, sizeof(float) * ncols * ncols); 591 for (i = 0; i < ncols; ++i) { 592 for (j = 0; j < ncols; ++j) { 593 // Since A' * A is symmetric, can use upper diagonal elements 594 // to fill in the lower diagonal without recomputing. 595 if (j < i) { 596 out[i * ncols + j] = out[j * ncols + i]; 597 } else { 598 // mat_out[i, j] = ai ' * aj 599 out[i * ncols + j] = 0; 600 for (k = 0; k < nrows; ++k) { 601 out[i * ncols + j] += A[k * ncols + i] * 602 A[k * ncols + j]; 603 } 604 } 605 } 606 } 607 } 608 609 void matMultiplyVec(float *out, const float *A, const float *v, 610 size_t nrows, size_t ncols) { 611 ASSERT_NOT_NULL(out); 612 ASSERT_NOT_NULL(A); 613 ASSERT_NOT_NULL(v); 614 size_t i; 615 for (i = 0; i < nrows; ++i) { 616 const float *row = &A[i * ncols]; 617 out[i] = vecDot(row, v, ncols); 618 } 619 } 620 621 void matTransposeMultiplyVec(float *out, const float *A, const float *v, 622 size_t nrows, size_t ncols) { 623 ASSERT_NOT_NULL(out); 624 ASSERT_NOT_NULL(A); 625 ASSERT_NOT_NULL(v); 626 size_t i, j; 627 for (i = 0; i < ncols; ++i) { 628 out[i] = 0; 629 for (j = 0; j < nrows; ++j) { 630 out[i] += A[j * ncols + i] * v[j]; 631 } 632 } 633 } 634 635 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) { 636 ASSERT_NOT_NULL(x); 637 ASSERT_NOT_NULL(L); 638 ASSERT_NOT_NULL(b); 639 ASSERT(n <= INT32_MAX); 640 int32_t i, j; // Loops below require signed integers. 641 int32_t s_n = (int32_t)n; // Signed n. 642 float sum = 0.0f; 643 // 1. Solve Ly = b through forward substitution. Use x[] to store y. 644 for (i = 0; i < s_n; ++i) { 645 sum = 0.0f; 646 for (j = 0; j < i; ++j) { 647 sum += L[i * s_n + j] * x[j]; 648 } 649 // Check for non-zero diagonals (don't divide by zero). 650 if (L[i * s_n + i] < EPSILON) { 651 return false; 652 } 653 x[i] = (b[i] - sum) / L[i * s_n + i]; 654 } 655 656 // 2. Solve L'x = y through backwards substitution. Use x[] to store both 657 // y and x. 658 for (i = s_n - 1; i >= 0; --i) { 659 sum = 0.0f; 660 for (j = i + 1; j < s_n; ++j) { 661 sum += L[j * s_n + i] * x[j]; 662 } 663 x[i] = (x[i] - sum) / L[i * s_n + i]; 664 } 665 666 return true; 667 } 668 669 bool matCholeskyDecomposition(float *L, const float *A, size_t n) { 670 ASSERT_NOT_NULL(L); 671 ASSERT_NOT_NULL(A); 672 size_t i, j, k; 673 float sum = 0.0f; 674 // initialize L to zero. 675 memset(L, 0, sizeof(float) * n * n); 676 677 for (i = 0; i < n; ++i) { 678 // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k]) 679 sum = 0.0f; 680 for (k = 0; k < i; ++k) { 681 sum += L[i * n + k] * L[i * n + k]; 682 } 683 sum = A[i * n + i] - sum; 684 // If diagonal element of L is too small, cholesky fails. 685 if (sum < CHOLESKY_TOLERANCE) { 686 return false; 687 } 688 L[i * n + i] = sqrtf(sum); 689 690 // for j = i+1:N, compute L[j][i] = 691 // (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k]) 692 for (j = i + 1; j < n; ++j) { 693 sum = 0.0f; 694 for (k = 0; k < i; ++k) { 695 sum += L[i * n + k] * L[j * n + k]; 696 } 697 // division okay because magnitude of L[i][i] already checked above. 698 L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i]; 699 } 700 } 701 702 return true; 703 } 704