1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <algos/mat.h> 18 19 #include <assert.h> 20 #include <nanohub_math.h> 21 #include <sys/types.h> 22 #include <float.h> 23 #include <string.h> 24 #include <seos.h> 25 26 27 #define kEps 1E-5 28 29 void initZeroMatrix(struct Mat33 *A) { 30 memset(A->elem, 0.0f, sizeof(A->elem)); 31 } 32 33 UNROLLED 34 void initDiagonalMatrix(struct Mat33 *A, float x) 35 { 36 initZeroMatrix(A); 37 38 uint32_t i; 39 for (i = 0; i < 3; ++i) { 40 A->elem[i][i] = x; 41 } 42 } 43 44 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1, const struct Vec3 *v2, const struct Vec3 *v3) 45 { 46 A->elem[0][0] = v1->x; 47 A->elem[0][1] = v2->x; 48 A->elem[0][2] = v3->x; 49 50 A->elem[1][0] = v1->y; 51 A->elem[1][1] = v2->y; 52 A->elem[1][2] = v3->y; 53 54 A->elem[2][0] = v1->z; 55 A->elem[2][1] = v2->z; 56 A->elem[2][2] = v3->z; 57 } 58 59 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) 60 { 61 // assert(out != v); 62 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z; 63 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z; 64 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z; 65 } 66 67 UNROLLED 68 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B) 69 { 70 // assert(out != A); 71 // assert(out != B); 72 73 uint32_t i; 74 for (i = 0; i < 3; ++i) { 75 uint32_t j; 76 for (j = 0; j < 3; ++j) { 77 uint32_t k; 78 float sum = 0.0f; 79 for (k = 0; k < 3; ++k) { 80 sum += A->elem[i][k] * B->elem[k][j]; 81 } 82 83 out->elem[i][j] = sum; 84 } 85 } 86 } 87 88 UNROLLED 89 void mat33ScalarMul(struct Mat33 *A, float c) 90 { 91 uint32_t i; 92 for (i = 0; i < 3; ++i) { 93 uint32_t j; 94 for (j = 0; j < 3; ++j) { 95 A->elem[i][j] *= c; 96 } 97 } 98 } 99 100 UNROLLED 101 void mat33Add(struct Mat33 *out, const struct Mat33 *A) 102 { 103 uint32_t i; 104 for (i = 0; i < 3; ++i) { 105 uint32_t j; 106 for (j = 0; j < 3; ++j) { 107 out->elem[i][j] += A->elem[i][j]; 108 } 109 } 110 } 111 112 UNROLLED 113 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) 114 { 115 uint32_t i; 116 for (i = 0; i < 3; ++i) { 117 uint32_t j; 118 for (j = 0; j < 3; ++j) { 119 out->elem[i][j] -= A->elem[i][j]; 120 } 121 } 122 } 123 124 UNROLLED 125 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) 126 { 127 uint32_t i; 128 for (i = 0; i < 3; ++i) { 129 if (A->elem[i][i] < 0.0f) { 130 return 0; 131 } 132 } 133 134 for (i = 0; i < 3; ++i) { 135 uint32_t j; 136 for (j = i + 1; j < 3; ++j) { 137 if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) { 138 return 0; 139 } 140 } 141 } 142 143 return 1; 144 } 145 146 UNROLLED 147 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B) 148 { 149 // assert(out != A); 150 // assert(out != B); 151 152 uint32_t i; 153 for (i = 0; i < 3; ++i) { 154 uint32_t j; 155 for (j = 0; j < 3; ++j) { 156 uint32_t k; 157 float sum = 0.0f; 158 for (k = 0; k < 3; ++k) { 159 sum += A->elem[k][i] * B->elem[k][j]; 160 } 161 162 out->elem[i][j] = sum; 163 } 164 } 165 } 166 167 UNROLLED 168 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B) 169 { 170 // assert(out != A); 171 // assert(out != B); 172 173 uint32_t i; 174 for (i = 0; i < 3; ++i) { 175 uint32_t j; 176 for (j = 0; j < 3; ++j) { 177 uint32_t k; 178 float sum = 0.0f; 179 for (k = 0; k < 3; ++k) { 180 sum += A->elem[i][k] * B->elem[j][k]; 181 } 182 183 out->elem[i][j] = sum; 184 } 185 } 186 } 187 188 UNROLLED 189 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) 190 { 191 float t; 192 initDiagonalMatrix(out, 1.0f); 193 194 struct Mat33 tmp = *A; 195 196 uint32_t i, k; 197 for (i = 0; i < 3; ++i) { 198 uint32_t swap = i; 199 uint32_t j; 200 for (j = i + 1; j < 3; ++j) { 201 if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) { 202 swap = j; 203 } 204 } 205 206 if (swap != i) { 207 for (k = 0; k < 3; ++k) { 208 t = tmp.elem[i][k]; 209 tmp.elem[i][k] = tmp.elem[swap][k]; 210 tmp.elem[swap][k] = t; 211 212 t = out->elem[i][k]; 213 out->elem[i][k] = out->elem[swap][k]; 214 out->elem[swap][k] = t; 215 } 216 } 217 218 t = 1.0f / tmp.elem[i][i]; 219 for (k = 0; k < 3; ++k) { 220 tmp.elem[i][k] *= t; 221 out->elem[i][k] *= t; 222 } 223 224 for (j = 0; j < 3; ++j) { 225 if (j != i) { 226 t = tmp.elem[j][i]; 227 for (k = 0; k < 3; ++k) { 228 tmp.elem[j][k] -= tmp.elem[i][k] * t; 229 out->elem[j][k] -= out->elem[i][k] * t; 230 } 231 } 232 } 233 } 234 } 235 236 UNROLLED 237 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) 238 { 239 uint32_t i; 240 for (i = 0; i < 3; ++i) { 241 uint32_t j; 242 for (j = 0; j < 3; ++j) { 243 out->elem[i][j] = A->elem[j][i]; 244 } 245 } 246 } 247 248 UNROLLED 249 void mat33DecomposeLup(struct Mat33 *LU, struct Size3 *pivot) 250 { 251 252 const uint32_t N = 3; 253 uint32_t i, j, k; 254 255 for (k = 0; k < N; ++k) { 256 pivot->elem[k] = k; 257 float max = fabsf(LU->elem[k][k]); 258 for (j = k + 1; j < N; ++j) { 259 if (max < fabsf(LU->elem[j][k])) { 260 max = fabsf(LU->elem[j][k]); 261 pivot->elem[k] = j; 262 } 263 } 264 265 if (pivot->elem[k] != k) { 266 mat33SwapRows(LU, k, pivot->elem[k]); 267 } 268 269 if (fabsf(LU->elem[k][k]) < kEps) { 270 continue; 271 } 272 273 for (j = k + 1; j < N; ++j) { 274 LU->elem[k][j] /= LU->elem[k][k]; 275 } 276 277 for (i = k + 1; i < N; ++i) { 278 for (j = k + 1; j < 3; ++j) { 279 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j]; 280 } 281 } 282 } 283 } 284 285 UNROLLED 286 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) 287 { 288 289 const uint32_t N = 3; 290 uint32_t k; 291 292 if (i == j) { 293 return; 294 } 295 296 for (k = 0; k < N; ++k) { 297 float tmp = A->elem[i][k]; 298 A->elem[i][k] = A->elem[j][k]; 299 A->elem[j][k] = tmp; 300 } 301 } 302 303 UNROLLED 304 void mat33Solve(const struct Mat33 *A, struct Vec3 *x, const struct Vec3 *b, const struct Size3 *pivot) 305 { 306 307 const uint32_t N = 3; 308 uint32_t i, k; 309 310 float bCopy[N]; 311 bCopy[0] = b->x; 312 bCopy[1] = b->y; 313 bCopy[2] = b->z; 314 315 float _x[N]; 316 for (k = 0; k < N; ++k) { 317 if (pivot->elem[k] != k) { 318 float tmp = bCopy[k]; 319 bCopy[k] = bCopy[pivot->elem[k]]; 320 bCopy[pivot->elem[k]] = tmp; 321 } 322 323 _x[k] = bCopy[k]; 324 for (i = 0; i < k; ++i) { 325 _x[k] -= _x[i] * A->elem[k][i]; 326 } 327 _x[k] /= A->elem[k][k]; 328 } 329 330 for (k = N; k-- > 0;) { 331 for (i = k + 1; i < N; ++i) { 332 _x[k] -= _x[i] * A->elem[k][i]; 333 } 334 } 335 336 initVec3(x, _x[0], _x[1], _x[2]); 337 } 338 339 /* Returns the eigenvalues and corresponding eigenvectors of the _symmetric_ matrix. 340 The i-th eigenvalue corresponds to the eigenvector in the i-th _row_ of "eigenvecs". 341 */ 342 343 UNROLLED 344 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals, struct Mat33 *eigenvecs) 345 { 346 347 const uint32_t N = 3; 348 uint32_t i, j, k, l, m; 349 350 float _eigenvals[N]; 351 352 uint32_t ind[N]; 353 for (k = 0; k < N; ++k) { 354 ind[k] = mat33Maxind(S, k); 355 _eigenvals[k] = S->elem[k][k]; 356 } 357 358 initDiagonalMatrix(eigenvecs, 1.0f); 359 360 for (;;) { 361 m = 0; 362 for (k = 1; k + 1 < N; ++k) { 363 if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) { 364 m = k; 365 } 366 } 367 368 k = m; 369 l = ind[m]; 370 float p = S->elem[k][l]; 371 372 if (fabsf(p) < kEps) { 373 break; 374 } 375 376 float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f; 377 378 float t = fabsf(y) + sqrtf(p * p + y * y); 379 float s = sqrtf(p * p + t * t); 380 float c = t / s; 381 s = p / s; 382 t = p * p / t; 383 384 if (y < 0.0f) { 385 s = -s; 386 t = -t; 387 } 388 389 S->elem[k][l] = 0.0f; 390 391 _eigenvals[k] -= t; 392 _eigenvals[l] += t; 393 394 for (i = 0; i < k; ++i) { 395 mat33Rotate(S, c, s, i, k, i, l); 396 } 397 398 for (i = k + 1; i < l; ++i) { 399 mat33Rotate(S, c, s, k, i, i, l); 400 } 401 402 for (i = l + 1; i < N; ++i) { 403 mat33Rotate(S, c, s, k, i, l, i); 404 } 405 406 for (i = 0; i < N; ++i) { 407 float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i]; 408 eigenvecs->elem[l][i] = s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i]; 409 eigenvecs->elem[k][i] = tmp; 410 } 411 412 ind[k] = mat33Maxind(S, k); 413 ind[l] = mat33Maxind(S, l); 414 415 float sum = 0.0f; 416 for (i = 0; i < N; ++i) { 417 for (j = i + 1; j < N; ++j) { 418 sum += fabsf(S->elem[i][j]); 419 } 420 } 421 422 if (sum < kEps) { 423 break; 424 } 425 } 426 427 for (k = 0; k < N; ++k) { 428 m = k; 429 for (l = k + 1; l < N; ++l) { 430 if (_eigenvals[l] > _eigenvals[m]) { 431 m = l; 432 } 433 } 434 435 if (k != m) { 436 float tmp = _eigenvals[k]; 437 _eigenvals[k] = _eigenvals[m]; 438 _eigenvals[m] = tmp; 439 440 mat33SwapRows(eigenvecs, k, m); 441 } 442 } 443 444 initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]); 445 } 446 447 // index of largest off-diagonal element in row k 448 UNROLLED 449 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) 450 { 451 452 const uint32_t N = 3; 453 454 uint32_t m = k + 1; 455 uint32_t i; 456 457 for (i = k + 2; i < N; ++i) { 458 if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) { 459 m = i; 460 } 461 } 462 463 return m; 464 } 465 466 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l, uint32_t i, uint32_t j) 467 { 468 float tmp = c * A->elem[k][l] - s * A->elem[i][j]; 469 A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j]; 470 A->elem[k][l] = tmp; 471 } 472 473 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) 474 { 475 // assert(out != v); 476 477 out->x = 478 A->elem[0][0] * v->x 479 + A->elem[0][1] * v->y 480 + A->elem[0][2] * v->z 481 + A->elem[0][3] * v->w; 482 483 out->y = 484 A->elem[1][0] * v->x 485 + A->elem[1][1] * v->y 486 + A->elem[1][2] * v->z 487 + A->elem[1][3] * v->w; 488 489 out->z = 490 A->elem[2][0] * v->x 491 + A->elem[2][1] * v->y 492 + A->elem[2][2] * v->z 493 + A->elem[2][3] * v->w; 494 495 out->w = 496 A->elem[3][0] * v->x 497 + A->elem[3][1] * v->y 498 + A->elem[3][2] * v->z 499 + A->elem[3][3] * v->w; 500 } 501 502 UNROLLED 503 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) 504 { 505 const uint32_t N = 4; 506 uint32_t i, j, k; 507 508 for (k = 0; k < N; ++k) { 509 pivot->elem[k] = k; 510 float max = fabsf(LU->elem[k][k]); 511 for (j = k + 1; j < N; ++j) { 512 if (max < fabsf(LU->elem[j][k])) { 513 max = fabsf(LU->elem[j][k]); 514 pivot->elem[k] = j; 515 } 516 } 517 518 if (pivot->elem[k] != k) { 519 mat44SwapRows(LU, k, pivot->elem[k]); 520 } 521 522 if (fabsf(LU->elem[k][k]) < kEps) { 523 continue; 524 } 525 526 for (j = k + 1; j < N; ++j) { 527 LU->elem[k][j] /= LU->elem[k][k]; 528 } 529 530 for (i = k + 1; i < N; ++i) { 531 for (j = k + 1; j < N; ++j) { 532 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j]; 533 } 534 } 535 } 536 } 537 538 UNROLLED 539 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) 540 { 541 542 const uint32_t N = 4; 543 uint32_t k; 544 545 if (i == j) { 546 return; 547 } 548 549 for (k = 0; k < N; ++k) { 550 float tmp = A->elem[i][k]; 551 A->elem[i][k] = A->elem[j][k]; 552 A->elem[j][k] = tmp; 553 } 554 } 555 556 UNROLLED 557 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b, const struct Size4 *pivot) 558 { 559 560 const uint32_t N = 4; 561 uint32_t i, k; 562 563 float bCopy[N]; 564 bCopy[0] = b->x; 565 bCopy[1] = b->y; 566 bCopy[2] = b->z; 567 bCopy[3] = b->w; 568 569 float _x[N]; 570 for (k = 0; k < N; ++k) { 571 if (pivot->elem[k] != k) { 572 float tmp = bCopy[k]; 573 bCopy[k] = bCopy[pivot->elem[k]]; 574 bCopy[pivot->elem[k]] = tmp; 575 } 576 577 _x[k] = bCopy[k]; 578 for (i = 0; i < k; ++i) { 579 _x[k] -= _x[i] * A->elem[k][i]; 580 } 581 _x[k] /= A->elem[k][k]; 582 } 583 584 for (k = N; k-- > 0;) { 585 for (i = k + 1; i < N; ++i) { 586 _x[k] -= _x[i] * A->elem[k][i]; 587 } 588 } 589 590 initVec4(x, _x[0], _x[1], _x[2], _x[3]); 591 } 592 593 594