Home | History | Annotate | Download | only in math
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "common/math/mat.h"
     18 
     19 #include <assert.h>
     20 #include <float.h>
     21 
     22 #ifdef _OS_BUILD_
     23 #include <nanohub_math.h>
     24 #include <seos.h>
     25 #else
     26 #include <math.h>
     27 #ifndef UNROLLED
     28 #define UNROLLED
     29 #endif
     30 #endif  // _OS_BUILD_
     31 
     32 #include <stddef.h>
     33 #include <string.h>
     34 
     35 #define EPSILON 1E-5f
     36 #define CHOLESKY_TOLERANCE 1E-6f
     37 
     38 // Forward declarations.
     39 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
     40 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
     41 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
     42                         uint32_t l, uint32_t i, uint32_t j);
     43 
     44 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
     45 
     46 void initZeroMatrix(struct Mat33 *A) {
     47   ASSERT_NOT_NULL(A);
     48   memset(A->elem, 0.0f, sizeof(A->elem));
     49 }
     50 
     51 UNROLLED
     52 void initDiagonalMatrix(struct Mat33 *A, float x) {
     53   ASSERT_NOT_NULL(A);
     54   initZeroMatrix(A);
     55 
     56   uint32_t i;
     57   for (i = 0; i < 3; ++i) {
     58     A->elem[i][i] = x;
     59   }
     60 }
     61 
     62 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
     63                        const struct Vec3 *v2, const struct Vec3 *v3) {
     64   ASSERT_NOT_NULL(A);
     65   ASSERT_NOT_NULL(v1);
     66   ASSERT_NOT_NULL(v2);
     67   ASSERT_NOT_NULL(v3);
     68   A->elem[0][0] = v1->x;
     69   A->elem[0][1] = v2->x;
     70   A->elem[0][2] = v3->x;
     71 
     72   A->elem[1][0] = v1->y;
     73   A->elem[1][1] = v2->y;
     74   A->elem[1][2] = v3->y;
     75 
     76   A->elem[2][0] = v1->z;
     77   A->elem[2][1] = v2->z;
     78   A->elem[2][2] = v3->z;
     79 }
     80 
     81 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
     82   ASSERT_NOT_NULL(out);
     83   ASSERT_NOT_NULL(A);
     84   ASSERT_NOT_NULL(v);
     85   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
     86   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
     87   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
     88 }
     89 
     90 UNROLLED
     91 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
     92                    const struct Mat33 *B) {
     93   ASSERT_NOT_NULL(out);
     94   ASSERT_NOT_NULL(A);
     95   ASSERT_NOT_NULL(B);
     96   ASSERT(out != A);
     97   ASSERT(out != B);
     98 
     99   uint32_t i;
    100   for (i = 0; i < 3; ++i) {
    101     uint32_t j;
    102     for (j = 0; j < 3; ++j) {
    103       uint32_t k;
    104       float sum = 0.0f;
    105       for (k = 0; k < 3; ++k) {
    106         sum += A->elem[i][k] * B->elem[k][j];
    107       }
    108 
    109       out->elem[i][j] = sum;
    110     }
    111   }
    112 }
    113 
    114 UNROLLED
    115 void mat33ScalarMul(struct Mat33 *A, float c) {
    116   ASSERT_NOT_NULL(A);
    117   uint32_t i;
    118   for (i = 0; i < 3; ++i) {
    119     uint32_t j;
    120     for (j = 0; j < 3; ++j) {
    121       A->elem[i][j] *= c;
    122     }
    123   }
    124 }
    125 
    126 UNROLLED
    127 void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
    128   ASSERT_NOT_NULL(out);
    129   ASSERT_NOT_NULL(A);
    130   uint32_t i;
    131   for (i = 0; i < 3; ++i) {
    132     uint32_t j;
    133     for (j = 0; j < 3; ++j) {
    134       out->elem[i][j] += A->elem[i][j];
    135     }
    136   }
    137 }
    138 
    139 UNROLLED
    140 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
    141   ASSERT_NOT_NULL(out);
    142   ASSERT_NOT_NULL(A);
    143   uint32_t i;
    144   for (i = 0; i < 3; ++i) {
    145     uint32_t j;
    146     for (j = 0; j < 3; ++j) {
    147       out->elem[i][j] -= A->elem[i][j];
    148     }
    149   }
    150 }
    151 
    152 UNROLLED
    153 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
    154   ASSERT_NOT_NULL(A);
    155   uint32_t i;
    156   for (i = 0; i < 3; ++i) {
    157     if (A->elem[i][i] < 0.0f) {
    158       return 0;
    159     }
    160   }
    161 
    162   for (i = 0; i < 3; ++i) {
    163     uint32_t j;
    164     for (j = i + 1; j < 3; ++j) {
    165       if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
    166         return 0;
    167       }
    168     }
    169   }
    170 
    171   return 1;
    172 }
    173 
    174 UNROLLED
    175 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
    176                              const struct Mat33 *B) {
    177   ASSERT(out != A);
    178   ASSERT(out != B);
    179   ASSERT_NOT_NULL(out);
    180   ASSERT_NOT_NULL(A);
    181   ASSERT_NOT_NULL(B);
    182   uint32_t i;
    183   for (i = 0; i < 3; ++i) {
    184     uint32_t j;
    185     for (j = 0; j < 3; ++j) {
    186       uint32_t k;
    187       float sum = 0.0f;
    188       for (k = 0; k < 3; ++k) {
    189         sum += A->elem[k][i] * B->elem[k][j];
    190       }
    191 
    192       out->elem[i][j] = sum;
    193     }
    194   }
    195 }
    196 
    197 UNROLLED
    198 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
    199                               const struct Mat33 *B) {
    200   ASSERT(out != A);
    201   ASSERT(out != B);
    202   ASSERT_NOT_NULL(out);
    203   ASSERT_NOT_NULL(A);
    204   ASSERT_NOT_NULL(B);
    205   uint32_t i;
    206   for (i = 0; i < 3; ++i) {
    207     uint32_t j;
    208     for (j = 0; j < 3; ++j) {
    209       uint32_t k;
    210       float sum = 0.0f;
    211       for (k = 0; k < 3; ++k) {
    212         sum += A->elem[i][k] * B->elem[j][k];
    213       }
    214 
    215       out->elem[i][j] = sum;
    216     }
    217   }
    218 }
    219 
    220 UNROLLED
    221 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
    222   ASSERT_NOT_NULL(out);
    223   ASSERT_NOT_NULL(A);
    224   float t;
    225   initDiagonalMatrix(out, 1.0f);
    226 
    227   struct Mat33 tmp = *A;
    228 
    229   uint32_t i, k;
    230   for (i = 0; i < 3; ++i) {
    231     uint32_t swap = i;
    232     uint32_t j;
    233     for (j = i + 1; j < 3; ++j) {
    234       if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
    235         swap = j;
    236       }
    237     }
    238 
    239     if (swap != i) {
    240       for (k = 0; k < 3; ++k) {
    241         t = tmp.elem[i][k];
    242         tmp.elem[i][k] = tmp.elem[swap][k];
    243         tmp.elem[swap][k] = t;
    244 
    245         t = out->elem[i][k];
    246         out->elem[i][k] = out->elem[swap][k];
    247         out->elem[swap][k] = t;
    248       }
    249     }
    250     // divide by zero guard.
    251     ASSERT(fabsf(tmp.elem[i][i]) > 0);
    252     if(!(fabsf(tmp.elem[i][i]) > 0)) {
    253       return;
    254     }
    255     t = 1.0f / tmp.elem[i][i];
    256     for (k = 0; k < 3; ++k) {
    257       tmp.elem[i][k] *= t;
    258       out->elem[i][k] *= t;
    259     }
    260 
    261     for (j = 0; j < 3; ++j) {
    262       if (j != i) {
    263         t = tmp.elem[j][i];
    264         for (k = 0; k < 3; ++k) {
    265           tmp.elem[j][k] -= tmp.elem[i][k] * t;
    266           out->elem[j][k] -= out->elem[i][k] * t;
    267         }
    268       }
    269     }
    270   }
    271 }
    272 
    273 UNROLLED
    274 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
    275   ASSERT_NOT_NULL(out);
    276   ASSERT_NOT_NULL(A);
    277   uint32_t i;
    278   for (i = 0; i < 3; ++i) {
    279     uint32_t j;
    280     for (j = 0; j < 3; ++j) {
    281       out->elem[i][j] = A->elem[j][i];
    282     }
    283   }
    284 }
    285 
    286 UNROLLED
    287 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
    288   ASSERT_NOT_NULL(A);
    289   const uint32_t N = 3;
    290   uint32_t k;
    291 
    292   if (i == j) {
    293     return;
    294   }
    295 
    296   for (k = 0; k < N; ++k) {
    297     float tmp = A->elem[i][k];
    298     A->elem[i][k] = A->elem[j][k];
    299     A->elem[j][k] = tmp;
    300   }
    301 }
    302 
    303 UNROLLED
    304 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
    305                         struct Mat33 *eigenvecs) {
    306   ASSERT_NOT_NULL(S);
    307   ASSERT_NOT_NULL(eigenvals);
    308   ASSERT_NOT_NULL(eigenvecs);
    309   const uint32_t N = 3;
    310   uint32_t i, j, k, l, m;
    311 
    312   float _eigenvals[N];
    313 
    314   uint32_t ind[N];
    315   for (k = 0; k < N; ++k) {
    316     ind[k] = mat33Maxind(S, k);
    317     _eigenvals[k] = S->elem[k][k];
    318   }
    319 
    320   initDiagonalMatrix(eigenvecs, 1.0f);
    321 
    322   for (;;) {
    323     m = 0;
    324     for (k = 1; k + 1 < N; ++k) {
    325       if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
    326         m = k;
    327       }
    328     }
    329 
    330     k = m;
    331     l = ind[m];
    332     float p = S->elem[k][l];
    333 
    334     if (fabsf(p) < EPSILON) {
    335       break;
    336     }
    337 
    338     float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
    339 
    340     float t = fabsf(y) + sqrtf(p * p + y * y);
    341     float s = sqrtf(p * p + t * t);
    342     float c = t / s;
    343     s = p / s;
    344     t = p * p / t;
    345 
    346     if (y < 0.0f) {
    347       s = -s;
    348       t = -t;
    349     }
    350 
    351     S->elem[k][l] = 0.0f;
    352 
    353     _eigenvals[k] -= t;
    354     _eigenvals[l] += t;
    355 
    356     for (i = 0; i < k; ++i) {
    357       mat33Rotate(S, c, s, i, k, i, l);
    358     }
    359 
    360     for (i = k + 1; i < l; ++i) {
    361       mat33Rotate(S, c, s, k, i, i, l);
    362     }
    363 
    364     for (i = l + 1; i < N; ++i) {
    365       mat33Rotate(S, c, s, k, i, l, i);
    366     }
    367 
    368     for (i = 0; i < N; ++i) {
    369       float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
    370       eigenvecs->elem[l][i] =
    371           s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
    372       eigenvecs->elem[k][i] = tmp;
    373     }
    374 
    375     ind[k] = mat33Maxind(S, k);
    376     ind[l] = mat33Maxind(S, l);
    377 
    378     float sum = 0.0f;
    379     for (i = 0; i < N; ++i) {
    380       for (j = i + 1; j < N; ++j) {
    381         sum += fabsf(S->elem[i][j]);
    382       }
    383     }
    384 
    385     if (sum < EPSILON) {
    386       break;
    387     }
    388   }
    389 
    390   for (k = 0; k < N; ++k) {
    391     m = k;
    392     for (l = k + 1; l < N; ++l) {
    393       if (_eigenvals[l] > _eigenvals[m]) {
    394         m = l;
    395       }
    396     }
    397 
    398     if (k != m) {
    399       float tmp = _eigenvals[k];
    400       _eigenvals[k] = _eigenvals[m];
    401       _eigenvals[m] = tmp;
    402 
    403       mat33SwapRows(eigenvecs, k, m);
    404     }
    405   }
    406 
    407   initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
    408 }
    409 
    410 float mat33Determinant(const struct Mat33 *A) {
    411   ASSERT_NOT_NULL(A);
    412   return A->elem[0][0] *
    413       (A->elem[1][1] * A->elem[2][2] - A->elem[1][2] * A->elem[2][1])
    414       - A->elem[0][1] *
    415       (A->elem[1][0] * A->elem[2][2] - A->elem[1][2] * A->elem[2][0])
    416       + A->elem[0][2] *
    417       (A->elem[1][0] * A->elem[2][1] - A->elem[1][1] * A->elem[2][0]);
    418 }
    419 
    420 // index of largest off-diagonal element in row k
    421 UNROLLED
    422 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
    423   ASSERT_NOT_NULL(A);
    424   const uint32_t N = 3;
    425 
    426   uint32_t m = k + 1;
    427   uint32_t i;
    428 
    429   for (i = k + 2; i < N; ++i) {
    430     if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
    431       m = i;
    432     }
    433   }
    434 
    435   return m;
    436 }
    437 
    438 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
    439                  uint32_t i, uint32_t j) {
    440   ASSERT_NOT_NULL(A);
    441   float tmp = c * A->elem[k][l] - s * A->elem[i][j];
    442   A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
    443   A->elem[k][l] = tmp;
    444 }
    445 
    446 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
    447   ASSERT_NOT_NULL(out);
    448   ASSERT_NOT_NULL(A);
    449   ASSERT_NOT_NULL(v);
    450 
    451   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
    452            A->elem[0][3] * v->w;
    453 
    454   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
    455            A->elem[1][3] * v->w;
    456 
    457   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
    458            A->elem[2][3] * v->w;
    459 
    460   out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
    461            A->elem[3][3] * v->w;
    462 }
    463 
    464 UNROLLED
    465 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
    466   ASSERT_NOT_NULL(LU);
    467   ASSERT_NOT_NULL(pivot);
    468   const uint32_t N = 4;
    469   uint32_t i, j, k;
    470 
    471   for (k = 0; k < N; ++k) {
    472     pivot->elem[k] = k;
    473     float max = fabsf(LU->elem[k][k]);
    474     for (j = k + 1; j < N; ++j) {
    475       if (max < fabsf(LU->elem[j][k])) {
    476         max = fabsf(LU->elem[j][k]);
    477         pivot->elem[k] = j;
    478       }
    479     }
    480 
    481     if (pivot->elem[k] != k) {
    482       mat44SwapRows(LU, k, pivot->elem[k]);
    483     }
    484 
    485     if (fabsf(LU->elem[k][k]) < EPSILON) {
    486       continue;
    487     }
    488 
    489     for (j = k + 1; j < N; ++j) {
    490       LU->elem[k][j] /= LU->elem[k][k];
    491     }
    492 
    493     for (i = k + 1; i < N; ++i) {
    494       for (j = k + 1; j < N; ++j) {
    495         LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
    496       }
    497     }
    498   }
    499 }
    500 
    501 UNROLLED
    502 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
    503   ASSERT_NOT_NULL(A);
    504   const uint32_t N = 4;
    505   uint32_t k;
    506 
    507   if (i == j) {
    508     return;
    509   }
    510 
    511   for (k = 0; k < N; ++k) {
    512     float tmp = A->elem[i][k];
    513     A->elem[i][k] = A->elem[j][k];
    514     A->elem[j][k] = tmp;
    515   }
    516 }
    517 
    518 UNROLLED
    519 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
    520                 const struct Size4 *pivot) {
    521   ASSERT_NOT_NULL(A);
    522   ASSERT_NOT_NULL(x);
    523   ASSERT_NOT_NULL(b);
    524   ASSERT_NOT_NULL(pivot);
    525   const uint32_t N = 4;
    526   uint32_t i, k;
    527 
    528   float bCopy[N];
    529   bCopy[0] = b->x;
    530   bCopy[1] = b->y;
    531   bCopy[2] = b->z;
    532   bCopy[3] = b->w;
    533 
    534   float _x[N];
    535   for (k = 0; k < N; ++k) {
    536     if (pivot->elem[k] != k) {
    537       float tmp = bCopy[k];
    538       bCopy[k] = bCopy[pivot->elem[k]];
    539       bCopy[pivot->elem[k]] = tmp;
    540     }
    541 
    542     _x[k] = bCopy[k];
    543     for (i = 0; i < k; ++i) {
    544       _x[k] -= _x[i] * A->elem[k][i];
    545     }
    546     _x[k] /= A->elem[k][k];
    547   }
    548 
    549   for (k = N; k-- > 0;) {
    550     for (i = k + 1; i < N; ++i) {
    551       _x[k] -= _x[i] * A->elem[k][i];
    552     }
    553   }
    554 
    555   initVec4(x, _x[0], _x[1], _x[2], _x[3]);
    556 }
    557 
    558 float matMaxDiagonalElement(const float *square_mat, size_t n) {
    559   ASSERT_NOT_NULL(square_mat);
    560   ASSERT(n > 0);
    561   size_t i;
    562   float max = square_mat[0];
    563   const size_t n_square = n * n;
    564   const size_t offset = n + 1;
    565   for (i = offset; i < n_square; i += offset) {
    566     if (square_mat[i] > max) {
    567       max = square_mat[i];
    568     }
    569   }
    570   return max;
    571 }
    572 
    573 void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
    574   ASSERT_NOT_NULL(square_mat);
    575   size_t i;
    576   const size_t n_square = n * n;
    577   const size_t offset = n + 1;
    578   for (i = 0; i < n_square; i += offset) {
    579     square_mat[i] += u;
    580   }
    581 }
    582 
    583 void matTransposeMultiplyMat(float *out, const float *A,
    584                              size_t nrows, size_t ncols) {
    585   ASSERT_NOT_NULL(out);
    586   ASSERT_NOT_NULL(A);
    587   size_t i;
    588   size_t j;
    589   size_t k;
    590   memset(out, 0, sizeof(float) * ncols * ncols);
    591   for (i = 0; i < ncols; ++i) {
    592     for (j = 0; j < ncols; ++j) {
    593       // Since A' * A is symmetric, can use upper diagonal elements
    594       // to fill in the lower diagonal without recomputing.
    595       if (j < i) {
    596         out[i * ncols + j] = out[j * ncols + i];
    597       } else {
    598         // mat_out[i, j] = ai ' * aj
    599         out[i * ncols + j] = 0;
    600         for (k = 0; k < nrows; ++k) {
    601           out[i * ncols + j] += A[k * ncols + i] *
    602               A[k * ncols + j];
    603         }
    604       }
    605     }
    606   }
    607 }
    608 
    609 void matMultiplyVec(float *out, const float *A, const float *v,
    610                     size_t nrows, size_t ncols) {
    611   ASSERT_NOT_NULL(out);
    612   ASSERT_NOT_NULL(A);
    613   ASSERT_NOT_NULL(v);
    614   size_t i;
    615   for (i = 0; i < nrows; ++i) {
    616     const float *row = &A[i * ncols];
    617     out[i] = vecDot(row, v, ncols);
    618   }
    619 }
    620 
    621 void matTransposeMultiplyVec(float *out, const float *A, const float *v,
    622                              size_t nrows, size_t ncols) {
    623   ASSERT_NOT_NULL(out);
    624   ASSERT_NOT_NULL(A);
    625   ASSERT_NOT_NULL(v);
    626   size_t i, j;
    627   for (i = 0; i < ncols; ++i) {
    628     out[i] = 0;
    629     for (j = 0; j < nrows; ++j) {
    630       out[i] += A[j * ncols + i] * v[j];
    631     }
    632   }
    633 }
    634 
    635 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
    636   ASSERT_NOT_NULL(x);
    637   ASSERT_NOT_NULL(L);
    638   ASSERT_NOT_NULL(b);
    639   ASSERT(n <= INT32_MAX);
    640   int32_t i, j;  // Loops below require signed integers.
    641   int32_t s_n = (int32_t)n; // Signed n.
    642   float sum = 0.0f;
    643   // 1. Solve Ly = b through forward substitution. Use x[] to store y.
    644   for (i = 0; i < s_n; ++i) {
    645     sum = 0.0f;
    646     for (j = 0; j < i; ++j) {
    647       sum += L[i * s_n + j] * x[j];
    648     }
    649     // Check for non-zero diagonals (don't divide by zero).
    650     if (L[i * s_n + i] < EPSILON) {
    651       return false;
    652     }
    653     x[i] = (b[i] - sum) / L[i * s_n + i];
    654   }
    655 
    656   // 2. Solve L'x = y through backwards substitution. Use x[] to store both
    657   // y and x.
    658   for (i = s_n - 1; i >= 0; --i) {
    659     sum = 0.0f;
    660     for (j = i + 1; j < s_n; ++j) {
    661       sum += L[j * s_n + i] * x[j];
    662     }
    663     x[i] = (x[i] - sum) / L[i * s_n + i];
    664   }
    665 
    666   return true;
    667 }
    668 
    669 bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
    670   ASSERT_NOT_NULL(L);
    671   ASSERT_NOT_NULL(A);
    672   size_t i, j, k;
    673   float sum = 0.0f;
    674   // initialize L to zero.
    675   memset(L, 0, sizeof(float) * n * n);
    676 
    677   for (i = 0; i < n; ++i) {
    678     // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
    679     sum = 0.0f;
    680     for (k = 0; k < i; ++k) {
    681       sum += L[i * n + k] * L[i * n + k];
    682     }
    683     sum = A[i * n + i] - sum;
    684     // If diagonal element of L is too small, cholesky fails.
    685     if (sum < CHOLESKY_TOLERANCE) {
    686       return false;
    687     }
    688     L[i * n + i] = sqrtf(sum);
    689 
    690     // for j = i+1:N,  compute L[j][i] =
    691     //      (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
    692     for (j = i + 1; j < n; ++j) {
    693       sum = 0.0f;
    694       for (k = 0; k < i; ++k) {
    695         sum += L[i * n + k] * L[j * n + k];
    696       }
    697       // division okay because magnitude of L[i][i] already checked above.
    698       L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
    699     }
    700   }
    701 
    702   return true;
    703 }
    704