Home | History | Annotate | Download | only in math
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "common/math/mat.h"
     18 
     19 #include <assert.h>
     20 #include <float.h>
     21 
     22 #ifdef _OS_BUILD_
     23 #include <nanohub_math.h>
     24 #include <seos.h>
     25 #else
     26 #include <math.h>
     27 #ifndef UNROLLED
     28 #define UNROLLED
     29 #endif
     30 #endif  // _OS_BUILD_
     31 
     32 #include <stddef.h>
     33 #include <string.h>
     34 
     35 #define EPSILON 1E-5
     36 #define CHOLESKY_TOLERANCE 1E-6
     37 
     38 // Forward declarations.
     39 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
     40 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
     41 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
     42                         uint32_t l, uint32_t i, uint32_t j);
     43 
     44 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
     45 
     46 void initZeroMatrix(struct Mat33 *A) {
     47   ASSERT_NOT_NULL(A);
     48   memset(A->elem, 0.0f, sizeof(A->elem));
     49 }
     50 
     51 UNROLLED
     52 void initDiagonalMatrix(struct Mat33 *A, float x) {
     53   ASSERT_NOT_NULL(A);
     54   initZeroMatrix(A);
     55 
     56   uint32_t i;
     57   for (i = 0; i < 3; ++i) {
     58     A->elem[i][i] = x;
     59   }
     60 }
     61 
     62 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
     63                        const struct Vec3 *v2, const struct Vec3 *v3) {
     64   ASSERT_NOT_NULL(A);
     65   ASSERT_NOT_NULL(v1);
     66   ASSERT_NOT_NULL(v2);
     67   ASSERT_NOT_NULL(v3);
     68   A->elem[0][0] = v1->x;
     69   A->elem[0][1] = v2->x;
     70   A->elem[0][2] = v3->x;
     71 
     72   A->elem[1][0] = v1->y;
     73   A->elem[1][1] = v2->y;
     74   A->elem[1][2] = v3->y;
     75 
     76   A->elem[2][0] = v1->z;
     77   A->elem[2][1] = v2->z;
     78   A->elem[2][2] = v3->z;
     79 }
     80 
     81 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
     82   ASSERT_NOT_NULL(out);
     83   ASSERT_NOT_NULL(A);
     84   ASSERT_NOT_NULL(v);
     85   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
     86   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
     87   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
     88 }
     89 
     90 UNROLLED
     91 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
     92                    const struct Mat33 *B) {
     93   ASSERT_NOT_NULL(out);
     94   ASSERT_NOT_NULL(A);
     95   ASSERT_NOT_NULL(B);
     96   ASSERT(out != A);
     97   ASSERT(out != B);
     98 
     99   uint32_t i;
    100   for (i = 0; i < 3; ++i) {
    101     uint32_t j;
    102     for (j = 0; j < 3; ++j) {
    103       uint32_t k;
    104       float sum = 0.0f;
    105       for (k = 0; k < 3; ++k) {
    106         sum += A->elem[i][k] * B->elem[k][j];
    107       }
    108 
    109       out->elem[i][j] = sum;
    110     }
    111   }
    112 }
    113 
    114 UNROLLED
    115 void mat33ScalarMul(struct Mat33 *A, float c) {
    116   ASSERT_NOT_NULL(A);
    117   uint32_t i;
    118   for (i = 0; i < 3; ++i) {
    119     uint32_t j;
    120     for (j = 0; j < 3; ++j) {
    121       A->elem[i][j] *= c;
    122     }
    123   }
    124 }
    125 
    126 UNROLLED
    127 void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
    128   ASSERT_NOT_NULL(out);
    129   ASSERT_NOT_NULL(A);
    130   uint32_t i;
    131   for (i = 0; i < 3; ++i) {
    132     uint32_t j;
    133     for (j = 0; j < 3; ++j) {
    134       out->elem[i][j] += A->elem[i][j];
    135     }
    136   }
    137 }
    138 
    139 UNROLLED
    140 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
    141   ASSERT_NOT_NULL(out);
    142   ASSERT_NOT_NULL(A);
    143   uint32_t i;
    144   for (i = 0; i < 3; ++i) {
    145     uint32_t j;
    146     for (j = 0; j < 3; ++j) {
    147       out->elem[i][j] -= A->elem[i][j];
    148     }
    149   }
    150 }
    151 
    152 UNROLLED
    153 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
    154   ASSERT_NOT_NULL(A);
    155   uint32_t i;
    156   for (i = 0; i < 3; ++i) {
    157     if (A->elem[i][i] < 0.0f) {
    158       return 0;
    159     }
    160   }
    161 
    162   for (i = 0; i < 3; ++i) {
    163     uint32_t j;
    164     for (j = i + 1; j < 3; ++j) {
    165       if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
    166         return 0;
    167       }
    168     }
    169   }
    170 
    171   return 1;
    172 }
    173 
    174 UNROLLED
    175 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
    176                              const struct Mat33 *B) {
    177   ASSERT(out != A);
    178   ASSERT(out != B);
    179   ASSERT_NOT_NULL(out);
    180   ASSERT_NOT_NULL(A);
    181   ASSERT_NOT_NULL(B);
    182   uint32_t i;
    183   for (i = 0; i < 3; ++i) {
    184     uint32_t j;
    185     for (j = 0; j < 3; ++j) {
    186       uint32_t k;
    187       float sum = 0.0f;
    188       for (k = 0; k < 3; ++k) {
    189         sum += A->elem[k][i] * B->elem[k][j];
    190       }
    191 
    192       out->elem[i][j] = sum;
    193     }
    194   }
    195 }
    196 
    197 UNROLLED
    198 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
    199                               const struct Mat33 *B) {
    200   ASSERT(out != A);
    201   ASSERT(out != B);
    202   ASSERT_NOT_NULL(out);
    203   ASSERT_NOT_NULL(A);
    204   ASSERT_NOT_NULL(B);
    205   uint32_t i;
    206   for (i = 0; i < 3; ++i) {
    207     uint32_t j;
    208     for (j = 0; j < 3; ++j) {
    209       uint32_t k;
    210       float sum = 0.0f;
    211       for (k = 0; k < 3; ++k) {
    212         sum += A->elem[i][k] * B->elem[j][k];
    213       }
    214 
    215       out->elem[i][j] = sum;
    216     }
    217   }
    218 }
    219 
    220 UNROLLED
    221 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
    222   ASSERT_NOT_NULL(out);
    223   ASSERT_NOT_NULL(A);
    224   float t;
    225   initDiagonalMatrix(out, 1.0f);
    226 
    227   struct Mat33 tmp = *A;
    228 
    229   uint32_t i, k;
    230   for (i = 0; i < 3; ++i) {
    231     uint32_t swap = i;
    232     uint32_t j;
    233     for (j = i + 1; j < 3; ++j) {
    234       if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
    235         swap = j;
    236       }
    237     }
    238 
    239     if (swap != i) {
    240       for (k = 0; k < 3; ++k) {
    241         t = tmp.elem[i][k];
    242         tmp.elem[i][k] = tmp.elem[swap][k];
    243         tmp.elem[swap][k] = t;
    244 
    245         t = out->elem[i][k];
    246         out->elem[i][k] = out->elem[swap][k];
    247         out->elem[swap][k] = t;
    248       }
    249     }
    250     // divide by zero guard.
    251     ASSERT(fabs(tmp.elem[i][i]) > 0);
    252     if(!(fabs(tmp.elem[i][i]) > 0)) {
    253       return;
    254     }
    255     t = 1.0f / tmp.elem[i][i];
    256     for (k = 0; k < 3; ++k) {
    257       tmp.elem[i][k] *= t;
    258       out->elem[i][k] *= t;
    259     }
    260 
    261     for (j = 0; j < 3; ++j) {
    262       if (j != i) {
    263         t = tmp.elem[j][i];
    264         for (k = 0; k < 3; ++k) {
    265           tmp.elem[j][k] -= tmp.elem[i][k] * t;
    266           out->elem[j][k] -= out->elem[i][k] * t;
    267         }
    268       }
    269     }
    270   }
    271 }
    272 
    273 UNROLLED
    274 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
    275   ASSERT_NOT_NULL(out);
    276   ASSERT_NOT_NULL(A);
    277   uint32_t i;
    278   for (i = 0; i < 3; ++i) {
    279     uint32_t j;
    280     for (j = 0; j < 3; ++j) {
    281       out->elem[i][j] = A->elem[j][i];
    282     }
    283   }
    284 }
    285 
    286 UNROLLED
    287 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
    288   ASSERT_NOT_NULL(A);
    289   const uint32_t N = 3;
    290   uint32_t k;
    291 
    292   if (i == j) {
    293     return;
    294   }
    295 
    296   for (k = 0; k < N; ++k) {
    297     float tmp = A->elem[i][k];
    298     A->elem[i][k] = A->elem[j][k];
    299     A->elem[j][k] = tmp;
    300   }
    301 }
    302 
    303 UNROLLED
    304 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
    305                         struct Mat33 *eigenvecs) {
    306   ASSERT_NOT_NULL(S);
    307   ASSERT_NOT_NULL(eigenvals);
    308   ASSERT_NOT_NULL(eigenvecs);
    309   const uint32_t N = 3;
    310   uint32_t i, j, k, l, m;
    311 
    312   float _eigenvals[N];
    313 
    314   uint32_t ind[N];
    315   for (k = 0; k < N; ++k) {
    316     ind[k] = mat33Maxind(S, k);
    317     _eigenvals[k] = S->elem[k][k];
    318   }
    319 
    320   initDiagonalMatrix(eigenvecs, 1.0f);
    321 
    322   for (;;) {
    323     m = 0;
    324     for (k = 1; k + 1 < N; ++k) {
    325       if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
    326         m = k;
    327       }
    328     }
    329 
    330     k = m;
    331     l = ind[m];
    332     float p = S->elem[k][l];
    333 
    334     if (fabsf(p) < EPSILON) {
    335       break;
    336     }
    337 
    338     float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
    339 
    340     float t = fabsf(y) + sqrtf(p * p + y * y);
    341     float s = sqrtf(p * p + t * t);
    342     float c = t / s;
    343     s = p / s;
    344     t = p * p / t;
    345 
    346     if (y < 0.0f) {
    347       s = -s;
    348       t = -t;
    349     }
    350 
    351     S->elem[k][l] = 0.0f;
    352 
    353     _eigenvals[k] -= t;
    354     _eigenvals[l] += t;
    355 
    356     for (i = 0; i < k; ++i) {
    357       mat33Rotate(S, c, s, i, k, i, l);
    358     }
    359 
    360     for (i = k + 1; i < l; ++i) {
    361       mat33Rotate(S, c, s, k, i, i, l);
    362     }
    363 
    364     for (i = l + 1; i < N; ++i) {
    365       mat33Rotate(S, c, s, k, i, l, i);
    366     }
    367 
    368     for (i = 0; i < N; ++i) {
    369       float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
    370       eigenvecs->elem[l][i] =
    371           s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
    372       eigenvecs->elem[k][i] = tmp;
    373     }
    374 
    375     ind[k] = mat33Maxind(S, k);
    376     ind[l] = mat33Maxind(S, l);
    377 
    378     float sum = 0.0f;
    379     for (i = 0; i < N; ++i) {
    380       for (j = i + 1; j < N; ++j) {
    381         sum += fabsf(S->elem[i][j]);
    382       }
    383     }
    384 
    385     if (sum < EPSILON) {
    386       break;
    387     }
    388   }
    389 
    390   for (k = 0; k < N; ++k) {
    391     m = k;
    392     for (l = k + 1; l < N; ++l) {
    393       if (_eigenvals[l] > _eigenvals[m]) {
    394         m = l;
    395       }
    396     }
    397 
    398     if (k != m) {
    399       float tmp = _eigenvals[k];
    400       _eigenvals[k] = _eigenvals[m];
    401       _eigenvals[m] = tmp;
    402 
    403       mat33SwapRows(eigenvecs, k, m);
    404     }
    405   }
    406 
    407   initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
    408 }
    409 
    410 // index of largest off-diagonal element in row k
    411 UNROLLED
    412 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
    413   ASSERT_NOT_NULL(A);
    414   const uint32_t N = 3;
    415 
    416   uint32_t m = k + 1;
    417   uint32_t i;
    418 
    419   for (i = k + 2; i < N; ++i) {
    420     if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
    421       m = i;
    422     }
    423   }
    424 
    425   return m;
    426 }
    427 
    428 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
    429                  uint32_t i, uint32_t j) {
    430   ASSERT_NOT_NULL(A);
    431   float tmp = c * A->elem[k][l] - s * A->elem[i][j];
    432   A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
    433   A->elem[k][l] = tmp;
    434 }
    435 
    436 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
    437   ASSERT_NOT_NULL(out);
    438   ASSERT_NOT_NULL(A);
    439   ASSERT_NOT_NULL(v);
    440 
    441   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
    442            A->elem[0][3] * v->w;
    443 
    444   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
    445            A->elem[1][3] * v->w;
    446 
    447   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
    448            A->elem[2][3] * v->w;
    449 
    450   out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
    451            A->elem[3][3] * v->w;
    452 }
    453 
    454 UNROLLED
    455 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
    456   ASSERT_NOT_NULL(LU);
    457   ASSERT_NOT_NULL(pivot);
    458   const uint32_t N = 4;
    459   uint32_t i, j, k;
    460 
    461   for (k = 0; k < N; ++k) {
    462     pivot->elem[k] = k;
    463     float max = fabsf(LU->elem[k][k]);
    464     for (j = k + 1; j < N; ++j) {
    465       if (max < fabsf(LU->elem[j][k])) {
    466         max = fabsf(LU->elem[j][k]);
    467         pivot->elem[k] = j;
    468       }
    469     }
    470 
    471     if (pivot->elem[k] != k) {
    472       mat44SwapRows(LU, k, pivot->elem[k]);
    473     }
    474 
    475     if (fabsf(LU->elem[k][k]) < EPSILON) {
    476       continue;
    477     }
    478 
    479     for (j = k + 1; j < N; ++j) {
    480       LU->elem[k][j] /= LU->elem[k][k];
    481     }
    482 
    483     for (i = k + 1; i < N; ++i) {
    484       for (j = k + 1; j < N; ++j) {
    485         LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
    486       }
    487     }
    488   }
    489 }
    490 
    491 UNROLLED
    492 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
    493   ASSERT_NOT_NULL(A);
    494   const uint32_t N = 4;
    495   uint32_t k;
    496 
    497   if (i == j) {
    498     return;
    499   }
    500 
    501   for (k = 0; k < N; ++k) {
    502     float tmp = A->elem[i][k];
    503     A->elem[i][k] = A->elem[j][k];
    504     A->elem[j][k] = tmp;
    505   }
    506 }
    507 
    508 UNROLLED
    509 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
    510                 const struct Size4 *pivot) {
    511   ASSERT_NOT_NULL(A);
    512   ASSERT_NOT_NULL(x);
    513   ASSERT_NOT_NULL(b);
    514   ASSERT_NOT_NULL(pivot);
    515   const uint32_t N = 4;
    516   uint32_t i, k;
    517 
    518   float bCopy[N];
    519   bCopy[0] = b->x;
    520   bCopy[1] = b->y;
    521   bCopy[2] = b->z;
    522   bCopy[3] = b->w;
    523 
    524   float _x[N];
    525   for (k = 0; k < N; ++k) {
    526     if (pivot->elem[k] != k) {
    527       float tmp = bCopy[k];
    528       bCopy[k] = bCopy[pivot->elem[k]];
    529       bCopy[pivot->elem[k]] = tmp;
    530     }
    531 
    532     _x[k] = bCopy[k];
    533     for (i = 0; i < k; ++i) {
    534       _x[k] -= _x[i] * A->elem[k][i];
    535     }
    536     _x[k] /= A->elem[k][k];
    537   }
    538 
    539   for (k = N; k-- > 0;) {
    540     for (i = k + 1; i < N; ++i) {
    541       _x[k] -= _x[i] * A->elem[k][i];
    542     }
    543   }
    544 
    545   initVec4(x, _x[0], _x[1], _x[2], _x[3]);
    546 }
    547 
    548 float matMaxDiagonalElement(const float *square_mat, size_t n) {
    549   ASSERT_NOT_NULL(square_mat);
    550   ASSERT(n > 0);
    551   size_t i;
    552   float max = square_mat[0];
    553   const size_t n_square = n * n;
    554   const size_t offset = n + 1;
    555   for (i = offset; i < n_square; i += offset) {
    556     if (square_mat[i] > max) {
    557       max = square_mat[i];
    558     }
    559   }
    560   return max;
    561 }
    562 
    563 void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
    564   ASSERT_NOT_NULL(square_mat);
    565   size_t i;
    566   const size_t n_square = n * n;
    567   const size_t offset = n + 1;
    568   for (i = 0; i < n_square; i += offset) {
    569     square_mat[i] += u;
    570   }
    571 }
    572 
    573 void matTransposeMultiplyMat(float *out, const float *A,
    574                              size_t nrows, size_t ncols) {
    575   ASSERT_NOT_NULL(out);
    576   ASSERT_NOT_NULL(A);
    577   size_t i;
    578   size_t j;
    579   size_t k;
    580   memset(out, 0, sizeof(float) * ncols * ncols);
    581   for (i = 0; i < ncols; ++i) {
    582     for (j = 0; j < ncols; ++j) {
    583       // Since A' * A is symmetric, can use upper diagonal elements
    584       // to fill in the lower diagonal without recomputing.
    585       if (j < i) {
    586         out[i * ncols + j] = out[j * ncols + i];
    587       } else {
    588         // mat_out[i, j] = ai ' * aj
    589         out[i * ncols + j] = 0;
    590         for (k = 0; k < nrows; ++k) {
    591           out[i * ncols + j] += A[k * ncols + i] *
    592               A[k * ncols + j];
    593         }
    594       }
    595     }
    596   }
    597 }
    598 
    599 void matMultiplyVec(float *out, const float *A, const float *v,
    600                     size_t nrows, size_t ncols) {
    601   ASSERT_NOT_NULL(out);
    602   ASSERT_NOT_NULL(A);
    603   ASSERT_NOT_NULL(v);
    604   size_t i;
    605   for (i = 0; i < nrows; ++i) {
    606     const float *row = &A[i * ncols];
    607     out[i] = vecDot(row, v, (int)ncols);
    608   }
    609 }
    610 
    611 void matTransposeMultiplyVec(float *out, const float *A, const float *v,
    612                              size_t nrows, size_t ncols) {
    613   ASSERT_NOT_NULL(out);
    614   ASSERT_NOT_NULL(A);
    615   ASSERT_NOT_NULL(v);
    616   size_t i, j;
    617   for (i = 0; i < ncols; ++i) {
    618     out[i] = 0;
    619     for (j = 0; j < nrows; ++j) {
    620       out[i] += A[j * ncols + i] * v[j];
    621     }
    622   }
    623 }
    624 
    625 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
    626   ASSERT_NOT_NULL(x);
    627   ASSERT_NOT_NULL(L);
    628   ASSERT_NOT_NULL(b);
    629   ASSERT(n <= INT32_MAX);
    630   int32_t i, j;  // Loops below require signed integers.
    631   int32_t s_n = (int32_t)n; // Signed n.
    632   float sum = 0.0f;
    633   // 1. Solve Ly = b through forward substitution. Use x[] to store y.
    634   for (i = 0; i < s_n; ++i) {
    635     sum = 0.0f;
    636     for (j = 0; j < i; ++j) {
    637       sum += L[i * s_n + j] * x[j];
    638     }
    639     // Check for non-zero diagonals (don't divide by zero).
    640     if (L[i * s_n + i] < EPSILON) {
    641       return false;
    642     }
    643     x[i] = (b[i] - sum) / L[i * s_n + i];
    644   }
    645 
    646   // 2. Solve L'x = y through backwards substitution. Use x[] to store both
    647   // y and x.
    648   for (i = s_n - 1; i >= 0; --i) {
    649     sum = 0.0f;
    650     for (j = i + 1; j < s_n; ++j) {
    651       sum += L[j * s_n + i] * x[j];
    652     }
    653     x[i] = (x[i] - sum) / L[i * s_n + i];
    654   }
    655 
    656   return true;
    657 }
    658 
    659 bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
    660   ASSERT_NOT_NULL(L);
    661   ASSERT_NOT_NULL(A);
    662   size_t i, j, k;
    663   float sum = 0.0f;
    664   // initialize L to zero.
    665   memset(L, 0, sizeof(float) * n * n);
    666 
    667   for (i = 0; i < n; ++i) {
    668     // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
    669     sum = 0.0f;
    670     for (k = 0; k < i; ++k) {
    671       sum += L[i * n + k] * L[i * n + k];
    672     }
    673     sum = A[i * n + i] - sum;
    674     // If diagonal element of L is too small, cholesky fails.
    675     if (sum < CHOLESKY_TOLERANCE) {
    676       return false;
    677     }
    678     L[i * n + i] = sqrtf(sum);
    679 
    680     // for j = i+1:N,  compute L[j][i] =
    681     //      (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
    682     for (j = i + 1; j < n; ++j) {
    683       sum = 0.0f;
    684       for (k = 0; k < i; ++k) {
    685         sum += L[i * n + k] * L[j * n + k];
    686       }
    687       // division okay because magnitude of L[i][i] already checked above.
    688       L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
    689     }
    690   }
    691 
    692   return true;
    693 }
    694