1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "common/math/mat.h"
18 
19 #include <assert.h>
20 #include <float.h>
21 
22 #ifdef _OS_BUILD_
23 #include <nanohub_math.h>
24 #include <seos.h>
25 #else
26 #include <math.h>
27 #ifndef UNROLLED
28 #define UNROLLED
29 #endif
30 #endif  // _OS_BUILD_
31 
32 #include <stddef.h>
33 #include <string.h>
34 
35 #define EPSILON 1E-5f
36 #define CHOLESKY_TOLERANCE 1E-6f
37 
38 // Forward declarations.
39 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
40 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
41 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
42                         uint32_t l, uint32_t i, uint32_t j);
43 
44 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
45 
initZeroMatrix(struct Mat33 * A)46 void initZeroMatrix(struct Mat33 *A) {
47   ASSERT_NOT_NULL(A);
48   memset(A->elem, 0.0f, sizeof(A->elem));
49 }
50 
51 UNROLLED
initDiagonalMatrix(struct Mat33 * A,float x)52 void initDiagonalMatrix(struct Mat33 *A, float x) {
53   ASSERT_NOT_NULL(A);
54   initZeroMatrix(A);
55 
56   uint32_t i;
57   for (i = 0; i < 3; ++i) {
58     A->elem[i][i] = x;
59   }
60 }
61 
initMatrixColumns(struct Mat33 * A,const struct Vec3 * v1,const struct Vec3 * v2,const struct Vec3 * v3)62 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
63                        const struct Vec3 *v2, const struct Vec3 *v3) {
64   ASSERT_NOT_NULL(A);
65   ASSERT_NOT_NULL(v1);
66   ASSERT_NOT_NULL(v2);
67   ASSERT_NOT_NULL(v3);
68   A->elem[0][0] = v1->x;
69   A->elem[0][1] = v2->x;
70   A->elem[0][2] = v3->x;
71 
72   A->elem[1][0] = v1->y;
73   A->elem[1][1] = v2->y;
74   A->elem[1][2] = v3->y;
75 
76   A->elem[2][0] = v1->z;
77   A->elem[2][1] = v2->z;
78   A->elem[2][2] = v3->z;
79 }
80 
mat33Apply(struct Vec3 * out,const struct Mat33 * A,const struct Vec3 * v)81 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
82   ASSERT_NOT_NULL(out);
83   ASSERT_NOT_NULL(A);
84   ASSERT_NOT_NULL(v);
85   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
86   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
87   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
88 }
89 
90 UNROLLED
mat33Multiply(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)91 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
92                    const struct Mat33 *B) {
93   ASSERT_NOT_NULL(out);
94   ASSERT_NOT_NULL(A);
95   ASSERT_NOT_NULL(B);
96   ASSERT(out != A);
97   ASSERT(out != B);
98 
99   uint32_t i;
100   for (i = 0; i < 3; ++i) {
101     uint32_t j;
102     for (j = 0; j < 3; ++j) {
103       uint32_t k;
104       float sum = 0.0f;
105       for (k = 0; k < 3; ++k) {
106         sum += A->elem[i][k] * B->elem[k][j];
107       }
108 
109       out->elem[i][j] = sum;
110     }
111   }
112 }
113 
114 UNROLLED
mat33ScalarMul(struct Mat33 * A,float c)115 void mat33ScalarMul(struct Mat33 *A, float c) {
116   ASSERT_NOT_NULL(A);
117   uint32_t i;
118   for (i = 0; i < 3; ++i) {
119     uint32_t j;
120     for (j = 0; j < 3; ++j) {
121       A->elem[i][j] *= c;
122     }
123   }
124 }
125 
126 UNROLLED
mat33Add(struct Mat33 * out,const struct Mat33 * A)127 void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
128   ASSERT_NOT_NULL(out);
129   ASSERT_NOT_NULL(A);
130   uint32_t i;
131   for (i = 0; i < 3; ++i) {
132     uint32_t j;
133     for (j = 0; j < 3; ++j) {
134       out->elem[i][j] += A->elem[i][j];
135     }
136   }
137 }
138 
139 UNROLLED
mat33Sub(struct Mat33 * out,const struct Mat33 * A)140 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
141   ASSERT_NOT_NULL(out);
142   ASSERT_NOT_NULL(A);
143   uint32_t i;
144   for (i = 0; i < 3; ++i) {
145     uint32_t j;
146     for (j = 0; j < 3; ++j) {
147       out->elem[i][j] -= A->elem[i][j];
148     }
149   }
150 }
151 
152 UNROLLED
mat33IsPositiveSemidefinite(const struct Mat33 * A,float tolerance)153 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
154   ASSERT_NOT_NULL(A);
155   uint32_t i;
156   for (i = 0; i < 3; ++i) {
157     if (A->elem[i][i] < 0.0f) {
158       return 0;
159     }
160   }
161 
162   for (i = 0; i < 3; ++i) {
163     uint32_t j;
164     for (j = i + 1; j < 3; ++j) {
165       if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
166         return 0;
167       }
168     }
169   }
170 
171   return 1;
172 }
173 
174 UNROLLED
mat33MultiplyTransposed(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)175 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
176                              const struct Mat33 *B) {
177   ASSERT(out != A);
178   ASSERT(out != B);
179   ASSERT_NOT_NULL(out);
180   ASSERT_NOT_NULL(A);
181   ASSERT_NOT_NULL(B);
182   uint32_t i;
183   for (i = 0; i < 3; ++i) {
184     uint32_t j;
185     for (j = 0; j < 3; ++j) {
186       uint32_t k;
187       float sum = 0.0f;
188       for (k = 0; k < 3; ++k) {
189         sum += A->elem[k][i] * B->elem[k][j];
190       }
191 
192       out->elem[i][j] = sum;
193     }
194   }
195 }
196 
197 UNROLLED
mat33MultiplyTransposed2(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)198 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
199                               const struct Mat33 *B) {
200   ASSERT(out != A);
201   ASSERT(out != B);
202   ASSERT_NOT_NULL(out);
203   ASSERT_NOT_NULL(A);
204   ASSERT_NOT_NULL(B);
205   uint32_t i;
206   for (i = 0; i < 3; ++i) {
207     uint32_t j;
208     for (j = 0; j < 3; ++j) {
209       uint32_t k;
210       float sum = 0.0f;
211       for (k = 0; k < 3; ++k) {
212         sum += A->elem[i][k] * B->elem[j][k];
213       }
214 
215       out->elem[i][j] = sum;
216     }
217   }
218 }
219 
220 UNROLLED
mat33Invert(struct Mat33 * out,const struct Mat33 * A)221 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
222   ASSERT_NOT_NULL(out);
223   ASSERT_NOT_NULL(A);
224   float t;
225   initDiagonalMatrix(out, 1.0f);
226 
227   struct Mat33 tmp = *A;
228 
229   uint32_t i, k;
230   for (i = 0; i < 3; ++i) {
231     uint32_t swap = i;
232     uint32_t j;
233     for (j = i + 1; j < 3; ++j) {
234       if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
235         swap = j;
236       }
237     }
238 
239     if (swap != i) {
240       for (k = 0; k < 3; ++k) {
241         t = tmp.elem[i][k];
242         tmp.elem[i][k] = tmp.elem[swap][k];
243         tmp.elem[swap][k] = t;
244 
245         t = out->elem[i][k];
246         out->elem[i][k] = out->elem[swap][k];
247         out->elem[swap][k] = t;
248       }
249     }
250     // divide by zero guard.
251     ASSERT(fabsf(tmp.elem[i][i]) > 0);
252     if(!(fabsf(tmp.elem[i][i]) > 0)) {
253       return;
254     }
255     t = 1.0f / tmp.elem[i][i];
256     for (k = 0; k < 3; ++k) {
257       tmp.elem[i][k] *= t;
258       out->elem[i][k] *= t;
259     }
260 
261     for (j = 0; j < 3; ++j) {
262       if (j != i) {
263         t = tmp.elem[j][i];
264         for (k = 0; k < 3; ++k) {
265           tmp.elem[j][k] -= tmp.elem[i][k] * t;
266           out->elem[j][k] -= out->elem[i][k] * t;
267         }
268       }
269     }
270   }
271 }
272 
273 UNROLLED
mat33Transpose(struct Mat33 * out,const struct Mat33 * A)274 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
275   ASSERT_NOT_NULL(out);
276   ASSERT_NOT_NULL(A);
277   uint32_t i;
278   for (i = 0; i < 3; ++i) {
279     uint32_t j;
280     for (j = 0; j < 3; ++j) {
281       out->elem[i][j] = A->elem[j][i];
282     }
283   }
284 }
285 
286 UNROLLED
mat33SwapRows(struct Mat33 * A,const uint32_t i,const uint32_t j)287 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
288   ASSERT_NOT_NULL(A);
289   const uint32_t N = 3;
290   uint32_t k;
291 
292   if (i == j) {
293     return;
294   }
295 
296   for (k = 0; k < N; ++k) {
297     float tmp = A->elem[i][k];
298     A->elem[i][k] = A->elem[j][k];
299     A->elem[j][k] = tmp;
300   }
301 }
302 
303 UNROLLED
mat33GetEigenbasis(struct Mat33 * S,struct Vec3 * eigenvals,struct Mat33 * eigenvecs)304 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
305                         struct Mat33 *eigenvecs) {
306   ASSERT_NOT_NULL(S);
307   ASSERT_NOT_NULL(eigenvals);
308   ASSERT_NOT_NULL(eigenvecs);
309   const uint32_t N = 3;
310   uint32_t i, j, k, l, m;
311 
312   float _eigenvals[N];
313 
314   uint32_t ind[N];
315   for (k = 0; k < N; ++k) {
316     ind[k] = mat33Maxind(S, k);
317     _eigenvals[k] = S->elem[k][k];
318   }
319 
320   initDiagonalMatrix(eigenvecs, 1.0f);
321 
322   for (;;) {
323     m = 0;
324     for (k = 1; k + 1 < N; ++k) {
325       if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
326         m = k;
327       }
328     }
329 
330     k = m;
331     l = ind[m];
332     float p = S->elem[k][l];
333 
334     if (fabsf(p) < EPSILON) {
335       break;
336     }
337 
338     float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
339 
340     float t = fabsf(y) + sqrtf(p * p + y * y);
341     float s = sqrtf(p * p + t * t);
342     float c = t / s;
343     s = p / s;
344     t = p * p / t;
345 
346     if (y < 0.0f) {
347       s = -s;
348       t = -t;
349     }
350 
351     S->elem[k][l] = 0.0f;
352 
353     _eigenvals[k] -= t;
354     _eigenvals[l] += t;
355 
356     for (i = 0; i < k; ++i) {
357       mat33Rotate(S, c, s, i, k, i, l);
358     }
359 
360     for (i = k + 1; i < l; ++i) {
361       mat33Rotate(S, c, s, k, i, i, l);
362     }
363 
364     for (i = l + 1; i < N; ++i) {
365       mat33Rotate(S, c, s, k, i, l, i);
366     }
367 
368     for (i = 0; i < N; ++i) {
369       float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
370       eigenvecs->elem[l][i] =
371           s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
372       eigenvecs->elem[k][i] = tmp;
373     }
374 
375     ind[k] = mat33Maxind(S, k);
376     ind[l] = mat33Maxind(S, l);
377 
378     float sum = 0.0f;
379     for (i = 0; i < N; ++i) {
380       for (j = i + 1; j < N; ++j) {
381         sum += fabsf(S->elem[i][j]);
382       }
383     }
384 
385     if (sum < EPSILON) {
386       break;
387     }
388   }
389 
390   for (k = 0; k < N; ++k) {
391     m = k;
392     for (l = k + 1; l < N; ++l) {
393       if (_eigenvals[l] > _eigenvals[m]) {
394         m = l;
395       }
396     }
397 
398     if (k != m) {
399       float tmp = _eigenvals[k];
400       _eigenvals[k] = _eigenvals[m];
401       _eigenvals[m] = tmp;
402 
403       mat33SwapRows(eigenvecs, k, m);
404     }
405   }
406 
407   initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
408 }
409 
mat33Determinant(const struct Mat33 * A)410 float mat33Determinant(const struct Mat33 *A) {
411   ASSERT_NOT_NULL(A);
412   return A->elem[0][0] *
413       (A->elem[1][1] * A->elem[2][2] - A->elem[1][2] * A->elem[2][1])
414       - A->elem[0][1] *
415       (A->elem[1][0] * A->elem[2][2] - A->elem[1][2] * A->elem[2][0])
416       + A->elem[0][2] *
417       (A->elem[1][0] * A->elem[2][1] - A->elem[1][1] * A->elem[2][0]);
418 }
419 
420 // index of largest off-diagonal element in row k
421 UNROLLED
mat33Maxind(const struct Mat33 * A,uint32_t k)422 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
423   ASSERT_NOT_NULL(A);
424   const uint32_t N = 3;
425 
426   uint32_t m = k + 1;
427   uint32_t i;
428 
429   for (i = k + 2; i < N; ++i) {
430     if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
431       m = i;
432     }
433   }
434 
435   return m;
436 }
437 
mat33Rotate(struct Mat33 * A,float c,float s,uint32_t k,uint32_t l,uint32_t i,uint32_t j)438 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
439                  uint32_t i, uint32_t j) {
440   ASSERT_NOT_NULL(A);
441   float tmp = c * A->elem[k][l] - s * A->elem[i][j];
442   A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
443   A->elem[k][l] = tmp;
444 }
445 
mat44Apply(struct Vec4 * out,const struct Mat44 * A,const struct Vec4 * v)446 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
447   ASSERT_NOT_NULL(out);
448   ASSERT_NOT_NULL(A);
449   ASSERT_NOT_NULL(v);
450 
451   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
452            A->elem[0][3] * v->w;
453 
454   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
455            A->elem[1][3] * v->w;
456 
457   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
458            A->elem[2][3] * v->w;
459 
460   out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
461            A->elem[3][3] * v->w;
462 }
463 
464 UNROLLED
mat44DecomposeLup(struct Mat44 * LU,struct Size4 * pivot)465 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
466   ASSERT_NOT_NULL(LU);
467   ASSERT_NOT_NULL(pivot);
468   const uint32_t N = 4;
469   uint32_t i, j, k;
470 
471   for (k = 0; k < N; ++k) {
472     pivot->elem[k] = k;
473     float max = fabsf(LU->elem[k][k]);
474     for (j = k + 1; j < N; ++j) {
475       if (max < fabsf(LU->elem[j][k])) {
476         max = fabsf(LU->elem[j][k]);
477         pivot->elem[k] = j;
478       }
479     }
480 
481     if (pivot->elem[k] != k) {
482       mat44SwapRows(LU, k, pivot->elem[k]);
483     }
484 
485     if (fabsf(LU->elem[k][k]) < EPSILON) {
486       continue;
487     }
488 
489     for (j = k + 1; j < N; ++j) {
490       LU->elem[k][j] /= LU->elem[k][k];
491     }
492 
493     for (i = k + 1; i < N; ++i) {
494       for (j = k + 1; j < N; ++j) {
495         LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
496       }
497     }
498   }
499 }
500 
501 UNROLLED
mat44SwapRows(struct Mat44 * A,const uint32_t i,const uint32_t j)502 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
503   ASSERT_NOT_NULL(A);
504   const uint32_t N = 4;
505   uint32_t k;
506 
507   if (i == j) {
508     return;
509   }
510 
511   for (k = 0; k < N; ++k) {
512     float tmp = A->elem[i][k];
513     A->elem[i][k] = A->elem[j][k];
514     A->elem[j][k] = tmp;
515   }
516 }
517 
518 UNROLLED
mat44Solve(const struct Mat44 * A,struct Vec4 * x,const struct Vec4 * b,const struct Size4 * pivot)519 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
520                 const struct Size4 *pivot) {
521   ASSERT_NOT_NULL(A);
522   ASSERT_NOT_NULL(x);
523   ASSERT_NOT_NULL(b);
524   ASSERT_NOT_NULL(pivot);
525   const uint32_t N = 4;
526   uint32_t i, k;
527 
528   float bCopy[N];
529   bCopy[0] = b->x;
530   bCopy[1] = b->y;
531   bCopy[2] = b->z;
532   bCopy[3] = b->w;
533 
534   float _x[N];
535   for (k = 0; k < N; ++k) {
536     if (pivot->elem[k] != k) {
537       float tmp = bCopy[k];
538       bCopy[k] = bCopy[pivot->elem[k]];
539       bCopy[pivot->elem[k]] = tmp;
540     }
541 
542     _x[k] = bCopy[k];
543     for (i = 0; i < k; ++i) {
544       _x[k] -= _x[i] * A->elem[k][i];
545     }
546     _x[k] /= A->elem[k][k];
547   }
548 
549   for (k = N; k-- > 0;) {
550     for (i = k + 1; i < N; ++i) {
551       _x[k] -= _x[i] * A->elem[k][i];
552     }
553   }
554 
555   initVec4(x, _x[0], _x[1], _x[2], _x[3]);
556 }
557 
matMaxDiagonalElement(const float * square_mat,size_t n)558 float matMaxDiagonalElement(const float *square_mat, size_t n) {
559   ASSERT_NOT_NULL(square_mat);
560   ASSERT(n > 0);
561   size_t i;
562   float max = square_mat[0];
563   const size_t n_square = n * n;
564   const size_t offset = n + 1;
565   for (i = offset; i < n_square; i += offset) {
566     if (square_mat[i] > max) {
567       max = square_mat[i];
568     }
569   }
570   return max;
571 }
572 
matAddConstantDiagonal(float * square_mat,float u,size_t n)573 void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
574   ASSERT_NOT_NULL(square_mat);
575   size_t i;
576   const size_t n_square = n * n;
577   const size_t offset = n + 1;
578   for (i = 0; i < n_square; i += offset) {
579     square_mat[i] += u;
580   }
581 }
582 
matTransposeMultiplyMat(float * out,const float * A,size_t nrows,size_t ncols)583 void matTransposeMultiplyMat(float *out, const float *A,
584                              size_t nrows, size_t ncols) {
585   ASSERT_NOT_NULL(out);
586   ASSERT_NOT_NULL(A);
587   size_t i;
588   size_t j;
589   size_t k;
590   memset(out, 0, sizeof(float) * ncols * ncols);
591   for (i = 0; i < ncols; ++i) {
592     for (j = 0; j < ncols; ++j) {
593       // Since A' * A is symmetric, can use upper diagonal elements
594       // to fill in the lower diagonal without recomputing.
595       if (j < i) {
596         out[i * ncols + j] = out[j * ncols + i];
597       } else {
598         // mat_out[i, j] = ai ' * aj
599         out[i * ncols + j] = 0;
600         for (k = 0; k < nrows; ++k) {
601           out[i * ncols + j] += A[k * ncols + i] *
602               A[k * ncols + j];
603         }
604       }
605     }
606   }
607 }
608 
matMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)609 void matMultiplyVec(float *out, const float *A, const float *v,
610                     size_t nrows, size_t ncols) {
611   ASSERT_NOT_NULL(out);
612   ASSERT_NOT_NULL(A);
613   ASSERT_NOT_NULL(v);
614   size_t i;
615   for (i = 0; i < nrows; ++i) {
616     const float *row = &A[i * ncols];
617     out[i] = vecDot(row, v, ncols);
618   }
619 }
620 
matTransposeMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)621 void matTransposeMultiplyVec(float *out, const float *A, const float *v,
622                              size_t nrows, size_t ncols) {
623   ASSERT_NOT_NULL(out);
624   ASSERT_NOT_NULL(A);
625   ASSERT_NOT_NULL(v);
626   size_t i, j;
627   for (i = 0; i < ncols; ++i) {
628     out[i] = 0;
629     for (j = 0; j < nrows; ++j) {
630       out[i] += A[j * ncols + i] * v[j];
631     }
632   }
633 }
634 
matLinearSolveCholesky(float * x,const float * L,const float * b,size_t n)635 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
636   ASSERT_NOT_NULL(x);
637   ASSERT_NOT_NULL(L);
638   ASSERT_NOT_NULL(b);
639   ASSERT(n <= INT32_MAX);
640   int32_t i, j;  // Loops below require signed integers.
641   int32_t s_n = (int32_t)n; // Signed n.
642   float sum = 0.0f;
643   // 1. Solve Ly = b through forward substitution. Use x[] to store y.
644   for (i = 0; i < s_n; ++i) {
645     sum = 0.0f;
646     for (j = 0; j < i; ++j) {
647       sum += L[i * s_n + j] * x[j];
648     }
649     // Check for non-zero diagonals (don't divide by zero).
650     if (L[i * s_n + i] < EPSILON) {
651       return false;
652     }
653     x[i] = (b[i] - sum) / L[i * s_n + i];
654   }
655 
656   // 2. Solve L'x = y through backwards substitution. Use x[] to store both
657   // y and x.
658   for (i = s_n - 1; i >= 0; --i) {
659     sum = 0.0f;
660     for (j = i + 1; j < s_n; ++j) {
661       sum += L[j * s_n + i] * x[j];
662     }
663     x[i] = (x[i] - sum) / L[i * s_n + i];
664   }
665 
666   return true;
667 }
668 
matCholeskyDecomposition(float * L,const float * A,size_t n)669 bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
670   ASSERT_NOT_NULL(L);
671   ASSERT_NOT_NULL(A);
672   size_t i, j, k;
673   float sum = 0.0f;
674   // initialize L to zero.
675   memset(L, 0, sizeof(float) * n * n);
676 
677   for (i = 0; i < n; ++i) {
678     // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
679     sum = 0.0f;
680     for (k = 0; k < i; ++k) {
681       sum += L[i * n + k] * L[i * n + k];
682     }
683     sum = A[i * n + i] - sum;
684     // If diagonal element of L is too small, cholesky fails.
685     if (sum < CHOLESKY_TOLERANCE) {
686       return false;
687     }
688     L[i * n + i] = sqrtf(sum);
689 
690     // for j = i+1:N,  compute L[j][i] =
691     //      (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
692     for (j = i + 1; j < n; ++j) {
693       sum = 0.0f;
694       for (k = 0; k < i; ++k) {
695         sum += L[i * n + k] * L[j * n + k];
696       }
697       // division okay because magnitude of L[i][i] already checked above.
698       L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
699     }
700   }
701 
702   return true;
703 }
704