Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                        Intel License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of Intel Corporation may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #include "_cxcore.h"
     43 #include <float.h>
     44 
     45 /////////////////////////////////////////////////////////////////////////////////////////
     46 
     47 #define icvGivens_64f( n, x, y, c, s ) \
     48 {                                      \
     49     int _i;                            \
     50     double* _x = (x);                  \
     51     double* _y = (y);                  \
     52                                        \
     53     for( _i = 0; _i < n; _i++ )        \
     54     {                                  \
     55         double t0 = _x[_i];            \
     56         double t1 = _y[_i];            \
     57         _x[_i] = t0*c + t1*s;          \
     58         _y[_i] = -t0*s + t1*c;         \
     59     }                                  \
     60 }
     61 
     62 
     63 /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
     64 static  void
     65 icvMatrAXPY_64f( int m, int n, const double* x, int dx,
     66                  const double* a, double* y, int dy )
     67 {
     68     int i, j;
     69 
     70     for( i = 0; i < m; i++, x += dx, y += dy )
     71     {
     72         double s = a[i];
     73 
     74         for( j = 0; j <= n - 4; j += 4 )
     75         {
     76             double t0 = y[j]   + s*x[j];
     77             double t1 = y[j+1] + s*x[j+1];
     78             y[j]   = t0;
     79             y[j+1] = t1;
     80             t0 = y[j+2] + s*x[j+2];
     81             t1 = y[j+3] + s*x[j+3];
     82             y[j+2] = t0;
     83             y[j+3] = t1;
     84         }
     85 
     86         for( ; j < n; j++ ) y[j] += s*x[j];
     87     }
     88 }
     89 
     90 
     91 /* y[1:m,-1] = h*y[1:m,0:n]*x[0:1,0:n]'*x[-1]  (this is used for U&V reconstruction)
     92    y[1:m,0:n] += h*y[1:m,0:n]*x[0:1,0:n]'*x[0:1,0:n] */
     93 static void
     94 icvMatrAXPY3_64f( int m, int n, const double* x, int l, double* y, double h )
     95 {
     96     int i, j;
     97 
     98     for( i = 1; i < m; i++ )
     99     {
    100         double s = 0;
    101 
    102         y += l;
    103 
    104         for( j = 0; j <= n - 4; j += 4 )
    105             s += x[j]*y[j] + x[j+1]*y[j+1] + x[j+2]*y[j+2] + x[j+3]*y[j+3];
    106 
    107         for( ; j < n; j++ )  s += x[j]*y[j];
    108 
    109         s *= h;
    110         y[-1] = s*x[-1];
    111 
    112         for( j = 0; j <= n - 4; j += 4 )
    113         {
    114             double t0 = y[j]   + s*x[j];
    115             double t1 = y[j+1] + s*x[j+1];
    116             y[j]   = t0;
    117             y[j+1] = t1;
    118             t0 = y[j+2] + s*x[j+2];
    119             t1 = y[j+3] + s*x[j+3];
    120             y[j+2] = t0;
    121             y[j+3] = t1;
    122         }
    123 
    124         for( ; j < n; j++ ) y[j] += s*x[j];
    125     }
    126 }
    127 
    128 
    129 #define icvGivens_32f( n, x, y, c, s ) \
    130 {                                      \
    131     int _i;                            \
    132     float* _x = (x);                   \
    133     float* _y = (y);                   \
    134                                        \
    135     for( _i = 0; _i < n; _i++ )        \
    136     {                                  \
    137         double t0 = _x[_i];            \
    138         double t1 = _y[_i];            \
    139         _x[_i] = (float)(t0*c + t1*s); \
    140         _y[_i] = (float)(-t0*s + t1*c);\
    141     }                                  \
    142 }
    143 
    144 static  void
    145 icvMatrAXPY_32f( int m, int n, const float* x, int dx,
    146                  const float* a, float* y, int dy )
    147 {
    148     int i, j;
    149 
    150     for( i = 0; i < m; i++, x += dx, y += dy )
    151     {
    152         double s = a[i];
    153 
    154         for( j = 0; j <= n - 4; j += 4 )
    155         {
    156             double t0 = y[j]   + s*x[j];
    157             double t1 = y[j+1] + s*x[j+1];
    158             y[j]   = (float)t0;
    159             y[j+1] = (float)t1;
    160             t0 = y[j+2] + s*x[j+2];
    161             t1 = y[j+3] + s*x[j+3];
    162             y[j+2] = (float)t0;
    163             y[j+3] = (float)t1;
    164         }
    165 
    166         for( ; j < n; j++ )
    167             y[j] = (float)(y[j] + s*x[j]);
    168     }
    169 }
    170 
    171 
    172 static void
    173 icvMatrAXPY3_32f( int m, int n, const float* x, int l, float* y, double h )
    174 {
    175     int i, j;
    176 
    177     for( i = 1; i < m; i++ )
    178     {
    179         double s = 0;
    180         y += l;
    181 
    182         for( j = 0; j <= n - 4; j += 4 )
    183             s += x[j]*y[j] + x[j+1]*y[j+1] + x[j+2]*y[j+2] + x[j+3]*y[j+3];
    184 
    185         for( ; j < n; j++ )  s += x[j]*y[j];
    186 
    187         s *= h;
    188         y[-1] = (float)(s*x[-1]);
    189 
    190         for( j = 0; j <= n - 4; j += 4 )
    191         {
    192             double t0 = y[j]   + s*x[j];
    193             double t1 = y[j+1] + s*x[j+1];
    194             y[j]   = (float)t0;
    195             y[j+1] = (float)t1;
    196             t0 = y[j+2] + s*x[j+2];
    197             t1 = y[j+3] + s*x[j+3];
    198             y[j+2] = (float)t0;
    199             y[j+3] = (float)t1;
    200         }
    201 
    202         for( ; j < n; j++ ) y[j] = (float)(y[j] + s*x[j]);
    203     }
    204 }
    205 
    206 /* accurate hypotenuse calculation */
    207 static double
    208 pythag( double a, double b )
    209 {
    210     a = fabs( a );
    211     b = fabs( b );
    212     if( a > b )
    213     {
    214         b /= a;
    215         a *= sqrt( 1. + b * b );
    216     }
    217     else if( b != 0 )
    218     {
    219         a /= b;
    220         a = b * sqrt( 1. + a * a );
    221     }
    222 
    223     return a;
    224 }
    225 
    226 /****************************************************************************************/
    227 /****************************************************************************************/
    228 
    229 #define MAX_ITERS  30
    230 
    231 static void
    232 icvSVD_64f( double* a, int lda, int m, int n,
    233             double* w,
    234             double* uT, int lduT, int nu,
    235             double* vT, int ldvT,
    236             double* buffer )
    237 {
    238     double* e;
    239     double* temp;
    240     double *w1, *e1;
    241     double *hv;
    242     double ku0 = 0, kv0 = 0;
    243     double anorm = 0;
    244     double *a1, *u0 = uT, *v0 = vT;
    245     double scale, h;
    246     int i, j, k, l;
    247     int nm, m1, n1;
    248     int nv = n;
    249     int iters = 0;
    250     double* hv0 = (double*)cvStackAlloc( (m+2)*sizeof(hv0[0])) + 1;
    251 
    252     e = buffer;
    253     w1 = w;
    254     e1 = e + 1;
    255     nm = n;
    256 
    257     temp = buffer + nm;
    258 
    259     memset( w, 0, nm * sizeof( w[0] ));
    260     memset( e, 0, nm * sizeof( e[0] ));
    261 
    262     m1 = m;
    263     n1 = n;
    264 
    265     /* transform a to bi-diagonal form */
    266     for( ;; )
    267     {
    268         int update_u;
    269         int update_v;
    270 
    271         if( m1 == 0 )
    272             break;
    273 
    274         scale = h = 0;
    275         update_u = uT && m1 > m - nu;
    276         hv = update_u ? uT : hv0;
    277 
    278         for( j = 0, a1 = a; j < m1; j++, a1 += lda )
    279         {
    280             double t = a1[0];
    281             scale += fabs( hv[j] = t );
    282         }
    283 
    284         if( scale != 0 )
    285         {
    286             double f = 1./scale, g, s = 0;
    287 
    288             for( j = 0; j < m1; j++ )
    289             {
    290                 double t = (hv[j] *= f);
    291                 s += t * t;
    292             }
    293 
    294             g = sqrt( s );
    295             f = hv[0];
    296             if( f >= 0 )
    297                 g = -g;
    298             hv[0] = f - g;
    299             h = 1. / (f * g - s);
    300 
    301             memset( temp, 0, n1 * sizeof( temp[0] ));
    302 
    303             /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */
    304             icvMatrAXPY_64f( m1, n1 - 1, a + 1, lda, hv, temp + 1, 0 );
    305             for( k = 1; k < n1; k++ ) temp[k] *= h;
    306 
    307             /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */
    308             icvMatrAXPY_64f( m1, n1 - 1, temp + 1, 0, hv, a + 1, lda );
    309             *w1 = g*scale;
    310         }
    311         w1++;
    312 
    313         /* store -2/(hv'*hv) */
    314         if( update_u )
    315         {
    316             if( m1 == m )
    317                 ku0 = h;
    318             else
    319                 hv[-1] = h;
    320         }
    321 
    322         a++;
    323         n1--;
    324         if( vT )
    325             vT += ldvT + 1;
    326 
    327         if( n1 == 0 )
    328             break;
    329 
    330         scale = h = 0;
    331         update_v = vT && n1 > n - nv;
    332 
    333         hv = update_v ? vT : hv0;
    334 
    335         for( j = 0; j < n1; j++ )
    336         {
    337             double t = a[j];
    338             scale += fabs( hv[j] = t );
    339         }
    340 
    341         if( scale != 0 )
    342         {
    343             double f = 1./scale, g, s = 0;
    344 
    345             for( j = 0; j < n1; j++ )
    346             {
    347                 double t = (hv[j] *= f);
    348                 s += t * t;
    349             }
    350 
    351             g = sqrt( s );
    352             f = hv[0];
    353             if( f >= 0 )
    354                 g = -g;
    355             hv[0] = f - g;
    356             h = 1. / (f * g - s);
    357             hv[-1] = 0.;
    358 
    359             /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */
    360             icvMatrAXPY3_64f( m1, n1, hv, lda, a, h );
    361 
    362             *e1 = g*scale;
    363         }
    364         e1++;
    365 
    366         /* store -2/(hv'*hv) */
    367         if( update_v )
    368         {
    369             if( n1 == n )
    370                 kv0 = h;
    371             else
    372                 hv[-1] = h;
    373         }
    374 
    375         a += lda;
    376         m1--;
    377         if( uT )
    378             uT += lduT + 1;
    379     }
    380 
    381     m1 -= m1 != 0;
    382     n1 -= n1 != 0;
    383 
    384     /* accumulate left transformations */
    385     if( uT )
    386     {
    387         m1 = m - m1;
    388         uT = u0 + m1 * lduT;
    389         for( i = m1; i < nu; i++, uT += lduT )
    390         {
    391             memset( uT + m1, 0, (m - m1) * sizeof( uT[0] ));
    392             uT[i] = 1.;
    393         }
    394 
    395         for( i = m1 - 1; i >= 0; i-- )
    396         {
    397             double s;
    398             int lh = nu - i;
    399 
    400             l = m - i;
    401 
    402             hv = u0 + (lduT + 1) * i;
    403             h = i == 0 ? ku0 : hv[-1];
    404 
    405             assert( h <= 0 );
    406 
    407             if( h != 0 )
    408             {
    409                 uT = hv;
    410                 icvMatrAXPY3_64f( lh, l-1, hv+1, lduT, uT+1, h );
    411 
    412                 s = hv[0] * h;
    413                 for( k = 0; k < l; k++ ) hv[k] *= s;
    414                 hv[0] += 1;
    415             }
    416             else
    417             {
    418                 for( j = 1; j < l; j++ )
    419                     hv[j] = 0;
    420                 for( j = 1; j < lh; j++ )
    421                     hv[j * lduT] = 0;
    422                 hv[0] = 1;
    423             }
    424         }
    425         uT = u0;
    426     }
    427 
    428     /* accumulate right transformations */
    429     if( vT )
    430     {
    431         n1 = n - n1;
    432         vT = v0 + n1 * ldvT;
    433         for( i = n1; i < nv; i++, vT += ldvT )
    434         {
    435             memset( vT + n1, 0, (n - n1) * sizeof( vT[0] ));
    436             vT[i] = 1.;
    437         }
    438 
    439         for( i = n1 - 1; i >= 0; i-- )
    440         {
    441             double s;
    442             int lh = nv - i;
    443 
    444             l = n - i;
    445             hv = v0 + (ldvT + 1) * i;
    446             h = i == 0 ? kv0 : hv[-1];
    447 
    448             assert( h <= 0 );
    449 
    450             if( h != 0 )
    451             {
    452                 vT = hv;
    453                 icvMatrAXPY3_64f( lh, l-1, hv+1, ldvT, vT+1, h );
    454 
    455                 s = hv[0] * h;
    456                 for( k = 0; k < l; k++ ) hv[k] *= s;
    457                 hv[0] += 1;
    458             }
    459             else
    460             {
    461                 for( j = 1; j < l; j++ )
    462                     hv[j] = 0;
    463                 for( j = 1; j < lh; j++ )
    464                     hv[j * ldvT] = 0;
    465                 hv[0] = 1;
    466             }
    467         }
    468         vT = v0;
    469     }
    470 
    471     for( i = 0; i < nm; i++ )
    472     {
    473         double tnorm = fabs( w[i] );
    474         tnorm += fabs( e[i] );
    475 
    476         if( anorm < tnorm )
    477             anorm = tnorm;
    478     }
    479 
    480     anorm *= DBL_EPSILON;
    481 
    482     /* diagonalization of the bidiagonal form */
    483     for( k = nm - 1; k >= 0; k-- )
    484     {
    485         double z = 0;
    486         iters = 0;
    487 
    488         for( ;; )               /* do iterations */
    489         {
    490             double c, s, f, g, x, y;
    491             int flag = 0;
    492 
    493             /* test for splitting */
    494             for( l = k; l >= 0; l-- )
    495             {
    496                 if( fabs(e[l]) <= anorm )
    497                 {
    498                     flag = 1;
    499                     break;
    500                 }
    501                 assert( l > 0 );
    502                 if( fabs(w[l - 1]) <= anorm )
    503                     break;
    504             }
    505 
    506             if( !flag )
    507             {
    508                 c = 0;
    509                 s = 1;
    510 
    511                 for( i = l; i <= k; i++ )
    512                 {
    513                     f = s * e[i];
    514 
    515                     e[i] *= c;
    516 
    517                     if( anorm + fabs( f ) == anorm )
    518                         break;
    519 
    520                     g = w[i];
    521                     h = pythag( f, g );
    522                     w[i] = h;
    523                     c = g / h;
    524                     s = -f / h;
    525 
    526                     if( uT )
    527                         icvGivens_64f( m, uT + lduT * (l - 1), uT + lduT * i, c, s );
    528                 }
    529             }
    530 
    531             z = w[k];
    532             if( l == k || iters++ == MAX_ITERS )
    533                 break;
    534 
    535             /* shift from bottom 2x2 minor */
    536             x = w[l];
    537             y = w[k - 1];
    538             g = e[k - 1];
    539             h = e[k];
    540             f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
    541             g = pythag( f, 1 );
    542             if( f < 0 )
    543                 g = -g;
    544             f = x - (z / x) * z + (h / x) * (y / (f + g) - h);
    545             /* next QR transformation */
    546             c = s = 1;
    547 
    548             for( i = l + 1; i <= k; i++ )
    549             {
    550                 g = e[i];
    551                 y = w[i];
    552                 h = s * g;
    553                 g *= c;
    554                 z = pythag( f, h );
    555                 e[i - 1] = z;
    556                 c = f / z;
    557                 s = h / z;
    558                 f = x * c + g * s;
    559                 g = -x * s + g * c;
    560                 h = y * s;
    561                 y *= c;
    562 
    563                 if( vT )
    564                     icvGivens_64f( n, vT + ldvT * (i - 1), vT + ldvT * i, c, s );
    565 
    566                 z = pythag( f, h );
    567                 w[i - 1] = z;
    568 
    569                 /* rotation can be arbitrary if z == 0 */
    570                 if( z != 0 )
    571                 {
    572                     c = f / z;
    573                     s = h / z;
    574                 }
    575                 f = c * g + s * y;
    576                 x = -s * g + c * y;
    577 
    578                 if( uT )
    579                     icvGivens_64f( m, uT + lduT * (i - 1), uT + lduT * i, c, s );
    580             }
    581 
    582             e[l] = 0;
    583             e[k] = f;
    584             w[k] = x;
    585         }                       /* end of iteration loop */
    586 
    587         if( iters > MAX_ITERS )
    588             break;
    589 
    590         if( z < 0 )
    591         {
    592             w[k] = -z;
    593             if( vT )
    594             {
    595                 for( j = 0; j < n; j++ )
    596                     vT[j + k * ldvT] = -vT[j + k * ldvT];
    597             }
    598         }
    599     }                           /* end of diagonalization loop */
    600 
    601     /* sort singular values and corresponding values */
    602     for( i = 0; i < nm; i++ )
    603     {
    604         k = i;
    605         for( j = i + 1; j < nm; j++ )
    606             if( w[k] < w[j] )
    607                 k = j;
    608 
    609         if( k != i )
    610         {
    611             double t;
    612             CV_SWAP( w[i], w[k], t );
    613 
    614             if( vT )
    615                 for( j = 0; j < n; j++ )
    616                     CV_SWAP( vT[j + ldvT*k], vT[j + ldvT*i], t );
    617 
    618             if( uT )
    619                 for( j = 0; j < m; j++ )
    620                     CV_SWAP( uT[j + lduT*k], uT[j + lduT*i], t );
    621         }
    622     }
    623 }
    624 
    625 
    626 static void
    627 icvSVD_32f( float* a, int lda, int m, int n,
    628             float* w,
    629             float* uT, int lduT, int nu,
    630             float* vT, int ldvT,
    631             float* buffer )
    632 {
    633     float* e;
    634     float* temp;
    635     float *w1, *e1;
    636     float *hv;
    637     double ku0 = 0, kv0 = 0;
    638     double anorm = 0;
    639     float *a1, *u0 = uT, *v0 = vT;
    640     double scale, h;
    641     int i, j, k, l;
    642     int nm, m1, n1;
    643     int nv = n;
    644     int iters = 0;
    645     float* hv0 = (float*)cvStackAlloc( (m+2)*sizeof(hv0[0])) + 1;
    646 
    647     e = buffer;
    648 
    649     w1 = w;
    650     e1 = e + 1;
    651     nm = n;
    652 
    653     temp = buffer + nm;
    654 
    655     memset( w, 0, nm * sizeof( w[0] ));
    656     memset( e, 0, nm * sizeof( e[0] ));
    657 
    658     m1 = m;
    659     n1 = n;
    660 
    661     /* transform a to bi-diagonal form */
    662     for( ;; )
    663     {
    664         int update_u;
    665         int update_v;
    666 
    667         if( m1 == 0 )
    668             break;
    669 
    670         scale = h = 0;
    671 
    672         update_u = uT && m1 > m - nu;
    673         hv = update_u ? uT : hv0;
    674 
    675         for( j = 0, a1 = a; j < m1; j++, a1 += lda )
    676         {
    677             double t = a1[0];
    678             scale += fabs( hv[j] = (float)t );
    679         }
    680 
    681         if( scale != 0 )
    682         {
    683             double f = 1./scale, g, s = 0;
    684 
    685             for( j = 0; j < m1; j++ )
    686             {
    687                 double t = (hv[j] = (float)(hv[j]*f));
    688                 s += t * t;
    689             }
    690 
    691             g = sqrt( s );
    692             f = hv[0];
    693             if( f >= 0 )
    694                 g = -g;
    695             hv[0] = (float)(f - g);
    696             h = 1. / (f * g - s);
    697 
    698             memset( temp, 0, n1 * sizeof( temp[0] ));
    699 
    700             /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */
    701             icvMatrAXPY_32f( m1, n1 - 1, a + 1, lda, hv, temp + 1, 0 );
    702 
    703             for( k = 1; k < n1; k++ ) temp[k] = (float)(temp[k]*h);
    704 
    705             /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */
    706             icvMatrAXPY_32f( m1, n1 - 1, temp + 1, 0, hv, a + 1, lda );
    707             *w1 = (float)(g*scale);
    708         }
    709         w1++;
    710 
    711         /* store -2/(hv'*hv) */
    712         if( update_u )
    713         {
    714             if( m1 == m )
    715                 ku0 = h;
    716             else
    717                 hv[-1] = (float)h;
    718         }
    719 
    720         a++;
    721         n1--;
    722         if( vT )
    723             vT += ldvT + 1;
    724 
    725         if( n1 == 0 )
    726             break;
    727 
    728         scale = h = 0;
    729         update_v = vT && n1 > n - nv;
    730         hv = update_v ? vT : hv0;
    731 
    732         for( j = 0; j < n1; j++ )
    733         {
    734             double t = a[j];
    735             scale += fabs( hv[j] = (float)t );
    736         }
    737 
    738         if( scale != 0 )
    739         {
    740             double f = 1./scale, g, s = 0;
    741 
    742             for( j = 0; j < n1; j++ )
    743             {
    744                 double t = (hv[j] = (float)(hv[j]*f));
    745                 s += t * t;
    746             }
    747 
    748             g = sqrt( s );
    749             f = hv[0];
    750             if( f >= 0 )
    751                 g = -g;
    752             hv[0] = (float)(f - g);
    753             h = 1. / (f * g - s);
    754             hv[-1] = 0.f;
    755 
    756             /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */
    757             icvMatrAXPY3_32f( m1, n1, hv, lda, a, h );
    758 
    759             *e1 = (float)(g*scale);
    760         }
    761         e1++;
    762 
    763         /* store -2/(hv'*hv) */
    764         if( update_v )
    765         {
    766             if( n1 == n )
    767                 kv0 = h;
    768             else
    769                 hv[-1] = (float)h;
    770         }
    771 
    772         a += lda;
    773         m1--;
    774         if( uT )
    775             uT += lduT + 1;
    776     }
    777 
    778     m1 -= m1 != 0;
    779     n1 -= n1 != 0;
    780 
    781     /* accumulate left transformations */
    782     if( uT )
    783     {
    784         m1 = m - m1;
    785         uT = u0 + m1 * lduT;
    786         for( i = m1; i < nu; i++, uT += lduT )
    787         {
    788             memset( uT + m1, 0, (m - m1) * sizeof( uT[0] ));
    789             uT[i] = 1.;
    790         }
    791 
    792         for( i = m1 - 1; i >= 0; i-- )
    793         {
    794             double s;
    795             int lh = nu - i;
    796 
    797             l = m - i;
    798 
    799             hv = u0 + (lduT + 1) * i;
    800             h = i == 0 ? ku0 : hv[-1];
    801 
    802             assert( h <= 0 );
    803 
    804             if( h != 0 )
    805             {
    806                 uT = hv;
    807                 icvMatrAXPY3_32f( lh, l-1, hv+1, lduT, uT+1, h );
    808 
    809                 s = hv[0] * h;
    810                 for( k = 0; k < l; k++ ) hv[k] = (float)(hv[k]*s);
    811                 hv[0] += 1;
    812             }
    813             else
    814             {
    815                 for( j = 1; j < l; j++ )
    816                     hv[j] = 0;
    817                 for( j = 1; j < lh; j++ )
    818                     hv[j * lduT] = 0;
    819                 hv[0] = 1;
    820             }
    821         }
    822         uT = u0;
    823     }
    824 
    825     /* accumulate right transformations */
    826     if( vT )
    827     {
    828         n1 = n - n1;
    829         vT = v0 + n1 * ldvT;
    830         for( i = n1; i < nv; i++, vT += ldvT )
    831         {
    832             memset( vT + n1, 0, (n - n1) * sizeof( vT[0] ));
    833             vT[i] = 1.;
    834         }
    835 
    836         for( i = n1 - 1; i >= 0; i-- )
    837         {
    838             double s;
    839             int lh = nv - i;
    840 
    841             l = n - i;
    842             hv = v0 + (ldvT + 1) * i;
    843             h = i == 0 ? kv0 : hv[-1];
    844 
    845             assert( h <= 0 );
    846 
    847             if( h != 0 )
    848             {
    849                 vT = hv;
    850                 icvMatrAXPY3_32f( lh, l-1, hv+1, ldvT, vT+1, h );
    851 
    852                 s = hv[0] * h;
    853                 for( k = 0; k < l; k++ ) hv[k] = (float)(hv[k]*s);
    854                 hv[0] += 1;
    855             }
    856             else
    857             {
    858                 for( j = 1; j < l; j++ )
    859                     hv[j] = 0;
    860                 for( j = 1; j < lh; j++ )
    861                     hv[j * ldvT] = 0;
    862                 hv[0] = 1;
    863             }
    864         }
    865         vT = v0;
    866     }
    867 
    868     for( i = 0; i < nm; i++ )
    869     {
    870         double tnorm = fabs( w[i] );
    871         tnorm += fabs( e[i] );
    872 
    873         if( anorm < tnorm )
    874             anorm = tnorm;
    875     }
    876 
    877     anorm *= FLT_EPSILON;
    878 
    879     /* diagonalization of the bidiagonal form */
    880     for( k = nm - 1; k >= 0; k-- )
    881     {
    882         double z = 0;
    883         iters = 0;
    884 
    885         for( ;; )               /* do iterations */
    886         {
    887             double c, s, f, g, x, y;
    888             int flag = 0;
    889 
    890             /* test for splitting */
    891             for( l = k; l >= 0; l-- )
    892             {
    893                 if( fabs( e[l] ) <= anorm )
    894                 {
    895                     flag = 1;
    896                     break;
    897                 }
    898                 assert( l > 0 );
    899                 if( fabs( w[l - 1] ) <= anorm )
    900                     break;
    901             }
    902 
    903             if( !flag )
    904             {
    905                 c = 0;
    906                 s = 1;
    907 
    908                 for( i = l; i <= k; i++ )
    909                 {
    910                     f = s * e[i];
    911                     e[i] = (float)(e[i]*c);
    912 
    913                     if( anorm + fabs( f ) == anorm )
    914                         break;
    915 
    916                     g = w[i];
    917                     h = pythag( f, g );
    918                     w[i] = (float)h;
    919                     c = g / h;
    920                     s = -f / h;
    921 
    922                     if( uT )
    923                         icvGivens_32f( m, uT + lduT * (l - 1), uT + lduT * i, c, s );
    924                 }
    925             }
    926 
    927             z = w[k];
    928             if( l == k || iters++ == MAX_ITERS )
    929                 break;
    930 
    931             /* shift from bottom 2x2 minor */
    932             x = w[l];
    933             y = w[k - 1];
    934             g = e[k - 1];
    935             h = e[k];
    936             f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
    937             g = pythag( f, 1 );
    938             if( f < 0 )
    939                 g = -g;
    940             f = x - (z / x) * z + (h / x) * (y / (f + g) - h);
    941             /* next QR transformation */
    942             c = s = 1;
    943 
    944             for( i = l + 1; i <= k; i++ )
    945             {
    946                 g = e[i];
    947                 y = w[i];
    948                 h = s * g;
    949                 g *= c;
    950                 z = pythag( f, h );
    951                 e[i - 1] = (float)z;
    952                 c = f / z;
    953                 s = h / z;
    954                 f = x * c + g * s;
    955                 g = -x * s + g * c;
    956                 h = y * s;
    957                 y *= c;
    958 
    959                 if( vT )
    960                     icvGivens_32f( n, vT + ldvT * (i - 1), vT + ldvT * i, c, s );
    961 
    962                 z = pythag( f, h );
    963                 w[i - 1] = (float)z;
    964 
    965                 /* rotation can be arbitrary if z == 0 */
    966                 if( z != 0 )
    967                 {
    968                     c = f / z;
    969                     s = h / z;
    970                 }
    971                 f = c * g + s * y;
    972                 x = -s * g + c * y;
    973 
    974                 if( uT )
    975                     icvGivens_32f( m, uT + lduT * (i - 1), uT + lduT * i, c, s );
    976             }
    977 
    978             e[l] = 0;
    979             e[k] = (float)f;
    980             w[k] = (float)x;
    981         }                       /* end of iteration loop */
    982 
    983         if( iters > MAX_ITERS )
    984             break;
    985 
    986         if( z < 0 )
    987         {
    988             w[k] = (float)(-z);
    989             if( vT )
    990             {
    991                 for( j = 0; j < n; j++ )
    992                     vT[j + k * ldvT] = -vT[j + k * ldvT];
    993             }
    994         }
    995     }                           /* end of diagonalization loop */
    996 
    997     /* sort singular values and corresponding vectors */
    998     for( i = 0; i < nm; i++ )
    999     {
   1000         k = i;
   1001         for( j = i + 1; j < nm; j++ )
   1002             if( w[k] < w[j] )
   1003                 k = j;
   1004 
   1005         if( k != i )
   1006         {
   1007             float t;
   1008             CV_SWAP( w[i], w[k], t );
   1009 
   1010             if( vT )
   1011                 for( j = 0; j < n; j++ )
   1012                     CV_SWAP( vT[j + ldvT*k], vT[j + ldvT*i], t );
   1013 
   1014             if( uT )
   1015                 for( j = 0; j < m; j++ )
   1016                     CV_SWAP( uT[j + lduT*k], uT[j + lduT*i], t );
   1017         }
   1018     }
   1019 }
   1020 
   1021 
   1022 static void
   1023 icvSVBkSb_64f( int m, int n, const double* w,
   1024                const double* uT, int lduT,
   1025                const double* vT, int ldvT,
   1026                const double* b, int ldb, int nb,
   1027                double* x, int ldx, double* buffer )
   1028 {
   1029     double threshold = 0;
   1030     int i, j, nm = MIN( m, n );
   1031 
   1032     if( !b )
   1033         nb = m;
   1034 
   1035     for( i = 0; i < n; i++ )
   1036         memset( x + i*ldx, 0, nb*sizeof(x[0]));
   1037 
   1038     for( i = 0; i < nm; i++ )
   1039         threshold += w[i];
   1040     threshold *= 2*DBL_EPSILON;
   1041 
   1042     /* vT * inv(w) * uT * b */
   1043     for( i = 0; i < nm; i++, uT += lduT, vT += ldvT )
   1044     {
   1045         double wi = w[i];
   1046 
   1047         if( wi > threshold )
   1048         {
   1049             wi = 1./wi;
   1050 
   1051             if( nb == 1 )
   1052             {
   1053                 double s = 0;
   1054                 if( b )
   1055                 {
   1056                     if( ldb == 1 )
   1057                     {
   1058                         for( j = 0; j <= m - 4; j += 4 )
   1059                             s += uT[j]*b[j] + uT[j+1]*b[j+1] + uT[j+2]*b[j+2] + uT[j+3]*b[j+3];
   1060                         for( ; j < m; j++ )
   1061                             s += uT[j]*b[j];
   1062                     }
   1063                     else
   1064                     {
   1065                         for( j = 0; j < m; j++ )
   1066                             s += uT[j]*b[j*ldb];
   1067                     }
   1068                 }
   1069                 else
   1070                     s = uT[0];
   1071                 s *= wi;
   1072                 if( ldx == 1 )
   1073                 {
   1074                     for( j = 0; j <= n - 4; j += 4 )
   1075                     {
   1076                         double t0 = x[j] + s*vT[j];
   1077                         double t1 = x[j+1] + s*vT[j+1];
   1078                         x[j] = t0;
   1079                         x[j+1] = t1;
   1080                         t0 = x[j+2] + s*vT[j+2];
   1081                         t1 = x[j+3] + s*vT[j+3];
   1082                         x[j+2] = t0;
   1083                         x[j+3] = t1;
   1084                     }
   1085 
   1086                     for( ; j < n; j++ )
   1087                         x[j] += s*vT[j];
   1088                 }
   1089                 else
   1090                 {
   1091                     for( j = 0; j < n; j++ )
   1092                         x[j*ldx] += s*vT[j];
   1093                 }
   1094             }
   1095             else
   1096             {
   1097                 if( b )
   1098                 {
   1099                     memset( buffer, 0, nb*sizeof(buffer[0]));
   1100                     icvMatrAXPY_64f( m, nb, b, ldb, uT, buffer, 0 );
   1101                     for( j = 0; j < nb; j++ )
   1102                         buffer[j] *= wi;
   1103                 }
   1104                 else
   1105                 {
   1106                     for( j = 0; j < nb; j++ )
   1107                         buffer[j] = uT[j]*wi;
   1108                 }
   1109                 icvMatrAXPY_64f( n, nb, buffer, 0, vT, x, ldx );
   1110             }
   1111         }
   1112     }
   1113 }
   1114 
   1115 
   1116 static void
   1117 icvSVBkSb_32f( int m, int n, const float* w,
   1118                const float* uT, int lduT,
   1119                const float* vT, int ldvT,
   1120                const float* b, int ldb, int nb,
   1121                float* x, int ldx, float* buffer )
   1122 {
   1123     float threshold = 0.f;
   1124     int i, j, nm = MIN( m, n );
   1125 
   1126     if( !b )
   1127         nb = m;
   1128 
   1129     for( i = 0; i < n; i++ )
   1130         memset( x + i*ldx, 0, nb*sizeof(x[0]));
   1131 
   1132     for( i = 0; i < nm; i++ )
   1133         threshold += w[i];
   1134     threshold *= 2*FLT_EPSILON;
   1135 
   1136     /* vT * inv(w) * uT * b */
   1137     for( i = 0; i < nm; i++, uT += lduT, vT += ldvT )
   1138     {
   1139         double wi = w[i];
   1140 
   1141         if( wi > threshold )
   1142         {
   1143             wi = 1./wi;
   1144 
   1145             if( nb == 1 )
   1146             {
   1147                 double s = 0;
   1148                 if( b )
   1149                 {
   1150                     if( ldb == 1 )
   1151                     {
   1152                         for( j = 0; j <= m - 4; j += 4 )
   1153                             s += uT[j]*b[j] + uT[j+1]*b[j+1] + uT[j+2]*b[j+2] + uT[j+3]*b[j+3];
   1154                         for( ; j < m; j++ )
   1155                             s += uT[j]*b[j];
   1156                     }
   1157                     else
   1158                     {
   1159                         for( j = 0; j < m; j++ )
   1160                             s += uT[j]*b[j*ldb];
   1161                     }
   1162                 }
   1163                 else
   1164                     s = uT[0];
   1165                 s *= wi;
   1166 
   1167                 if( ldx == 1 )
   1168                 {
   1169                     for( j = 0; j <= n - 4; j += 4 )
   1170                     {
   1171                         double t0 = x[j] + s*vT[j];
   1172                         double t1 = x[j+1] + s*vT[j+1];
   1173                         x[j] = (float)t0;
   1174                         x[j+1] = (float)t1;
   1175                         t0 = x[j+2] + s*vT[j+2];
   1176                         t1 = x[j+3] + s*vT[j+3];
   1177                         x[j+2] = (float)t0;
   1178                         x[j+3] = (float)t1;
   1179                     }
   1180 
   1181                     for( ; j < n; j++ )
   1182                         x[j] = (float)(x[j] + s*vT[j]);
   1183                 }
   1184                 else
   1185                 {
   1186                     for( j = 0; j < n; j++ )
   1187                         x[j*ldx] = (float)(x[j*ldx] + s*vT[j]);
   1188                 }
   1189             }
   1190             else
   1191             {
   1192                 if( b )
   1193                 {
   1194                     memset( buffer, 0, nb*sizeof(buffer[0]));
   1195                     icvMatrAXPY_32f( m, nb, b, ldb, uT, buffer, 0 );
   1196                     for( j = 0; j < nb; j++ )
   1197                         buffer[j] = (float)(buffer[j]*wi);
   1198                 }
   1199                 else
   1200                 {
   1201                     for( j = 0; j < nb; j++ )
   1202                         buffer[j] = (float)(uT[j]*wi);
   1203                 }
   1204                 icvMatrAXPY_32f( n, nb, buffer, 0, vT, x, ldx );
   1205             }
   1206         }
   1207     }
   1208 }
   1209 
   1210 
   1211 CV_IMPL  void
   1212 cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
   1213 {
   1214     uchar* buffer = 0;
   1215     int local_alloc = 0;
   1216 
   1217     CV_FUNCNAME( "cvSVD" );
   1218 
   1219     __BEGIN__;
   1220 
   1221     CvMat astub, *a = (CvMat*)aarr;
   1222     CvMat wstub, *w = (CvMat*)warr;
   1223     CvMat ustub, *u;
   1224     CvMat vstub, *v;
   1225     CvMat tmat;
   1226     uchar* tw = 0;
   1227     int type;
   1228     int a_buf_offset = 0, u_buf_offset = 0, buf_size, pix_size;
   1229     int temp_u = 0, /* temporary storage for U is needed */
   1230         t_svd; /* special case: a->rows < a->cols */
   1231     int m, n;
   1232     int w_rows, w_cols;
   1233     int u_rows = 0, u_cols = 0;
   1234     int w_is_mat = 0;
   1235 
   1236     if( !CV_IS_MAT( a ))
   1237         CV_CALL( a = cvGetMat( a, &astub ));
   1238 
   1239     if( !CV_IS_MAT( w ))
   1240         CV_CALL( w = cvGetMat( w, &wstub ));
   1241 
   1242     if( !CV_ARE_TYPES_EQ( a, w ))
   1243         CV_ERROR( CV_StsUnmatchedFormats, "" );
   1244 
   1245     if( a->rows >= a->cols )
   1246     {
   1247         m = a->rows;
   1248         n = a->cols;
   1249         w_rows = w->rows;
   1250         w_cols = w->cols;
   1251         t_svd = 0;
   1252     }
   1253     else
   1254     {
   1255         CvArr* t;
   1256         CV_SWAP( uarr, varr, t );
   1257 
   1258         flags = (flags & CV_SVD_U_T ? CV_SVD_V_T : 0)|
   1259                 (flags & CV_SVD_V_T ? CV_SVD_U_T : 0);
   1260         m = a->cols;
   1261         n = a->rows;
   1262         w_rows = w->cols;
   1263         w_cols = w->rows;
   1264         t_svd = 1;
   1265     }
   1266 
   1267     u = (CvMat*)uarr;
   1268     v = (CvMat*)varr;
   1269 
   1270     w_is_mat = w_cols > 1 && w_rows > 1;
   1271 
   1272     if( !w_is_mat && CV_IS_MAT_CONT(w->type) && w_cols + w_rows - 1 == n )
   1273         tw = w->data.ptr;
   1274 
   1275     if( u )
   1276     {
   1277         if( !CV_IS_MAT( u ))
   1278             CV_CALL( u = cvGetMat( u, &ustub ));
   1279 
   1280         if( !(flags & CV_SVD_U_T) )
   1281         {
   1282             u_rows = u->rows;
   1283             u_cols = u->cols;
   1284         }
   1285         else
   1286         {
   1287             u_rows = u->cols;
   1288             u_cols = u->rows;
   1289         }
   1290 
   1291         if( !CV_ARE_TYPES_EQ( a, u ))
   1292             CV_ERROR( CV_StsUnmatchedFormats, "" );
   1293 
   1294         if( u_rows != m || (u_cols != m && u_cols != n))
   1295             CV_ERROR( CV_StsUnmatchedSizes, !t_svd ? "U matrix has unappropriate size" :
   1296                                                      "V matrix has unappropriate size" );
   1297 
   1298         temp_u = (u_rows != u_cols && !(flags & CV_SVD_U_T)) || u->data.ptr==a->data.ptr;
   1299 
   1300         if( w_is_mat && u_cols != w_rows )
   1301             CV_ERROR( CV_StsUnmatchedSizes, !t_svd ? "U and W have incompatible sizes" :
   1302                                                      "V and W have incompatible sizes" );
   1303     }
   1304     else
   1305     {
   1306         u = &ustub;
   1307         u->data.ptr = 0;
   1308         u->step = 0;
   1309     }
   1310 
   1311     if( v )
   1312     {
   1313         int v_rows, v_cols;
   1314 
   1315         if( !CV_IS_MAT( v ))
   1316             CV_CALL( v = cvGetMat( v, &vstub ));
   1317 
   1318         if( !(flags & CV_SVD_V_T) )
   1319         {
   1320             v_rows = v->rows;
   1321             v_cols = v->cols;
   1322         }
   1323         else
   1324         {
   1325             v_rows = v->cols;
   1326             v_cols = v->rows;
   1327         }
   1328 
   1329         if( !CV_ARE_TYPES_EQ( a, v ))
   1330             CV_ERROR( CV_StsUnmatchedFormats, "" );
   1331 
   1332         if( v_rows != n || v_cols != n )
   1333             CV_ERROR( CV_StsUnmatchedSizes, t_svd ? "U matrix has unappropriate size" :
   1334                                                     "V matrix has unappropriate size" );
   1335 
   1336         if( w_is_mat && w_cols != v_cols )
   1337             CV_ERROR( CV_StsUnmatchedSizes, t_svd ? "U and W have incompatible sizes" :
   1338                                                     "V and W have incompatible sizes" );
   1339     }
   1340     else
   1341     {
   1342         v = &vstub;
   1343         v->data.ptr = 0;
   1344         v->step = 0;
   1345     }
   1346 
   1347     type = CV_MAT_TYPE( a->type );
   1348     pix_size = CV_ELEM_SIZE(type);
   1349     buf_size = n*2 + m;
   1350 
   1351     if( !(flags & CV_SVD_MODIFY_A) )
   1352     {
   1353         a_buf_offset = buf_size;
   1354         buf_size += a->rows*a->cols;
   1355     }
   1356 
   1357     if( temp_u )
   1358     {
   1359         u_buf_offset = buf_size;
   1360         buf_size += u->rows*u->cols;
   1361     }
   1362 
   1363     buf_size *= pix_size;
   1364 
   1365     if( buf_size <= CV_MAX_LOCAL_SIZE )
   1366     {
   1367         buffer = (uchar*)cvStackAlloc( buf_size );
   1368         local_alloc = 1;
   1369     }
   1370     else
   1371     {
   1372         CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
   1373     }
   1374 
   1375     if( !(flags & CV_SVD_MODIFY_A) )
   1376     {
   1377         cvInitMatHeader( &tmat, m, n, type,
   1378                          buffer + a_buf_offset*pix_size );
   1379         if( !t_svd )
   1380             cvCopy( a, &tmat );
   1381         else
   1382             cvT( a, &tmat );
   1383         a = &tmat;
   1384     }
   1385 
   1386     if( temp_u )
   1387     {
   1388         cvInitMatHeader( &ustub, u_cols, u_rows, type, buffer + u_buf_offset*pix_size );
   1389         u = &ustub;
   1390     }
   1391 
   1392     if( !tw )
   1393         tw = buffer + (n + m)*pix_size;
   1394 
   1395     if( type == CV_32FC1 )
   1396     {
   1397         icvSVD_32f( a->data.fl, a->step/sizeof(float), a->rows, a->cols,
   1398                    (float*)tw, u->data.fl, u->step/sizeof(float), u_cols,
   1399                    v->data.fl, v->step/sizeof(float), (float*)buffer );
   1400     }
   1401     else if( type == CV_64FC1 )
   1402     {
   1403         icvSVD_64f( a->data.db, a->step/sizeof(double), a->rows, a->cols,
   1404                     (double*)tw, u->data.db, u->step/sizeof(double), u_cols,
   1405                     v->data.db, v->step/sizeof(double), (double*)buffer );
   1406     }
   1407     else
   1408     {
   1409         CV_ERROR( CV_StsUnsupportedFormat, "" );
   1410     }
   1411 
   1412     if( tw != w->data.ptr )
   1413     {
   1414         int shift = w->cols != 1;
   1415         cvSetZero( w );
   1416         if( type == CV_32FC1 )
   1417             for( int i = 0; i < n; i++ )
   1418                 ((float*)(w->data.ptr + i*w->step))[i*shift] = ((float*)tw)[i];
   1419         else
   1420             for( int i = 0; i < n; i++ )
   1421                 ((double*)(w->data.ptr + i*w->step))[i*shift] = ((double*)tw)[i];
   1422     }
   1423 
   1424     if( uarr )
   1425     {
   1426         if( !(flags & CV_SVD_U_T))
   1427             cvT( u, uarr );
   1428         else if( temp_u )
   1429             cvCopy( u, uarr );
   1430         /*CV_CHECK_NANS( uarr );*/
   1431     }
   1432 
   1433     if( varr )
   1434     {
   1435         if( !(flags & CV_SVD_V_T))
   1436             cvT( v, varr );
   1437         /*CV_CHECK_NANS( varr );*/
   1438     }
   1439 
   1440     CV_CHECK_NANS( w );
   1441 
   1442     __END__;
   1443 
   1444     if( buffer && !local_alloc )
   1445         cvFree( &buffer );
   1446 }
   1447 
   1448 
   1449 CV_IMPL void
   1450 cvSVBkSb( const CvArr* warr, const CvArr* uarr,
   1451           const CvArr* varr, const CvArr* barr,
   1452           CvArr* xarr, int flags )
   1453 {
   1454     uchar* buffer = 0;
   1455     int local_alloc = 0;
   1456 
   1457     CV_FUNCNAME( "cvSVBkSb" );
   1458 
   1459     __BEGIN__;
   1460 
   1461     CvMat wstub, *w = (CvMat*)warr;
   1462     CvMat bstub, *b = (CvMat*)barr;
   1463     CvMat xstub, *x = (CvMat*)xarr;
   1464     CvMat ustub, ustub2, *u = (CvMat*)uarr;
   1465     CvMat vstub, vstub2, *v = (CvMat*)varr;
   1466     uchar* tw = 0;
   1467     int type;
   1468     int temp_u = 0, temp_v = 0;
   1469     int u_buf_offset = 0, v_buf_offset = 0, w_buf_offset = 0, t_buf_offset = 0;
   1470     int buf_size = 0, pix_size;
   1471     int m, n, nm;
   1472     int u_rows, u_cols;
   1473     int v_rows, v_cols;
   1474 
   1475     if( !CV_IS_MAT( w ))
   1476         CV_CALL( w = cvGetMat( w, &wstub ));
   1477 
   1478     if( !CV_IS_MAT( u ))
   1479         CV_CALL( u = cvGetMat( u, &ustub ));
   1480 
   1481     if( !CV_IS_MAT( v ))
   1482         CV_CALL( v = cvGetMat( v, &vstub ));
   1483 
   1484     if( !CV_IS_MAT( x ))
   1485         CV_CALL( x = cvGetMat( x, &xstub ));
   1486 
   1487     if( !CV_ARE_TYPES_EQ( w, u ) || !CV_ARE_TYPES_EQ( w, v ) || !CV_ARE_TYPES_EQ( w, x ))
   1488         CV_ERROR( CV_StsUnmatchedFormats, "All matrices must have the same type" );
   1489 
   1490     type = CV_MAT_TYPE( w->type );
   1491     pix_size = CV_ELEM_SIZE(type);
   1492 
   1493     if( !(flags & CV_SVD_U_T) )
   1494     {
   1495         temp_u = 1;
   1496         u_buf_offset = buf_size;
   1497         buf_size += u->cols*u->rows*pix_size;
   1498         u_rows = u->rows;
   1499         u_cols = u->cols;
   1500     }
   1501     else
   1502     {
   1503         u_rows = u->cols;
   1504         u_cols = u->rows;
   1505     }
   1506 
   1507     if( !(flags & CV_SVD_V_T) )
   1508     {
   1509         temp_v = 1;
   1510         v_buf_offset = buf_size;
   1511         buf_size += v->cols*v->rows*pix_size;
   1512         v_rows = v->rows;
   1513         v_cols = v->cols;
   1514     }
   1515     else
   1516     {
   1517         v_rows = v->cols;
   1518         v_cols = v->rows;
   1519     }
   1520 
   1521     m = u_rows;
   1522     n = v_rows;
   1523     nm = MIN(n,m);
   1524 
   1525     if( (u_rows != u_cols && v_rows != v_cols) || x->rows != v_rows )
   1526         CV_ERROR( CV_StsBadSize, "V or U matrix must be square" );
   1527 
   1528     if( (w->rows == 1 || w->cols == 1) && w->rows + w->cols - 1 == nm )
   1529     {
   1530         if( CV_IS_MAT_CONT(w->type) )
   1531             tw = w->data.ptr;
   1532         else
   1533         {
   1534             w_buf_offset = buf_size;
   1535             buf_size += nm*pix_size;
   1536         }
   1537     }
   1538     else
   1539     {
   1540         if( w->cols != v_cols || w->rows != u_cols )
   1541             CV_ERROR( CV_StsBadSize, "W must be 1d array of MIN(m,n) elements or "
   1542                                     "matrix which size matches to U and V" );
   1543         w_buf_offset = buf_size;
   1544         buf_size += nm*pix_size;
   1545     }
   1546 
   1547     if( b )
   1548     {
   1549         if( !CV_IS_MAT( b ))
   1550             CV_CALL( b = cvGetMat( b, &bstub ));
   1551         if( !CV_ARE_TYPES_EQ( w, b ))
   1552             CV_ERROR( CV_StsUnmatchedFormats, "All matrices must have the same type" );
   1553         if( b->cols != x->cols || b->rows != m )
   1554             CV_ERROR( CV_StsUnmatchedSizes, "b matrix must have (m x x->cols) size" );
   1555     }
   1556     else
   1557     {
   1558         b = &bstub;
   1559         memset( b, 0, sizeof(*b));
   1560     }
   1561 
   1562     t_buf_offset = buf_size;
   1563     buf_size += (MAX(m,n) + b->cols)*pix_size;
   1564 
   1565     if( buf_size <= CV_MAX_LOCAL_SIZE )
   1566     {
   1567         buffer = (uchar*)cvStackAlloc( buf_size );
   1568         local_alloc = 1;
   1569     }
   1570     else
   1571         CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
   1572 
   1573     if( temp_u )
   1574     {
   1575         cvInitMatHeader( &ustub2, u_cols, u_rows, type, buffer + u_buf_offset );
   1576         cvT( u, &ustub2 );
   1577         u = &ustub2;
   1578     }
   1579 
   1580     if( temp_v )
   1581     {
   1582         cvInitMatHeader( &vstub2, v_cols, v_rows, type, buffer + v_buf_offset );
   1583         cvT( v, &vstub2 );
   1584         v = &vstub2;
   1585     }
   1586 
   1587     if( !tw )
   1588     {
   1589         int i, shift = w->cols > 1 ? pix_size : 0;
   1590         tw = buffer + w_buf_offset;
   1591         for( i = 0; i < nm; i++ )
   1592             memcpy( tw + i*pix_size, w->data.ptr + i*(w->step + shift), pix_size );
   1593     }
   1594 
   1595     if( type == CV_32FC1 )
   1596     {
   1597         icvSVBkSb_32f( m, n, (float*)tw, u->data.fl, u->step/sizeof(float),
   1598                        v->data.fl, v->step/sizeof(float),
   1599                        b->data.fl, b->step/sizeof(float), b->cols,
   1600                        x->data.fl, x->step/sizeof(float),
   1601                        (float*)(buffer + t_buf_offset) );
   1602     }
   1603     else if( type == CV_64FC1 )
   1604     {
   1605         icvSVBkSb_64f( m, n, (double*)tw, u->data.db, u->step/sizeof(double),
   1606                        v->data.db, v->step/sizeof(double),
   1607                        b->data.db, b->step/sizeof(double), b->cols,
   1608                        x->data.db, x->step/sizeof(double),
   1609                        (double*)(buffer + t_buf_offset) );
   1610     }
   1611     else
   1612     {
   1613         CV_ERROR( CV_StsUnsupportedFormat, "" );
   1614     }
   1615 
   1616     __END__;
   1617 
   1618     if( buffer && !local_alloc )
   1619         cvFree( &buffer );
   1620 }
   1621 
   1622 /* End of file. */
   1623