Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                        Intel License Agreement
     11 //
     12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     13 // Third party copyrights are property of their respective owners.
     14 //
     15 // Redistribution and use in source and binary forms, with or without modification,
     16 // are permitted provided that the following conditions are met:
     17 //
     18 //   * Redistribution's of source code must retain the above copyright notice,
     19 //     this list of conditions and the following disclaimer.
     20 //
     21 //   * Redistribution's in binary form must reproduce the above copyright notice,
     22 //     this list of conditions and the following disclaimer in the documentation
     23 //     and/or other materials provided with the distribution.
     24 //
     25 //   * The name of Intel Corporation may not be used to endorse or promote products
     26 //     derived from this software without specific prior written permission.
     27 //
     28 // This software is provided by the copyright holders and contributors "as is" and
     29 // any express or implied warranties, including, but not limited to, the implied
     30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     31 // In no event shall the Intel Corporation or contributors be liable for any direct,
     32 // indirect, incidental, special, exemplary, or consequential damages
     33 // (including, but not limited to, procurement of substitute goods or services;
     34 // loss of use, data, or profits; or business interruption) however caused
     35 // and on any theory of liability, whether in contract, strict liability,
     36 // or tort (including negligence or otherwise) arising in any way out of
     37 // the use of this software, even if advised of the possibility of such damage.
     38 //
     39 //M*/
     40 
     41 #include "_ml.h"
     42 
     43 CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
     44 {
     45     term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
     46     train_method = RPROP;
     47     bp_dw_scale = bp_moment_scale = 0.1;
     48     rp_dw0 = 0.1; rp_dw_plus = 1.2; rp_dw_minus = 0.5;
     49     rp_dw_min = FLT_EPSILON; rp_dw_max = 50.;
     50 }
     51 
     52 
     53 CvANN_MLP_TrainParams::CvANN_MLP_TrainParams( CvTermCriteria _term_crit,
     54                                               int _train_method,
     55                                               double _param1, double _param2 )
     56 {
     57     term_crit = _term_crit;
     58     train_method = _train_method;
     59     bp_dw_scale = bp_moment_scale = 0.1;
     60     rp_dw0 = 1.; rp_dw_plus = 1.2; rp_dw_minus = 0.5;
     61     rp_dw_min = FLT_EPSILON; rp_dw_max = 50.;
     62 
     63     if( train_method == RPROP )
     64     {
     65         rp_dw0 = _param1;
     66         if( rp_dw0 < FLT_EPSILON )
     67             rp_dw0 = 1.;
     68         rp_dw_min = _param2;
     69         rp_dw_min = MAX( rp_dw_min, 0 );
     70     }
     71     else if( train_method == BACKPROP )
     72     {
     73         bp_dw_scale = _param1;
     74         if( bp_dw_scale <= 0 )
     75             bp_dw_scale = 0.1;
     76         bp_dw_scale = MAX( bp_dw_scale, 1e-3 );
     77         bp_dw_scale = MIN( bp_dw_scale, 1 );
     78         bp_moment_scale = _param2;
     79         if( bp_moment_scale < 0 )
     80             bp_moment_scale = 0.1;
     81         bp_moment_scale = MIN( bp_moment_scale, 1 );
     82     }
     83     else
     84         train_method = RPROP;
     85 }
     86 
     87 
     88 CvANN_MLP_TrainParams::~CvANN_MLP_TrainParams()
     89 {
     90 }
     91 
     92 
     93 CvANN_MLP::CvANN_MLP()
     94 {
     95     layer_sizes = wbuf = 0;
     96     min_val = max_val = min_val1 = max_val1 = 0.;
     97     weights = 0;
     98     rng = cvRNG(-1);
     99     default_model_name = "my_nn";
    100     clear();
    101 }
    102 
    103 
    104 CvANN_MLP::CvANN_MLP( const CvMat* _layer_sizes,
    105                       int _activ_func,
    106                       double _f_param1, double _f_param2 )
    107 {
    108     layer_sizes = wbuf = 0;
    109     min_val = max_val = min_val1 = max_val1 = 0.;
    110     weights = 0;
    111     rng = cvRNG(-1);
    112     default_model_name = "my_nn";
    113     create( _layer_sizes, _activ_func, _f_param1, _f_param2 );
    114 }
    115 
    116 
    117 CvANN_MLP::~CvANN_MLP()
    118 {
    119     clear();
    120 }
    121 
    122 
    123 void CvANN_MLP::clear()
    124 {
    125     cvReleaseMat( &layer_sizes );
    126     cvReleaseMat( &wbuf );
    127     cvFree( &weights );
    128     activ_func = SIGMOID_SYM;
    129     f_param1 = f_param2 = 1;
    130     max_buf_sz = 1 << 12;
    131 }
    132 
    133 
    134 void CvANN_MLP::set_activ_func( int _activ_func, double _f_param1, double _f_param2 )
    135 {
    136     CV_FUNCNAME( "CvANN_MLP::set_activ_func" );
    137 
    138     __BEGIN__;
    139 
    140     if( _activ_func < 0 || _activ_func > GAUSSIAN )
    141         CV_ERROR( CV_StsOutOfRange, "Unknown activation function" );
    142 
    143     activ_func = _activ_func;
    144 
    145     switch( activ_func )
    146     {
    147     case SIGMOID_SYM:
    148         max_val = 0.95; min_val = -max_val;
    149         max_val1 = 0.98; min_val1 = -max_val1;
    150         if( fabs(_f_param1) < FLT_EPSILON )
    151             _f_param1 = 2./3;
    152         if( fabs(_f_param2) < FLT_EPSILON )
    153             _f_param2 = 1.7159;
    154         break;
    155     case GAUSSIAN:
    156         max_val = 1.; min_val = 0.05;
    157         max_val1 = 1.; min_val1 = 0.02;
    158         if( fabs(_f_param1) < FLT_EPSILON )
    159             _f_param1 = 1.;
    160         if( fabs(_f_param2) < FLT_EPSILON )
    161             _f_param2 = 1.;
    162         break;
    163     default:
    164         min_val = max_val = min_val1 = max_val1 = 0.;
    165         _f_param1 = 1.;
    166         _f_param2 = 0.;
    167     }
    168 
    169     f_param1 = _f_param1;
    170     f_param2 = _f_param2;
    171 
    172     __END__;
    173 }
    174 
    175 
    176 void CvANN_MLP::init_weights()
    177 {
    178     int i, j, k;
    179 
    180     for( i = 1; i < layer_sizes->cols; i++ )
    181     {
    182         int n1 = layer_sizes->data.i[i-1];
    183         int n2 = layer_sizes->data.i[i];
    184         double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
    185         double* w = weights[i];
    186 
    187         // initialize weights using Nguyen-Widrow algorithm
    188         for( j = 0; j < n2; j++ )
    189         {
    190             double s = 0;
    191             for( k = 0; k <= n1; k++ )
    192             {
    193                 val = cvRandReal(&rng)*2-1.;
    194                 w[k*n2 + j] = val;
    195                 s += val;
    196             }
    197 
    198             if( i < layer_sizes->cols - 1 )
    199             {
    200                 s = 1./(s - val);
    201                 for( k = 0; k <= n1; k++ )
    202                     w[k*n2 + j] *= s;
    203                 w[n1*n2 + j] *= G*(-1+j*2./n2);
    204             }
    205         }
    206     }
    207 }
    208 
    209 
    210 void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func,
    211                         double _f_param1, double _f_param2 )
    212 {
    213     CV_FUNCNAME( "CvANN_MLP::create" );
    214 
    215     __BEGIN__;
    216 
    217     int i, l_step, l_count, buf_sz = 0;
    218     int *l_src, *l_dst;
    219 
    220     clear();
    221 
    222     if( !CV_IS_MAT(_layer_sizes) ||
    223         _layer_sizes->cols != 1 && _layer_sizes->rows != 1 ||
    224         CV_MAT_TYPE(_layer_sizes->type) != CV_32SC1 )
    225         CV_ERROR( CV_StsBadArg,
    226         "The array of layer neuron counters must be an integer vector" );
    227 
    228     CV_CALL( set_activ_func( _activ_func, _f_param1, _f_param2 ));
    229 
    230     l_count = _layer_sizes->rows + _layer_sizes->cols - 1;
    231     l_src = _layer_sizes->data.i;
    232     l_step = CV_IS_MAT_CONT(_layer_sizes->type) ? 1 :
    233                 _layer_sizes->step / sizeof(l_src[0]);
    234     CV_CALL( layer_sizes = cvCreateMat( 1, l_count, CV_32SC1 ));
    235     l_dst = layer_sizes->data.i;
    236 
    237     max_count = 0;
    238     for( i = 0; i < l_count; i++ )
    239     {
    240         int n = l_src[i*l_step];
    241         if( n < 1 + (0 < i && i < l_count-1))
    242             CV_ERROR( CV_StsOutOfRange,
    243             "there should be at least one input and one output "
    244             "and every hidden layer must have more than 1 neuron" );
    245         l_dst[i] = n;
    246         max_count = MAX( max_count, n );
    247         if( i > 0 )
    248             buf_sz += (l_dst[i-1]+1)*n;
    249     }
    250 
    251     buf_sz += (l_dst[0] + l_dst[l_count-1]*2)*2;
    252 
    253     CV_CALL( wbuf = cvCreateMat( 1, buf_sz, CV_64F ));
    254     CV_CALL( weights = (double**)cvAlloc( (l_count+1)*sizeof(weights[0]) ));
    255 
    256     weights[0] = wbuf->data.db;
    257     weights[1] = weights[0] + l_dst[0]*2;
    258     for( i = 1; i < l_count; i++ )
    259         weights[i+1] = weights[i] + (l_dst[i-1] + 1)*l_dst[i];
    260     weights[l_count+1] = weights[l_count] + l_dst[l_count-1]*2;
    261 
    262     __END__;
    263 }
    264 
    265 
    266 float CvANN_MLP::predict( const CvMat* _inputs, CvMat* _outputs ) const
    267 {
    268     CV_FUNCNAME( "CvANN_MLP::predict" );
    269 
    270     __BEGIN__;
    271 
    272     double* buf;
    273     int i, j, n, dn = 0, l_count, dn0, buf_sz, min_buf_sz;
    274 
    275     if( !layer_sizes )
    276         CV_ERROR( CV_StsError, "The network has not been initialized" );
    277 
    278     if( !CV_IS_MAT(_inputs) || !CV_IS_MAT(_outputs) ||
    279         !CV_ARE_TYPES_EQ(_inputs,_outputs) ||
    280         CV_MAT_TYPE(_inputs->type) != CV_32FC1 &&
    281         CV_MAT_TYPE(_inputs->type) != CV_64FC1 ||
    282         _inputs->rows != _outputs->rows )
    283         CV_ERROR( CV_StsBadArg, "Both input and output must be floating-point matrices "
    284                                 "of the same type and have the same number of rows" );
    285 
    286     if( _inputs->cols != layer_sizes->data.i[0] )
    287         CV_ERROR( CV_StsBadSize, "input matrix must have the same number of columns as "
    288                                  "the number of neurons in the input layer" );
    289 
    290     if( _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] )
    291         CV_ERROR( CV_StsBadSize, "output matrix must have the same number of columns as "
    292                                  "the number of neurons in the output layer" );
    293     n = dn0 = _inputs->rows;
    294     min_buf_sz = 2*max_count;
    295     buf_sz = n*min_buf_sz;
    296 
    297     if( buf_sz > max_buf_sz )
    298     {
    299         dn0 = max_buf_sz/min_buf_sz;
    300         dn0 = MAX( dn0, 1 );
    301         buf_sz = dn0*min_buf_sz;
    302     }
    303 
    304     buf = (double*)cvStackAlloc( buf_sz*sizeof(buf[0]) );
    305     l_count = layer_sizes->cols;
    306 
    307     for( i = 0; i < n; i += dn )
    308     {
    309         CvMat hdr[2], _w, *layer_in = &hdr[0], *layer_out = &hdr[1], *temp;
    310         dn = MIN( dn0, n - i );
    311 
    312         cvGetRows( _inputs, layer_in, i, i + dn );
    313         cvInitMatHeader( layer_out, dn, layer_in->cols, CV_64F, buf );
    314 
    315         scale_input( layer_in, layer_out );
    316         CV_SWAP( layer_in, layer_out, temp );
    317 
    318         for( j = 1; j < l_count; j++ )
    319         {
    320             double* data = buf + (j&1 ? max_count*dn0 : 0);
    321             int cols = layer_sizes->data.i[j];
    322 
    323             cvInitMatHeader( layer_out, dn, cols, CV_64F, data );
    324             cvInitMatHeader( &_w, layer_in->cols, layer_out->cols, CV_64F, weights[j] );
    325             cvGEMM( layer_in, &_w, 1, 0, 0, layer_out );
    326             calc_activ_func( layer_out, _w.data.db + _w.rows*_w.cols );
    327 
    328             CV_SWAP( layer_in, layer_out, temp );
    329         }
    330 
    331         cvGetRows( _outputs, layer_out, i, i + dn );
    332         scale_output( layer_in, layer_out );
    333     }
    334 
    335     __END__;
    336 
    337     return 0.f;
    338 }
    339 
    340 
    341 void CvANN_MLP::scale_input( const CvMat* _src, CvMat* _dst ) const
    342 {
    343     int i, j, cols = _src->cols;
    344     double* dst = _dst->data.db;
    345     const double* w = weights[0];
    346     int step = _src->step;
    347 
    348     if( CV_MAT_TYPE( _src->type ) == CV_32F )
    349     {
    350         const float* src = _src->data.fl;
    351         step /= sizeof(src[0]);
    352 
    353         for( i = 0; i < _src->rows; i++, src += step, dst += cols )
    354             for( j = 0; j < cols; j++ )
    355                 dst[j] = src[j]*w[j*2] + w[j*2+1];
    356     }
    357     else
    358     {
    359         const double* src = _src->data.db;
    360         step /= sizeof(src[0]);
    361 
    362         for( i = 0; i < _src->rows; i++, src += step, dst += cols )
    363             for( j = 0; j < cols; j++ )
    364                 dst[j] = src[j]*w[j*2] + w[j*2+1];
    365     }
    366 }
    367 
    368 
    369 void CvANN_MLP::scale_output( const CvMat* _src, CvMat* _dst ) const
    370 {
    371     int i, j, cols = _src->cols;
    372     const double* src = _src->data.db;
    373     const double* w = weights[layer_sizes->cols];
    374     int step = _dst->step;
    375 
    376     if( CV_MAT_TYPE( _dst->type ) == CV_32F )
    377     {
    378         float* dst = _dst->data.fl;
    379         step /= sizeof(dst[0]);
    380 
    381         for( i = 0; i < _src->rows; i++, src += cols, dst += step )
    382             for( j = 0; j < cols; j++ )
    383                 dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
    384     }
    385     else
    386     {
    387         double* dst = _dst->data.db;
    388         step /= sizeof(dst[0]);
    389 
    390         for( i = 0; i < _src->rows; i++, src += cols, dst += step )
    391             for( j = 0; j < cols; j++ )
    392                 dst[j] = src[j]*w[j*2] + w[j*2+1];
    393     }
    394 }
    395 
    396 
    397 void CvANN_MLP::calc_activ_func( CvMat* sums, const double* bias ) const
    398 {
    399     int i, j, n = sums->rows, cols = sums->cols;
    400     double* data = sums->data.db;
    401     double scale = 0, scale2 = f_param2;
    402 
    403     switch( activ_func )
    404     {
    405     case IDENTITY:
    406         scale = 1.;
    407         break;
    408     case SIGMOID_SYM:
    409         scale = -f_param1;
    410         break;
    411     case GAUSSIAN:
    412         scale = -f_param1*f_param1;
    413         break;
    414     default:
    415         ;
    416     }
    417 
    418     assert( CV_IS_MAT_CONT(sums->type) );
    419 
    420     if( activ_func != GAUSSIAN )
    421     {
    422         for( i = 0; i < n; i++, data += cols )
    423             for( j = 0; j < cols; j++ )
    424                 data[j] = (data[j] + bias[j])*scale;
    425 
    426         if( activ_func == IDENTITY )
    427             return;
    428     }
    429     else
    430     {
    431         for( i = 0; i < n; i++, data += cols )
    432             for( j = 0; j < cols; j++ )
    433             {
    434                 double t = data[j] + bias[j];
    435                 data[j] = t*t*scale;
    436             }
    437     }
    438 
    439     cvExp( sums, sums );
    440 
    441     n *= cols;
    442     data -= n;
    443 
    444     switch( activ_func )
    445     {
    446     case SIGMOID_SYM:
    447         for( i = 0; i <= n - 4; i += 4 )
    448         {
    449             double x0 = 1.+data[i], x1 = 1.+data[i+1], x2 = 1.+data[i+2], x3 = 1.+data[i+3];
    450             double a = x0*x1, b = x2*x3, d = scale2/(a*b), t0, t1;
    451             a *= d; b *= d;
    452             t0 = (2 - x0)*b*x1; t1 = (2 - x1)*b*x0;
    453             data[i] = t0; data[i+1] = t1;
    454             t0 = (2 - x2)*a*x3; t1 = (2 - x3)*a*x2;
    455             data[i+2] = t0; data[i+3] = t1;
    456         }
    457 
    458         for( ; i < n; i++ )
    459         {
    460             double t = scale2*(1. - data[i])/(1. + data[i]);
    461             data[i] = t;
    462         }
    463         break;
    464 
    465     case GAUSSIAN:
    466         for( i = 0; i < n; i++ )
    467             data[i] = scale2*data[i];
    468         break;
    469 
    470     default:
    471         ;
    472     }
    473 }
    474 
    475 
    476 void CvANN_MLP::calc_activ_func_deriv( CvMat* _xf, CvMat* _df,
    477                                        const double* bias ) const
    478 {
    479     int i, j, n = _xf->rows, cols = _xf->cols;
    480     double* xf = _xf->data.db;
    481     double* df = _df->data.db;
    482     double scale, scale2 = f_param2;
    483     assert( CV_IS_MAT_CONT( _xf->type & _df->type ) );
    484 
    485     if( activ_func == IDENTITY )
    486     {
    487         for( i = 0; i < n; i++, xf += cols, df += cols )
    488             for( j = 0; j < cols; j++ )
    489             {
    490                 xf[j] += bias[j];
    491                 df[j] = 1;
    492             }
    493         return;
    494     }
    495     else if( activ_func == GAUSSIAN )
    496     {
    497         scale = -f_param1*f_param1;
    498         scale2 *= scale;
    499         for( i = 0; i < n; i++, xf += cols, df += cols )
    500             for( j = 0; j < cols; j++ )
    501             {
    502                 double t = xf[j] + bias[j];
    503                 df[j] = t*2*scale2;
    504                 xf[j] = t*t*scale;
    505             }
    506     }
    507     else
    508     {
    509         scale = -f_param1;
    510         for( i = 0; i < n; i++, xf += cols, df += cols )
    511             for( j = 0; j < cols; j++ )
    512                 xf[j] = (xf[j] + bias[j])*scale;
    513     }
    514 
    515     cvExp( _xf, _xf );
    516 
    517     n *= cols;
    518     xf -= n; df -= n;
    519 
    520     // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
    521     // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
    522     // 2*a*exp(-ax)/(1+exp(-ax))^2
    523     switch( activ_func )
    524     {
    525     case SIGMOID_SYM:
    526         scale *= -2*f_param2;
    527         for( i = 0; i <= n - 4; i += 4 )
    528         {
    529             double x0 = 1.+xf[i], x1 = 1.+xf[i+1], x2 = 1.+xf[i+2], x3 = 1.+xf[i+3];
    530             double a = x0*x1, b = x2*x3, d = 1./(a*b), t0, t1;
    531             a *= d; b *= d;
    532 
    533             t0 = b*x1; t1 = b*x0;
    534             df[i] = scale*xf[i]*t0*t0;
    535             df[i+1] = scale*xf[i+1]*t1*t1;
    536             t0 *= scale2*(2 - x0); t1 *= scale2*(2 - x1);
    537             xf[i] = t0; xf[i+1] = t1;
    538 
    539             t0 = a*x3; t1 = a*x2;
    540             df[i+2] = scale*xf[i+2]*t0*t0;
    541             df[i+3] = scale*xf[i+3]*t1*t1;
    542             t0 *= scale2*(2 - x2); t1 *= scale2*(2 - x3);
    543             xf[i+2] = t0; xf[i+3] = t1;
    544         }
    545 
    546         for( ; i < n; i++ )
    547         {
    548             double t0 = 1./(1. + xf[i]);
    549             double t1 = scale*xf[i]*t0*t0;
    550             t0 *= scale2*(1. - xf[i]);
    551             df[i] = t1;
    552             xf[i] = t0;
    553         }
    554         break;
    555 
    556     case GAUSSIAN:
    557         for( i = 0; i < n; i++ )
    558             df[i] *= xf[i];
    559         break;
    560     default:
    561         ;
    562     }
    563 }
    564 
    565 
    566 void CvANN_MLP::calc_input_scale( const CvVectors* vecs, int flags )
    567 {
    568     bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
    569     bool no_scale = (flags & NO_INPUT_SCALE) != 0;
    570     double* scale = weights[0];
    571     int count = vecs->count;
    572 
    573     if( reset_weights )
    574     {
    575         int i, j, vcount = layer_sizes->data.i[0];
    576         int type = vecs->type;
    577         double a = no_scale ? 1. : 0.;
    578 
    579         for( j = 0; j < vcount; j++ )
    580             scale[2*j] = a, scale[j*2+1] = 0.;
    581 
    582         if( no_scale )
    583             return;
    584 
    585         for( i = 0; i < count; i++ )
    586         {
    587             const float* f = vecs->data.fl[i];
    588             const double* d = vecs->data.db[i];
    589             for( j = 0; j < vcount; j++ )
    590             {
    591                 double t = type == CV_32F ? (double)f[j] : d[j];
    592                 scale[j*2] += t;
    593                 scale[j*2+1] += t*t;
    594             }
    595         }
    596 
    597         for( j = 0; j < vcount; j++ )
    598         {
    599             double s = scale[j*2], s2 = scale[j*2+1];
    600             double m = s/count, sigma2 = s2/count - m*m;
    601             scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
    602             scale[j*2+1] = -m*scale[j*2];
    603         }
    604     }
    605 }
    606 
    607 
    608 void CvANN_MLP::calc_output_scale( const CvVectors* vecs, int flags )
    609 {
    610     int i, j, vcount = layer_sizes->data.i[layer_sizes->cols-1];
    611     int type = vecs->type;
    612     double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
    613     bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
    614     bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
    615     int l_count = layer_sizes->cols;
    616     double* scale = weights[l_count];
    617     double* inv_scale = weights[l_count+1];
    618     int count = vecs->count;
    619 
    620     CV_FUNCNAME( "CvANN_MLP::calc_output_scale" );
    621 
    622     __BEGIN__;
    623 
    624     if( reset_weights )
    625     {
    626         double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
    627 
    628         for( j = 0; j < vcount; j++ )
    629         {
    630             scale[2*j] = inv_scale[2*j] = a0;
    631             scale[j*2+1] = inv_scale[2*j+1] = b0;
    632         }
    633 
    634         if( no_scale )
    635             EXIT;
    636     }
    637 
    638     for( i = 0; i < count; i++ )
    639     {
    640         const float* f = vecs->data.fl[i];
    641         const double* d = vecs->data.db[i];
    642 
    643         for( j = 0; j < vcount; j++ )
    644         {
    645             double t = type == CV_32F ? (double)f[j] : d[j];
    646 
    647             if( reset_weights )
    648             {
    649                 double mj = scale[j*2], Mj = scale[j*2+1];
    650                 if( mj > t ) mj = t;
    651                 if( Mj < t ) Mj = t;
    652 
    653                 scale[j*2] = mj;
    654                 scale[j*2+1] = Mj;
    655             }
    656             else
    657             {
    658                 t = t*scale[j*2] + scale[2*j+1];
    659                 if( t < m1 || t > M1 )
    660                     CV_ERROR( CV_StsOutOfRange,
    661                     "Some of new output training vector components run exceed the original range too much" );
    662             }
    663         }
    664     }
    665 
    666     if( reset_weights )
    667         for( j = 0; j < vcount; j++ )
    668         {
    669             // map mj..Mj to m..M
    670             double mj = scale[j*2], Mj = scale[j*2+1];
    671             double a, b;
    672             double delta = Mj - mj;
    673             if( delta < DBL_EPSILON )
    674                 a = 1, b = (M + m - Mj - mj)*0.5;
    675             else
    676                 a = (M - m)/delta, b = m - mj*a;
    677             inv_scale[j*2] = a; inv_scale[j*2+1] = b;
    678             a = 1./a; b = -b*a;
    679             scale[j*2] = a; scale[j*2+1] = b;
    680         }
    681 
    682     __END__;
    683 }
    684 
    685 
    686 bool CvANN_MLP::prepare_to_train( const CvMat* _inputs, const CvMat* _outputs,
    687             const CvMat* _sample_weights, const CvMat* _sample_idx,
    688             CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags )
    689 {
    690     bool ok = false;
    691     CvMat* sample_idx = 0;
    692     CvVectors ivecs, ovecs;
    693     double* sw = 0;
    694     int count = 0;
    695 
    696     CV_FUNCNAME( "CvANN_MLP::prepare_to_train" );
    697 
    698     ivecs.data.ptr = ovecs.data.ptr = 0;
    699     assert( _ivecs && _ovecs );
    700 
    701     __BEGIN__;
    702 
    703     const int* sidx = 0;
    704     int i, sw_type = 0, sw_count = 0;
    705     int sw_step = 0;
    706     double sw_sum = 0;
    707 
    708     if( !layer_sizes )
    709         CV_ERROR( CV_StsError,
    710         "The network has not been created. Use method create or the appropriate constructor" );
    711 
    712     if( !CV_IS_MAT(_inputs) || CV_MAT_TYPE(_inputs->type) != CV_32FC1 &&
    713         CV_MAT_TYPE(_inputs->type) != CV_64FC1 || _inputs->cols != layer_sizes->data.i[0] )
    714         CV_ERROR( CV_StsBadArg,
    715         "input training data should be a floating-point matrix with"
    716         "the number of rows equal to the number of training samples and "
    717         "the number of columns equal to the size of 0-th (input) layer" );
    718 
    719     if( !CV_IS_MAT(_outputs) || CV_MAT_TYPE(_outputs->type) != CV_32FC1 &&
    720         CV_MAT_TYPE(_outputs->type) != CV_64FC1 ||
    721         _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] )
    722         CV_ERROR( CV_StsBadArg,
    723         "output training data should be a floating-point matrix with"
    724         "the number of rows equal to the number of training samples and "
    725         "the number of columns equal to the size of last (output) layer" );
    726 
    727     if( _inputs->rows != _outputs->rows )
    728         CV_ERROR( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
    729 
    730     if( _sample_idx )
    731     {
    732         CV_CALL( sample_idx = cvPreprocessIndexArray( _sample_idx, _inputs->rows ));
    733         sidx = sample_idx->data.i;
    734         count = sample_idx->cols + sample_idx->rows - 1;
    735     }
    736     else
    737         count = _inputs->rows;
    738 
    739     if( _sample_weights )
    740     {
    741         if( !CV_IS_MAT(_sample_weights) )
    742             CV_ERROR( CV_StsBadArg, "sample_weights (if passed) must be a valid matrix" );
    743 
    744         sw_type = CV_MAT_TYPE(_sample_weights->type);
    745         sw_count = _sample_weights->cols + _sample_weights->rows - 1;
    746 
    747         if( sw_type != CV_32FC1 && sw_type != CV_64FC1 ||
    748             _sample_weights->cols != 1 && _sample_weights->rows != 1 ||
    749             sw_count != count && sw_count != _inputs->rows )
    750             CV_ERROR( CV_StsBadArg,
    751             "sample_weights must be 1d floating-point vector containing weights "
    752             "of all or selected training samples" );
    753 
    754         sw_step = CV_IS_MAT_CONT(_sample_weights->type) ? 1 :
    755             _sample_weights->step/CV_ELEM_SIZE(sw_type);
    756 
    757         CV_CALL( sw = (double*)cvAlloc( count*sizeof(sw[0]) ));
    758     }
    759 
    760     CV_CALL( ivecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ivecs.data.ptr[0]) ));
    761     CV_CALL( ovecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ovecs.data.ptr[0]) ));
    762 
    763     ivecs.type = CV_MAT_TYPE(_inputs->type);
    764     ovecs.type = CV_MAT_TYPE(_outputs->type);
    765     ivecs.count = ovecs.count = count;
    766 
    767     for( i = 0; i < count; i++ )
    768     {
    769         int idx = sidx ? sidx[i] : i;
    770         ivecs.data.ptr[i] = _inputs->data.ptr + idx*_inputs->step;
    771         ovecs.data.ptr[i] = _outputs->data.ptr + idx*_outputs->step;
    772         if( sw )
    773         {
    774             int si = sw_count == count ? i : idx;
    775             double w = sw_type == CV_32FC1 ?
    776                 (double)_sample_weights->data.fl[si*sw_step] :
    777                 _sample_weights->data.db[si*sw_step];
    778             sw[i] = w;
    779             if( w < 0 )
    780                 CV_ERROR( CV_StsOutOfRange, "some of sample weights are negative" );
    781             sw_sum += w;
    782         }
    783     }
    784 
    785     // normalize weights
    786     if( sw )
    787     {
    788         sw_sum = sw_sum > DBL_EPSILON ? 1./sw_sum : 0;
    789         for( i = 0; i < count; i++ )
    790             sw[i] *= sw_sum;
    791     }
    792 
    793     calc_input_scale( &ivecs, _flags );
    794     CV_CALL( calc_output_scale( &ovecs, _flags ));
    795 
    796     ok = true;
    797 
    798     __END__;
    799 
    800     if( !ok )
    801     {
    802         cvFree( &ivecs.data.ptr );
    803         cvFree( &ovecs.data.ptr );
    804         cvFree( &sw );
    805     }
    806 
    807     cvReleaseMat( &sample_idx );
    808     *_ivecs = ivecs;
    809     *_ovecs = ovecs;
    810     *_sw = sw;
    811 
    812     return ok;
    813 }
    814 
    815 
    816 int CvANN_MLP::train( const CvMat* _inputs, const CvMat* _outputs,
    817                       const CvMat* _sample_weights, const CvMat* _sample_idx,
    818                       CvANN_MLP_TrainParams _params, int flags )
    819 {
    820     const int MAX_ITER = 1000;
    821     const double DEFAULT_EPSILON = FLT_EPSILON;
    822 
    823     double* sw = 0;
    824     CvVectors x0, u;
    825     int iter = -1;
    826 
    827     x0.data.ptr = u.data.ptr = 0;
    828 
    829     CV_FUNCNAME( "CvANN_MLP::train" );
    830 
    831     __BEGIN__;
    832 
    833     int max_iter;
    834     double epsilon;
    835 
    836     params = _params;
    837 
    838     // initialize training data
    839     CV_CALL( prepare_to_train( _inputs, _outputs, _sample_weights,
    840                                _sample_idx, &x0, &u, &sw, flags ));
    841 
    842     // ... and link weights
    843     if( !(flags & UPDATE_WEIGHTS) )
    844         init_weights();
    845 
    846     max_iter = params.term_crit.type & CV_TERMCRIT_ITER ? params.term_crit.max_iter : MAX_ITER;
    847     max_iter = MIN( max_iter, MAX_ITER );
    848     max_iter = MAX( max_iter, 1 );
    849 
    850     epsilon = params.term_crit.type & CV_TERMCRIT_EPS ? params.term_crit.epsilon : DEFAULT_EPSILON;
    851     epsilon = MAX(epsilon, DBL_EPSILON);
    852 
    853     params.term_crit.type = CV_TERMCRIT_ITER + CV_TERMCRIT_EPS;
    854     params.term_crit.max_iter = max_iter;
    855     params.term_crit.epsilon = epsilon;
    856 
    857     if( params.train_method == CvANN_MLP_TrainParams::BACKPROP )
    858     {
    859         CV_CALL( iter = train_backprop( x0, u, sw ));
    860     }
    861     else
    862     {
    863         CV_CALL( iter = train_rprop( x0, u, sw ));
    864     }
    865 
    866     __END__;
    867 
    868     cvFree( &x0.data.ptr );
    869     cvFree( &u.data.ptr );
    870     cvFree( &sw );
    871 
    872     return iter;
    873 }
    874 
    875 
    876 int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
    877 {
    878     CvMat* dw = 0;
    879     CvMat* buf = 0;
    880     double **x = 0, **df = 0;
    881     CvMat* _idx = 0;
    882     int iter = -1, count = x0.count;
    883 
    884     CV_FUNCNAME( "CvANN_MLP::train_backprop" );
    885 
    886     __BEGIN__;
    887 
    888     int i, j, k, ivcount, ovcount, l_count, total = 0, max_iter;
    889     double *buf_ptr;
    890     double prev_E = DBL_MAX*0.5, E = 0, epsilon;
    891 
    892     max_iter = params.term_crit.max_iter*count;
    893     epsilon = params.term_crit.epsilon*count;
    894 
    895     l_count = layer_sizes->cols;
    896     ivcount = layer_sizes->data.i[0];
    897     ovcount = layer_sizes->data.i[l_count-1];
    898 
    899     // allocate buffers
    900     for( i = 0; i < l_count; i++ )
    901         total += layer_sizes->data.i[i] + 1;
    902 
    903     CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
    904     cvZero( dw );
    905     CV_CALL( buf = cvCreateMat( 1, (total + max_count)*2, CV_64F ));
    906     CV_CALL( _idx = cvCreateMat( 1, count, CV_32SC1 ));
    907     for( i = 0; i < count; i++ )
    908         _idx->data.i[i] = i;
    909 
    910     CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
    911     df = x + total;
    912     buf_ptr = buf->data.db;
    913 
    914     for( j = 0; j < l_count; j++ )
    915     {
    916         x[j] = buf_ptr;
    917         df[j] = x[j] + layer_sizes->data.i[j];
    918         buf_ptr += (df[j] - x[j])*2;
    919     }
    920 
    921     // run back-propagation loop
    922     /*
    923         y_i = w_i*x_{i-1}
    924         x_i = f(y_i)
    925         E = 1/2*||u - x_N||^2
    926         grad_N = (x_N - u)*f'(y_i)
    927         dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
    928         w_i(t+1) = w_i(t) + dw_i(t)
    929         grad_{i-1} = w_i^t*grad_i
    930     */
    931     for( iter = 0; iter < max_iter; iter++ )
    932     {
    933         int idx = iter % count;
    934         double* w = weights[0];
    935         double sweight = sw ? count*sw[idx] : 1.;
    936         CvMat _w, _dw, hdr1, hdr2, ghdr1, ghdr2, _df;
    937         CvMat *x1 = &hdr1, *x2 = &hdr2, *grad1 = &ghdr1, *grad2 = &ghdr2, *temp;
    938 
    939         if( idx == 0 )
    940         {
    941             if( fabs(prev_E - E) < epsilon )
    942                 break;
    943             prev_E = E;
    944             E = 0;
    945 
    946             // shuffle indices
    947             for( i = 0; i < count; i++ )
    948             {
    949                 int tt;
    950                 j = (unsigned)cvRandInt(&rng) % count;
    951                 k = (unsigned)cvRandInt(&rng) % count;
    952                 CV_SWAP( _idx->data.i[j], _idx->data.i[k], tt );
    953             }
    954         }
    955 
    956         idx = _idx->data.i[idx];
    957 
    958         if( x0.type == CV_32F )
    959         {
    960             const float* x0data = x0.data.fl[idx];
    961             for( j = 0; j < ivcount; j++ )
    962                 x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
    963         }
    964         else
    965         {
    966             const double* x0data = x0.data.db[idx];
    967             for( j = 0; j < ivcount; j++ )
    968                 x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
    969         }
    970 
    971         cvInitMatHeader( x1, 1, ivcount, CV_64F, x[0] );
    972 
    973         // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
    974         for( i = 1; i < l_count; i++ )
    975         {
    976             cvInitMatHeader( x2, 1, layer_sizes->data.i[i], CV_64F, x[i] );
    977             cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
    978             cvGEMM( x1, &_w, 1, 0, 0, x2 );
    979             _df = *x2;
    980             _df.data.db = df[i];
    981             calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
    982             CV_SWAP( x1, x2, temp );
    983         }
    984 
    985         cvInitMatHeader( grad1, 1, ovcount, CV_64F, buf_ptr );
    986         *grad2 = *grad1;
    987         grad2->data.db = buf_ptr + max_count;
    988 
    989         w = weights[l_count+1];
    990 
    991         // calculate error
    992         if( u.type == CV_32F )
    993         {
    994             const float* udata = u.data.fl[idx];
    995             for( k = 0; k < ovcount; k++ )
    996             {
    997                 double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
    998                 grad1->data.db[k] = t*sweight;
    999                 E += t*t;
   1000             }
   1001         }
   1002         else
   1003         {
   1004             const double* udata = u.data.db[idx];
   1005             for( k = 0; k < ovcount; k++ )
   1006             {
   1007                 double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
   1008                 grad1->data.db[k] = t*sweight;
   1009                 E += t*t;
   1010             }
   1011         }
   1012         E *= sweight;
   1013 
   1014         // backward pass, update weights
   1015         for( i = l_count-1; i > 0; i-- )
   1016         {
   1017             int n1 = layer_sizes->data.i[i-1], n2 = layer_sizes->data.i[i];
   1018             cvInitMatHeader( &_df, 1, n2, CV_64F, df[i] );
   1019             cvMul( grad1, &_df, grad1 );
   1020             cvInitMatHeader( &_w, n1+1, n2, CV_64F, weights[i] );
   1021             cvInitMatHeader( &_dw, n1+1, n2, CV_64F, dw->data.db + (weights[i] - weights[0]) );
   1022             cvInitMatHeader( x1, n1+1, 1, CV_64F, x[i-1] );
   1023             x[i-1][n1] = 1.;
   1024             cvGEMM( x1, grad1, params.bp_dw_scale, &_dw, params.bp_moment_scale, &_dw );
   1025             cvAdd( &_w, &_dw, &_w );
   1026             if( i > 1 )
   1027             {
   1028                 grad2->cols = n1;
   1029                 _w.rows = n1;
   1030                 cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
   1031             }
   1032             CV_SWAP( grad1, grad2, temp );
   1033         }
   1034     }
   1035 
   1036     iter /= count;
   1037 
   1038     __END__;
   1039 
   1040     cvReleaseMat( &dw );
   1041     cvReleaseMat( &buf );
   1042     cvReleaseMat( &_idx );
   1043     cvFree( &x );
   1044 
   1045     return iter;
   1046 }
   1047 
   1048 
   1049 int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
   1050 {
   1051     const int max_buf_sz = 1 << 16;
   1052     CvMat* dw = 0;
   1053     CvMat* dEdw = 0;
   1054     CvMat* prev_dEdw_sign = 0;
   1055     CvMat* buf = 0;
   1056     double **x = 0, **df = 0;
   1057     int iter = -1, count = x0.count;
   1058 
   1059     CV_FUNCNAME( "CvANN_MLP::train" );
   1060 
   1061     __BEGIN__;
   1062 
   1063     int i, ivcount, ovcount, l_count, total = 0, max_iter, buf_sz, dcount0, dcount=0;
   1064     double *buf_ptr;
   1065     double prev_E = DBL_MAX*0.5, epsilon;
   1066     double dw_plus, dw_minus, dw_min, dw_max;
   1067     double inv_count;
   1068 
   1069     max_iter = params.term_crit.max_iter;
   1070     epsilon = params.term_crit.epsilon;
   1071     dw_plus = params.rp_dw_plus;
   1072     dw_minus = params.rp_dw_minus;
   1073     dw_min = params.rp_dw_min;
   1074     dw_max = params.rp_dw_max;
   1075 
   1076     l_count = layer_sizes->cols;
   1077     ivcount = layer_sizes->data.i[0];
   1078     ovcount = layer_sizes->data.i[l_count-1];
   1079 
   1080     // allocate buffers
   1081     for( i = 0; i < l_count; i++ )
   1082         total += layer_sizes->data.i[i];
   1083 
   1084     CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
   1085     cvSet( dw, cvScalarAll(params.rp_dw0) );
   1086     CV_CALL( dEdw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
   1087     cvZero( dEdw );
   1088     CV_CALL( prev_dEdw_sign = cvCreateMat( wbuf->rows, wbuf->cols, CV_8SC1 ));
   1089     cvZero( prev_dEdw_sign );
   1090 
   1091     inv_count = 1./count;
   1092     dcount0 = max_buf_sz/(2*total);
   1093     dcount0 = MAX( dcount0, 1 );
   1094     dcount0 = MIN( dcount0, count );
   1095     buf_sz = dcount0*(total + max_count)*2;
   1096 
   1097     CV_CALL( buf = cvCreateMat( 1, buf_sz, CV_64F ));
   1098 
   1099     CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
   1100     df = x + total;
   1101     buf_ptr = buf->data.db;
   1102 
   1103     for( i = 0; i < l_count; i++ )
   1104     {
   1105         x[i] = buf_ptr;
   1106         df[i] = x[i] + layer_sizes->data.i[i]*dcount0;
   1107         buf_ptr += (df[i] - x[i])*2;
   1108     }
   1109 
   1110     // run rprop loop
   1111     /*
   1112         y_i(t) = w_i(t)*x_{i-1}(t)
   1113         x_i(t) = f(y_i(t))
   1114         E = sum_over_all_samples(1/2*||u - x_N||^2)
   1115         grad_N = (x_N - u)*f'(y_i)
   1116 
   1117                       MIN(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
   1118         dw_i{jk}(t) = MAX(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
   1119                       dw_i{jk}(t-1) else
   1120 
   1121         if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
   1122            dE/dw_i{jk}(t)<-0
   1123         else
   1124            w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
   1125         grad_{i-1}(t) = w_i^t(t)*grad_i(t)
   1126     */
   1127     for( iter = 0; iter < max_iter; iter++ )
   1128     {
   1129         int n1, n2, si, j, k;
   1130         double* w;
   1131         CvMat _w, _dEdw, hdr1, hdr2, ghdr1, ghdr2, _df;
   1132         CvMat *x1, *x2, *grad1, *grad2, *temp;
   1133         double E = 0;
   1134 
   1135         // first, iterate through all the samples and compute dEdw
   1136         for( si = 0; si < count; si += dcount )
   1137         {
   1138             dcount = MIN( count - si, dcount0 );
   1139             w = weights[0];
   1140             grad1 = &ghdr1; grad2 = &ghdr2;
   1141             x1 = &hdr1; x2 = &hdr2;
   1142 
   1143             // grab and preprocess input data
   1144             if( x0.type == CV_32F )
   1145                 for( i = 0; i < dcount; i++ )
   1146                 {
   1147                     const float* x0data = x0.data.fl[si+i];
   1148                     double* xdata = x[0]+i*ivcount;
   1149                     for( j = 0; j < ivcount; j++ )
   1150                         xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
   1151                 }
   1152             else
   1153                 for( i = 0; i < dcount; i++ )
   1154                 {
   1155                     const double* x0data = x0.data.db[si+i];
   1156                     double* xdata = x[0]+i*ivcount;
   1157                     for( j = 0; j < ivcount; j++ )
   1158                         xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
   1159                 }
   1160 
   1161             cvInitMatHeader( x1, dcount, ivcount, CV_64F, x[0] );
   1162 
   1163             // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
   1164             for( i = 1; i < l_count; i++ )
   1165             {
   1166                 cvInitMatHeader( x2, dcount, layer_sizes->data.i[i], CV_64F, x[i] );
   1167                 cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
   1168                 cvGEMM( x1, &_w, 1, 0, 0, x2 );
   1169                 _df = *x2;
   1170                 _df.data.db = df[i];
   1171                 calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
   1172                 CV_SWAP( x1, x2, temp );
   1173             }
   1174 
   1175             cvInitMatHeader( grad1, dcount, ovcount, CV_64F, buf_ptr );
   1176             w = weights[l_count+1];
   1177             grad2->data.db = buf_ptr + max_count*dcount;
   1178 
   1179             // calculate error
   1180             if( u.type == CV_32F )
   1181                 for( i = 0; i < dcount; i++ )
   1182                 {
   1183                     const float* udata = u.data.fl[si+i];
   1184                     const double* xdata = x[l_count-1] + i*ovcount;
   1185                     double* gdata = grad1->data.db + i*ovcount;
   1186                     double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
   1187 
   1188                     for( j = 0; j < ovcount; j++ )
   1189                     {
   1190                         double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
   1191                         gdata[j] = t*sweight;
   1192                         E1 += t*t;
   1193                     }
   1194                     E += sweight*E1;
   1195                 }
   1196             else
   1197                 for( i = 0; i < dcount; i++ )
   1198                 {
   1199                     const double* udata = u.data.db[si+i];
   1200                     const double* xdata = x[l_count-1] + i*ovcount;
   1201                     double* gdata = grad1->data.db + i*ovcount;
   1202                     double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
   1203 
   1204                     for( j = 0; j < ovcount; j++ )
   1205                     {
   1206                         double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
   1207                         gdata[j] = t*sweight;
   1208                         E1 += t*t;
   1209                     }
   1210                     E += sweight*E1;
   1211                 }
   1212 
   1213             // backward pass, update dEdw
   1214             for( i = l_count-1; i > 0; i-- )
   1215             {
   1216                 n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
   1217                 cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
   1218                 cvMul( grad1, &_df, grad1 );
   1219                 cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
   1220                 cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
   1221                 cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
   1222                 // update bias part of dEdw
   1223                 for( k = 0; k < dcount; k++ )
   1224                 {
   1225                     double* dst = _dEdw.data.db + n1*n2;
   1226                     const double* src = grad1->data.db + k*n2;
   1227                     for( j = 0; j < n2; j++ )
   1228                         dst[j] += src[j];
   1229                 }
   1230                 cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
   1231                 cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
   1232 
   1233                 if( i > 1 )
   1234                     cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
   1235                 CV_SWAP( grad1, grad2, temp );
   1236             }
   1237         }
   1238 
   1239         // now update weights
   1240         for( i = 1; i < l_count; i++ )
   1241         {
   1242             n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
   1243             for( k = 0; k <= n1; k++ )
   1244             {
   1245                 double* wk = weights[i]+k*n2;
   1246                 size_t delta = wk - weights[0];
   1247                 double* dwk = dw->data.db + delta;
   1248                 double* dEdwk = dEdw->data.db + delta;
   1249                 char* prevEk = (char*)(prev_dEdw_sign->data.ptr + delta);
   1250 
   1251                 for( j = 0; j < n2; j++ )
   1252                 {
   1253                     double Eval = dEdwk[j];
   1254                     double dval = dwk[j];
   1255                     double wval = wk[j];
   1256                     int s = CV_SIGN(Eval);
   1257                     int ss = prevEk[j]*s;
   1258                     if( ss > 0 )
   1259                     {
   1260                         dval *= dw_plus;
   1261                         dval = MIN( dval, dw_max );
   1262                         dwk[j] = dval;
   1263                         wk[j] = wval + dval*s;
   1264                     }
   1265                     else if( ss < 0 )
   1266                     {
   1267                         dval *= dw_minus;
   1268                         dval = MAX( dval, dw_min );
   1269                         prevEk[j] = 0;
   1270                         dwk[j] = dval;
   1271                         wk[j] = wval + dval*s;
   1272                     }
   1273                     else
   1274                     {
   1275                         prevEk[j] = (char)s;
   1276                         wk[j] = wval + dval*s;
   1277                     }
   1278                     dEdwk[j] = 0.;
   1279                 }
   1280             }
   1281         }
   1282 
   1283         if( fabs(prev_E - E) < epsilon )
   1284             break;
   1285         prev_E = E;
   1286         E = 0;
   1287     }
   1288 
   1289     __END__;
   1290 
   1291     cvReleaseMat( &dw );
   1292     cvReleaseMat( &dEdw );
   1293     cvReleaseMat( &prev_dEdw_sign );
   1294     cvReleaseMat( &buf );
   1295     cvFree( &x );
   1296 
   1297     return iter;
   1298 }
   1299 
   1300 
   1301 void CvANN_MLP::write_params( CvFileStorage* fs )
   1302 {
   1303     //CV_FUNCNAME( "CvANN_MLP::write_params" );
   1304 
   1305     __BEGIN__;
   1306 
   1307     const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
   1308                             activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
   1309                             activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
   1310 
   1311     if( activ_func_name )
   1312         cvWriteString( fs, "activation_function", activ_func_name );
   1313     else
   1314         cvWriteInt( fs, "activation_function", activ_func );
   1315 
   1316     if( activ_func != IDENTITY )
   1317     {
   1318         cvWriteReal( fs, "f_param1", f_param1 );
   1319         cvWriteReal( fs, "f_param2", f_param2 );
   1320     }
   1321 
   1322     cvWriteReal( fs, "min_val", min_val );
   1323     cvWriteReal( fs, "max_val", max_val );
   1324     cvWriteReal( fs, "min_val1", min_val1 );
   1325     cvWriteReal( fs, "max_val1", max_val1 );
   1326 
   1327     cvStartWriteStruct( fs, "training_params", CV_NODE_MAP );
   1328     if( params.train_method == CvANN_MLP_TrainParams::BACKPROP )
   1329     {
   1330         cvWriteString( fs, "train_method", "BACKPROP" );
   1331         cvWriteReal( fs, "dw_scale", params.bp_dw_scale );
   1332         cvWriteReal( fs, "moment_scale", params.bp_moment_scale );
   1333     }
   1334     else if( params.train_method == CvANN_MLP_TrainParams::RPROP )
   1335     {
   1336         cvWriteString( fs, "train_method", "RPROP" );
   1337         cvWriteReal( fs, "dw0", params.rp_dw0 );
   1338         cvWriteReal( fs, "dw_plus", params.rp_dw_plus );
   1339         cvWriteReal( fs, "dw_minus", params.rp_dw_minus );
   1340         cvWriteReal( fs, "dw_min", params.rp_dw_min );
   1341         cvWriteReal( fs, "dw_max", params.rp_dw_max );
   1342     }
   1343 
   1344     cvStartWriteStruct( fs, "term_criteria", CV_NODE_MAP + CV_NODE_FLOW );
   1345     if( params.term_crit.type & CV_TERMCRIT_EPS )
   1346         cvWriteReal( fs, "epsilon", params.term_crit.epsilon );
   1347     if( params.term_crit.type & CV_TERMCRIT_ITER )
   1348         cvWriteInt( fs, "iterations", params.term_crit.max_iter );
   1349     cvEndWriteStruct( fs );
   1350 
   1351     cvEndWriteStruct( fs );
   1352 
   1353     __END__;
   1354 }
   1355 
   1356 
   1357 void CvANN_MLP::write( CvFileStorage* fs, const char* name )
   1358 {
   1359     CV_FUNCNAME( "CvANN_MLP::write" );
   1360 
   1361     __BEGIN__;
   1362 
   1363     int i, l_count = layer_sizes->cols;
   1364 
   1365     if( !layer_sizes )
   1366         CV_ERROR( CV_StsError, "The network has not been initialized" );
   1367 
   1368     cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_ANN_MLP );
   1369 
   1370     cvWrite( fs, "layer_sizes", layer_sizes );
   1371 
   1372     write_params( fs );
   1373 
   1374     cvStartWriteStruct( fs, "input_scale", CV_NODE_SEQ + CV_NODE_FLOW );
   1375     cvWriteRawData( fs, weights[0], layer_sizes->data.i[0]*2, "d" );
   1376     cvEndWriteStruct( fs );
   1377 
   1378     cvStartWriteStruct( fs, "output_scale", CV_NODE_SEQ + CV_NODE_FLOW );
   1379     cvWriteRawData( fs, weights[l_count], layer_sizes->data.i[l_count-1]*2, "d" );
   1380     cvEndWriteStruct( fs );
   1381 
   1382     cvStartWriteStruct( fs, "inv_output_scale", CV_NODE_SEQ + CV_NODE_FLOW );
   1383     cvWriteRawData( fs, weights[l_count+1], layer_sizes->data.i[l_count-1]*2, "d" );
   1384     cvEndWriteStruct( fs );
   1385 
   1386     cvStartWriteStruct( fs, "weights", CV_NODE_SEQ );
   1387     for( i = 1; i < l_count; i++ )
   1388     {
   1389         cvStartWriteStruct( fs, 0, CV_NODE_SEQ + CV_NODE_FLOW );
   1390         cvWriteRawData( fs, weights[i], (layer_sizes->data.i[i-1]+1)*layer_sizes->data.i[i], "d" );
   1391         cvEndWriteStruct( fs );
   1392     }
   1393 
   1394     cvEndWriteStruct( fs );
   1395 
   1396     __END__;
   1397 }
   1398 
   1399 
   1400 void CvANN_MLP::read_params( CvFileStorage* fs, CvFileNode* node )
   1401 {
   1402     //CV_FUNCNAME( "CvANN_MLP::read_params" );
   1403 
   1404     __BEGIN__;
   1405 
   1406     const char* activ_func_name = cvReadStringByName( fs, node, "activation_function", 0 );
   1407     CvFileNode* tparams_node;
   1408 
   1409     if( activ_func_name )
   1410         activ_func = strcmp( activ_func_name, "SIGMOID_SYM" ) == 0 ? SIGMOID_SYM :
   1411                      strcmp( activ_func_name, "IDENTITY" ) == 0 ? IDENTITY :
   1412                      strcmp( activ_func_name, "GAUSSIAN" ) == 0 ? GAUSSIAN : 0;
   1413     else
   1414         activ_func = cvReadIntByName( fs, node, "activation_function" );
   1415 
   1416     f_param1 = cvReadRealByName( fs, node, "f_param1", 0 );
   1417     f_param2 = cvReadRealByName( fs, node, "f_param2", 0 );
   1418 
   1419     set_activ_func( activ_func, f_param1, f_param2 );
   1420 
   1421     min_val = cvReadRealByName( fs, node, "min_val", 0. );
   1422     max_val = cvReadRealByName( fs, node, "max_val", 1. );
   1423     min_val1 = cvReadRealByName( fs, node, "min_val1", 0. );
   1424     max_val1 = cvReadRealByName( fs, node, "max_val1", 1. );
   1425 
   1426     tparams_node = cvGetFileNodeByName( fs, node, "training_params" );
   1427     params = CvANN_MLP_TrainParams();
   1428 
   1429     if( tparams_node )
   1430     {
   1431         const char* tmethod_name = cvReadStringByName( fs, tparams_node, "train_method", "" );
   1432         CvFileNode* tcrit_node;
   1433 
   1434         if( strcmp( tmethod_name, "BACKPROP" ) == 0 )
   1435         {
   1436             params.train_method = CvANN_MLP_TrainParams::BACKPROP;
   1437             params.bp_dw_scale = cvReadRealByName( fs, tparams_node, "dw_scale", 0 );
   1438             params.bp_moment_scale = cvReadRealByName( fs, tparams_node, "moment_scale", 0 );
   1439         }
   1440         else if( strcmp( tmethod_name, "RPROP" ) == 0 )
   1441         {
   1442             params.train_method = CvANN_MLP_TrainParams::RPROP;
   1443             params.rp_dw0 = cvReadRealByName( fs, tparams_node, "dw0", 0 );
   1444             params.rp_dw_plus = cvReadRealByName( fs, tparams_node, "dw_plus", 0 );
   1445             params.rp_dw_minus = cvReadRealByName( fs, tparams_node, "dw_minus", 0 );
   1446             params.rp_dw_min = cvReadRealByName( fs, tparams_node, "dw_min", 0 );
   1447             params.rp_dw_max = cvReadRealByName( fs, tparams_node, "dw_max", 0 );
   1448         }
   1449 
   1450         tcrit_node = cvGetFileNodeByName( fs, tparams_node, "term_criteria" );
   1451         if( tcrit_node )
   1452         {
   1453             params.term_crit.epsilon = cvReadRealByName( fs, tcrit_node, "epsilon", -1 );
   1454             params.term_crit.max_iter = cvReadIntByName( fs, tcrit_node, "iterations", -1 );
   1455             params.term_crit.type = (params.term_crit.epsilon >= 0 ? CV_TERMCRIT_EPS : 0) +
   1456                                    (params.term_crit.max_iter >= 0 ? CV_TERMCRIT_ITER : 0);
   1457         }
   1458     }
   1459 
   1460     __END__;
   1461 }
   1462 
   1463 
   1464 void CvANN_MLP::read( CvFileStorage* fs, CvFileNode* node )
   1465 {
   1466     CvMat* _layer_sizes = 0;
   1467 
   1468     CV_FUNCNAME( "CvANN_MLP::read" );
   1469 
   1470     __BEGIN__;
   1471 
   1472     CvFileNode* w;
   1473     CvSeqReader reader;
   1474     int i, l_count;
   1475 
   1476     _layer_sizes = (CvMat*)cvReadByName( fs, node, "layer_sizes" );
   1477     CV_CALL( create( _layer_sizes, SIGMOID_SYM, 0, 0 ));
   1478     l_count = layer_sizes->cols;
   1479 
   1480     CV_CALL( read_params( fs, node ));
   1481 
   1482     w = cvGetFileNodeByName( fs, node, "input_scale" );
   1483     if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
   1484         w->data.seq->total != layer_sizes->data.i[0]*2 )
   1485         CV_ERROR( CV_StsParseError, "input_scale tag is not found or is invalid" );
   1486 
   1487     CV_CALL( cvReadRawData( fs, w, weights[0], "d" ));
   1488 
   1489     w = cvGetFileNodeByName( fs, node, "output_scale" );
   1490     if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
   1491         w->data.seq->total != layer_sizes->data.i[l_count-1]*2 )
   1492         CV_ERROR( CV_StsParseError, "output_scale tag is not found or is invalid" );
   1493 
   1494     CV_CALL( cvReadRawData( fs, w, weights[l_count], "d" ));
   1495 
   1496     w = cvGetFileNodeByName( fs, node, "inv_output_scale" );
   1497     if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
   1498         w->data.seq->total != layer_sizes->data.i[l_count-1]*2 )
   1499         CV_ERROR( CV_StsParseError, "inv_output_scale tag is not found or is invalid" );
   1500 
   1501     CV_CALL( cvReadRawData( fs, w, weights[l_count+1], "d" ));
   1502 
   1503     w = cvGetFileNodeByName( fs, node, "weights" );
   1504     if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
   1505         w->data.seq->total != l_count - 1 )
   1506         CV_ERROR( CV_StsParseError, "weights tag is not found or is invalid" );
   1507 
   1508     cvStartReadSeq( w->data.seq, &reader );
   1509 
   1510     for( i = 1; i < l_count; i++ )
   1511     {
   1512         w = (CvFileNode*)reader.ptr;
   1513         CV_CALL( cvReadRawData( fs, w, weights[i], "d" ));
   1514         CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );
   1515     }
   1516 
   1517     __END__;
   1518 }
   1519 
   1520 /* End of file. */
   1521