Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                        Intel License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of Intel Corporation may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #include "_cv.h"
     43 
     44 
     45 /****************************************************************************************\
     46                                     Base Image Filter
     47 \****************************************************************************************/
     48 
     49 static void default_x_filter_func( const uchar*, uchar*, void* )
     50 {
     51 }
     52 
     53 static void default_y_filter_func( uchar**, uchar*, int, int, void* )
     54 {
     55 }
     56 
     57 CvBaseImageFilter::CvBaseImageFilter()
     58 {
     59     min_depth = CV_8U;
     60     buffer = 0;
     61     rows = 0;
     62     max_width = 0;
     63     x_func = default_x_filter_func;
     64     y_func = default_y_filter_func;
     65 }
     66 
     67 
     68 CvBaseImageFilter::CvBaseImageFilter( int _max_width, int _src_type, int _dst_type,
     69                                       bool _is_separable, CvSize _ksize, CvPoint _anchor,
     70                                       int _border_mode, CvScalar _border_value )
     71 {
     72     min_depth = CV_8U;
     73     buffer = 0;
     74     rows = 0;
     75     max_width = 0;
     76     x_func = default_x_filter_func;
     77     y_func = default_y_filter_func;
     78 
     79     init( _max_width, _src_type, _dst_type, _is_separable,
     80           _ksize, _anchor, _border_mode, _border_value );
     81 }
     82 
     83 
     84 void CvBaseImageFilter::clear()
     85 {
     86     cvFree( &buffer );
     87     rows = 0;
     88 }
     89 
     90 
     91 CvBaseImageFilter::~CvBaseImageFilter()
     92 {
     93     clear();
     94 }
     95 
     96 
     97 void CvBaseImageFilter::get_work_params()
     98 {
     99     int min_rows = max_ky*2 + 3, rows = MAX(min_rows,10), row_sz;
    100     int width = max_width, trow_sz = 0;
    101 
    102     if( is_separable )
    103     {
    104         int max_depth = MAX(CV_MAT_DEPTH(src_type), CV_MAT_DEPTH(dst_type));
    105         int max_cn = MAX(CV_MAT_CN(src_type), CV_MAT_CN(dst_type));
    106         max_depth = MAX( max_depth, min_depth );
    107         work_type = CV_MAKETYPE( max_depth, max_cn );
    108         trow_sz = cvAlign( (max_width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN );
    109     }
    110     else
    111     {
    112         work_type = src_type;
    113         width += ksize.width - 1;
    114     }
    115     row_sz = cvAlign( width*CV_ELEM_SIZE(work_type), ALIGN );
    116     buf_size = rows*row_sz;
    117     buf_size = MIN( buf_size, 1 << 16 );
    118     buf_size = MAX( buf_size, min_rows*row_sz );
    119     max_rows = (buf_size/row_sz)*3 + max_ky*2 + 8;
    120     buf_size += trow_sz;
    121 }
    122 
    123 
    124 void CvBaseImageFilter::init( int _max_width, int _src_type, int _dst_type,
    125                               bool _is_separable, CvSize _ksize, CvPoint _anchor,
    126                               int _border_mode, CvScalar _border_value )
    127 {
    128     CV_FUNCNAME( "CvBaseImageFilter::init" );
    129 
    130     __BEGIN__;
    131 
    132     int total_buf_sz, src_pix_sz, row_tab_sz, bsz;
    133     uchar* ptr;
    134 
    135     if( !(buffer && _max_width <= max_width && _src_type == src_type &&
    136         _dst_type == dst_type && _is_separable == is_separable &&
    137         _ksize.width == ksize.width && _ksize.height == ksize.height &&
    138         _anchor.x == anchor.x && _anchor.y == anchor.y) )
    139         clear();
    140 
    141     is_separable = _is_separable != 0;
    142     max_width = _max_width; //MAX(_max_width,_ksize.width);
    143     src_type = CV_MAT_TYPE(_src_type);
    144     dst_type = CV_MAT_TYPE(_dst_type);
    145     ksize = _ksize;
    146     anchor = _anchor;
    147 
    148     if( anchor.x == -1 )
    149         anchor.x = ksize.width / 2;
    150     if( anchor.y == -1 )
    151         anchor.y = ksize.height / 2;
    152 
    153     max_ky = MAX( anchor.y, ksize.height - anchor.y - 1 );
    154     border_mode = _border_mode;
    155     border_value = _border_value;
    156 
    157     if( ksize.width <= 0 || ksize.height <= 0 ||
    158         (unsigned)anchor.x >= (unsigned)ksize.width ||
    159         (unsigned)anchor.y >= (unsigned)ksize.height )
    160         CV_ERROR( CV_StsOutOfRange, "invalid kernel size and/or anchor position" );
    161 
    162     if( border_mode != IPL_BORDER_CONSTANT && border_mode != IPL_BORDER_REPLICATE &&
    163         border_mode != IPL_BORDER_REFLECT && border_mode != IPL_BORDER_REFLECT_101 )
    164         CV_ERROR( CV_StsBadArg, "Invalid/unsupported border mode" );
    165 
    166     get_work_params();
    167 
    168     prev_width = 0;
    169     prev_x_range = cvSlice(0,0);
    170 
    171     buf_size = cvAlign( buf_size, ALIGN );
    172 
    173     src_pix_sz = CV_ELEM_SIZE(src_type);
    174     border_tab_sz1 = anchor.x*src_pix_sz;
    175     border_tab_sz = (ksize.width-1)*src_pix_sz;
    176     bsz = cvAlign( border_tab_sz*sizeof(int), ALIGN );
    177 
    178     assert( max_rows > max_ky*2 );
    179     row_tab_sz = cvAlign( max_rows*sizeof(uchar*), ALIGN );
    180     total_buf_sz = buf_size + row_tab_sz + bsz;
    181 
    182     CV_CALL( ptr = buffer = (uchar*)cvAlloc( total_buf_sz ));
    183 
    184     rows = (uchar**)ptr;
    185     ptr += row_tab_sz;
    186     border_tab = (int*)ptr;
    187     ptr += bsz;
    188 
    189     buf_start = ptr;
    190     const_row = 0;
    191 
    192     if( border_mode == IPL_BORDER_CONSTANT )
    193         cvScalarToRawData( &border_value, border_tab, src_type, 0 );
    194 
    195     __END__;
    196 }
    197 
    198 
    199 void CvBaseImageFilter::start_process( CvSlice x_range, int width )
    200 {
    201     int mode = border_mode;
    202     int pix_sz = CV_ELEM_SIZE(src_type), work_pix_sz = CV_ELEM_SIZE(work_type);
    203     int bsz = buf_size, bw = x_range.end_index - x_range.start_index, bw1 = bw + ksize.width - 1;
    204     int tr_step = cvAlign(bw1*pix_sz, ALIGN );
    205     int i, j, k, ofs;
    206 
    207     if( x_range.start_index == prev_x_range.start_index &&
    208         x_range.end_index == prev_x_range.end_index &&
    209         width == prev_width )
    210         return;
    211 
    212     prev_x_range = x_range;
    213     prev_width = width;
    214 
    215     if( !is_separable )
    216         bw = bw1;
    217     else
    218         bsz -= tr_step;
    219 
    220     buf_step = cvAlign(bw*work_pix_sz, ALIGN);
    221 
    222     if( mode == IPL_BORDER_CONSTANT )
    223         bsz -= buf_step;
    224     buf_max_count = bsz/buf_step;
    225     buf_max_count = MIN( buf_max_count, max_rows - max_ky*2 );
    226     buf_end = buf_start + buf_max_count*buf_step;
    227 
    228     if( mode == IPL_BORDER_CONSTANT )
    229     {
    230         int i, tab_len = ksize.width*pix_sz;
    231         uchar* bt = (uchar*)border_tab;
    232         uchar* trow = buf_end;
    233         const_row = buf_end + (is_separable ? 1 : 0)*tr_step;
    234 
    235         for( i = pix_sz; i < tab_len; i++ )
    236             bt[i] = bt[i - pix_sz];
    237         for( i = 0; i < pix_sz; i++ )
    238             trow[i] = bt[i];
    239         for( i = pix_sz; i < tr_step; i++ )
    240             trow[i] = trow[i - pix_sz];
    241         if( is_separable )
    242             x_func( trow, const_row, this );
    243         return;
    244     }
    245 
    246     if( x_range.end_index - x_range.start_index <= 1 )
    247         mode = IPL_BORDER_REPLICATE;
    248 
    249     width = (width - 1)*pix_sz;
    250     ofs = (anchor.x-x_range.start_index)*pix_sz;
    251 
    252     for( k = 0; k < 2; k++ )
    253     {
    254         int idx, delta;
    255         int i1, i2, di;
    256 
    257         if( k == 0 )
    258         {
    259             idx = (x_range.start_index - 1)*pix_sz;
    260             delta = di = -pix_sz;
    261             i1 = border_tab_sz1 - pix_sz;
    262             i2 = -pix_sz;
    263         }
    264         else
    265         {
    266             idx = x_range.end_index*pix_sz;
    267             delta = di = pix_sz;
    268             i1 = border_tab_sz1;
    269             i2 = border_tab_sz;
    270         }
    271 
    272         if( (unsigned)idx > (unsigned)width )
    273         {
    274             int shift = mode == IPL_BORDER_REFLECT_101 ? pix_sz : 0;
    275             idx = k == 0 ? shift : width - shift;
    276             delta = -delta;
    277         }
    278 
    279         for( i = i1; i != i2; i += di )
    280         {
    281             for( j = 0; j < pix_sz; j++ )
    282                 border_tab[i + j] = idx + ofs + j;
    283             if( mode != IPL_BORDER_REPLICATE )
    284             {
    285                 if( (delta > 0 && idx == width) ||
    286                     (delta < 0 && idx == 0) )
    287                 {
    288                     if( mode == IPL_BORDER_REFLECT_101 )
    289                         idx -= delta*2;
    290                     delta = -delta;
    291                 }
    292                 else
    293                     idx += delta;
    294             }
    295         }
    296     }
    297 }
    298 
    299 
    300 void CvBaseImageFilter::make_y_border( int row_count, int top_rows, int bottom_rows )
    301 {
    302     int i;
    303 
    304     if( border_mode == IPL_BORDER_CONSTANT ||
    305         border_mode == IPL_BORDER_REPLICATE )
    306     {
    307         uchar* row1 = border_mode == IPL_BORDER_CONSTANT ? const_row : rows[max_ky];
    308 
    309         for( i = 0; i < top_rows && rows[i] == 0; i++ )
    310             rows[i] = row1;
    311 
    312         row1 = border_mode == IPL_BORDER_CONSTANT ? const_row : rows[row_count-1];
    313         for( i = 0; i < bottom_rows; i++ )
    314             rows[i + row_count] = row1;
    315     }
    316     else
    317     {
    318         int j, dj = 1, shift = border_mode == IPL_BORDER_REFLECT_101;
    319 
    320         for( i = top_rows-1, j = top_rows+shift; i >= 0; i-- )
    321         {
    322             if( rows[i] == 0 )
    323                 rows[i] = rows[j];
    324             j += dj;
    325             if( dj > 0 && j >= row_count )
    326             {
    327                 if( !bottom_rows )
    328                     break;
    329                 j -= 1 + shift;
    330                 dj = -dj;
    331             }
    332         }
    333 
    334         for( i = 0, j = row_count-1-shift; i < bottom_rows; i++, j-- )
    335             rows[i + row_count] = rows[j];
    336     }
    337 }
    338 
    339 
    340 int CvBaseImageFilter::fill_cyclic_buffer( const uchar* src, int src_step,
    341                                            int y0, int y1, int y2 )
    342 {
    343     int i, y = y0, bsz1 = border_tab_sz1, bsz = border_tab_sz;
    344     int pix_size = CV_ELEM_SIZE(src_type);
    345     int width = prev_x_range.end_index - prev_x_range.start_index, width_n = width*pix_size;
    346     bool can_use_src_as_trow = false; //is_separable && width >= ksize.width;
    347 
    348     // fill the cyclic buffer
    349     for( ; buf_count < buf_max_count && y < y2; buf_count++, y++, src += src_step )
    350     {
    351         uchar* trow = is_separable ? buf_end : buf_tail;
    352         uchar* bptr = can_use_src_as_trow && y1 < y && y+1 < y2 ? (uchar*)(src - bsz1) : trow;
    353 
    354         if( bptr != trow )
    355         {
    356             for( i = 0; i < bsz1; i++ )
    357                 trow[i] = bptr[i];
    358             for( ; i < bsz; i++ )
    359                 trow[i] = bptr[i + width_n];
    360         }
    361         else if( !(((size_t)(bptr + bsz1)|(size_t)src|width_n) & (sizeof(int)-1)) )
    362             for( i = 0; i < width_n; i += sizeof(int) )
    363                 *(int*)(bptr + i + bsz1) = *(int*)(src + i);
    364         else
    365             for( i = 0; i < width_n; i++ )
    366                 bptr[i + bsz1] = src[i];
    367 
    368         if( border_mode != IPL_BORDER_CONSTANT )
    369         {
    370             for( i = 0; i < bsz1; i++ )
    371             {
    372                 int j = border_tab[i];
    373                 bptr[i] = bptr[j];
    374             }
    375             for( ; i < bsz; i++ )
    376             {
    377                 int j = border_tab[i];
    378                 bptr[i + width_n] = bptr[j];
    379             }
    380         }
    381         else
    382         {
    383             const uchar *bt = (uchar*)border_tab;
    384             for( i = 0; i < bsz1; i++ )
    385                 bptr[i] = bt[i];
    386 
    387             for( ; i < bsz; i++ )
    388                 bptr[i + width_n] = bt[i];
    389         }
    390 
    391         if( is_separable )
    392         {
    393             x_func( bptr, buf_tail, this );
    394             if( bptr != trow )
    395             {
    396                 for( i = 0; i < bsz1; i++ )
    397                     bptr[i] = trow[i];
    398                 for( ; i < bsz; i++ )
    399                     bptr[i + width_n] = trow[i];
    400             }
    401         }
    402 
    403         buf_tail += buf_step;
    404         if( buf_tail >= buf_end )
    405             buf_tail = buf_start;
    406     }
    407 
    408     return y - y0;
    409 }
    410 
    411 int CvBaseImageFilter::process( const CvMat* src, CvMat* dst,
    412                                 CvRect src_roi, CvPoint dst_origin, int flags )
    413 {
    414     int rows_processed = 0;
    415 
    416     /*
    417         check_parameters
    418         initialize_horizontal_border_reloc_tab_if_not_initialized_yet
    419 
    420         for_each_source_row: src starts from src_roi.y, buf starts with the first available row
    421             1) if separable,
    422                    1a.1) copy source row to temporary buffer, form a border using border reloc tab.
    423                    1a.2) apply row-wise filter (symmetric, asymmetric or generic)
    424                else
    425                    1b.1) copy source row to the buffer, form a border
    426             2) if the buffer is full, or it is the last source row:
    427                  2.1) if stage != middle, form the pointers to other "virtual" rows.
    428                  if separable
    429                     2a.2) apply column-wise filter, store the results.
    430                  else
    431                     2b.2) form a sparse (offset,weight) tab
    432                     2b.3) apply generic non-separable filter, store the results
    433             3) update row pointers etc.
    434     */
    435 
    436     CV_FUNCNAME( "CvBaseImageFilter::process" );
    437 
    438     __BEGIN__;
    439 
    440     int i, width, _src_y1, _src_y2;
    441     int src_x, src_y, src_y1, src_y2, dst_y;
    442     int pix_size = CV_ELEM_SIZE(src_type);
    443     uchar *sptr = 0, *dptr;
    444     int phase = flags & (CV_START|CV_END|CV_MIDDLE);
    445     bool isolated_roi = (flags & CV_ISOLATED_ROI) != 0;
    446 
    447     if( !CV_IS_MAT(src) )
    448         CV_ERROR( CV_StsBadArg, "" );
    449 
    450     if( CV_MAT_TYPE(src->type) != src_type )
    451         CV_ERROR( CV_StsUnmatchedFormats, "" );
    452 
    453     width = src->cols;
    454 
    455     if( src_roi.width == -1 && src_roi.x == 0 )
    456         src_roi.width = width;
    457 
    458     if( src_roi.height == -1 && src_roi.y == 0 )
    459     {
    460         src_roi.y = 0;
    461         src_roi.height = src->rows;
    462     }
    463 
    464     if( src_roi.width > max_width ||
    465         src_roi.x < 0 || src_roi.width < 0 ||
    466         src_roi.y < 0 || src_roi.height < 0 ||
    467         src_roi.x + src_roi.width > width ||
    468         src_roi.y + src_roi.height > src->rows )
    469         CV_ERROR( CV_StsOutOfRange, "Too large source image or its ROI" );
    470 
    471     src_x = src_roi.x;
    472     _src_y1 = 0;
    473     _src_y2 = src->rows;
    474 
    475     if( isolated_roi )
    476     {
    477         src_roi.x = 0;
    478         width = src_roi.width;
    479         _src_y1 = src_roi.y;
    480         _src_y2 = src_roi.y + src_roi.height;
    481     }
    482 
    483     if( !CV_IS_MAT(dst) )
    484         CV_ERROR( CV_StsBadArg, "" );
    485 
    486     if( CV_MAT_TYPE(dst->type) != dst_type )
    487         CV_ERROR( CV_StsUnmatchedFormats, "" );
    488 
    489     if( dst_origin.x < 0 || dst_origin.y < 0 )
    490         CV_ERROR( CV_StsOutOfRange, "Incorrect destination ROI origin" );
    491 
    492     if( phase == CV_WHOLE )
    493         phase = CV_START | CV_END;
    494     phase &= CV_START | CV_END | CV_MIDDLE;
    495 
    496     // initialize horizontal border relocation tab if it is not initialized yet
    497     if( phase & CV_START )
    498         start_process( cvSlice(src_roi.x, src_roi.x + src_roi.width), width );
    499     else if( prev_width != width || prev_x_range.start_index != src_roi.x ||
    500         prev_x_range.end_index != src_roi.x + src_roi.width )
    501         CV_ERROR( CV_StsBadArg,
    502             "In a middle or at the end the horizontal placement of the stripe can not be changed" );
    503 
    504     dst_y = dst_origin.y;
    505     src_y1 = src_roi.y;
    506     src_y2 = src_roi.y + src_roi.height;
    507 
    508     if( phase & CV_START )
    509     {
    510         for( i = 0; i <= max_ky*2; i++ )
    511             rows[i] = 0;
    512 
    513         src_y1 -= max_ky;
    514         top_rows = bottom_rows = 0;
    515 
    516         if( src_y1 < _src_y1 )
    517         {
    518             top_rows = _src_y1 - src_y1;
    519             src_y1 = _src_y1;
    520         }
    521 
    522         buf_head = buf_tail = buf_start;
    523         buf_count = 0;
    524     }
    525 
    526     if( phase & CV_END )
    527     {
    528         src_y2 += max_ky;
    529 
    530         if( src_y2 > _src_y2 )
    531         {
    532             bottom_rows = src_y2 - _src_y2;
    533             src_y2 = _src_y2;
    534         }
    535     }
    536 
    537     dptr = dst->data.ptr + dst_origin.y*dst->step + dst_origin.x*CV_ELEM_SIZE(dst_type);
    538     sptr = src->data.ptr + src_y1*src->step + src_x*pix_size;
    539 
    540     for( src_y = src_y1; src_y < src_y2; )
    541     {
    542         uchar* bptr;
    543         int row_count, delta;
    544 
    545         delta = fill_cyclic_buffer( sptr, src->step, src_y, src_y1, src_y2 );
    546 
    547         src_y += delta;
    548         sptr += src->step*delta;
    549 
    550         // initialize the cyclic buffer row pointers
    551         bptr = buf_head;
    552         for( i = 0; i < buf_count; i++ )
    553         {
    554             rows[i+top_rows] = bptr;
    555             bptr += buf_step;
    556             if( bptr >= buf_end )
    557                 bptr = buf_start;
    558         }
    559 
    560         row_count = top_rows + buf_count;
    561 
    562         if( !rows[0] || ((phase & CV_END) && src_y == src_y2) )
    563         {
    564             int br = (phase & CV_END) && src_y == src_y2 ? bottom_rows : 0;
    565             make_y_border( row_count, top_rows, br );
    566             row_count += br;
    567         }
    568 
    569         if( rows[0] && row_count > max_ky*2 )
    570         {
    571             int count = row_count - max_ky*2;
    572             if( dst_y + count > dst->rows )
    573                 CV_ERROR( CV_StsOutOfRange, "The destination image can not fit the result" );
    574 
    575             assert( count >= 0 );
    576             y_func( rows + max_ky - anchor.y, dptr, dst->step, count, this );
    577             row_count -= count;
    578             dst_y += count;
    579             dptr += dst->step*count;
    580             for( bptr = row_count > 0 ?rows[count] : 0; buf_head != bptr && buf_count > 0; buf_count-- )
    581             {
    582                 buf_head += buf_step;
    583                 if( buf_head >= buf_end )
    584                     buf_head = buf_start;
    585             }
    586             rows_processed += count;
    587             top_rows = MAX(top_rows - count, 0);
    588         }
    589     }
    590 
    591     __END__;
    592 
    593     return rows_processed;
    594 }
    595 
    596 
    597 /****************************************************************************************\
    598                                     Separable Linear Filter
    599 \****************************************************************************************/
    600 
    601 static void icvFilterRowSymm_8u32s( const uchar* src, int* dst, void* params );
    602 static void icvFilterColSymm_32s8u( const int** src, uchar* dst, int dst_step,
    603                                     int count, void* params );
    604 static void icvFilterColSymm_32s16s( const int** src, short* dst, int dst_step,
    605                                      int count, void* params );
    606 static void icvFilterRowSymm_8u32f( const uchar* src, float* dst, void* params );
    607 static void icvFilterRow_8u32f( const uchar* src, float* dst, void* params );
    608 static void icvFilterRowSymm_16s32f( const short* src, float* dst, void* params );
    609 static void icvFilterRow_16s32f( const short* src, float* dst, void* params );
    610 static void icvFilterRowSymm_16u32f( const ushort* src, float* dst, void* params );
    611 static void icvFilterRow_16u32f( const ushort* src, float* dst, void* params );
    612 static void icvFilterRowSymm_32f( const float* src, float* dst, void* params );
    613 static void icvFilterRow_32f( const float* src, float* dst, void* params );
    614 
    615 static void icvFilterColSymm_32f8u( const float** src, uchar* dst, int dst_step,
    616                                     int count, void* params );
    617 static void icvFilterCol_32f8u( const float** src, uchar* dst, int dst_step,
    618                                 int count, void* params );
    619 static void icvFilterColSymm_32f16s( const float** src, short* dst, int dst_step,
    620                                      int count, void* params );
    621 static void icvFilterCol_32f16s( const float** src, short* dst, int dst_step,
    622                                  int count, void* params );
    623 static void icvFilterColSymm_32f16u( const float** src, ushort* dst, int dst_step,
    624                                      int count, void* params );
    625 static void icvFilterCol_32f16u( const float** src, ushort* dst, int dst_step,
    626                                  int count, void* params );
    627 static void icvFilterColSymm_32f( const float** src, float* dst, int dst_step,
    628                                   int count, void* params );
    629 static void icvFilterCol_32f( const float** src, float* dst, int dst_step,
    630                               int count, void* params );
    631 
    632 CvSepFilter::CvSepFilter()
    633 {
    634     min_depth = CV_32F;
    635     kx = ky = 0;
    636     kx_flags = ky_flags = 0;
    637 }
    638 
    639 
    640 CvSepFilter::CvSepFilter( int _max_width, int _src_type, int _dst_type,
    641                           const CvMat* _kx, const CvMat* _ky,
    642                           CvPoint _anchor, int _border_mode,
    643                           CvScalar _border_value )
    644 {
    645     min_depth = CV_32F;
    646     kx = ky = 0;
    647     init( _max_width, _src_type, _dst_type, _kx, _ky, _anchor, _border_mode, _border_value );
    648 }
    649 
    650 
    651 void CvSepFilter::clear()
    652 {
    653     cvReleaseMat( &kx );
    654     cvReleaseMat( &ky );
    655     CvBaseImageFilter::clear();
    656 }
    657 
    658 
    659 CvSepFilter::~CvSepFilter()
    660 {
    661     clear();
    662 }
    663 
    664 
    665 #undef FILTER_BITS
    666 #define FILTER_BITS 8
    667 
    668 void CvSepFilter::init( int _max_width, int _src_type, int _dst_type,
    669                         const CvMat* _kx, const CvMat* _ky,
    670                         CvPoint _anchor, int _border_mode,
    671                         CvScalar _border_value )
    672 {
    673     CV_FUNCNAME( "CvSepFilter::init" );
    674 
    675     __BEGIN__;
    676 
    677     CvSize _ksize;
    678     int filter_type;
    679     int i, xsz, ysz;
    680     int convert_filters = 0;
    681     double xsum = 0, ysum = 0;
    682     const float eps = FLT_EPSILON*100.f;
    683 
    684     if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) ||
    685         (_kx->cols != 1 && _kx->rows != 1) ||
    686         (_ky->cols != 1 && _ky->rows != 1) ||
    687         CV_MAT_CN(_kx->type) != 1 || CV_MAT_CN(_ky->type) != 1 ||
    688         !CV_ARE_TYPES_EQ(_kx,_ky) )
    689         CV_ERROR( CV_StsBadArg,
    690         "Both kernels must be valid 1d single-channel vectors of the same types" );
    691 
    692     if( CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type) )
    693         CV_ERROR( CV_StsUnmatchedFormats, "Input and output must have the same number of channels" );
    694 
    695     filter_type = MAX( CV_32F, CV_MAT_DEPTH(_kx->type) );
    696 
    697     _ksize.width = _kx->rows + _kx->cols - 1;
    698     _ksize.height = _ky->rows + _ky->cols - 1;
    699 
    700     CV_CALL( CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize,
    701                                       _anchor, _border_mode, _border_value ));
    702 
    703     if( !(kx && CV_ARE_SIZES_EQ(kx,_kx)) )
    704     {
    705         cvReleaseMat( &kx );
    706         CV_CALL( kx = cvCreateMat( _kx->rows, _kx->cols, filter_type ));
    707     }
    708 
    709     if( !(ky && CV_ARE_SIZES_EQ(ky,_ky)) )
    710     {
    711         cvReleaseMat( &ky );
    712         CV_CALL( ky = cvCreateMat( _ky->rows, _ky->cols, filter_type ));
    713     }
    714 
    715     CV_CALL( cvConvert( _kx, kx ));
    716     CV_CALL( cvConvert( _ky, ky ));
    717 
    718     xsz = kx->rows + kx->cols - 1;
    719     ysz = ky->rows + ky->cols - 1;
    720     kx_flags = ky_flags = ASYMMETRICAL + SYMMETRICAL + POSITIVE + SUM_TO_1 + INTEGER;
    721 
    722     if( !(xsz & 1) )
    723         kx_flags &= ~(ASYMMETRICAL + SYMMETRICAL);
    724     if( !(ysz & 1) )
    725         ky_flags &= ~(ASYMMETRICAL + SYMMETRICAL);
    726 
    727     for( i = 0; i < xsz; i++ )
    728     {
    729         float v = kx->data.fl[i];
    730         xsum += v;
    731         if( v < 0 )
    732             kx_flags &= ~POSITIVE;
    733         if( fabs(v - cvRound(v)) > eps )
    734             kx_flags &= ~INTEGER;
    735         if( fabs(v - kx->data.fl[xsz - i - 1]) > eps )
    736             kx_flags &= ~SYMMETRICAL;
    737         if( fabs(v + kx->data.fl[xsz - i - 1]) > eps )
    738             kx_flags &= ~ASYMMETRICAL;
    739     }
    740 
    741     if( fabs(xsum - 1.) > eps )
    742         kx_flags &= ~SUM_TO_1;
    743 
    744     for( i = 0; i < ysz; i++ )
    745     {
    746         float v = ky->data.fl[i];
    747         ysum += v;
    748         if( v < 0 )
    749             ky_flags &= ~POSITIVE;
    750         if( fabs(v - cvRound(v)) > eps )
    751             ky_flags &= ~INTEGER;
    752         if( fabs(v - ky->data.fl[ysz - i - 1]) > eps )
    753             ky_flags &= ~SYMMETRICAL;
    754         if( fabs(v + ky->data.fl[ysz - i - 1]) > eps )
    755             ky_flags &= ~ASYMMETRICAL;
    756     }
    757 
    758     if( fabs(ysum - 1.) > eps )
    759         ky_flags &= ~SUM_TO_1;
    760 
    761     x_func = 0;
    762     y_func = 0;
    763 
    764     if( CV_MAT_DEPTH(src_type) == CV_8U )
    765     {
    766         if( CV_MAT_DEPTH(dst_type) == CV_8U &&
    767             ((kx_flags&ky_flags) & (SYMMETRICAL + POSITIVE + SUM_TO_1)) == SYMMETRICAL + POSITIVE + SUM_TO_1 )
    768         {
    769             x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32s;
    770             y_func = (CvColumnFilterFunc)icvFilterColSymm_32s8u;
    771             kx_flags &= ~INTEGER;
    772             ky_flags &= ~INTEGER;
    773             convert_filters = 1;
    774         }
    775         else if( CV_MAT_DEPTH(dst_type) == CV_16S &&
    776             (kx_flags & (SYMMETRICAL + ASYMMETRICAL)) && (kx_flags & INTEGER) &&
    777             (ky_flags & (SYMMETRICAL + ASYMMETRICAL)) && (ky_flags & INTEGER) )
    778         {
    779             x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32s;
    780             y_func = (CvColumnFilterFunc)icvFilterColSymm_32s16s;
    781             convert_filters = 1;
    782         }
    783         else
    784         {
    785             if( CV_MAT_DEPTH(dst_type) > CV_32F )
    786                 CV_ERROR( CV_StsUnsupportedFormat, "8u->64f separable filtering is not supported" );
    787 
    788             if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) )
    789                 x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32f;
    790             else
    791                 x_func = (CvRowFilterFunc)icvFilterRow_8u32f;
    792         }
    793     }
    794     else if( CV_MAT_DEPTH(src_type) == CV_16U )
    795     {
    796         if( CV_MAT_DEPTH(dst_type) > CV_32F )
    797             CV_ERROR( CV_StsUnsupportedFormat, "16u->64f separable filtering is not supported" );
    798 
    799         if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) )
    800             x_func = (CvRowFilterFunc)icvFilterRowSymm_16u32f;
    801         else
    802             x_func = (CvRowFilterFunc)icvFilterRow_16u32f;
    803     }
    804     else if( CV_MAT_DEPTH(src_type) == CV_16S )
    805     {
    806         if( CV_MAT_DEPTH(dst_type) > CV_32F )
    807             CV_ERROR( CV_StsUnsupportedFormat, "16s->64f separable filtering is not supported" );
    808 
    809         if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) )
    810             x_func = (CvRowFilterFunc)icvFilterRowSymm_16s32f;
    811         else
    812             x_func = (CvRowFilterFunc)icvFilterRow_16s32f;
    813     }
    814     else if( CV_MAT_DEPTH(src_type) == CV_32F )
    815     {
    816         if( CV_MAT_DEPTH(dst_type) != CV_32F )
    817             CV_ERROR( CV_StsUnsupportedFormat, "When the input has 32f data type, the output must also have 32f type" );
    818 
    819         if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) )
    820             x_func = (CvRowFilterFunc)icvFilterRowSymm_32f;
    821         else
    822             x_func = (CvRowFilterFunc)icvFilterRow_32f;
    823     }
    824     else
    825         CV_ERROR( CV_StsUnsupportedFormat, "Unknown or unsupported input data type" );
    826 
    827     if( !y_func )
    828     {
    829         if( CV_MAT_DEPTH(dst_type) == CV_8U )
    830         {
    831             if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) )
    832                 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f8u;
    833             else
    834                 y_func = (CvColumnFilterFunc)icvFilterCol_32f8u;
    835         }
    836         else if( CV_MAT_DEPTH(dst_type) == CV_16U )
    837         {
    838             if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) )
    839                 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f16u;
    840             else
    841                 y_func = (CvColumnFilterFunc)icvFilterCol_32f16u;
    842         }
    843         else if( CV_MAT_DEPTH(dst_type) == CV_16S )
    844         {
    845             if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) )
    846                 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f16s;
    847             else
    848                 y_func = (CvColumnFilterFunc)icvFilterCol_32f16s;
    849         }
    850         else if( CV_MAT_DEPTH(dst_type) == CV_32F )
    851         {
    852             if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) )
    853                 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f;
    854             else
    855                 y_func = (CvColumnFilterFunc)icvFilterCol_32f;
    856         }
    857         else
    858             CV_ERROR( CV_StsUnsupportedFormat, "Unknown or unsupported input data type" );
    859     }
    860 
    861     if( convert_filters )
    862     {
    863         int scale = kx_flags & ky_flags & INTEGER ? 1 : (1 << FILTER_BITS);
    864         int sum;
    865 
    866         for( i = sum = 0; i < xsz; i++ )
    867         {
    868             int t = cvRound(kx->data.fl[i]*scale);
    869             kx->data.i[i] = t;
    870             sum += t;
    871         }
    872         if( scale > 1 )
    873             kx->data.i[xsz/2] += scale - sum;
    874 
    875         for( i = sum = 0; i < ysz; i++ )
    876         {
    877             int t = cvRound(ky->data.fl[i]*scale);
    878             ky->data.i[i] = t;
    879             sum += t;
    880         }
    881         if( scale > 1 )
    882             ky->data.i[ysz/2] += scale - sum;
    883         kx->type = (kx->type & ~CV_MAT_DEPTH_MASK) | CV_32S;
    884         ky->type = (ky->type & ~CV_MAT_DEPTH_MASK) | CV_32S;
    885     }
    886 
    887     __END__;
    888 }
    889 
    890 
    891 void CvSepFilter::init( int _max_width, int _src_type, int _dst_type,
    892                         bool _is_separable, CvSize _ksize,
    893                         CvPoint _anchor, int _border_mode,
    894                         CvScalar _border_value )
    895 {
    896     CvBaseImageFilter::init( _max_width, _src_type, _dst_type, _is_separable,
    897                              _ksize, _anchor, _border_mode, _border_value );
    898 }
    899 
    900 
    901 static void
    902 icvFilterRowSymm_8u32s( const uchar* src, int* dst, void* params )
    903 {
    904     const CvSepFilter* state = (const CvSepFilter*)params;
    905     const CvMat* _kx = state->get_x_kernel();
    906     const int* kx = _kx->data.i;
    907     int ksize = _kx->cols + _kx->rows - 1;
    908     int i = 0, j, k, width = state->get_width();
    909     int cn = CV_MAT_CN(state->get_src_type());
    910     int ksize2 = ksize/2, ksize2n = ksize2*cn;
    911     int is_symm = state->get_x_kernel_flags() & CvSepFilter::SYMMETRICAL;
    912     const uchar* s = src + ksize2n;
    913 
    914     kx += ksize2;
    915     width *= cn;
    916 
    917     if( is_symm )
    918     {
    919         if( ksize == 1 && kx[0] == 1 )
    920         {
    921             for( i = 0; i <= width - 2; i += 2 )
    922             {
    923                 int s0 = s[i], s1 = s[i+1];
    924                 dst[i] = s0; dst[i+1] = s1;
    925             }
    926             s += i;
    927         }
    928         else if( ksize == 3 )
    929         {
    930             if( kx[0] == 2 && kx[1] == 1 )
    931                 for( ; i <= width - 2; i += 2, s += 2 )
    932                 {
    933                     int s0 = s[-cn] + s[0]*2 + s[cn], s1 = s[1-cn] + s[1]*2 + s[1+cn];
    934                     dst[i] = s0; dst[i+1] = s1;
    935                 }
    936             else if( kx[0] == 10 && kx[1] == 3 )
    937                 for( ; i <= width - 2; i += 2, s += 2 )
    938                 {
    939                     int s0 = s[0]*10 + (s[-cn] + s[cn])*3, s1 = s[1]*10 + (s[1-cn] + s[1+cn])*3;
    940                     dst[i] = s0; dst[i+1] = s1;
    941                 }
    942             else if( kx[0] == 2*64 && kx[1] == 1*64 )
    943                 for( ; i <= width - 2; i += 2, s += 2 )
    944                 {
    945                     int s0 = (s[0]*2 + s[-cn] + s[cn]) << 6;
    946                     int s1 = (s[1]*2 + s[1-cn] + s[1+cn]) << 6;
    947                     dst[i] = s0; dst[i+1] = s1;
    948                 }
    949             else
    950             {
    951                 int k0 = kx[0], k1 = kx[1];
    952                 for( ; i <= width - 2; i += 2, s += 2 )
    953                 {
    954                     int s0 = s[0]*k0 + (s[-cn] + s[cn])*k1, s1 = s[1]*k0 + (s[1-cn] + s[1+cn])*k1;
    955                     dst[i] = s0; dst[i+1] = s1;
    956                 }
    957             }
    958         }
    959         else if( ksize == 5 )
    960         {
    961             int k0 = kx[0], k1 = kx[1], k2 = kx[2];
    962             if( k0 == 6*16 && k1 == 4*16 && k2 == 1*16 )
    963                 for( ; i <= width - 2; i += 2, s += 2 )
    964                 {
    965                     int s0 = (s[0]*6 + (s[-cn] + s[cn])*4 + (s[-cn*2] + s[cn*2])*1) << 4;
    966                     int s1 = (s[1]*6 + (s[1-cn] + s[1+cn])*4 + (s[1-cn*2] + s[1+cn*2])*1) << 4;
    967                     dst[i] = s0; dst[i+1] = s1;
    968                 }
    969             else
    970                 for( ; i <= width - 2; i += 2, s += 2 )
    971                 {
    972                     int s0 = s[0]*k0 + (s[-cn] + s[cn])*k1 + (s[-cn*2] + s[cn*2])*k2;
    973                     int s1 = s[1]*k0 + (s[1-cn] + s[1+cn])*k1 + (s[1-cn*2] + s[1+cn*2])*k2;
    974                     dst[i] = s0; dst[i+1] = s1;
    975                 }
    976         }
    977         else
    978             for( ; i <= width - 4; i += 4, s += 4 )
    979             {
    980                 int f = kx[0];
    981                 int s0 = f*s[0], s1 = f*s[1], s2 = f*s[2], s3 = f*s[3];
    982                 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
    983                 {
    984                     f = kx[k];
    985                     s0 += f*(s[j] + s[-j]); s1 += f*(s[j+1] + s[-j+1]);
    986                     s2 += f*(s[j+2] + s[-j+2]); s3 += f*(s[j+3] + s[-j+3]);
    987                 }
    988 
    989                 dst[i] = s0; dst[i+1] = s1;
    990                 dst[i+2] = s2; dst[i+3] = s3;
    991             }
    992 
    993         for( ; i < width; i++, s++ )
    994         {
    995             int s0 = kx[0]*s[0];
    996             for( k = 1, j = cn; k <= ksize2; k++, j += cn )
    997                 s0 += kx[k]*(s[j] + s[-j]);
    998             dst[i] = s0;
    999         }
   1000     }
   1001     else
   1002     {
   1003         if( ksize == 3 && kx[0] == 0 && kx[1] == 1 )
   1004             for( ; i <= width - 2; i += 2, s += 2 )
   1005             {
   1006                 int s0 = s[cn] - s[-cn], s1 = s[1+cn] - s[1-cn];
   1007                 dst[i] = s0; dst[i+1] = s1;
   1008             }
   1009         else
   1010             for( ; i <= width - 4; i += 4, s += 4 )
   1011             {
   1012                 int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
   1013                 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
   1014                 {
   1015                     int f = kx[k];
   1016                     s0 += f*(s[j] - s[-j]); s1 += f*(s[j+1] - s[-j+1]);
   1017                     s2 += f*(s[j+2] - s[-j+2]); s3 += f*(s[j+3] - s[-j+3]);
   1018                 }
   1019 
   1020                 dst[i] = s0; dst[i+1] = s1;
   1021                 dst[i+2] = s2; dst[i+3] = s3;
   1022             }
   1023 
   1024         for( ; i < width; i++, s++ )
   1025         {
   1026             int s0 = kx[0]*s[0];
   1027             for( k = 1, j = cn; k <= ksize2; k++, j += cn )
   1028                 s0 += kx[k]*(s[j] - s[-j]);
   1029             dst[i] = s0;
   1030         }
   1031     }
   1032 }
   1033 
   1034 
   1035 #define ICV_FILTER_ROW( flavor, srctype, dsttype, load_macro )      \
   1036 static void                                                         \
   1037 icvFilterRow_##flavor(const srctype* src, dsttype* dst, void*params)\
   1038 {                                                                   \
   1039     const CvSepFilter* state = (const CvSepFilter*)params;          \
   1040     const CvMat* _kx = state->get_x_kernel();                       \
   1041     const dsttype* kx = (const dsttype*)(_kx->data.ptr);            \
   1042     int ksize = _kx->cols + _kx->rows - 1;                          \
   1043     int i = 0, k, width = state->get_width();                       \
   1044     int cn = CV_MAT_CN(state->get_src_type());                      \
   1045     const srctype* s;                                               \
   1046                                                                     \
   1047     width *= cn;                                                    \
   1048                                                                     \
   1049     for( ; i <= width - 4; i += 4 )                                 \
   1050     {                                                               \
   1051         double f = kx[0];                                           \
   1052         double s0=f*load_macro(src[i]), s1=f*load_macro(src[i+1]),  \
   1053                 s2=f*load_macro(src[i+2]), s3=f*load_macro(src[i+3]);\
   1054         for( k = 1, s = src + i + cn; k < ksize; k++, s += cn )     \
   1055         {                                                           \
   1056             f = kx[k];                                              \
   1057             s0 += f*load_macro(s[0]);                               \
   1058             s1 += f*load_macro(s[1]);                               \
   1059             s2 += f*load_macro(s[2]);                               \
   1060             s3 += f*load_macro(s[3]);                               \
   1061         }                                                           \
   1062         dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1;               \
   1063         dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3;             \
   1064     }                                                               \
   1065                                                                     \
   1066     for( ; i < width; i++ )                                         \
   1067     {                                                               \
   1068         double s0 = (double)kx[0]*load_macro(src[i]);               \
   1069         for( k = 1, s = src + i + cn; k < ksize; k++, s += cn )     \
   1070             s0 += (double)kx[k]*load_macro(s[0]);                   \
   1071         dst[i] = (dsttype)s0;                                       \
   1072     }                                                               \
   1073 }
   1074 
   1075 
   1076 ICV_FILTER_ROW( 8u32f, uchar, float, CV_8TO32F )
   1077 ICV_FILTER_ROW( 16s32f, short, float, CV_NOP )
   1078 ICV_FILTER_ROW( 16u32f, ushort, float, CV_NOP )
   1079 ICV_FILTER_ROW( 32f, float, float, CV_NOP )
   1080 
   1081 
   1082 #define ICV_FILTER_ROW_SYMM( flavor, srctype, dsttype, load_macro ) \
   1083 static void                                                         \
   1084 icvFilterRowSymm_##flavor( const srctype* src,                      \
   1085                            dsttype* dst, void* params )             \
   1086 {                                                                   \
   1087     const CvSepFilter* state = (const CvSepFilter*)params;          \
   1088     const CvMat* _kx = state->get_x_kernel();                       \
   1089     const dsttype* kx = (const dsttype*)(_kx->data.ptr);            \
   1090     int ksize = _kx->cols + _kx->rows - 1;                          \
   1091     int i = 0, j, k, width = state->get_width();                    \
   1092     int cn = CV_MAT_CN(state->get_src_type());                      \
   1093     int is_symm=state->get_x_kernel_flags()&CvSepFilter::SYMMETRICAL;\
   1094     int ksize2 = ksize/2, ksize2n = ksize2*cn;                      \
   1095     const srctype* s = src + ksize2n;                               \
   1096                                                                     \
   1097     kx += ksize2;                                                   \
   1098     width *= cn;                                                    \
   1099                                                                     \
   1100     if( is_symm )                                                   \
   1101     {                                                               \
   1102         for( ; i <= width - 4; i += 4, s += 4 )                     \
   1103         {                                                           \
   1104             double f = kx[0];                                       \
   1105             double s0=f*load_macro(s[0]), s1=f*load_macro(s[1]),    \
   1106                    s2=f*load_macro(s[2]), s3=f*load_macro(s[3]);    \
   1107             for( k = 1, j = cn; k <= ksize2; k++, j += cn )         \
   1108             {                                                       \
   1109                 f = kx[k];                                          \
   1110                 s0 += f*load_macro(s[j] + s[-j]);                   \
   1111                 s1 += f*load_macro(s[j+1] + s[-j+1]);               \
   1112                 s2 += f*load_macro(s[j+2] + s[-j+2]);               \
   1113                 s3 += f*load_macro(s[j+3] + s[-j+3]);               \
   1114             }                                                       \
   1115                                                                     \
   1116             dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1;           \
   1117             dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3;         \
   1118         }                                                           \
   1119                                                                     \
   1120         for( ; i < width; i++, s++ )                                \
   1121         {                                                           \
   1122             double s0 = (double)kx[0]*load_macro(s[0]);             \
   1123             for( k = 1, j = cn; k <= ksize2; k++, j += cn )         \
   1124                 s0 += (double)kx[k]*load_macro(s[j] + s[-j]);       \
   1125             dst[i] = (dsttype)s0;                                   \
   1126         }                                                           \
   1127     }                                                               \
   1128     else                                                            \
   1129     {                                                               \
   1130         for( ; i <= width - 4; i += 4, s += 4 )                     \
   1131         {                                                           \
   1132             double s0 = 0, s1 = 0, s2 = 0, s3 = 0;                  \
   1133             for( k = 1, j = cn; k <= ksize2; k++, j += cn )         \
   1134             {                                                       \
   1135                 double f = kx[k];                                   \
   1136                 s0 += f*load_macro(s[j] - s[-j]);                   \
   1137                 s1 += f*load_macro(s[j+1] - s[-j+1]);               \
   1138                 s2 += f*load_macro(s[j+2] - s[-j+2]);               \
   1139                 s3 += f*load_macro(s[j+3] - s[-j+3]);               \
   1140             }                                                       \
   1141                                                                     \
   1142             dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1;           \
   1143             dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3;         \
   1144         }                                                           \
   1145                                                                     \
   1146         for( ; i < width; i++, s++ )                                \
   1147         {                                                           \
   1148             double s0 = 0;                                          \
   1149             for( k = 1, j = cn; k <= ksize2; k++, j += cn )         \
   1150                 s0 += (double)kx[k]*load_macro(s[j] - s[-j]);       \
   1151             dst[i] = (dsttype)s0;                                   \
   1152         }                                                           \
   1153     }                                                               \
   1154 }
   1155 
   1156 
   1157 ICV_FILTER_ROW_SYMM( 8u32f, uchar, float, CV_8TO32F )
   1158 ICV_FILTER_ROW_SYMM( 16s32f, short, float, CV_NOP )
   1159 ICV_FILTER_ROW_SYMM( 16u32f, ushort, float, CV_NOP )
   1160 
   1161 static void
   1162 icvFilterRowSymm_32f( const float* src, float* dst, void* params )
   1163 {
   1164     const CvSepFilter* state = (const CvSepFilter*)params;
   1165     const CvMat* _kx = state->get_x_kernel();
   1166     const float* kx = _kx->data.fl;
   1167     int ksize = _kx->cols + _kx->rows - 1;
   1168     int i = 0, j, k, width = state->get_width();
   1169     int cn = CV_MAT_CN(state->get_src_type());
   1170     int ksize2 = ksize/2, ksize2n = ksize2*cn;
   1171     int is_symm = state->get_x_kernel_flags() & CvSepFilter::SYMMETRICAL;
   1172     const float* s = src + ksize2n;
   1173 
   1174     kx += ksize2;
   1175     width *= cn;
   1176 
   1177     if( is_symm )
   1178     {
   1179         if( ksize == 3 && fabs(kx[0]-2.) <= FLT_EPSILON && fabs(kx[1]-1.) <= FLT_EPSILON )
   1180             for( ; i <= width - 2; i += 2, s += 2 )
   1181             {
   1182                 float s0 = s[-cn] + s[0]*2 + s[cn], s1 = s[1-cn] + s[1]*2 + s[1+cn];
   1183                 dst[i] = s0; dst[i+1] = s1;
   1184             }
   1185         else if( ksize == 3 && fabs(kx[0]-10.) <= FLT_EPSILON && fabs(kx[1]-3.) <= FLT_EPSILON )
   1186             for( ; i <= width - 2; i += 2, s += 2 )
   1187             {
   1188                 float s0 = s[0]*10 + (s[-cn] + s[cn])*3, s1 = s[1]*10 + (s[1-cn] + s[1+cn])*3;
   1189                 dst[i] = s0; dst[i+1] = s1;
   1190             }
   1191         else
   1192             for( ; i <= width - 4; i += 4, s += 4 )
   1193             {
   1194                 double f = kx[0];
   1195                 double s0 = f*s[0], s1 = f*s[1], s2 = f*s[2], s3 = f*s[3];
   1196                 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
   1197                 {
   1198                     f = kx[k];
   1199                     s0 += f*(s[j] + s[-j]); s1 += f*(s[j+1] + s[-j+1]);
   1200                     s2 += f*(s[j+2] + s[-j+2]); s3 += f*(s[j+3] + s[-j+3]);
   1201                 }
   1202 
   1203                 dst[i] = (float)s0; dst[i+1] = (float)s1;
   1204                 dst[i+2] = (float)s2; dst[i+3] = (float)s3;
   1205             }
   1206 
   1207         for( ; i < width; i++, s++ )
   1208         {
   1209             double s0 = (double)kx[0]*s[0];
   1210             for( k = 1, j = cn; k <= ksize2; k++, j += cn )
   1211                 s0 += (double)kx[k]*(s[j] + s[-j]);
   1212             dst[i] = (float)s0;
   1213         }
   1214     }
   1215     else
   1216     {
   1217         if( ksize == 3 && fabs(kx[0]) <= FLT_EPSILON && fabs(kx[1]-1.) <= FLT_EPSILON )
   1218             for( ; i <= width - 2; i += 2, s += 2 )
   1219             {
   1220                 float s0 = s[cn] - s[-cn], s1 = s[1+cn] - s[1-cn];
   1221                 dst[i] = s0; dst[i+1] = s1;
   1222             }
   1223         else
   1224             for( ; i <= width - 4; i += 4, s += 4 )
   1225             {
   1226                 double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
   1227                 for( k = 1, j = cn; k <= ksize2; k++, j += cn )
   1228                 {
   1229                     double f = kx[k];
   1230                     s0 += f*(s[j] - s[-j]); s1 += f*(s[j+1] - s[-j+1]);
   1231                     s2 += f*(s[j+2] - s[-j+2]); s3 += f*(s[j+3] - s[-j+3]);
   1232                 }
   1233 
   1234                 dst[i] = (float)s0; dst[i+1] = (float)s1;
   1235                 dst[i+2] = (float)s2; dst[i+3] = (float)s3;
   1236             }
   1237 
   1238         for( ; i < width; i++, s++ )
   1239         {
   1240             double s0 = (double)kx[0]*s[0];
   1241             for( k = 1, j = cn; k <= ksize2; k++, j += cn )
   1242                 s0 += (double)kx[k]*(s[j] - s[-j]);
   1243             dst[i] = (float)s0;
   1244         }
   1245     }
   1246 }
   1247 
   1248 
   1249 static void
   1250 icvFilterColSymm_32s8u( const int** src, uchar* dst, int dst_step, int count, void* params )
   1251 {
   1252     const CvSepFilter* state = (const CvSepFilter*)params;
   1253     const CvMat* _ky = state->get_y_kernel();
   1254     const int* ky = _ky->data.i;
   1255     int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2;
   1256     int i, k, width = state->get_width();
   1257     int cn = CV_MAT_CN(state->get_src_type());
   1258 
   1259     width *= cn;
   1260     src += ksize2;
   1261     ky += ksize2;
   1262 
   1263     for( ; count--; dst += dst_step, src++ )
   1264     {
   1265         if( ksize == 3 )
   1266         {
   1267             const int* sptr0 = src[-1], *sptr1 = src[0], *sptr2 = src[1];
   1268             int k0 = ky[0], k1 = ky[1];
   1269             for( i = 0; i <= width - 2; i += 2 )
   1270             {
   1271                 int s0 = sptr1[i]*k0 + (sptr0[i] + sptr2[i])*k1;
   1272                 int s1 = sptr1[i+1]*k0 + (sptr0[i+1] + sptr2[i+1])*k1;
   1273                 s0 = CV_DESCALE(s0, FILTER_BITS*2);
   1274                 s1 = CV_DESCALE(s1, FILTER_BITS*2);
   1275                 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1;
   1276             }
   1277         }
   1278         else if( ksize == 5 )
   1279         {
   1280             const int* sptr0 = src[-2], *sptr1 = src[-1],
   1281                 *sptr2 = src[0], *sptr3 = src[1], *sptr4 = src[2];
   1282             int k0 = ky[0], k1 = ky[1], k2 = ky[2];
   1283             for( i = 0; i <= width - 2; i += 2 )
   1284             {
   1285                 int s0 = sptr2[i]*k0 + (sptr1[i] + sptr3[i])*k1 + (sptr0[i] + sptr4[i])*k2;
   1286                 int s1 = sptr2[i+1]*k0 + (sptr1[i+1] + sptr3[i+1])*k1 + (sptr0[i+1] + sptr4[i+1])*k2;
   1287                 s0 = CV_DESCALE(s0, FILTER_BITS*2);
   1288                 s1 = CV_DESCALE(s1, FILTER_BITS*2);
   1289                 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1;
   1290             }
   1291         }
   1292         else
   1293             for( i = 0; i <= width - 4; i += 4 )
   1294             {
   1295                 int f = ky[0];
   1296                 const int* sptr = src[0] + i, *sptr2;
   1297                 int s0 = f*sptr[0], s1 = f*sptr[1], s2 = f*sptr[2], s3 = f*sptr[3];
   1298                 for( k = 1; k <= ksize2; k++ )
   1299                 {
   1300                     sptr = src[k] + i;
   1301                     sptr2 = src[-k] + i;
   1302                     f = ky[k];
   1303                     s0 += f*(sptr[0] + sptr2[0]);
   1304                     s1 += f*(sptr[1] + sptr2[1]);
   1305                     s2 += f*(sptr[2] + sptr2[2]);
   1306                     s3 += f*(sptr[3] + sptr2[3]);
   1307                 }
   1308 
   1309                 s0 = CV_DESCALE(s0, FILTER_BITS*2);
   1310                 s1 = CV_DESCALE(s1, FILTER_BITS*2);
   1311                 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1;
   1312                 s2 = CV_DESCALE(s2, FILTER_BITS*2);
   1313                 s3 = CV_DESCALE(s3, FILTER_BITS*2);
   1314                 dst[i+2] = (uchar)s2; dst[i+3] = (uchar)s3;
   1315             }
   1316 
   1317         for( ; i < width; i++ )
   1318         {
   1319             int s0 = ky[0]*src[0][i];
   1320             for( k = 1; k <= ksize2; k++ )
   1321                 s0 += ky[k]*(src[k][i] + src[-k][i]);
   1322 
   1323             s0 = CV_DESCALE(s0, FILTER_BITS*2);
   1324             dst[i] = (uchar)s0;
   1325         }
   1326     }
   1327 }
   1328 
   1329 
   1330 static void
   1331 icvFilterColSymm_32s16s( const int** src, short* dst,
   1332                          int dst_step, int count, void* params )
   1333 {
   1334     const CvSepFilter* state = (const CvSepFilter*)params;
   1335     const CvMat* _ky = state->get_y_kernel();
   1336     const int* ky = (const int*)_ky->data.ptr;
   1337     int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2;
   1338     int i = 0, k, width = state->get_width();
   1339     int cn = CV_MAT_CN(state->get_src_type());
   1340     int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL;
   1341     int is_1_2_1 = is_symm && ksize == 3 && ky[1] == 2 && ky[2] == 1;
   1342     int is_3_10_3 = is_symm && ksize == 3 && ky[1] == 10 && ky[2] == 3;
   1343     int is_m1_0_1 = !is_symm && ksize == 3 && ky[1] == 0 &&
   1344         ky[2]*ky[2] == 1 ? (ky[2] > 0 ? 1 : -1) : 0;
   1345 
   1346     width *= cn;
   1347     src += ksize2;
   1348     ky += ksize2;
   1349     dst_step /= sizeof(dst[0]);
   1350 
   1351     if( is_symm )
   1352     {
   1353         for( ; count--; dst += dst_step, src++ )
   1354         {
   1355             if( is_1_2_1 )
   1356             {
   1357                 const int *src0 = src[-1], *src1 = src[0], *src2 = src[1];
   1358 
   1359                 for( i = 0; i <= width - 2; i += 2 )
   1360                 {
   1361                     int s0 = src0[i] + src1[i]*2 + src2[i],
   1362                         s1 = src0[i+1] + src1[i+1]*2 + src2[i+1];
   1363 
   1364                     dst[i] = (short)s0; dst[i+1] = (short)s1;
   1365                 }
   1366             }
   1367             else if( is_3_10_3 )
   1368             {
   1369                 const int *src0 = src[-1], *src1 = src[0], *src2 = src[1];
   1370 
   1371                 for( i = 0; i <= width - 2; i += 2 )
   1372                 {
   1373                     int s0 = src1[i]*10 + (src0[i] + src2[i])*3,
   1374                         s1 = src1[i+1]*10 + (src0[i+1] + src2[i+1])*3;
   1375 
   1376                     dst[i] = (short)s0; dst[i+1] = (short)s1;
   1377                 }
   1378             }
   1379             else
   1380                 for( i = 0; i <= width - 4; i += 4 )
   1381                 {
   1382                     int f = ky[0];
   1383                     const int* sptr = src[0] + i, *sptr2;
   1384                     int s0 = f*sptr[0], s1 = f*sptr[1],
   1385                         s2 = f*sptr[2], s3 = f*sptr[3];
   1386                     for( k = 1; k <= ksize2; k++ )
   1387                     {
   1388                         sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k];
   1389                         s0 += f*(sptr[0] + sptr2[0]); s1 += f*(sptr[1] + sptr2[1]);
   1390                         s2 += f*(sptr[2] + sptr2[2]); s3 += f*(sptr[3] + sptr2[3]);
   1391                     }
   1392 
   1393                     dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
   1394                     dst[i+2] = CV_CAST_16S(s2); dst[i+3] = CV_CAST_16S(s3);
   1395                 }
   1396 
   1397             for( ; i < width; i++ )
   1398             {
   1399                 int s0 = ky[0]*src[0][i];
   1400                 for( k = 1; k <= ksize2; k++ )
   1401                     s0 += ky[k]*(src[k][i] + src[-k][i]);
   1402                 dst[i] = CV_CAST_16S(s0);
   1403             }
   1404         }
   1405     }
   1406     else
   1407     {
   1408         for( ; count--; dst += dst_step, src++ )
   1409         {
   1410             if( is_m1_0_1 )
   1411             {
   1412                 const int *src0 = src[-is_m1_0_1], *src2 = src[is_m1_0_1];
   1413 
   1414                 for( i = 0; i <= width - 2; i += 2 )
   1415                 {
   1416                     int s0 = src2[i] - src0[i], s1 = src2[i+1] - src0[i+1];
   1417                     dst[i] = (short)s0; dst[i+1] = (short)s1;
   1418                 }
   1419             }
   1420             else
   1421                 for( i = 0; i <= width - 4; i += 4 )
   1422                 {
   1423                     int f = ky[0];
   1424                     const int* sptr = src[0] + i, *sptr2;
   1425                     int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
   1426                     for( k = 1; k <= ksize2; k++ )
   1427                     {
   1428                         sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k];
   1429                         s0 += f*(sptr[0] - sptr2[0]); s1 += f*(sptr[1] - sptr2[1]);
   1430                         s2 += f*(sptr[2] - sptr2[2]); s3 += f*(sptr[3] - sptr2[3]);
   1431                     }
   1432 
   1433                     dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
   1434                     dst[i+2] = CV_CAST_16S(s2); dst[i+3] = CV_CAST_16S(s3);
   1435                 }
   1436 
   1437             for( ; i < width; i++ )
   1438             {
   1439                 int s0 = ky[0]*src[0][i];
   1440                 for( k = 1; k <= ksize2; k++ )
   1441                     s0 += ky[k]*(src[k][i] - src[-k][i]);
   1442                 dst[i] = CV_CAST_16S(s0);
   1443             }
   1444         }
   1445     }
   1446 }
   1447 
   1448 
   1449 #define ICV_FILTER_COL( flavor, srctype, dsttype, worktype,     \
   1450                         cast_macro1, cast_macro2 )              \
   1451 static void                                                     \
   1452 icvFilterCol_##flavor( const srctype** src, dsttype* dst,       \
   1453                        int dst_step, int count, void* params )  \
   1454 {                                                               \
   1455     const CvSepFilter* state = (const CvSepFilter*)params;      \
   1456     const CvMat* _ky = state->get_y_kernel();                   \
   1457     const srctype* ky = (const srctype*)_ky->data.ptr;          \
   1458     int ksize = _ky->cols + _ky->rows - 1;                      \
   1459     int i, k, width = state->get_width();                       \
   1460     int cn = CV_MAT_CN(state->get_src_type());                  \
   1461                                                                 \
   1462     width *= cn;                                                \
   1463     dst_step /= sizeof(dst[0]);                                 \
   1464                                                                 \
   1465     for( ; count--; dst += dst_step, src++ )                    \
   1466     {                                                           \
   1467         for( i = 0; i <= width - 4; i += 4 )                    \
   1468         {                                                       \
   1469             double f = ky[0];                                   \
   1470             const srctype* sptr = src[0] + i;                   \
   1471             double s0 = f*sptr[0], s1 = f*sptr[1],              \
   1472                    s2 = f*sptr[2], s3 = f*sptr[3];              \
   1473             worktype t0, t1;                                    \
   1474             for( k = 1; k < ksize; k++ )                        \
   1475             {                                                   \
   1476                 sptr = src[k] + i; f = ky[k];                   \
   1477                 s0 += f*sptr[0]; s1 += f*sptr[1];               \
   1478                 s2 += f*sptr[2]; s3 += f*sptr[3];               \
   1479             }                                                   \
   1480                                                                 \
   1481             t0 = cast_macro1(s0); t1 = cast_macro1(s1);         \
   1482             dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1);   \
   1483             t0 = cast_macro1(s2); t1 = cast_macro1(s3);         \
   1484             dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \
   1485         }                                                       \
   1486                                                                 \
   1487         for( ; i < width; i++ )                                 \
   1488         {                                                       \
   1489             double s0 = (double)ky[0]*src[0][i];                \
   1490             worktype t0;                                        \
   1491             for( k = 1; k < ksize; k++ )                        \
   1492                 s0 += (double)ky[k]*src[k][i];                  \
   1493             t0 = cast_macro1(s0);                               \
   1494             dst[i] = cast_macro2(t0);                           \
   1495         }                                                       \
   1496     }                                                           \
   1497 }
   1498 
   1499 
   1500 ICV_FILTER_COL( 32f8u, float, uchar, int, cvRound, CV_CAST_8U )
   1501 ICV_FILTER_COL( 32f16s, float, short, int, cvRound, CV_CAST_16S )
   1502 ICV_FILTER_COL( 32f16u, float, ushort, int, cvRound, CV_CAST_16U )
   1503 
   1504 #define ICV_FILTER_COL_SYMM( flavor, srctype, dsttype, worktype,    \
   1505                              cast_macro1, cast_macro2 )             \
   1506 static void                                                         \
   1507 icvFilterColSymm_##flavor( const srctype** src, dsttype* dst,       \
   1508                            int dst_step, int count, void* params )  \
   1509 {                                                                   \
   1510     const CvSepFilter* state = (const CvSepFilter*)params;          \
   1511     const CvMat* _ky = state->get_y_kernel();                       \
   1512     const srctype* ky = (const srctype*)_ky->data.ptr;              \
   1513     int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2;        \
   1514     int i, k, width = state->get_width();                           \
   1515     int cn = CV_MAT_CN(state->get_src_type());                      \
   1516     int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL;\
   1517                                                                     \
   1518     width *= cn;                                                    \
   1519     src += ksize2;                                                  \
   1520     ky += ksize2;                                                   \
   1521     dst_step /= sizeof(dst[0]);                                     \
   1522                                                                     \
   1523     if( is_symm )                                                   \
   1524     {                                                               \
   1525         for( ; count--; dst += dst_step, src++ )                    \
   1526         {                                                           \
   1527             for( i = 0; i <= width - 4; i += 4 )                    \
   1528             {                                                       \
   1529                 double f = ky[0];                                   \
   1530                 const srctype* sptr = src[0] + i, *sptr2;           \
   1531                 double s0 = f*sptr[0], s1 = f*sptr[1],              \
   1532                        s2 = f*sptr[2], s3 = f*sptr[3];              \
   1533                 worktype t0, t1;                                    \
   1534                 for( k = 1; k <= ksize2; k++ )                      \
   1535                 {                                                   \
   1536                     sptr = src[k] + i;                              \
   1537                     sptr2 = src[-k] + i;                            \
   1538                     f = ky[k];                                      \
   1539                     s0 += f*(sptr[0] + sptr2[0]);                   \
   1540                     s1 += f*(sptr[1] + sptr2[1]);                   \
   1541                     s2 += f*(sptr[2] + sptr2[2]);                   \
   1542                     s3 += f*(sptr[3] + sptr2[3]);                   \
   1543                 }                                                   \
   1544                                                                     \
   1545                 t0 = cast_macro1(s0); t1 = cast_macro1(s1);         \
   1546                 dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1);   \
   1547                 t0 = cast_macro1(s2); t1 = cast_macro1(s3);         \
   1548                 dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \
   1549             }                                                       \
   1550                                                                     \
   1551             for( ; i < width; i++ )                                 \
   1552             {                                                       \
   1553                 double s0 = (double)ky[0]*src[0][i];                \
   1554                 worktype t0;                                        \
   1555                 for( k = 1; k <= ksize2; k++ )                      \
   1556                     s0 += (double)ky[k]*(src[k][i] + src[-k][i]);   \
   1557                 t0 = cast_macro1(s0);                               \
   1558                 dst[i] = cast_macro2(t0);                           \
   1559             }                                                       \
   1560         }                                                           \
   1561     }                                                               \
   1562     else                                                            \
   1563     {                                                               \
   1564         for( ; count--; dst += dst_step, src++ )                    \
   1565         {                                                           \
   1566             for( i = 0; i <= width - 4; i += 4 )                    \
   1567             {                                                       \
   1568                 double f = ky[0];                                   \
   1569                 const srctype* sptr = src[0] + i, *sptr2;           \
   1570                 double s0 = 0, s1 = 0, s2 = 0, s3 = 0;              \
   1571                 worktype t0, t1;                                    \
   1572                 for( k = 1; k <= ksize2; k++ )                      \
   1573                 {                                                   \
   1574                     sptr = src[k] + i;                              \
   1575                     sptr2 = src[-k] + i;                            \
   1576                     f = ky[k];                                      \
   1577                     s0 += f*(sptr[0] - sptr2[0]);                   \
   1578                     s1 += f*(sptr[1] - sptr2[1]);                   \
   1579                     s2 += f*(sptr[2] - sptr2[2]);                   \
   1580                     s3 += f*(sptr[3] - sptr2[3]);                   \
   1581                 }                                                   \
   1582                                                                     \
   1583                 t0 = cast_macro1(s0); t1 = cast_macro1(s1);         \
   1584                 dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1);   \
   1585                 t0 = cast_macro1(s2); t1 = cast_macro1(s3);         \
   1586                 dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \
   1587             }                                                       \
   1588                                                                     \
   1589             for( ; i < width; i++ )                                 \
   1590             {                                                       \
   1591                 double s0 = (double)ky[0]*src[0][i];                \
   1592                 worktype t0;                                        \
   1593                 for( k = 1; k <= ksize2; k++ )                      \
   1594                     s0 += (double)ky[k]*(src[k][i] - src[-k][i]);   \
   1595                 t0 = cast_macro1(s0);                               \
   1596                 dst[i] = cast_macro2(t0);                           \
   1597             }                                                       \
   1598         }                                                           \
   1599     }                                                               \
   1600 }
   1601 
   1602 
   1603 ICV_FILTER_COL_SYMM( 32f8u, float, uchar, int, cvRound, CV_CAST_8U )
   1604 ICV_FILTER_COL_SYMM( 32f16s, float, short, int, cvRound, CV_CAST_16S )
   1605 ICV_FILTER_COL_SYMM( 32f16u, float, ushort, int, cvRound, CV_CAST_16U )
   1606 
   1607 
   1608 static void
   1609 icvFilterCol_32f( const float** src, float* dst,
   1610                   int dst_step, int count, void* params )
   1611 {
   1612     const CvSepFilter* state = (const CvSepFilter*)params;
   1613     const CvMat* _ky = state->get_y_kernel();
   1614     const float* ky = (const float*)_ky->data.ptr;
   1615     int ksize = _ky->cols + _ky->rows - 1;
   1616     int i, k, width = state->get_width();
   1617     int cn = CV_MAT_CN(state->get_src_type());
   1618 
   1619     width *= cn;
   1620     dst_step /= sizeof(dst[0]);
   1621 
   1622     for( ; count--; dst += dst_step, src++ )
   1623     {
   1624         for( i = 0; i <= width - 4; i += 4 )
   1625         {
   1626             double f = ky[0];
   1627             const float* sptr = src[0] + i;
   1628             double s0 = f*sptr[0], s1 = f*sptr[1],
   1629                    s2 = f*sptr[2], s3 = f*sptr[3];
   1630             for( k = 1; k < ksize; k++ )
   1631             {
   1632                 sptr = src[k] + i; f = ky[k];
   1633                 s0 += f*sptr[0]; s1 += f*sptr[1];
   1634                 s2 += f*sptr[2]; s3 += f*sptr[3];
   1635             }
   1636 
   1637             dst[i] = (float)s0; dst[i+1] = (float)s1;
   1638             dst[i+2] = (float)s2; dst[i+3] = (float)s3;
   1639         }
   1640 
   1641         for( ; i < width; i++ )
   1642         {
   1643             double s0 = (double)ky[0]*src[0][i];
   1644             for( k = 1; k < ksize; k++ )
   1645                 s0 += (double)ky[k]*src[k][i];
   1646             dst[i] = (float)s0;
   1647         }
   1648     }
   1649 }
   1650 
   1651 
   1652 static void
   1653 icvFilterColSymm_32f( const float** src, float* dst,
   1654                       int dst_step, int count, void* params )
   1655 {
   1656     const CvSepFilter* state = (const CvSepFilter*)params;
   1657     const CvMat* _ky = state->get_y_kernel();
   1658     const float* ky = (const float*)_ky->data.ptr;
   1659     int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2;
   1660     int i = 0, k, width = state->get_width();
   1661     int cn = CV_MAT_CN(state->get_src_type());
   1662     int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL;
   1663     int is_1_2_1 = is_symm && ksize == 3 &&
   1664         fabs(ky[1] - 2.) <= FLT_EPSILON && fabs(ky[2] - 1.) <= FLT_EPSILON;
   1665     int is_3_10_3 = is_symm && ksize == 3 &&
   1666             fabs(ky[1] - 10.) <= FLT_EPSILON && fabs(ky[2] - 3.) <= FLT_EPSILON;
   1667     int is_m1_0_1 = !is_symm && ksize == 3 &&
   1668             fabs(ky[1]) <= FLT_EPSILON && fabs(ky[2]*ky[2] - 1.) <= FLT_EPSILON ?
   1669             (ky[2] > 0 ? 1 : -1) : 0;
   1670 
   1671     width *= cn;
   1672     src += ksize2;
   1673     ky += ksize2;
   1674     dst_step /= sizeof(dst[0]);
   1675 
   1676     if( is_symm )
   1677     {
   1678         for( ; count--; dst += dst_step, src++ )
   1679         {
   1680             if( is_1_2_1 )
   1681             {
   1682                 const float *src0 = src[-1], *src1 = src[0], *src2 = src[1];
   1683 
   1684                 for( i = 0; i <= width - 4; i += 4 )
   1685                 {
   1686                     float s0 = src0[i] + src1[i]*2 + src2[i],
   1687                           s1 = src0[i+1] + src1[i+1]*2 + src2[i+1],
   1688                           s2 = src0[i+2] + src1[i+2]*2 + src2[i+2],
   1689                           s3 = src0[i+3] + src1[i+3]*2 + src2[i+3];
   1690 
   1691                     dst[i] = s0; dst[i+1] = s1;
   1692                     dst[i+2] = s2; dst[i+3] = s3;
   1693                 }
   1694             }
   1695             else if( is_3_10_3 )
   1696             {
   1697                 const float *src0 = src[-1], *src1 = src[0], *src2 = src[1];
   1698 
   1699                 for( i = 0; i <= width - 4; i += 4 )
   1700                 {
   1701                     float s0 = src1[i]*10 + (src0[i] + src2[i])*3,
   1702                           s1 = src1[i+1]*10 + (src0[i+1] + src2[i+1])*3,
   1703                           s2 = src1[i+2]*10 + (src0[i+2] + src2[i+2])*3,
   1704                           s3 = src1[i+3]*10 + (src0[i+3] + src2[i+3])*3;
   1705 
   1706                     dst[i] = s0; dst[i+1] = s1;
   1707                     dst[i+2] = s2; dst[i+3] = s3;
   1708                 }
   1709             }
   1710             else
   1711                 for( i = 0; i <= width - 4; i += 4 )
   1712                 {
   1713                     double f = ky[0];
   1714                     const float* sptr = src[0] + i, *sptr2;
   1715                     double s0 = f*sptr[0], s1 = f*sptr[1],
   1716                            s2 = f*sptr[2], s3 = f*sptr[3];
   1717                     for( k = 1; k <= ksize2; k++ )
   1718                     {
   1719                         sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k];
   1720                         s0 += f*(sptr[0] + sptr2[0]); s1 += f*(sptr[1] + sptr2[1]);
   1721                         s2 += f*(sptr[2] + sptr2[2]); s3 += f*(sptr[3] + sptr2[3]);
   1722                     }
   1723 
   1724                     dst[i] = (float)s0; dst[i+1] = (float)s1;
   1725                     dst[i+2] = (float)s2; dst[i+3] = (float)s3;
   1726                 }
   1727 
   1728             for( ; i < width; i++ )
   1729             {
   1730                 double s0 = (double)ky[0]*src[0][i];
   1731                 for( k = 1; k <= ksize2; k++ )
   1732                     s0 += (double)ky[k]*(src[k][i] + src[-k][i]);
   1733                 dst[i] = (float)s0;
   1734             }
   1735         }
   1736     }
   1737     else
   1738     {
   1739         for( ; count--; dst += dst_step, src++ )
   1740         {
   1741             if( is_m1_0_1 )
   1742             {
   1743                 const float *src0 = src[-is_m1_0_1], *src2 = src[is_m1_0_1];
   1744 
   1745                 for( i = 0; i <= width - 4; i += 4 )
   1746                 {
   1747                     float s0 = src2[i] - src0[i], s1 = src2[i+1] - src0[i+1],
   1748                           s2 = src2[i+2] - src0[i+2], s3 = src2[i+3] - src0[i+3];
   1749                     dst[i] = s0; dst[i+1] = s1;
   1750                     dst[i+2] = s2; dst[i+3] = s3;
   1751                 }
   1752             }
   1753             else
   1754                 for( i = 0; i <= width - 4; i += 4 )
   1755                 {
   1756                     double f = ky[0];
   1757                     const float* sptr = src[0] + i, *sptr2;
   1758                     double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
   1759                     for( k = 1; k <= ksize2; k++ )
   1760                     {
   1761                         sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k];
   1762                         s0 += f*(sptr[0] - sptr2[0]); s1 += f*(sptr[1] - sptr2[1]);
   1763                         s2 += f*(sptr[2] - sptr2[2]); s3 += f*(sptr[3] - sptr2[3]);
   1764                     }
   1765 
   1766                     dst[i] = (float)s0; dst[i+1] = (float)s1;
   1767                     dst[i+2] = (float)s2; dst[i+3] = (float)s3;
   1768                 }
   1769 
   1770             for( ; i < width; i++ )
   1771             {
   1772                 double s0 = (double)ky[0]*src[0][i];
   1773                 for( k = 1; k <= ksize2; k++ )
   1774                     s0 += (double)ky[k]*(src[k][i] - src[-k][i]);
   1775                 dst[i] = (float)s0;
   1776             }
   1777         }
   1778     }
   1779 }
   1780 
   1781 
   1782 #define SMALL_GAUSSIAN_SIZE  7
   1783 
   1784 void CvSepFilter::init_gaussian_kernel( CvMat* kernel, double sigma )
   1785 {
   1786     static const float small_gaussian_tab[][SMALL_GAUSSIAN_SIZE/2+1] =
   1787     {
   1788         {1.f},
   1789         {0.5f, 0.25f},
   1790         {0.375f, 0.25f, 0.0625f},
   1791         {0.28125f, 0.21875f, 0.109375f, 0.03125f}
   1792     };
   1793 
   1794     CV_FUNCNAME( "CvSepFilter::init_gaussian_kernel" );
   1795 
   1796     __BEGIN__;
   1797 
   1798     int type, i, n, step;
   1799     const float* fixed_kernel = 0;
   1800     double sigmaX, scale2X, sum;
   1801     float* cf;
   1802     double* cd;
   1803 
   1804     if( !CV_IS_MAT(kernel) )
   1805         CV_ERROR( CV_StsBadArg, "kernel is not a valid matrix" );
   1806 
   1807     type = CV_MAT_TYPE(kernel->type);
   1808 
   1809     if( (kernel->cols != 1 && kernel->rows != 1) ||
   1810         (kernel->cols + kernel->rows - 1) % 2 == 0 ||
   1811         (type != CV_32FC1 && type != CV_64FC1) )
   1812         CV_ERROR( CV_StsBadSize, "kernel should be 1D floating-point vector of odd (2*k+1) size" );
   1813 
   1814     n = kernel->cols + kernel->rows - 1;
   1815 
   1816     if( n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 )
   1817         fixed_kernel = small_gaussian_tab[n>>1];
   1818 
   1819     sigmaX = sigma > 0 ? sigma : (n/2 - 1)*0.3 + 0.8;
   1820     scale2X = -0.5/(sigmaX*sigmaX);
   1821     step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type);
   1822     cf = kernel->data.fl;
   1823     cd = kernel->data.db;
   1824 
   1825     sum = fixed_kernel ? -fixed_kernel[0] : -1.;
   1826 
   1827     for( i = 0; i <= n/2; i++ )
   1828     {
   1829         double t = fixed_kernel ? (double)fixed_kernel[i] : exp(scale2X*i*i);
   1830         if( type == CV_32FC1 )
   1831         {
   1832             cf[(n/2+i)*step] = (float)t;
   1833             sum += cf[(n/2+i)*step]*2;
   1834         }
   1835         else
   1836         {
   1837             cd[(n/2+i)*step] = t;
   1838             sum += cd[(n/2+i)*step]*2;
   1839         }
   1840     }
   1841 
   1842     sum = 1./sum;
   1843     for( i = 0; i <= n/2; i++ )
   1844     {
   1845         if( type == CV_32FC1 )
   1846             cf[(n/2+i)*step] = cf[(n/2-i)*step] = (float)(cf[(n/2+i)*step]*sum);
   1847         else
   1848             cd[(n/2+i)*step] = cd[(n/2-i)*step] = cd[(n/2+i)*step]*sum;
   1849     }
   1850 
   1851     __END__;
   1852 }
   1853 
   1854 
   1855 void CvSepFilter::init_sobel_kernel( CvMat* _kx, CvMat* _ky, int dx, int dy, int flags )
   1856 {
   1857     CV_FUNCNAME( "CvSepFilter::init_sobel_kernel" );
   1858 
   1859     __BEGIN__;
   1860 
   1861     int i, j, k, msz;
   1862     int* kerI;
   1863     bool normalize = (flags & NORMALIZE_KERNEL) != 0;
   1864     bool flip = (flags & FLIP_KERNEL) != 0;
   1865 
   1866     if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) )
   1867         CV_ERROR( CV_StsBadArg, "One of the kernel matrices is not valid" );
   1868 
   1869     msz = MAX( _kx->cols + _kx->rows, _ky->cols + _ky->rows );
   1870     if( msz > 32 )
   1871         CV_ERROR( CV_StsOutOfRange, "Too large kernel size" );
   1872 
   1873     kerI = (int*)cvStackAlloc( msz*sizeof(kerI[0]) );
   1874 
   1875     if( dx < 0 || dy < 0 || dx+dy <= 0 )
   1876         CV_ERROR( CV_StsOutOfRange,
   1877             "Both derivative orders (dx and dy) must be non-negative "
   1878             "and at least one of them must be positive." );
   1879 
   1880     for( k = 0; k < 2; k++ )
   1881     {
   1882         CvMat* kernel = k == 0 ? _kx : _ky;
   1883         int order = k == 0 ? dx : dy;
   1884         int n = kernel->cols + kernel->rows - 1, step;
   1885         int type = CV_MAT_TYPE(kernel->type);
   1886         double scale = !normalize ? 1. : 1./(1 << (n-order-1));
   1887         int iscale = 1;
   1888 
   1889         if( (kernel->cols != 1 && kernel->rows != 1) ||
   1890             (kernel->cols + kernel->rows - 1) % 2 == 0 ||
   1891             (type != CV_32FC1 && type != CV_64FC1 && type != CV_32SC1) )
   1892             CV_ERROR( CV_StsOutOfRange,
   1893             "Both kernels must be 1D floating-point or integer vectors of odd (2*k+1) size." );
   1894 
   1895         if( normalize && n > 1 && type == CV_32SC1 )
   1896             CV_ERROR( CV_StsBadArg, "Integer kernel can not be normalized" );
   1897 
   1898         if( n <= order )
   1899             CV_ERROR( CV_StsOutOfRange,
   1900             "Derivative order must be smaller than the corresponding kernel size" );
   1901 
   1902         if( n == 1 )
   1903             kerI[0] = 1;
   1904         else if( n == 3 )
   1905         {
   1906             if( order == 0 )
   1907                 kerI[0] = 1, kerI[1] = 2, kerI[2] = 1;
   1908             else if( order == 1 )
   1909                 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
   1910             else
   1911                 kerI[0] = 1, kerI[1] = -2, kerI[2] = 1;
   1912         }
   1913         else
   1914         {
   1915             int oldval, newval;
   1916             kerI[0] = 1;
   1917             for( i = 0; i < n; i++ )
   1918                 kerI[i+1] = 0;
   1919 
   1920             for( i = 0; i < n - order - 1; i++ )
   1921             {
   1922                 oldval = kerI[0];
   1923                 for( j = 1; j <= n; j++ )
   1924                 {
   1925                     newval = kerI[j]+kerI[j-1];
   1926                     kerI[j-1] = oldval;
   1927                     oldval = newval;
   1928                 }
   1929             }
   1930 
   1931             for( i = 0; i < order; i++ )
   1932             {
   1933                 oldval = -kerI[0];
   1934                 for( j = 1; j <= n; j++ )
   1935                 {
   1936                     newval = kerI[j-1] - kerI[j];
   1937                     kerI[j-1] = oldval;
   1938                     oldval = newval;
   1939                 }
   1940             }
   1941         }
   1942 
   1943         step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type);
   1944         if( flip && (order & 1) && k )
   1945             iscale = -iscale, scale = -scale;
   1946 
   1947         for( i = 0; i < n; i++ )
   1948         {
   1949             if( type == CV_32SC1 )
   1950                 kernel->data.i[i*step] = kerI[i]*iscale;
   1951             else if( type == CV_32FC1 )
   1952                 kernel->data.fl[i*step] = (float)(kerI[i]*scale);
   1953             else
   1954                 kernel->data.db[i*step] = kerI[i]*scale;
   1955         }
   1956     }
   1957 
   1958     __END__;
   1959 }
   1960 
   1961 
   1962 void CvSepFilter::init_scharr_kernel( CvMat* _kx, CvMat* _ky, int dx, int dy, int flags )
   1963 {
   1964     CV_FUNCNAME( "CvSepFilter::init_scharr_kernel" );
   1965 
   1966     __BEGIN__;
   1967 
   1968     int i, k;
   1969     int kerI[3];
   1970     bool normalize = (flags & NORMALIZE_KERNEL) != 0;
   1971     bool flip = (flags & FLIP_KERNEL) != 0;
   1972 
   1973     if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) )
   1974         CV_ERROR( CV_StsBadArg, "One of the kernel matrices is not valid" );
   1975 
   1976     if( ((dx|dy)&~1) || dx+dy != 1 )
   1977         CV_ERROR( CV_StsOutOfRange,
   1978             "Scharr kernel can only be used for 1st order derivatives" );
   1979 
   1980     for( k = 0; k < 2; k++ )
   1981     {
   1982         CvMat* kernel = k == 0 ? _kx : _ky;
   1983         int order = k == 0 ? dx : dy;
   1984         int n = kernel->cols + kernel->rows - 1, step;
   1985         int type = CV_MAT_TYPE(kernel->type);
   1986         double scale = !normalize ? 1. : order == 0 ? 1./16 : 1./2;
   1987         int iscale = 1;
   1988 
   1989         if( (kernel->cols != 1 && kernel->rows != 1) ||
   1990             kernel->cols + kernel->rows - 1 != 3 ||
   1991             (type != CV_32FC1 && type != CV_64FC1 && type != CV_32SC1) )
   1992             CV_ERROR( CV_StsOutOfRange,
   1993             "Both kernels must be 1D floating-point or integer vectors containing 3 elements each." );
   1994 
   1995         if( normalize && type == CV_32SC1 )
   1996             CV_ERROR( CV_StsBadArg, "Integer kernel can not be normalized" );
   1997 
   1998         if( order == 0 )
   1999             kerI[0] = 3, kerI[1] = 10, kerI[2] = 3;
   2000         else
   2001             kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
   2002 
   2003         step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type);
   2004         if( flip && (order & 1) && k )
   2005             iscale = -iscale, scale = -scale;
   2006 
   2007         for( i = 0; i < n; i++ )
   2008         {
   2009             if( type == CV_32SC1 )
   2010                 kernel->data.i[i*step] = kerI[i]*iscale;
   2011             else if( type == CV_32FC1 )
   2012                 kernel->data.fl[i*step] = (float)(kerI[i]*scale);
   2013             else
   2014                 kernel->data.db[i*step] = kerI[i]*scale;
   2015         }
   2016     }
   2017 
   2018     __END__;
   2019 }
   2020 
   2021 
   2022 void CvSepFilter::init_deriv( int _max_width, int _src_type, int _dst_type,
   2023                               int dx, int dy, int aperture_size, int flags )
   2024 {
   2025     CV_FUNCNAME( "CvSepFilter::init_deriv" );
   2026 
   2027     __BEGIN__;
   2028 
   2029     int kx_size = aperture_size == CV_SCHARR ? 3 : aperture_size, ky_size = kx_size;
   2030     float kx_data[CV_MAX_SOBEL_KSIZE], ky_data[CV_MAX_SOBEL_KSIZE];
   2031     CvMat _kx, _ky;
   2032 
   2033     if( kx_size <= 0 || ky_size > CV_MAX_SOBEL_KSIZE )
   2034         CV_ERROR( CV_StsOutOfRange, "Incorrect aperture_size" );
   2035 
   2036     if( kx_size == 1 && dx )
   2037         kx_size = 3;
   2038     if( ky_size == 1 && dy )
   2039         ky_size = 3;
   2040 
   2041     _kx = cvMat( 1, kx_size, CV_32FC1, kx_data );
   2042     _ky = cvMat( 1, ky_size, CV_32FC1, ky_data );
   2043 
   2044     if( aperture_size == CV_SCHARR )
   2045     {
   2046         CV_CALL( init_scharr_kernel( &_kx, &_ky, dx, dy, flags ));
   2047     }
   2048     else
   2049     {
   2050         CV_CALL( init_sobel_kernel( &_kx, &_ky, dx, dy, flags ));
   2051     }
   2052 
   2053     CV_CALL( init( _max_width, _src_type, _dst_type, &_kx, &_ky ));
   2054 
   2055     __END__;
   2056 }
   2057 
   2058 
   2059 void CvSepFilter::init_gaussian( int _max_width, int _src_type, int _dst_type,
   2060                                  int gaussian_size, double sigma )
   2061 {
   2062     float* kdata = 0;
   2063 
   2064     CV_FUNCNAME( "CvSepFilter::init_gaussian" );
   2065 
   2066     __BEGIN__;
   2067 
   2068     CvMat _kernel;
   2069 
   2070     if( gaussian_size <= 0 || gaussian_size > 1024 )
   2071         CV_ERROR( CV_StsBadSize, "Incorrect size of gaussian kernel" );
   2072 
   2073     kdata = (float*)cvStackAlloc(gaussian_size*sizeof(kdata[0]));
   2074     _kernel = cvMat( 1, gaussian_size, CV_32F, kdata );
   2075 
   2076     CV_CALL( init_gaussian_kernel( &_kernel, sigma ));
   2077     CV_CALL( init( _max_width, _src_type, _dst_type, &_kernel, &_kernel ));
   2078 
   2079     __END__;
   2080 }
   2081 
   2082 
   2083 /****************************************************************************************\
   2084                               Non-separable Linear Filter
   2085 \****************************************************************************************/
   2086 
   2087 static void icvLinearFilter_8u( const uchar** src, uchar* dst, int dst_step,
   2088                                 int count, void* params );
   2089 static void icvLinearFilter_16s( const short** src, short* dst, int dst_step,
   2090                                  int count, void* params );
   2091 static void icvLinearFilter_16u( const ushort** src, ushort* dst, int dst_step,
   2092                                  int count, void* params );
   2093 static void icvLinearFilter_32f( const float** src, float* dst, int dst_step,
   2094                                  int count, void* params );
   2095 
   2096 CvLinearFilter::CvLinearFilter()
   2097 {
   2098     kernel = 0;
   2099     k_sparse = 0;
   2100 }
   2101 
   2102 CvLinearFilter::CvLinearFilter( int _max_width, int _src_type, int _dst_type,
   2103                                 const CvMat* _kernel, CvPoint _anchor,
   2104                                 int _border_mode, CvScalar _border_value )
   2105 {
   2106     kernel = 0;
   2107     k_sparse = 0;
   2108     init( _max_width, _src_type, _dst_type, _kernel,
   2109           _anchor, _border_mode, _border_value );
   2110 }
   2111 
   2112 
   2113 void CvLinearFilter::clear()
   2114 {
   2115     cvReleaseMat( &kernel );
   2116     cvFree( &k_sparse );
   2117     CvBaseImageFilter::clear();
   2118 }
   2119 
   2120 
   2121 CvLinearFilter::~CvLinearFilter()
   2122 {
   2123     clear();
   2124 }
   2125 
   2126 
   2127 void CvLinearFilter::init( int _max_width, int _src_type, int _dst_type,
   2128                            const CvMat* _kernel, CvPoint _anchor,
   2129                            int _border_mode, CvScalar _border_value )
   2130 {
   2131     CV_FUNCNAME( "CvLinearFilter::init" );
   2132 
   2133     __BEGIN__;
   2134 
   2135     int depth = CV_MAT_DEPTH(_src_type);
   2136     int cn = CV_MAT_CN(_src_type);
   2137     CvPoint* nz_loc;
   2138     float* coeffs;
   2139     int i, j, k = 0;
   2140 
   2141     if( !CV_IS_MAT(_kernel) )
   2142         CV_ERROR( CV_StsBadArg, "kernel is not valid matrix" );
   2143 
   2144     _src_type = CV_MAT_TYPE(_src_type);
   2145     _dst_type = CV_MAT_TYPE(_dst_type);
   2146 
   2147     if( _src_type != _dst_type )
   2148         CV_ERROR( CV_StsUnmatchedFormats,
   2149         "The source and destination image types must be the same" );
   2150 
   2151     CV_CALL( CvBaseImageFilter::init( _max_width, _src_type, _dst_type,
   2152         false, cvGetMatSize(_kernel), _anchor, _border_mode, _border_value ));
   2153 
   2154     if( !(kernel && k_sparse && ksize.width == kernel->cols && ksize.height == kernel->rows ))
   2155     {
   2156         cvReleaseMat( &kernel );
   2157         cvFree( &k_sparse );
   2158         CV_CALL( kernel = cvCreateMat( ksize.height, ksize.width, CV_32FC1 ));
   2159         CV_CALL( k_sparse = (uchar*)cvAlloc(
   2160             ksize.width*ksize.height*(2*sizeof(int) + sizeof(uchar*) + sizeof(float))));
   2161     }
   2162 
   2163     CV_CALL( cvConvert( _kernel, kernel ));
   2164 
   2165     nz_loc = (CvPoint*)k_sparse;
   2166     for( i = 0; i < ksize.height; i++ )
   2167     {
   2168         for( j = 0; j < ksize.width; j++ )
   2169             if( fabs(((float*)(kernel->data.ptr + i*kernel->step))[j])>FLT_EPSILON )
   2170                 nz_loc[k++] = cvPoint(j,i);
   2171     }
   2172     if( k == 0 )
   2173         nz_loc[k++] = anchor;
   2174     k_sparse_count = k;
   2175     coeffs = (float*)((uchar**)(nz_loc + k_sparse_count) + k_sparse_count);
   2176 
   2177     for( k = 0; k < k_sparse_count; k++ )
   2178     {
   2179         coeffs[k] = CV_MAT_ELEM( *kernel, float, nz_loc[k].y, nz_loc[k].x );
   2180         nz_loc[k].x *= cn;
   2181     }
   2182 
   2183     x_func = 0;
   2184     if( depth == CV_8U )
   2185         y_func = (CvColumnFilterFunc)icvLinearFilter_8u;
   2186     else if( depth == CV_16S )
   2187         y_func = (CvColumnFilterFunc)icvLinearFilter_16s;
   2188     else if( depth == CV_16U )
   2189         y_func = (CvColumnFilterFunc)icvLinearFilter_16u;
   2190     else if( depth == CV_32F )
   2191         y_func = (CvColumnFilterFunc)icvLinearFilter_32f;
   2192     else
   2193         CV_ERROR( CV_StsUnsupportedFormat, "Unsupported image type" );
   2194 
   2195     __END__;
   2196 }
   2197 
   2198 
   2199 void CvLinearFilter::init( int _max_width, int _src_type, int _dst_type,
   2200                            bool _is_separable, CvSize _ksize,
   2201                            CvPoint _anchor, int _border_mode,
   2202                            CvScalar _border_value )
   2203 {
   2204     CvBaseImageFilter::init( _max_width, _src_type, _dst_type, _is_separable,
   2205                              _ksize, _anchor, _border_mode, _border_value );
   2206 }
   2207 
   2208 
   2209 #define ICV_FILTER( flavor, arrtype, worktype, load_macro,          \
   2210                     cast_macro1, cast_macro2 )                      \
   2211 static void                                                         \
   2212 icvLinearFilter_##flavor( const arrtype** src, arrtype* dst,        \
   2213                     int dst_step, int count, void* params )         \
   2214 {                                                                   \
   2215     CvLinearFilter* state = (CvLinearFilter*)params;                \
   2216     int width = state->get_width();                                 \
   2217     int cn = CV_MAT_CN(state->get_src_type());                      \
   2218     int i, k;                                                       \
   2219     CvPoint* k_sparse = (CvPoint*)state->get_kernel_sparse_buf();   \
   2220     int k_count = state->get_kernel_sparse_count();                 \
   2221     const arrtype** k_ptr = (const arrtype**)(k_sparse + k_count);  \
   2222     const arrtype** k_end = k_ptr + k_count;                        \
   2223     const float* k_coeffs = (const float*)(k_ptr + k_count);        \
   2224                                                                     \
   2225     width *= cn;                                                    \
   2226     dst_step /= sizeof(dst[0]);                                     \
   2227                                                                     \
   2228     for( ; count > 0; count--, dst += dst_step, src++ )             \
   2229     {                                                               \
   2230         for( k = 0; k < k_count; k++ )                              \
   2231             k_ptr[k] = src[k_sparse[k].y] + k_sparse[k].x;          \
   2232                                                                     \
   2233         for( i = 0; i <= width - 4; i += 4 )                        \
   2234         {                                                           \
   2235             const arrtype** kp = k_ptr;                             \
   2236             const float* kc = k_coeffs;                             \
   2237             double s0 = 0, s1 = 0, s2 = 0, s3 = 0;                  \
   2238             worktype t0, t1;                                        \
   2239                                                                     \
   2240             while( kp != k_end )                                    \
   2241             {                                                       \
   2242                 const arrtype* sptr = (*kp++) + i;                  \
   2243                 float f = *kc++;                                    \
   2244                 s0 += f*load_macro(sptr[0]);                        \
   2245                 s1 += f*load_macro(sptr[1]);                        \
   2246                 s2 += f*load_macro(sptr[2]);                        \
   2247                 s3 += f*load_macro(sptr[3]);                        \
   2248             }                                                       \
   2249                                                                     \
   2250             t0 = cast_macro1(s0); t1 = cast_macro1(s1);             \
   2251             dst[i] = cast_macro2(t0);                               \
   2252             dst[i+1] = cast_macro2(t1);                             \
   2253             t0 = cast_macro1(s2); t1 = cast_macro1(s3);             \
   2254             dst[i+2] = cast_macro2(t0);                             \
   2255             dst[i+3] = cast_macro2(t1);                             \
   2256         }                                                           \
   2257                                                                     \
   2258         for( ; i < width; i++ )                                     \
   2259         {                                                           \
   2260             const arrtype** kp = k_ptr;                             \
   2261             const float* kc = k_coeffs;                             \
   2262             double s0 = 0;                                          \
   2263             worktype t0;                                            \
   2264                                                                     \
   2265             while( kp != k_end )                                    \
   2266             {                                                       \
   2267                 const arrtype* sptr = *kp++;                        \
   2268                 float f = *kc++;                                    \
   2269                 s0 += f*load_macro(sptr[i]);                        \
   2270             }                                                       \
   2271                                                                     \
   2272             t0 = cast_macro1(s0);                                   \
   2273             dst[i] = cast_macro2(t0);                               \
   2274         }                                                           \
   2275     }                                                               \
   2276 }
   2277 
   2278 
   2279 ICV_FILTER( 8u, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U )
   2280 ICV_FILTER( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U )
   2281 ICV_FILTER( 16s, short, int, CV_NOP, cvRound, CV_CAST_16S )
   2282 ICV_FILTER( 32f, float, float, CV_NOP, CV_CAST_32F, CV_NOP )
   2283 
   2284 
   2285 /////////////////////// common functions for working with IPP filters ////////////////////
   2286 
   2287 CvMat* icvIPPFilterInit( const CvMat* src, int stripe_size, CvSize ksize )
   2288 {
   2289     CvSize temp_size;
   2290     int pix_size = CV_ELEM_SIZE(src->type);
   2291     temp_size.width = cvAlign(src->cols + ksize.width - 1,8/CV_ELEM_SIZE(src->type & CV_MAT_DEPTH_MASK));
   2292     //temp_size.width = src->cols + ksize.width - 1;
   2293     temp_size.height = (stripe_size*2 + temp_size.width*pix_size) / (temp_size.width*pix_size*2);
   2294     temp_size.height = MAX( temp_size.height, ksize.height );
   2295     temp_size.height = MIN( temp_size.height, src->rows + ksize.height - 1 );
   2296 
   2297     return cvCreateMat( temp_size.height, temp_size.width, src->type );
   2298 }
   2299 
   2300 
   2301 int icvIPPFilterNextStripe( const CvMat* src, CvMat* temp, int y,
   2302                             CvSize ksize, CvPoint anchor )
   2303 {
   2304     int pix_size = CV_ELEM_SIZE(src->type);
   2305     int src_step = src->step ? src->step : CV_STUB_STEP;
   2306     int temp_step = temp->step ? temp->step : CV_STUB_STEP;
   2307     int i, dy, src_y1 = 0, src_y2;
   2308     int temp_rows;
   2309     uchar* temp_ptr = temp->data.ptr;
   2310     CvSize stripe_size, temp_size;
   2311 
   2312     dy = MIN( temp->rows - ksize.height + 1, src->rows - y );
   2313     if( y > 0 )
   2314     {
   2315         int temp_ready = ksize.height - 1;
   2316 
   2317         for( i = 0; i < temp_ready; i++ )
   2318             memcpy( temp_ptr + temp_step*i, temp_ptr +
   2319                     temp_step*(temp->rows - temp_ready + i), temp_step );
   2320 
   2321         temp_ptr += temp_ready*temp_step;
   2322         temp_rows = dy;
   2323         src_y1 = y + temp_ready - anchor.y;
   2324         src_y2 = src_y1 + dy;
   2325         if( src_y1 >= src->rows )
   2326         {
   2327             src_y1 = src->rows - 1;
   2328             src_y2 = src->rows;
   2329         }
   2330     }
   2331     else
   2332     {
   2333         temp_rows = dy + ksize.height - 1;
   2334         src_y2 = temp_rows - anchor.y;
   2335     }
   2336 
   2337     src_y2 = MIN( src_y2, src->rows );
   2338 
   2339     stripe_size = cvSize(src->cols, src_y2 - src_y1);
   2340     temp_size = cvSize(temp->cols, temp_rows);
   2341     icvCopyReplicateBorder_8u( src->data.ptr + src_y1*src_step, src_step,
   2342                       stripe_size, temp_ptr, temp_step, temp_size,
   2343                       (y == 0 ? anchor.y : 0), anchor.x, pix_size );
   2344     return dy;
   2345 }
   2346 
   2347 
   2348 /////////////////////////////// IPP separable filter functions ///////////////////////////
   2349 
   2350 icvFilterRow_8u_C1R_t icvFilterRow_8u_C1R_p = 0;
   2351 icvFilterRow_8u_C3R_t icvFilterRow_8u_C3R_p = 0;
   2352 icvFilterRow_8u_C4R_t icvFilterRow_8u_C4R_p = 0;
   2353 icvFilterRow_16s_C1R_t icvFilterRow_16s_C1R_p = 0;
   2354 icvFilterRow_16s_C3R_t icvFilterRow_16s_C3R_p = 0;
   2355 icvFilterRow_16s_C4R_t icvFilterRow_16s_C4R_p = 0;
   2356 icvFilterRow_32f_C1R_t icvFilterRow_32f_C1R_p = 0;
   2357 icvFilterRow_32f_C3R_t icvFilterRow_32f_C3R_p = 0;
   2358 icvFilterRow_32f_C4R_t icvFilterRow_32f_C4R_p = 0;
   2359 
   2360 icvFilterColumn_8u_C1R_t icvFilterColumn_8u_C1R_p = 0;
   2361 icvFilterColumn_8u_C3R_t icvFilterColumn_8u_C3R_p = 0;
   2362 icvFilterColumn_8u_C4R_t icvFilterColumn_8u_C4R_p = 0;
   2363 icvFilterColumn_16s_C1R_t icvFilterColumn_16s_C1R_p = 0;
   2364 icvFilterColumn_16s_C3R_t icvFilterColumn_16s_C3R_p = 0;
   2365 icvFilterColumn_16s_C4R_t icvFilterColumn_16s_C4R_p = 0;
   2366 icvFilterColumn_32f_C1R_t icvFilterColumn_32f_C1R_p = 0;
   2367 icvFilterColumn_32f_C3R_t icvFilterColumn_32f_C3R_p = 0;
   2368 icvFilterColumn_32f_C4R_t icvFilterColumn_32f_C4R_p = 0;
   2369 
   2370 //////////////////////////////////////////////////////////////////////////////////////////
   2371 
   2372 typedef CvStatus (CV_STDCALL * CvIPPSepFilterFunc)
   2373     ( const void* src, int srcstep, void* dst, int dststep,
   2374       CvSize size, const float* kernel, int ksize, int anchor );
   2375 
   2376 int icvIPPSepFilter( const CvMat* src, CvMat* dst, const CvMat* kernelX,
   2377                      const CvMat* kernelY, CvPoint anchor )
   2378 {
   2379     int result = 0;
   2380 
   2381     CvMat* top_bottom = 0;
   2382     CvMat* vout_hin = 0;
   2383     CvMat* dst_buf = 0;
   2384 
   2385     CV_FUNCNAME( "icvIPPSepFilter" );
   2386 
   2387     __BEGIN__;
   2388 
   2389     CvSize ksize;
   2390     CvPoint el_anchor;
   2391     CvSize size;
   2392     int type, depth, pix_size;
   2393     int i, x, y, dy = 0, prev_dy = 0, max_dy;
   2394     CvMat vout;
   2395     CvIPPSepFilterFunc x_func = 0, y_func = 0;
   2396     int src_step, top_bottom_step;
   2397     float *kx, *ky;
   2398     int align, stripe_size;
   2399 
   2400     if( !icvFilterRow_8u_C1R_p )
   2401         EXIT;
   2402 
   2403     if( !CV_ARE_TYPES_EQ( src, dst ) || !CV_ARE_SIZES_EQ( src, dst ) ||
   2404         !CV_IS_MAT_CONT(kernelX->type & kernelY->type) ||
   2405         CV_MAT_TYPE(kernelX->type) != CV_32FC1 ||
   2406         CV_MAT_TYPE(kernelY->type) != CV_32FC1 ||
   2407         (kernelX->cols != 1 && kernelX->rows != 1) ||
   2408         (kernelY->cols != 1 && kernelY->rows != 1) ||
   2409         (unsigned)anchor.x >= (unsigned)(kernelX->cols + kernelX->rows - 1) ||
   2410         (unsigned)anchor.y >= (unsigned)(kernelY->cols + kernelY->rows - 1) )
   2411         CV_ERROR( CV_StsError, "Internal Error: incorrect parameters" );
   2412 
   2413     ksize.width = kernelX->cols + kernelX->rows - 1;
   2414     ksize.height = kernelY->cols + kernelY->rows - 1;
   2415 
   2416     /*if( ksize.width <= 5 && ksize.height <= 5 )
   2417     {
   2418         float* ker = (float*)cvStackAlloc( ksize.width*ksize.height*sizeof(ker[0]));
   2419         CvMat kernel = cvMat( ksize.height, ksize.width, CV_32F, ker );
   2420         for( y = 0, i = 0; y < ksize.height; y++ )
   2421             for( x = 0; x < ksize.width; x++, i++ )
   2422                 ker[i] = kernelY->data.fl[y]*kernelX->data.fl[x];
   2423 
   2424         CV_CALL( cvFilter2D( src, dst, &kernel, anchor ));
   2425         EXIT;
   2426     }*/
   2427 
   2428     type = CV_MAT_TYPE(src->type);
   2429     depth = CV_MAT_DEPTH(type);
   2430     pix_size = CV_ELEM_SIZE(type);
   2431 
   2432     if( type == CV_8UC1 )
   2433         x_func = icvFilterRow_8u_C1R_p, y_func = icvFilterColumn_8u_C1R_p;
   2434     else if( type == CV_8UC3 )
   2435         x_func = icvFilterRow_8u_C3R_p, y_func = icvFilterColumn_8u_C3R_p;
   2436     else if( type == CV_8UC4 )
   2437         x_func = icvFilterRow_8u_C4R_p, y_func = icvFilterColumn_8u_C4R_p;
   2438     else if( type == CV_16SC1 )
   2439         x_func = icvFilterRow_16s_C1R_p, y_func = icvFilterColumn_16s_C1R_p;
   2440     else if( type == CV_16SC3 )
   2441         x_func = icvFilterRow_16s_C3R_p, y_func = icvFilterColumn_16s_C3R_p;
   2442     else if( type == CV_16SC4 )
   2443         x_func = icvFilterRow_16s_C4R_p, y_func = icvFilterColumn_16s_C4R_p;
   2444     else if( type == CV_32FC1 )
   2445         x_func = icvFilterRow_32f_C1R_p, y_func = icvFilterColumn_32f_C1R_p;
   2446     else if( type == CV_32FC3 )
   2447         x_func = icvFilterRow_32f_C3R_p, y_func = icvFilterColumn_32f_C3R_p;
   2448     else if( type == CV_32FC4 )
   2449         x_func = icvFilterRow_32f_C4R_p, y_func = icvFilterColumn_32f_C4R_p;
   2450     else
   2451         EXIT;
   2452 
   2453     size = cvGetMatSize(src);
   2454     stripe_size = src->data.ptr == dst->data.ptr ? 1 << 15 : 1 << 16;
   2455     max_dy = MAX( ksize.height - 1, stripe_size/(size.width + ksize.width - 1));
   2456     max_dy = MIN( max_dy, size.height + ksize.height - 1 );
   2457 
   2458     align = 8/CV_ELEM_SIZE(depth);
   2459 
   2460     CV_CALL( top_bottom = cvCreateMat( ksize.height*2, cvAlign(size.width,align), type ));
   2461 
   2462     CV_CALL( vout_hin = cvCreateMat( max_dy + ksize.height,
   2463         cvAlign(size.width + ksize.width - 1, align), type ));
   2464 
   2465     if( src->data.ptr == dst->data.ptr && size.height )
   2466         CV_CALL( dst_buf = cvCreateMat( max_dy + ksize.height,
   2467             cvAlign(size.width, align), type ));
   2468 
   2469     kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) );
   2470     ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) );
   2471 
   2472     // mirror the kernels
   2473     for( i = 0; i < ksize.width; i++ )
   2474         kx[i] = kernelX->data.fl[ksize.width - i - 1];
   2475 
   2476     for( i = 0; i < ksize.height; i++ )
   2477         ky[i] = kernelY->data.fl[ksize.height - i - 1];
   2478 
   2479     el_anchor = cvPoint( ksize.width - anchor.x - 1, ksize.height - anchor.y - 1 );
   2480 
   2481     cvGetCols( vout_hin, &vout, anchor.x, anchor.x + size.width );
   2482 
   2483     src_step = src->step ? src->step : CV_STUB_STEP;
   2484     top_bottom_step = top_bottom->step ? top_bottom->step : CV_STUB_STEP;
   2485     vout.step = vout.step ? vout.step : CV_STUB_STEP;
   2486 
   2487     for( y = 0; y < size.height; y += dy )
   2488     {
   2489         const CvMat *vin = src, *hout = dst;
   2490         int src_y = y, dst_y = y;
   2491         dy = MIN( max_dy, size.height - (ksize.height - anchor.y - 1) - y );
   2492 
   2493         if( y < anchor.y || dy < anchor.y )
   2494         {
   2495             int ay = anchor.y;
   2496             CvSize src_stripe_size = size;
   2497 
   2498             if( y < anchor.y )
   2499             {
   2500                 src_y = 0;
   2501                 dy = MIN( anchor.y, size.height );
   2502                 src_stripe_size.height = MIN( dy + ksize.height - anchor.y - 1, size.height );
   2503             }
   2504             else
   2505             {
   2506                 src_y = MAX( y - anchor.y, 0 );
   2507                 dy = size.height - y;
   2508                 src_stripe_size.height = MIN( dy + anchor.y, size.height );
   2509                 ay = MAX( anchor.y - y, 0 );
   2510             }
   2511 
   2512             icvCopyReplicateBorder_8u( src->data.ptr + src_y*src_step, src_step,
   2513                 src_stripe_size, top_bottom->data.ptr, top_bottom_step,
   2514                 cvSize(size.width, dy + ksize.height - 1), ay, 0, pix_size );
   2515             vin = top_bottom;
   2516             src_y = anchor.y;
   2517         }
   2518 
   2519         // do vertical convolution
   2520         IPPI_CALL( y_func( vin->data.ptr + src_y*vin->step, vin->step ? vin->step : CV_STUB_STEP,
   2521                            vout.data.ptr, vout.step, cvSize(size.width, dy),
   2522                            ky, ksize.height, el_anchor.y ));
   2523 
   2524         // now it's time to copy the previously processed stripe to the input/output image
   2525         if( src->data.ptr == dst->data.ptr )
   2526         {
   2527             for( i = 0; i < prev_dy; i++ )
   2528                 memcpy( dst->data.ptr + (y - prev_dy + i)*dst->step,
   2529                         dst_buf->data.ptr + i*dst_buf->step, size.width*pix_size );
   2530             if( y + dy < size.height )
   2531             {
   2532                 hout = dst_buf;
   2533                 dst_y = 0;
   2534             }
   2535         }
   2536 
   2537         // create a border for every line by replicating the left-most/right-most elements
   2538         for( i = 0; i < dy; i++ )
   2539         {
   2540             uchar* ptr = vout.data.ptr + i*vout.step;
   2541             for( x = -1; x >= -anchor.x*pix_size; x-- )
   2542                 ptr[x] = ptr[x + pix_size];
   2543             for( x = size.width*pix_size; x < (size.width+ksize.width-anchor.x-1)*pix_size; x++ )
   2544                 ptr[x] = ptr[x - pix_size];
   2545         }
   2546 
   2547         // do horizontal convolution
   2548         IPPI_CALL( x_func( vout.data.ptr, vout.step, hout->data.ptr + dst_y*hout->step,
   2549                            hout->step ? hout->step : CV_STUB_STEP,
   2550                            cvSize(size.width, dy), kx, ksize.width, el_anchor.x ));
   2551         prev_dy = dy;
   2552     }
   2553 
   2554     result = 1;
   2555 
   2556     __END__;
   2557 
   2558     cvReleaseMat( &vout_hin );
   2559     cvReleaseMat( &dst_buf );
   2560     cvReleaseMat( &top_bottom );
   2561 
   2562     return result;
   2563 }
   2564 
   2565 //////////////////////////////////////////////////////////////////////////////////////////
   2566 
   2567 //////////////////////////////// IPP generic filter functions ////////////////////////////
   2568 
   2569 icvFilter_8u_C1R_t icvFilter_8u_C1R_p = 0;
   2570 icvFilter_8u_C3R_t icvFilter_8u_C3R_p = 0;
   2571 icvFilter_8u_C4R_t icvFilter_8u_C4R_p = 0;
   2572 icvFilter_16s_C1R_t icvFilter_16s_C1R_p = 0;
   2573 icvFilter_16s_C3R_t icvFilter_16s_C3R_p = 0;
   2574 icvFilter_16s_C4R_t icvFilter_16s_C4R_p = 0;
   2575 icvFilter_32f_C1R_t icvFilter_32f_C1R_p = 0;
   2576 icvFilter_32f_C3R_t icvFilter_32f_C3R_p = 0;
   2577 icvFilter_32f_C4R_t icvFilter_32f_C4R_p = 0;
   2578 
   2579 
   2580 typedef CvStatus (CV_STDCALL * CvFilterIPPFunc)
   2581 ( const void* src, int srcstep, void* dst, int dststep, CvSize size,
   2582   const float* kernel, CvSize ksize, CvPoint anchor );
   2583 
   2584 CV_IMPL void
   2585 cvFilter2D( const CvArr* _src, CvArr* _dst, const CvMat* kernel, CvPoint anchor )
   2586 {
   2587     const int dft_filter_size = 100;
   2588 
   2589     CvLinearFilter filter;
   2590     CvMat* ipp_kernel = 0;
   2591 
   2592     // below that approximate size OpenCV is faster
   2593     const int ipp_lower_limit = 20;
   2594     CvMat* temp = 0;
   2595 
   2596     CV_FUNCNAME( "cvFilter2D" );
   2597 
   2598     __BEGIN__;
   2599 
   2600     int coi1 = 0, coi2 = 0;
   2601     CvMat srcstub, *src = (CvMat*)_src;
   2602     CvMat dststub, *dst = (CvMat*)_dst;
   2603     int type;
   2604 
   2605     CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
   2606     CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
   2607 
   2608     if( coi1 != 0 || coi2 != 0 )
   2609         CV_ERROR( CV_BadCOI, "" );
   2610 
   2611     type = CV_MAT_TYPE( src->type );
   2612 
   2613     if( !CV_ARE_SIZES_EQ( src, dst ))
   2614         CV_ERROR( CV_StsUnmatchedSizes, "" );
   2615 
   2616     if( !CV_ARE_TYPES_EQ( src, dst ))
   2617         CV_ERROR( CV_StsUnmatchedFormats, "" );
   2618 
   2619     if( anchor.x == -1 && anchor.y == -1 )
   2620         anchor = cvPoint(kernel->cols/2,kernel->rows/2);
   2621 
   2622     if( kernel->cols*kernel->rows >= dft_filter_size &&
   2623         kernel->cols <= src->cols && kernel->rows <= src->rows )
   2624     {
   2625         if( src->data.ptr == dst->data.ptr )
   2626         {
   2627             temp = cvCloneMat( src );
   2628             src = temp;
   2629         }
   2630         icvCrossCorr( src, kernel, dst, anchor );
   2631         EXIT;
   2632     }
   2633 
   2634     if( icvFilter_8u_C1R_p && (src->rows >= ipp_lower_limit || src->cols >= ipp_lower_limit) )
   2635     {
   2636         CvFilterIPPFunc ipp_func =
   2637                 type == CV_8UC1 ? (CvFilterIPPFunc)icvFilter_8u_C1R_p :
   2638                 type == CV_8UC3 ? (CvFilterIPPFunc)icvFilter_8u_C3R_p :
   2639                 type == CV_8UC4 ? (CvFilterIPPFunc)icvFilter_8u_C4R_p :
   2640                 type == CV_16SC1 ? (CvFilterIPPFunc)icvFilter_16s_C1R_p :
   2641                 type == CV_16SC3 ? (CvFilterIPPFunc)icvFilter_16s_C3R_p :
   2642                 type == CV_16SC4 ? (CvFilterIPPFunc)icvFilter_16s_C4R_p :
   2643                 type == CV_32FC1 ? (CvFilterIPPFunc)icvFilter_32f_C1R_p :
   2644                 type == CV_32FC3 ? (CvFilterIPPFunc)icvFilter_32f_C3R_p :
   2645                 type == CV_32FC4 ? (CvFilterIPPFunc)icvFilter_32f_C4R_p : 0;
   2646 
   2647         if( ipp_func )
   2648         {
   2649             CvSize el_size = { kernel->cols, kernel->rows };
   2650             CvPoint el_anchor;
   2651             int stripe_size = 1 << 16; // the optimal value may depend on CPU cache,
   2652                                        // overhead of current IPP code etc.
   2653             const uchar* shifted_ptr;
   2654             int i, j, y, dy = 0;
   2655             int temp_step, dst_step = dst->step ? dst->step : CV_STUB_STEP;
   2656 
   2657             if( (unsigned)anchor.x >= (unsigned)kernel->cols ||
   2658                 (unsigned)anchor.y >= (unsigned)kernel->rows )
   2659                 CV_ERROR( CV_StsOutOfRange, "anchor point is out of kernel" );
   2660 
   2661             el_anchor = cvPoint( el_size.width - anchor.x - 1, el_size.height - anchor.y - 1 );
   2662 
   2663             CV_CALL( ipp_kernel = cvCreateMat( kernel->rows, kernel->cols, CV_32FC1 ));
   2664             CV_CALL( cvConvert( kernel, ipp_kernel ));
   2665 
   2666             // mirror the kernel around the center
   2667             for( i = 0; i < (el_size.height+1)/2; i++ )
   2668             {
   2669                 float* top_row = ipp_kernel->data.fl + el_size.width*i;
   2670                 float* bottom_row = ipp_kernel->data.fl + el_size.width*(el_size.height - i - 1);
   2671 
   2672                 for( j = 0; j < (el_size.width+1)/2; j++ )
   2673                 {
   2674                     float a = top_row[j], b = top_row[el_size.width - j - 1];
   2675                     float c = bottom_row[j], d = bottom_row[el_size.width - j - 1];
   2676                     top_row[j] = d;
   2677                     top_row[el_size.width - j - 1] = c;
   2678                     bottom_row[j] = b;
   2679                     bottom_row[el_size.width - j - 1] = a;
   2680                 }
   2681             }
   2682 
   2683             CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
   2684 
   2685             shifted_ptr = temp->data.ptr +
   2686                 anchor.y*temp->step + anchor.x*CV_ELEM_SIZE(type);
   2687             temp_step = temp->step ? temp->step : CV_STUB_STEP;
   2688 
   2689             for( y = 0; y < src->rows; y += dy )
   2690             {
   2691                 dy = icvIPPFilterNextStripe( src, temp, y, el_size, anchor );
   2692                 IPPI_CALL( ipp_func( shifted_ptr, temp_step,
   2693                     dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
   2694                     ipp_kernel->data.fl, el_size, el_anchor ));
   2695             }
   2696             EXIT;
   2697         }
   2698     }
   2699 
   2700     CV_CALL( filter.init( src->cols, type, type, kernel, anchor ));
   2701     CV_CALL( filter.process( src, dst ));
   2702 
   2703     __END__;
   2704 
   2705     cvReleaseMat( &temp );
   2706     cvReleaseMat( &ipp_kernel );
   2707 }
   2708 
   2709 
   2710 /* End of file. */
   2711