1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // Intel License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000, Intel Corporation, all rights reserved. 14 // Third party copyrights are property of their respective owners. 15 // 16 // Redistribution and use in source and binary forms, with or without modification, 17 // are permitted provided that the following conditions are met: 18 // 19 // * Redistribution's of source code must retain the above copyright notice, 20 // this list of conditions and the following disclaimer. 21 // 22 // * Redistribution's in binary form must reproduce the above copyright notice, 23 // this list of conditions and the following disclaimer in the documentation 24 // and/or other materials provided with the distribution. 25 // 26 // * The name of Intel Corporation may not be used to endorse or promote products 27 // derived from this software without specific prior written permission. 28 // 29 // This software is provided by the copyright holders and contributors "as is" and 30 // any express or implied warranties, including, but not limited to, the implied 31 // warranties of merchantability and fitness for a particular purpose are disclaimed. 32 // In no event shall the Intel Corporation or contributors be liable for any direct, 33 // indirect, incidental, special, exemplary, or consequential damages 34 // (including, but not limited to, procurement of substitute goods or services; 35 // loss of use, data, or profits; or business interruption) however caused 36 // and on any theory of liability, whether in contract, strict liability, 37 // or tort (including negligence or otherwise) arising in any way out of 38 // the use of this software, even if advised of the possibility of such damage. 39 // 40 //M*/ 41 42 #include "_cv.h" 43 44 45 /****************************************************************************************\ 46 Base Image Filter 47 \****************************************************************************************/ 48 49 static void default_x_filter_func( const uchar*, uchar*, void* ) 50 { 51 } 52 53 static void default_y_filter_func( uchar**, uchar*, int, int, void* ) 54 { 55 } 56 57 CvBaseImageFilter::CvBaseImageFilter() 58 { 59 min_depth = CV_8U; 60 buffer = 0; 61 rows = 0; 62 max_width = 0; 63 x_func = default_x_filter_func; 64 y_func = default_y_filter_func; 65 } 66 67 68 CvBaseImageFilter::CvBaseImageFilter( int _max_width, int _src_type, int _dst_type, 69 bool _is_separable, CvSize _ksize, CvPoint _anchor, 70 int _border_mode, CvScalar _border_value ) 71 { 72 min_depth = CV_8U; 73 buffer = 0; 74 rows = 0; 75 max_width = 0; 76 x_func = default_x_filter_func; 77 y_func = default_y_filter_func; 78 79 init( _max_width, _src_type, _dst_type, _is_separable, 80 _ksize, _anchor, _border_mode, _border_value ); 81 } 82 83 84 void CvBaseImageFilter::clear() 85 { 86 cvFree( &buffer ); 87 rows = 0; 88 } 89 90 91 CvBaseImageFilter::~CvBaseImageFilter() 92 { 93 clear(); 94 } 95 96 97 void CvBaseImageFilter::get_work_params() 98 { 99 int min_rows = max_ky*2 + 3, rows = MAX(min_rows,10), row_sz; 100 int width = max_width, trow_sz = 0; 101 102 if( is_separable ) 103 { 104 int max_depth = MAX(CV_MAT_DEPTH(src_type), CV_MAT_DEPTH(dst_type)); 105 int max_cn = MAX(CV_MAT_CN(src_type), CV_MAT_CN(dst_type)); 106 max_depth = MAX( max_depth, min_depth ); 107 work_type = CV_MAKETYPE( max_depth, max_cn ); 108 trow_sz = cvAlign( (max_width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN ); 109 } 110 else 111 { 112 work_type = src_type; 113 width += ksize.width - 1; 114 } 115 row_sz = cvAlign( width*CV_ELEM_SIZE(work_type), ALIGN ); 116 buf_size = rows*row_sz; 117 buf_size = MIN( buf_size, 1 << 16 ); 118 buf_size = MAX( buf_size, min_rows*row_sz ); 119 max_rows = (buf_size/row_sz)*3 + max_ky*2 + 8; 120 buf_size += trow_sz; 121 } 122 123 124 void CvBaseImageFilter::init( int _max_width, int _src_type, int _dst_type, 125 bool _is_separable, CvSize _ksize, CvPoint _anchor, 126 int _border_mode, CvScalar _border_value ) 127 { 128 CV_FUNCNAME( "CvBaseImageFilter::init" ); 129 130 __BEGIN__; 131 132 int total_buf_sz, src_pix_sz, row_tab_sz, bsz; 133 uchar* ptr; 134 135 if( !(buffer && _max_width <= max_width && _src_type == src_type && 136 _dst_type == dst_type && _is_separable == is_separable && 137 _ksize.width == ksize.width && _ksize.height == ksize.height && 138 _anchor.x == anchor.x && _anchor.y == anchor.y) ) 139 clear(); 140 141 is_separable = _is_separable != 0; 142 max_width = _max_width; //MAX(_max_width,_ksize.width); 143 src_type = CV_MAT_TYPE(_src_type); 144 dst_type = CV_MAT_TYPE(_dst_type); 145 ksize = _ksize; 146 anchor = _anchor; 147 148 if( anchor.x == -1 ) 149 anchor.x = ksize.width / 2; 150 if( anchor.y == -1 ) 151 anchor.y = ksize.height / 2; 152 153 max_ky = MAX( anchor.y, ksize.height - anchor.y - 1 ); 154 border_mode = _border_mode; 155 border_value = _border_value; 156 157 if( ksize.width <= 0 || ksize.height <= 0 || 158 (unsigned)anchor.x >= (unsigned)ksize.width || 159 (unsigned)anchor.y >= (unsigned)ksize.height ) 160 CV_ERROR( CV_StsOutOfRange, "invalid kernel size and/or anchor position" ); 161 162 if( border_mode != IPL_BORDER_CONSTANT && border_mode != IPL_BORDER_REPLICATE && 163 border_mode != IPL_BORDER_REFLECT && border_mode != IPL_BORDER_REFLECT_101 ) 164 CV_ERROR( CV_StsBadArg, "Invalid/unsupported border mode" ); 165 166 get_work_params(); 167 168 prev_width = 0; 169 prev_x_range = cvSlice(0,0); 170 171 buf_size = cvAlign( buf_size, ALIGN ); 172 173 src_pix_sz = CV_ELEM_SIZE(src_type); 174 border_tab_sz1 = anchor.x*src_pix_sz; 175 border_tab_sz = (ksize.width-1)*src_pix_sz; 176 bsz = cvAlign( border_tab_sz*sizeof(int), ALIGN ); 177 178 assert( max_rows > max_ky*2 ); 179 row_tab_sz = cvAlign( max_rows*sizeof(uchar*), ALIGN ); 180 total_buf_sz = buf_size + row_tab_sz + bsz; 181 182 CV_CALL( ptr = buffer = (uchar*)cvAlloc( total_buf_sz )); 183 184 rows = (uchar**)ptr; 185 ptr += row_tab_sz; 186 border_tab = (int*)ptr; 187 ptr += bsz; 188 189 buf_start = ptr; 190 const_row = 0; 191 192 if( border_mode == IPL_BORDER_CONSTANT ) 193 cvScalarToRawData( &border_value, border_tab, src_type, 0 ); 194 195 __END__; 196 } 197 198 199 void CvBaseImageFilter::start_process( CvSlice x_range, int width ) 200 { 201 int mode = border_mode; 202 int pix_sz = CV_ELEM_SIZE(src_type), work_pix_sz = CV_ELEM_SIZE(work_type); 203 int bsz = buf_size, bw = x_range.end_index - x_range.start_index, bw1 = bw + ksize.width - 1; 204 int tr_step = cvAlign(bw1*pix_sz, ALIGN ); 205 int i, j, k, ofs; 206 207 if( x_range.start_index == prev_x_range.start_index && 208 x_range.end_index == prev_x_range.end_index && 209 width == prev_width ) 210 return; 211 212 prev_x_range = x_range; 213 prev_width = width; 214 215 if( !is_separable ) 216 bw = bw1; 217 else 218 bsz -= tr_step; 219 220 buf_step = cvAlign(bw*work_pix_sz, ALIGN); 221 222 if( mode == IPL_BORDER_CONSTANT ) 223 bsz -= buf_step; 224 buf_max_count = bsz/buf_step; 225 buf_max_count = MIN( buf_max_count, max_rows - max_ky*2 ); 226 buf_end = buf_start + buf_max_count*buf_step; 227 228 if( mode == IPL_BORDER_CONSTANT ) 229 { 230 int i, tab_len = ksize.width*pix_sz; 231 uchar* bt = (uchar*)border_tab; 232 uchar* trow = buf_end; 233 const_row = buf_end + (is_separable ? 1 : 0)*tr_step; 234 235 for( i = pix_sz; i < tab_len; i++ ) 236 bt[i] = bt[i - pix_sz]; 237 for( i = 0; i < pix_sz; i++ ) 238 trow[i] = bt[i]; 239 for( i = pix_sz; i < tr_step; i++ ) 240 trow[i] = trow[i - pix_sz]; 241 if( is_separable ) 242 x_func( trow, const_row, this ); 243 return; 244 } 245 246 if( x_range.end_index - x_range.start_index <= 1 ) 247 mode = IPL_BORDER_REPLICATE; 248 249 width = (width - 1)*pix_sz; 250 ofs = (anchor.x-x_range.start_index)*pix_sz; 251 252 for( k = 0; k < 2; k++ ) 253 { 254 int idx, delta; 255 int i1, i2, di; 256 257 if( k == 0 ) 258 { 259 idx = (x_range.start_index - 1)*pix_sz; 260 delta = di = -pix_sz; 261 i1 = border_tab_sz1 - pix_sz; 262 i2 = -pix_sz; 263 } 264 else 265 { 266 idx = x_range.end_index*pix_sz; 267 delta = di = pix_sz; 268 i1 = border_tab_sz1; 269 i2 = border_tab_sz; 270 } 271 272 if( (unsigned)idx > (unsigned)width ) 273 { 274 int shift = mode == IPL_BORDER_REFLECT_101 ? pix_sz : 0; 275 idx = k == 0 ? shift : width - shift; 276 delta = -delta; 277 } 278 279 for( i = i1; i != i2; i += di ) 280 { 281 for( j = 0; j < pix_sz; j++ ) 282 border_tab[i + j] = idx + ofs + j; 283 if( mode != IPL_BORDER_REPLICATE ) 284 { 285 if( (delta > 0 && idx == width) || 286 (delta < 0 && idx == 0) ) 287 { 288 if( mode == IPL_BORDER_REFLECT_101 ) 289 idx -= delta*2; 290 delta = -delta; 291 } 292 else 293 idx += delta; 294 } 295 } 296 } 297 } 298 299 300 void CvBaseImageFilter::make_y_border( int row_count, int top_rows, int bottom_rows ) 301 { 302 int i; 303 304 if( border_mode == IPL_BORDER_CONSTANT || 305 border_mode == IPL_BORDER_REPLICATE ) 306 { 307 uchar* row1 = border_mode == IPL_BORDER_CONSTANT ? const_row : rows[max_ky]; 308 309 for( i = 0; i < top_rows && rows[i] == 0; i++ ) 310 rows[i] = row1; 311 312 row1 = border_mode == IPL_BORDER_CONSTANT ? const_row : rows[row_count-1]; 313 for( i = 0; i < bottom_rows; i++ ) 314 rows[i + row_count] = row1; 315 } 316 else 317 { 318 int j, dj = 1, shift = border_mode == IPL_BORDER_REFLECT_101; 319 320 for( i = top_rows-1, j = top_rows+shift; i >= 0; i-- ) 321 { 322 if( rows[i] == 0 ) 323 rows[i] = rows[j]; 324 j += dj; 325 if( dj > 0 && j >= row_count ) 326 { 327 if( !bottom_rows ) 328 break; 329 j -= 1 + shift; 330 dj = -dj; 331 } 332 } 333 334 for( i = 0, j = row_count-1-shift; i < bottom_rows; i++, j-- ) 335 rows[i + row_count] = rows[j]; 336 } 337 } 338 339 340 int CvBaseImageFilter::fill_cyclic_buffer( const uchar* src, int src_step, 341 int y0, int y1, int y2 ) 342 { 343 int i, y = y0, bsz1 = border_tab_sz1, bsz = border_tab_sz; 344 int pix_size = CV_ELEM_SIZE(src_type); 345 int width = prev_x_range.end_index - prev_x_range.start_index, width_n = width*pix_size; 346 bool can_use_src_as_trow = false; //is_separable && width >= ksize.width; 347 348 // fill the cyclic buffer 349 for( ; buf_count < buf_max_count && y < y2; buf_count++, y++, src += src_step ) 350 { 351 uchar* trow = is_separable ? buf_end : buf_tail; 352 uchar* bptr = can_use_src_as_trow && y1 < y && y+1 < y2 ? (uchar*)(src - bsz1) : trow; 353 354 if( bptr != trow ) 355 { 356 for( i = 0; i < bsz1; i++ ) 357 trow[i] = bptr[i]; 358 for( ; i < bsz; i++ ) 359 trow[i] = bptr[i + width_n]; 360 } 361 else if( !(((size_t)(bptr + bsz1)|(size_t)src|width_n) & (sizeof(int)-1)) ) 362 for( i = 0; i < width_n; i += sizeof(int) ) 363 *(int*)(bptr + i + bsz1) = *(int*)(src + i); 364 else 365 for( i = 0; i < width_n; i++ ) 366 bptr[i + bsz1] = src[i]; 367 368 if( border_mode != IPL_BORDER_CONSTANT ) 369 { 370 for( i = 0; i < bsz1; i++ ) 371 { 372 int j = border_tab[i]; 373 bptr[i] = bptr[j]; 374 } 375 for( ; i < bsz; i++ ) 376 { 377 int j = border_tab[i]; 378 bptr[i + width_n] = bptr[j]; 379 } 380 } 381 else 382 { 383 const uchar *bt = (uchar*)border_tab; 384 for( i = 0; i < bsz1; i++ ) 385 bptr[i] = bt[i]; 386 387 for( ; i < bsz; i++ ) 388 bptr[i + width_n] = bt[i]; 389 } 390 391 if( is_separable ) 392 { 393 x_func( bptr, buf_tail, this ); 394 if( bptr != trow ) 395 { 396 for( i = 0; i < bsz1; i++ ) 397 bptr[i] = trow[i]; 398 for( ; i < bsz; i++ ) 399 bptr[i + width_n] = trow[i]; 400 } 401 } 402 403 buf_tail += buf_step; 404 if( buf_tail >= buf_end ) 405 buf_tail = buf_start; 406 } 407 408 return y - y0; 409 } 410 411 int CvBaseImageFilter::process( const CvMat* src, CvMat* dst, 412 CvRect src_roi, CvPoint dst_origin, int flags ) 413 { 414 int rows_processed = 0; 415 416 /* 417 check_parameters 418 initialize_horizontal_border_reloc_tab_if_not_initialized_yet 419 420 for_each_source_row: src starts from src_roi.y, buf starts with the first available row 421 1) if separable, 422 1a.1) copy source row to temporary buffer, form a border using border reloc tab. 423 1a.2) apply row-wise filter (symmetric, asymmetric or generic) 424 else 425 1b.1) copy source row to the buffer, form a border 426 2) if the buffer is full, or it is the last source row: 427 2.1) if stage != middle, form the pointers to other "virtual" rows. 428 if separable 429 2a.2) apply column-wise filter, store the results. 430 else 431 2b.2) form a sparse (offset,weight) tab 432 2b.3) apply generic non-separable filter, store the results 433 3) update row pointers etc. 434 */ 435 436 CV_FUNCNAME( "CvBaseImageFilter::process" ); 437 438 __BEGIN__; 439 440 int i, width, _src_y1, _src_y2; 441 int src_x, src_y, src_y1, src_y2, dst_y; 442 int pix_size = CV_ELEM_SIZE(src_type); 443 uchar *sptr = 0, *dptr; 444 int phase = flags & (CV_START|CV_END|CV_MIDDLE); 445 bool isolated_roi = (flags & CV_ISOLATED_ROI) != 0; 446 447 if( !CV_IS_MAT(src) ) 448 CV_ERROR( CV_StsBadArg, "" ); 449 450 if( CV_MAT_TYPE(src->type) != src_type ) 451 CV_ERROR( CV_StsUnmatchedFormats, "" ); 452 453 width = src->cols; 454 455 if( src_roi.width == -1 && src_roi.x == 0 ) 456 src_roi.width = width; 457 458 if( src_roi.height == -1 && src_roi.y == 0 ) 459 { 460 src_roi.y = 0; 461 src_roi.height = src->rows; 462 } 463 464 if( src_roi.width > max_width || 465 src_roi.x < 0 || src_roi.width < 0 || 466 src_roi.y < 0 || src_roi.height < 0 || 467 src_roi.x + src_roi.width > width || 468 src_roi.y + src_roi.height > src->rows ) 469 CV_ERROR( CV_StsOutOfRange, "Too large source image or its ROI" ); 470 471 src_x = src_roi.x; 472 _src_y1 = 0; 473 _src_y2 = src->rows; 474 475 if( isolated_roi ) 476 { 477 src_roi.x = 0; 478 width = src_roi.width; 479 _src_y1 = src_roi.y; 480 _src_y2 = src_roi.y + src_roi.height; 481 } 482 483 if( !CV_IS_MAT(dst) ) 484 CV_ERROR( CV_StsBadArg, "" ); 485 486 if( CV_MAT_TYPE(dst->type) != dst_type ) 487 CV_ERROR( CV_StsUnmatchedFormats, "" ); 488 489 if( dst_origin.x < 0 || dst_origin.y < 0 ) 490 CV_ERROR( CV_StsOutOfRange, "Incorrect destination ROI origin" ); 491 492 if( phase == CV_WHOLE ) 493 phase = CV_START | CV_END; 494 phase &= CV_START | CV_END | CV_MIDDLE; 495 496 // initialize horizontal border relocation tab if it is not initialized yet 497 if( phase & CV_START ) 498 start_process( cvSlice(src_roi.x, src_roi.x + src_roi.width), width ); 499 else if( prev_width != width || prev_x_range.start_index != src_roi.x || 500 prev_x_range.end_index != src_roi.x + src_roi.width ) 501 CV_ERROR( CV_StsBadArg, 502 "In a middle or at the end the horizontal placement of the stripe can not be changed" ); 503 504 dst_y = dst_origin.y; 505 src_y1 = src_roi.y; 506 src_y2 = src_roi.y + src_roi.height; 507 508 if( phase & CV_START ) 509 { 510 for( i = 0; i <= max_ky*2; i++ ) 511 rows[i] = 0; 512 513 src_y1 -= max_ky; 514 top_rows = bottom_rows = 0; 515 516 if( src_y1 < _src_y1 ) 517 { 518 top_rows = _src_y1 - src_y1; 519 src_y1 = _src_y1; 520 } 521 522 buf_head = buf_tail = buf_start; 523 buf_count = 0; 524 } 525 526 if( phase & CV_END ) 527 { 528 src_y2 += max_ky; 529 530 if( src_y2 > _src_y2 ) 531 { 532 bottom_rows = src_y2 - _src_y2; 533 src_y2 = _src_y2; 534 } 535 } 536 537 dptr = dst->data.ptr + dst_origin.y*dst->step + dst_origin.x*CV_ELEM_SIZE(dst_type); 538 sptr = src->data.ptr + src_y1*src->step + src_x*pix_size; 539 540 for( src_y = src_y1; src_y < src_y2; ) 541 { 542 uchar* bptr; 543 int row_count, delta; 544 545 delta = fill_cyclic_buffer( sptr, src->step, src_y, src_y1, src_y2 ); 546 547 src_y += delta; 548 sptr += src->step*delta; 549 550 // initialize the cyclic buffer row pointers 551 bptr = buf_head; 552 for( i = 0; i < buf_count; i++ ) 553 { 554 rows[i+top_rows] = bptr; 555 bptr += buf_step; 556 if( bptr >= buf_end ) 557 bptr = buf_start; 558 } 559 560 row_count = top_rows + buf_count; 561 562 if( !rows[0] || ((phase & CV_END) && src_y == src_y2) ) 563 { 564 int br = (phase & CV_END) && src_y == src_y2 ? bottom_rows : 0; 565 make_y_border( row_count, top_rows, br ); 566 row_count += br; 567 } 568 569 if( rows[0] && row_count > max_ky*2 ) 570 { 571 int count = row_count - max_ky*2; 572 if( dst_y + count > dst->rows ) 573 CV_ERROR( CV_StsOutOfRange, "The destination image can not fit the result" ); 574 575 assert( count >= 0 ); 576 y_func( rows + max_ky - anchor.y, dptr, dst->step, count, this ); 577 row_count -= count; 578 dst_y += count; 579 dptr += dst->step*count; 580 for( bptr = row_count > 0 ?rows[count] : 0; buf_head != bptr && buf_count > 0; buf_count-- ) 581 { 582 buf_head += buf_step; 583 if( buf_head >= buf_end ) 584 buf_head = buf_start; 585 } 586 rows_processed += count; 587 top_rows = MAX(top_rows - count, 0); 588 } 589 } 590 591 __END__; 592 593 return rows_processed; 594 } 595 596 597 /****************************************************************************************\ 598 Separable Linear Filter 599 \****************************************************************************************/ 600 601 static void icvFilterRowSymm_8u32s( const uchar* src, int* dst, void* params ); 602 static void icvFilterColSymm_32s8u( const int** src, uchar* dst, int dst_step, 603 int count, void* params ); 604 static void icvFilterColSymm_32s16s( const int** src, short* dst, int dst_step, 605 int count, void* params ); 606 static void icvFilterRowSymm_8u32f( const uchar* src, float* dst, void* params ); 607 static void icvFilterRow_8u32f( const uchar* src, float* dst, void* params ); 608 static void icvFilterRowSymm_16s32f( const short* src, float* dst, void* params ); 609 static void icvFilterRow_16s32f( const short* src, float* dst, void* params ); 610 static void icvFilterRowSymm_16u32f( const ushort* src, float* dst, void* params ); 611 static void icvFilterRow_16u32f( const ushort* src, float* dst, void* params ); 612 static void icvFilterRowSymm_32f( const float* src, float* dst, void* params ); 613 static void icvFilterRow_32f( const float* src, float* dst, void* params ); 614 615 static void icvFilterColSymm_32f8u( const float** src, uchar* dst, int dst_step, 616 int count, void* params ); 617 static void icvFilterCol_32f8u( const float** src, uchar* dst, int dst_step, 618 int count, void* params ); 619 static void icvFilterColSymm_32f16s( const float** src, short* dst, int dst_step, 620 int count, void* params ); 621 static void icvFilterCol_32f16s( const float** src, short* dst, int dst_step, 622 int count, void* params ); 623 static void icvFilterColSymm_32f16u( const float** src, ushort* dst, int dst_step, 624 int count, void* params ); 625 static void icvFilterCol_32f16u( const float** src, ushort* dst, int dst_step, 626 int count, void* params ); 627 static void icvFilterColSymm_32f( const float** src, float* dst, int dst_step, 628 int count, void* params ); 629 static void icvFilterCol_32f( const float** src, float* dst, int dst_step, 630 int count, void* params ); 631 632 CvSepFilter::CvSepFilter() 633 { 634 min_depth = CV_32F; 635 kx = ky = 0; 636 kx_flags = ky_flags = 0; 637 } 638 639 640 CvSepFilter::CvSepFilter( int _max_width, int _src_type, int _dst_type, 641 const CvMat* _kx, const CvMat* _ky, 642 CvPoint _anchor, int _border_mode, 643 CvScalar _border_value ) 644 { 645 min_depth = CV_32F; 646 kx = ky = 0; 647 init( _max_width, _src_type, _dst_type, _kx, _ky, _anchor, _border_mode, _border_value ); 648 } 649 650 651 void CvSepFilter::clear() 652 { 653 cvReleaseMat( &kx ); 654 cvReleaseMat( &ky ); 655 CvBaseImageFilter::clear(); 656 } 657 658 659 CvSepFilter::~CvSepFilter() 660 { 661 clear(); 662 } 663 664 665 #undef FILTER_BITS 666 #define FILTER_BITS 8 667 668 void CvSepFilter::init( int _max_width, int _src_type, int _dst_type, 669 const CvMat* _kx, const CvMat* _ky, 670 CvPoint _anchor, int _border_mode, 671 CvScalar _border_value ) 672 { 673 CV_FUNCNAME( "CvSepFilter::init" ); 674 675 __BEGIN__; 676 677 CvSize _ksize; 678 int filter_type; 679 int i, xsz, ysz; 680 int convert_filters = 0; 681 double xsum = 0, ysum = 0; 682 const float eps = FLT_EPSILON*100.f; 683 684 if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) || 685 (_kx->cols != 1 && _kx->rows != 1) || 686 (_ky->cols != 1 && _ky->rows != 1) || 687 CV_MAT_CN(_kx->type) != 1 || CV_MAT_CN(_ky->type) != 1 || 688 !CV_ARE_TYPES_EQ(_kx,_ky) ) 689 CV_ERROR( CV_StsBadArg, 690 "Both kernels must be valid 1d single-channel vectors of the same types" ); 691 692 if( CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type) ) 693 CV_ERROR( CV_StsUnmatchedFormats, "Input and output must have the same number of channels" ); 694 695 filter_type = MAX( CV_32F, CV_MAT_DEPTH(_kx->type) ); 696 697 _ksize.width = _kx->rows + _kx->cols - 1; 698 _ksize.height = _ky->rows + _ky->cols - 1; 699 700 CV_CALL( CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize, 701 _anchor, _border_mode, _border_value )); 702 703 if( !(kx && CV_ARE_SIZES_EQ(kx,_kx)) ) 704 { 705 cvReleaseMat( &kx ); 706 CV_CALL( kx = cvCreateMat( _kx->rows, _kx->cols, filter_type )); 707 } 708 709 if( !(ky && CV_ARE_SIZES_EQ(ky,_ky)) ) 710 { 711 cvReleaseMat( &ky ); 712 CV_CALL( ky = cvCreateMat( _ky->rows, _ky->cols, filter_type )); 713 } 714 715 CV_CALL( cvConvert( _kx, kx )); 716 CV_CALL( cvConvert( _ky, ky )); 717 718 xsz = kx->rows + kx->cols - 1; 719 ysz = ky->rows + ky->cols - 1; 720 kx_flags = ky_flags = ASYMMETRICAL + SYMMETRICAL + POSITIVE + SUM_TO_1 + INTEGER; 721 722 if( !(xsz & 1) ) 723 kx_flags &= ~(ASYMMETRICAL + SYMMETRICAL); 724 if( !(ysz & 1) ) 725 ky_flags &= ~(ASYMMETRICAL + SYMMETRICAL); 726 727 for( i = 0; i < xsz; i++ ) 728 { 729 float v = kx->data.fl[i]; 730 xsum += v; 731 if( v < 0 ) 732 kx_flags &= ~POSITIVE; 733 if( fabs(v - cvRound(v)) > eps ) 734 kx_flags &= ~INTEGER; 735 if( fabs(v - kx->data.fl[xsz - i - 1]) > eps ) 736 kx_flags &= ~SYMMETRICAL; 737 if( fabs(v + kx->data.fl[xsz - i - 1]) > eps ) 738 kx_flags &= ~ASYMMETRICAL; 739 } 740 741 if( fabs(xsum - 1.) > eps ) 742 kx_flags &= ~SUM_TO_1; 743 744 for( i = 0; i < ysz; i++ ) 745 { 746 float v = ky->data.fl[i]; 747 ysum += v; 748 if( v < 0 ) 749 ky_flags &= ~POSITIVE; 750 if( fabs(v - cvRound(v)) > eps ) 751 ky_flags &= ~INTEGER; 752 if( fabs(v - ky->data.fl[ysz - i - 1]) > eps ) 753 ky_flags &= ~SYMMETRICAL; 754 if( fabs(v + ky->data.fl[ysz - i - 1]) > eps ) 755 ky_flags &= ~ASYMMETRICAL; 756 } 757 758 if( fabs(ysum - 1.) > eps ) 759 ky_flags &= ~SUM_TO_1; 760 761 x_func = 0; 762 y_func = 0; 763 764 if( CV_MAT_DEPTH(src_type) == CV_8U ) 765 { 766 if( CV_MAT_DEPTH(dst_type) == CV_8U && 767 ((kx_flags&ky_flags) & (SYMMETRICAL + POSITIVE + SUM_TO_1)) == SYMMETRICAL + POSITIVE + SUM_TO_1 ) 768 { 769 x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32s; 770 y_func = (CvColumnFilterFunc)icvFilterColSymm_32s8u; 771 kx_flags &= ~INTEGER; 772 ky_flags &= ~INTEGER; 773 convert_filters = 1; 774 } 775 else if( CV_MAT_DEPTH(dst_type) == CV_16S && 776 (kx_flags & (SYMMETRICAL + ASYMMETRICAL)) && (kx_flags & INTEGER) && 777 (ky_flags & (SYMMETRICAL + ASYMMETRICAL)) && (ky_flags & INTEGER) ) 778 { 779 x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32s; 780 y_func = (CvColumnFilterFunc)icvFilterColSymm_32s16s; 781 convert_filters = 1; 782 } 783 else 784 { 785 if( CV_MAT_DEPTH(dst_type) > CV_32F ) 786 CV_ERROR( CV_StsUnsupportedFormat, "8u->64f separable filtering is not supported" ); 787 788 if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) ) 789 x_func = (CvRowFilterFunc)icvFilterRowSymm_8u32f; 790 else 791 x_func = (CvRowFilterFunc)icvFilterRow_8u32f; 792 } 793 } 794 else if( CV_MAT_DEPTH(src_type) == CV_16U ) 795 { 796 if( CV_MAT_DEPTH(dst_type) > CV_32F ) 797 CV_ERROR( CV_StsUnsupportedFormat, "16u->64f separable filtering is not supported" ); 798 799 if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) ) 800 x_func = (CvRowFilterFunc)icvFilterRowSymm_16u32f; 801 else 802 x_func = (CvRowFilterFunc)icvFilterRow_16u32f; 803 } 804 else if( CV_MAT_DEPTH(src_type) == CV_16S ) 805 { 806 if( CV_MAT_DEPTH(dst_type) > CV_32F ) 807 CV_ERROR( CV_StsUnsupportedFormat, "16s->64f separable filtering is not supported" ); 808 809 if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) ) 810 x_func = (CvRowFilterFunc)icvFilterRowSymm_16s32f; 811 else 812 x_func = (CvRowFilterFunc)icvFilterRow_16s32f; 813 } 814 else if( CV_MAT_DEPTH(src_type) == CV_32F ) 815 { 816 if( CV_MAT_DEPTH(dst_type) != CV_32F ) 817 CV_ERROR( CV_StsUnsupportedFormat, "When the input has 32f data type, the output must also have 32f type" ); 818 819 if( kx_flags & (SYMMETRICAL + ASYMMETRICAL) ) 820 x_func = (CvRowFilterFunc)icvFilterRowSymm_32f; 821 else 822 x_func = (CvRowFilterFunc)icvFilterRow_32f; 823 } 824 else 825 CV_ERROR( CV_StsUnsupportedFormat, "Unknown or unsupported input data type" ); 826 827 if( !y_func ) 828 { 829 if( CV_MAT_DEPTH(dst_type) == CV_8U ) 830 { 831 if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) ) 832 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f8u; 833 else 834 y_func = (CvColumnFilterFunc)icvFilterCol_32f8u; 835 } 836 else if( CV_MAT_DEPTH(dst_type) == CV_16U ) 837 { 838 if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) ) 839 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f16u; 840 else 841 y_func = (CvColumnFilterFunc)icvFilterCol_32f16u; 842 } 843 else if( CV_MAT_DEPTH(dst_type) == CV_16S ) 844 { 845 if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) ) 846 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f16s; 847 else 848 y_func = (CvColumnFilterFunc)icvFilterCol_32f16s; 849 } 850 else if( CV_MAT_DEPTH(dst_type) == CV_32F ) 851 { 852 if( ky_flags & (SYMMETRICAL + ASYMMETRICAL) ) 853 y_func = (CvColumnFilterFunc)icvFilterColSymm_32f; 854 else 855 y_func = (CvColumnFilterFunc)icvFilterCol_32f; 856 } 857 else 858 CV_ERROR( CV_StsUnsupportedFormat, "Unknown or unsupported input data type" ); 859 } 860 861 if( convert_filters ) 862 { 863 int scale = kx_flags & ky_flags & INTEGER ? 1 : (1 << FILTER_BITS); 864 int sum; 865 866 for( i = sum = 0; i < xsz; i++ ) 867 { 868 int t = cvRound(kx->data.fl[i]*scale); 869 kx->data.i[i] = t; 870 sum += t; 871 } 872 if( scale > 1 ) 873 kx->data.i[xsz/2] += scale - sum; 874 875 for( i = sum = 0; i < ysz; i++ ) 876 { 877 int t = cvRound(ky->data.fl[i]*scale); 878 ky->data.i[i] = t; 879 sum += t; 880 } 881 if( scale > 1 ) 882 ky->data.i[ysz/2] += scale - sum; 883 kx->type = (kx->type & ~CV_MAT_DEPTH_MASK) | CV_32S; 884 ky->type = (ky->type & ~CV_MAT_DEPTH_MASK) | CV_32S; 885 } 886 887 __END__; 888 } 889 890 891 void CvSepFilter::init( int _max_width, int _src_type, int _dst_type, 892 bool _is_separable, CvSize _ksize, 893 CvPoint _anchor, int _border_mode, 894 CvScalar _border_value ) 895 { 896 CvBaseImageFilter::init( _max_width, _src_type, _dst_type, _is_separable, 897 _ksize, _anchor, _border_mode, _border_value ); 898 } 899 900 901 static void 902 icvFilterRowSymm_8u32s( const uchar* src, int* dst, void* params ) 903 { 904 const CvSepFilter* state = (const CvSepFilter*)params; 905 const CvMat* _kx = state->get_x_kernel(); 906 const int* kx = _kx->data.i; 907 int ksize = _kx->cols + _kx->rows - 1; 908 int i = 0, j, k, width = state->get_width(); 909 int cn = CV_MAT_CN(state->get_src_type()); 910 int ksize2 = ksize/2, ksize2n = ksize2*cn; 911 int is_symm = state->get_x_kernel_flags() & CvSepFilter::SYMMETRICAL; 912 const uchar* s = src + ksize2n; 913 914 kx += ksize2; 915 width *= cn; 916 917 if( is_symm ) 918 { 919 if( ksize == 1 && kx[0] == 1 ) 920 { 921 for( i = 0; i <= width - 2; i += 2 ) 922 { 923 int s0 = s[i], s1 = s[i+1]; 924 dst[i] = s0; dst[i+1] = s1; 925 } 926 s += i; 927 } 928 else if( ksize == 3 ) 929 { 930 if( kx[0] == 2 && kx[1] == 1 ) 931 for( ; i <= width - 2; i += 2, s += 2 ) 932 { 933 int s0 = s[-cn] + s[0]*2 + s[cn], s1 = s[1-cn] + s[1]*2 + s[1+cn]; 934 dst[i] = s0; dst[i+1] = s1; 935 } 936 else if( kx[0] == 10 && kx[1] == 3 ) 937 for( ; i <= width - 2; i += 2, s += 2 ) 938 { 939 int s0 = s[0]*10 + (s[-cn] + s[cn])*3, s1 = s[1]*10 + (s[1-cn] + s[1+cn])*3; 940 dst[i] = s0; dst[i+1] = s1; 941 } 942 else if( kx[0] == 2*64 && kx[1] == 1*64 ) 943 for( ; i <= width - 2; i += 2, s += 2 ) 944 { 945 int s0 = (s[0]*2 + s[-cn] + s[cn]) << 6; 946 int s1 = (s[1]*2 + s[1-cn] + s[1+cn]) << 6; 947 dst[i] = s0; dst[i+1] = s1; 948 } 949 else 950 { 951 int k0 = kx[0], k1 = kx[1]; 952 for( ; i <= width - 2; i += 2, s += 2 ) 953 { 954 int s0 = s[0]*k0 + (s[-cn] + s[cn])*k1, s1 = s[1]*k0 + (s[1-cn] + s[1+cn])*k1; 955 dst[i] = s0; dst[i+1] = s1; 956 } 957 } 958 } 959 else if( ksize == 5 ) 960 { 961 int k0 = kx[0], k1 = kx[1], k2 = kx[2]; 962 if( k0 == 6*16 && k1 == 4*16 && k2 == 1*16 ) 963 for( ; i <= width - 2; i += 2, s += 2 ) 964 { 965 int s0 = (s[0]*6 + (s[-cn] + s[cn])*4 + (s[-cn*2] + s[cn*2])*1) << 4; 966 int s1 = (s[1]*6 + (s[1-cn] + s[1+cn])*4 + (s[1-cn*2] + s[1+cn*2])*1) << 4; 967 dst[i] = s0; dst[i+1] = s1; 968 } 969 else 970 for( ; i <= width - 2; i += 2, s += 2 ) 971 { 972 int s0 = s[0]*k0 + (s[-cn] + s[cn])*k1 + (s[-cn*2] + s[cn*2])*k2; 973 int s1 = s[1]*k0 + (s[1-cn] + s[1+cn])*k1 + (s[1-cn*2] + s[1+cn*2])*k2; 974 dst[i] = s0; dst[i+1] = s1; 975 } 976 } 977 else 978 for( ; i <= width - 4; i += 4, s += 4 ) 979 { 980 int f = kx[0]; 981 int s0 = f*s[0], s1 = f*s[1], s2 = f*s[2], s3 = f*s[3]; 982 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) 983 { 984 f = kx[k]; 985 s0 += f*(s[j] + s[-j]); s1 += f*(s[j+1] + s[-j+1]); 986 s2 += f*(s[j+2] + s[-j+2]); s3 += f*(s[j+3] + s[-j+3]); 987 } 988 989 dst[i] = s0; dst[i+1] = s1; 990 dst[i+2] = s2; dst[i+3] = s3; 991 } 992 993 for( ; i < width; i++, s++ ) 994 { 995 int s0 = kx[0]*s[0]; 996 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) 997 s0 += kx[k]*(s[j] + s[-j]); 998 dst[i] = s0; 999 } 1000 } 1001 else 1002 { 1003 if( ksize == 3 && kx[0] == 0 && kx[1] == 1 ) 1004 for( ; i <= width - 2; i += 2, s += 2 ) 1005 { 1006 int s0 = s[cn] - s[-cn], s1 = s[1+cn] - s[1-cn]; 1007 dst[i] = s0; dst[i+1] = s1; 1008 } 1009 else 1010 for( ; i <= width - 4; i += 4, s += 4 ) 1011 { 1012 int s0 = 0, s1 = 0, s2 = 0, s3 = 0; 1013 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) 1014 { 1015 int f = kx[k]; 1016 s0 += f*(s[j] - s[-j]); s1 += f*(s[j+1] - s[-j+1]); 1017 s2 += f*(s[j+2] - s[-j+2]); s3 += f*(s[j+3] - s[-j+3]); 1018 } 1019 1020 dst[i] = s0; dst[i+1] = s1; 1021 dst[i+2] = s2; dst[i+3] = s3; 1022 } 1023 1024 for( ; i < width; i++, s++ ) 1025 { 1026 int s0 = kx[0]*s[0]; 1027 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) 1028 s0 += kx[k]*(s[j] - s[-j]); 1029 dst[i] = s0; 1030 } 1031 } 1032 } 1033 1034 1035 #define ICV_FILTER_ROW( flavor, srctype, dsttype, load_macro ) \ 1036 static void \ 1037 icvFilterRow_##flavor(const srctype* src, dsttype* dst, void*params)\ 1038 { \ 1039 const CvSepFilter* state = (const CvSepFilter*)params; \ 1040 const CvMat* _kx = state->get_x_kernel(); \ 1041 const dsttype* kx = (const dsttype*)(_kx->data.ptr); \ 1042 int ksize = _kx->cols + _kx->rows - 1; \ 1043 int i = 0, k, width = state->get_width(); \ 1044 int cn = CV_MAT_CN(state->get_src_type()); \ 1045 const srctype* s; \ 1046 \ 1047 width *= cn; \ 1048 \ 1049 for( ; i <= width - 4; i += 4 ) \ 1050 { \ 1051 double f = kx[0]; \ 1052 double s0=f*load_macro(src[i]), s1=f*load_macro(src[i+1]), \ 1053 s2=f*load_macro(src[i+2]), s3=f*load_macro(src[i+3]);\ 1054 for( k = 1, s = src + i + cn; k < ksize; k++, s += cn ) \ 1055 { \ 1056 f = kx[k]; \ 1057 s0 += f*load_macro(s[0]); \ 1058 s1 += f*load_macro(s[1]); \ 1059 s2 += f*load_macro(s[2]); \ 1060 s3 += f*load_macro(s[3]); \ 1061 } \ 1062 dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1; \ 1063 dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3; \ 1064 } \ 1065 \ 1066 for( ; i < width; i++ ) \ 1067 { \ 1068 double s0 = (double)kx[0]*load_macro(src[i]); \ 1069 for( k = 1, s = src + i + cn; k < ksize; k++, s += cn ) \ 1070 s0 += (double)kx[k]*load_macro(s[0]); \ 1071 dst[i] = (dsttype)s0; \ 1072 } \ 1073 } 1074 1075 1076 ICV_FILTER_ROW( 8u32f, uchar, float, CV_8TO32F ) 1077 ICV_FILTER_ROW( 16s32f, short, float, CV_NOP ) 1078 ICV_FILTER_ROW( 16u32f, ushort, float, CV_NOP ) 1079 ICV_FILTER_ROW( 32f, float, float, CV_NOP ) 1080 1081 1082 #define ICV_FILTER_ROW_SYMM( flavor, srctype, dsttype, load_macro ) \ 1083 static void \ 1084 icvFilterRowSymm_##flavor( const srctype* src, \ 1085 dsttype* dst, void* params ) \ 1086 { \ 1087 const CvSepFilter* state = (const CvSepFilter*)params; \ 1088 const CvMat* _kx = state->get_x_kernel(); \ 1089 const dsttype* kx = (const dsttype*)(_kx->data.ptr); \ 1090 int ksize = _kx->cols + _kx->rows - 1; \ 1091 int i = 0, j, k, width = state->get_width(); \ 1092 int cn = CV_MAT_CN(state->get_src_type()); \ 1093 int is_symm=state->get_x_kernel_flags()&CvSepFilter::SYMMETRICAL;\ 1094 int ksize2 = ksize/2, ksize2n = ksize2*cn; \ 1095 const srctype* s = src + ksize2n; \ 1096 \ 1097 kx += ksize2; \ 1098 width *= cn; \ 1099 \ 1100 if( is_symm ) \ 1101 { \ 1102 for( ; i <= width - 4; i += 4, s += 4 ) \ 1103 { \ 1104 double f = kx[0]; \ 1105 double s0=f*load_macro(s[0]), s1=f*load_macro(s[1]), \ 1106 s2=f*load_macro(s[2]), s3=f*load_macro(s[3]); \ 1107 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) \ 1108 { \ 1109 f = kx[k]; \ 1110 s0 += f*load_macro(s[j] + s[-j]); \ 1111 s1 += f*load_macro(s[j+1] + s[-j+1]); \ 1112 s2 += f*load_macro(s[j+2] + s[-j+2]); \ 1113 s3 += f*load_macro(s[j+3] + s[-j+3]); \ 1114 } \ 1115 \ 1116 dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1; \ 1117 dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3; \ 1118 } \ 1119 \ 1120 for( ; i < width; i++, s++ ) \ 1121 { \ 1122 double s0 = (double)kx[0]*load_macro(s[0]); \ 1123 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) \ 1124 s0 += (double)kx[k]*load_macro(s[j] + s[-j]); \ 1125 dst[i] = (dsttype)s0; \ 1126 } \ 1127 } \ 1128 else \ 1129 { \ 1130 for( ; i <= width - 4; i += 4, s += 4 ) \ 1131 { \ 1132 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; \ 1133 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) \ 1134 { \ 1135 double f = kx[k]; \ 1136 s0 += f*load_macro(s[j] - s[-j]); \ 1137 s1 += f*load_macro(s[j+1] - s[-j+1]); \ 1138 s2 += f*load_macro(s[j+2] - s[-j+2]); \ 1139 s3 += f*load_macro(s[j+3] - s[-j+3]); \ 1140 } \ 1141 \ 1142 dst[i] = (dsttype)s0; dst[i+1] = (dsttype)s1; \ 1143 dst[i+2] = (dsttype)s2; dst[i+3] = (dsttype)s3; \ 1144 } \ 1145 \ 1146 for( ; i < width; i++, s++ ) \ 1147 { \ 1148 double s0 = 0; \ 1149 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) \ 1150 s0 += (double)kx[k]*load_macro(s[j] - s[-j]); \ 1151 dst[i] = (dsttype)s0; \ 1152 } \ 1153 } \ 1154 } 1155 1156 1157 ICV_FILTER_ROW_SYMM( 8u32f, uchar, float, CV_8TO32F ) 1158 ICV_FILTER_ROW_SYMM( 16s32f, short, float, CV_NOP ) 1159 ICV_FILTER_ROW_SYMM( 16u32f, ushort, float, CV_NOP ) 1160 1161 static void 1162 icvFilterRowSymm_32f( const float* src, float* dst, void* params ) 1163 { 1164 const CvSepFilter* state = (const CvSepFilter*)params; 1165 const CvMat* _kx = state->get_x_kernel(); 1166 const float* kx = _kx->data.fl; 1167 int ksize = _kx->cols + _kx->rows - 1; 1168 int i = 0, j, k, width = state->get_width(); 1169 int cn = CV_MAT_CN(state->get_src_type()); 1170 int ksize2 = ksize/2, ksize2n = ksize2*cn; 1171 int is_symm = state->get_x_kernel_flags() & CvSepFilter::SYMMETRICAL; 1172 const float* s = src + ksize2n; 1173 1174 kx += ksize2; 1175 width *= cn; 1176 1177 if( is_symm ) 1178 { 1179 if( ksize == 3 && fabs(kx[0]-2.) <= FLT_EPSILON && fabs(kx[1]-1.) <= FLT_EPSILON ) 1180 for( ; i <= width - 2; i += 2, s += 2 ) 1181 { 1182 float s0 = s[-cn] + s[0]*2 + s[cn], s1 = s[1-cn] + s[1]*2 + s[1+cn]; 1183 dst[i] = s0; dst[i+1] = s1; 1184 } 1185 else if( ksize == 3 && fabs(kx[0]-10.) <= FLT_EPSILON && fabs(kx[1]-3.) <= FLT_EPSILON ) 1186 for( ; i <= width - 2; i += 2, s += 2 ) 1187 { 1188 float s0 = s[0]*10 + (s[-cn] + s[cn])*3, s1 = s[1]*10 + (s[1-cn] + s[1+cn])*3; 1189 dst[i] = s0; dst[i+1] = s1; 1190 } 1191 else 1192 for( ; i <= width - 4; i += 4, s += 4 ) 1193 { 1194 double f = kx[0]; 1195 double s0 = f*s[0], s1 = f*s[1], s2 = f*s[2], s3 = f*s[3]; 1196 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) 1197 { 1198 f = kx[k]; 1199 s0 += f*(s[j] + s[-j]); s1 += f*(s[j+1] + s[-j+1]); 1200 s2 += f*(s[j+2] + s[-j+2]); s3 += f*(s[j+3] + s[-j+3]); 1201 } 1202 1203 dst[i] = (float)s0; dst[i+1] = (float)s1; 1204 dst[i+2] = (float)s2; dst[i+3] = (float)s3; 1205 } 1206 1207 for( ; i < width; i++, s++ ) 1208 { 1209 double s0 = (double)kx[0]*s[0]; 1210 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) 1211 s0 += (double)kx[k]*(s[j] + s[-j]); 1212 dst[i] = (float)s0; 1213 } 1214 } 1215 else 1216 { 1217 if( ksize == 3 && fabs(kx[0]) <= FLT_EPSILON && fabs(kx[1]-1.) <= FLT_EPSILON ) 1218 for( ; i <= width - 2; i += 2, s += 2 ) 1219 { 1220 float s0 = s[cn] - s[-cn], s1 = s[1+cn] - s[1-cn]; 1221 dst[i] = s0; dst[i+1] = s1; 1222 } 1223 else 1224 for( ; i <= width - 4; i += 4, s += 4 ) 1225 { 1226 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; 1227 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) 1228 { 1229 double f = kx[k]; 1230 s0 += f*(s[j] - s[-j]); s1 += f*(s[j+1] - s[-j+1]); 1231 s2 += f*(s[j+2] - s[-j+2]); s3 += f*(s[j+3] - s[-j+3]); 1232 } 1233 1234 dst[i] = (float)s0; dst[i+1] = (float)s1; 1235 dst[i+2] = (float)s2; dst[i+3] = (float)s3; 1236 } 1237 1238 for( ; i < width; i++, s++ ) 1239 { 1240 double s0 = (double)kx[0]*s[0]; 1241 for( k = 1, j = cn; k <= ksize2; k++, j += cn ) 1242 s0 += (double)kx[k]*(s[j] - s[-j]); 1243 dst[i] = (float)s0; 1244 } 1245 } 1246 } 1247 1248 1249 static void 1250 icvFilterColSymm_32s8u( const int** src, uchar* dst, int dst_step, int count, void* params ) 1251 { 1252 const CvSepFilter* state = (const CvSepFilter*)params; 1253 const CvMat* _ky = state->get_y_kernel(); 1254 const int* ky = _ky->data.i; 1255 int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2; 1256 int i, k, width = state->get_width(); 1257 int cn = CV_MAT_CN(state->get_src_type()); 1258 1259 width *= cn; 1260 src += ksize2; 1261 ky += ksize2; 1262 1263 for( ; count--; dst += dst_step, src++ ) 1264 { 1265 if( ksize == 3 ) 1266 { 1267 const int* sptr0 = src[-1], *sptr1 = src[0], *sptr2 = src[1]; 1268 int k0 = ky[0], k1 = ky[1]; 1269 for( i = 0; i <= width - 2; i += 2 ) 1270 { 1271 int s0 = sptr1[i]*k0 + (sptr0[i] + sptr2[i])*k1; 1272 int s1 = sptr1[i+1]*k0 + (sptr0[i+1] + sptr2[i+1])*k1; 1273 s0 = CV_DESCALE(s0, FILTER_BITS*2); 1274 s1 = CV_DESCALE(s1, FILTER_BITS*2); 1275 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1; 1276 } 1277 } 1278 else if( ksize == 5 ) 1279 { 1280 const int* sptr0 = src[-2], *sptr1 = src[-1], 1281 *sptr2 = src[0], *sptr3 = src[1], *sptr4 = src[2]; 1282 int k0 = ky[0], k1 = ky[1], k2 = ky[2]; 1283 for( i = 0; i <= width - 2; i += 2 ) 1284 { 1285 int s0 = sptr2[i]*k0 + (sptr1[i] + sptr3[i])*k1 + (sptr0[i] + sptr4[i])*k2; 1286 int s1 = sptr2[i+1]*k0 + (sptr1[i+1] + sptr3[i+1])*k1 + (sptr0[i+1] + sptr4[i+1])*k2; 1287 s0 = CV_DESCALE(s0, FILTER_BITS*2); 1288 s1 = CV_DESCALE(s1, FILTER_BITS*2); 1289 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1; 1290 } 1291 } 1292 else 1293 for( i = 0; i <= width - 4; i += 4 ) 1294 { 1295 int f = ky[0]; 1296 const int* sptr = src[0] + i, *sptr2; 1297 int s0 = f*sptr[0], s1 = f*sptr[1], s2 = f*sptr[2], s3 = f*sptr[3]; 1298 for( k = 1; k <= ksize2; k++ ) 1299 { 1300 sptr = src[k] + i; 1301 sptr2 = src[-k] + i; 1302 f = ky[k]; 1303 s0 += f*(sptr[0] + sptr2[0]); 1304 s1 += f*(sptr[1] + sptr2[1]); 1305 s2 += f*(sptr[2] + sptr2[2]); 1306 s3 += f*(sptr[3] + sptr2[3]); 1307 } 1308 1309 s0 = CV_DESCALE(s0, FILTER_BITS*2); 1310 s1 = CV_DESCALE(s1, FILTER_BITS*2); 1311 dst[i] = (uchar)s0; dst[i+1] = (uchar)s1; 1312 s2 = CV_DESCALE(s2, FILTER_BITS*2); 1313 s3 = CV_DESCALE(s3, FILTER_BITS*2); 1314 dst[i+2] = (uchar)s2; dst[i+3] = (uchar)s3; 1315 } 1316 1317 for( ; i < width; i++ ) 1318 { 1319 int s0 = ky[0]*src[0][i]; 1320 for( k = 1; k <= ksize2; k++ ) 1321 s0 += ky[k]*(src[k][i] + src[-k][i]); 1322 1323 s0 = CV_DESCALE(s0, FILTER_BITS*2); 1324 dst[i] = (uchar)s0; 1325 } 1326 } 1327 } 1328 1329 1330 static void 1331 icvFilterColSymm_32s16s( const int** src, short* dst, 1332 int dst_step, int count, void* params ) 1333 { 1334 const CvSepFilter* state = (const CvSepFilter*)params; 1335 const CvMat* _ky = state->get_y_kernel(); 1336 const int* ky = (const int*)_ky->data.ptr; 1337 int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2; 1338 int i = 0, k, width = state->get_width(); 1339 int cn = CV_MAT_CN(state->get_src_type()); 1340 int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL; 1341 int is_1_2_1 = is_symm && ksize == 3 && ky[1] == 2 && ky[2] == 1; 1342 int is_3_10_3 = is_symm && ksize == 3 && ky[1] == 10 && ky[2] == 3; 1343 int is_m1_0_1 = !is_symm && ksize == 3 && ky[1] == 0 && 1344 ky[2]*ky[2] == 1 ? (ky[2] > 0 ? 1 : -1) : 0; 1345 1346 width *= cn; 1347 src += ksize2; 1348 ky += ksize2; 1349 dst_step /= sizeof(dst[0]); 1350 1351 if( is_symm ) 1352 { 1353 for( ; count--; dst += dst_step, src++ ) 1354 { 1355 if( is_1_2_1 ) 1356 { 1357 const int *src0 = src[-1], *src1 = src[0], *src2 = src[1]; 1358 1359 for( i = 0; i <= width - 2; i += 2 ) 1360 { 1361 int s0 = src0[i] + src1[i]*2 + src2[i], 1362 s1 = src0[i+1] + src1[i+1]*2 + src2[i+1]; 1363 1364 dst[i] = (short)s0; dst[i+1] = (short)s1; 1365 } 1366 } 1367 else if( is_3_10_3 ) 1368 { 1369 const int *src0 = src[-1], *src1 = src[0], *src2 = src[1]; 1370 1371 for( i = 0; i <= width - 2; i += 2 ) 1372 { 1373 int s0 = src1[i]*10 + (src0[i] + src2[i])*3, 1374 s1 = src1[i+1]*10 + (src0[i+1] + src2[i+1])*3; 1375 1376 dst[i] = (short)s0; dst[i+1] = (short)s1; 1377 } 1378 } 1379 else 1380 for( i = 0; i <= width - 4; i += 4 ) 1381 { 1382 int f = ky[0]; 1383 const int* sptr = src[0] + i, *sptr2; 1384 int s0 = f*sptr[0], s1 = f*sptr[1], 1385 s2 = f*sptr[2], s3 = f*sptr[3]; 1386 for( k = 1; k <= ksize2; k++ ) 1387 { 1388 sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k]; 1389 s0 += f*(sptr[0] + sptr2[0]); s1 += f*(sptr[1] + sptr2[1]); 1390 s2 += f*(sptr[2] + sptr2[2]); s3 += f*(sptr[3] + sptr2[3]); 1391 } 1392 1393 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1); 1394 dst[i+2] = CV_CAST_16S(s2); dst[i+3] = CV_CAST_16S(s3); 1395 } 1396 1397 for( ; i < width; i++ ) 1398 { 1399 int s0 = ky[0]*src[0][i]; 1400 for( k = 1; k <= ksize2; k++ ) 1401 s0 += ky[k]*(src[k][i] + src[-k][i]); 1402 dst[i] = CV_CAST_16S(s0); 1403 } 1404 } 1405 } 1406 else 1407 { 1408 for( ; count--; dst += dst_step, src++ ) 1409 { 1410 if( is_m1_0_1 ) 1411 { 1412 const int *src0 = src[-is_m1_0_1], *src2 = src[is_m1_0_1]; 1413 1414 for( i = 0; i <= width - 2; i += 2 ) 1415 { 1416 int s0 = src2[i] - src0[i], s1 = src2[i+1] - src0[i+1]; 1417 dst[i] = (short)s0; dst[i+1] = (short)s1; 1418 } 1419 } 1420 else 1421 for( i = 0; i <= width - 4; i += 4 ) 1422 { 1423 int f = ky[0]; 1424 const int* sptr = src[0] + i, *sptr2; 1425 int s0 = 0, s1 = 0, s2 = 0, s3 = 0; 1426 for( k = 1; k <= ksize2; k++ ) 1427 { 1428 sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k]; 1429 s0 += f*(sptr[0] - sptr2[0]); s1 += f*(sptr[1] - sptr2[1]); 1430 s2 += f*(sptr[2] - sptr2[2]); s3 += f*(sptr[3] - sptr2[3]); 1431 } 1432 1433 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1); 1434 dst[i+2] = CV_CAST_16S(s2); dst[i+3] = CV_CAST_16S(s3); 1435 } 1436 1437 for( ; i < width; i++ ) 1438 { 1439 int s0 = ky[0]*src[0][i]; 1440 for( k = 1; k <= ksize2; k++ ) 1441 s0 += ky[k]*(src[k][i] - src[-k][i]); 1442 dst[i] = CV_CAST_16S(s0); 1443 } 1444 } 1445 } 1446 } 1447 1448 1449 #define ICV_FILTER_COL( flavor, srctype, dsttype, worktype, \ 1450 cast_macro1, cast_macro2 ) \ 1451 static void \ 1452 icvFilterCol_##flavor( const srctype** src, dsttype* dst, \ 1453 int dst_step, int count, void* params ) \ 1454 { \ 1455 const CvSepFilter* state = (const CvSepFilter*)params; \ 1456 const CvMat* _ky = state->get_y_kernel(); \ 1457 const srctype* ky = (const srctype*)_ky->data.ptr; \ 1458 int ksize = _ky->cols + _ky->rows - 1; \ 1459 int i, k, width = state->get_width(); \ 1460 int cn = CV_MAT_CN(state->get_src_type()); \ 1461 \ 1462 width *= cn; \ 1463 dst_step /= sizeof(dst[0]); \ 1464 \ 1465 for( ; count--; dst += dst_step, src++ ) \ 1466 { \ 1467 for( i = 0; i <= width - 4; i += 4 ) \ 1468 { \ 1469 double f = ky[0]; \ 1470 const srctype* sptr = src[0] + i; \ 1471 double s0 = f*sptr[0], s1 = f*sptr[1], \ 1472 s2 = f*sptr[2], s3 = f*sptr[3]; \ 1473 worktype t0, t1; \ 1474 for( k = 1; k < ksize; k++ ) \ 1475 { \ 1476 sptr = src[k] + i; f = ky[k]; \ 1477 s0 += f*sptr[0]; s1 += f*sptr[1]; \ 1478 s2 += f*sptr[2]; s3 += f*sptr[3]; \ 1479 } \ 1480 \ 1481 t0 = cast_macro1(s0); t1 = cast_macro1(s1); \ 1482 dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1); \ 1483 t0 = cast_macro1(s2); t1 = cast_macro1(s3); \ 1484 dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \ 1485 } \ 1486 \ 1487 for( ; i < width; i++ ) \ 1488 { \ 1489 double s0 = (double)ky[0]*src[0][i]; \ 1490 worktype t0; \ 1491 for( k = 1; k < ksize; k++ ) \ 1492 s0 += (double)ky[k]*src[k][i]; \ 1493 t0 = cast_macro1(s0); \ 1494 dst[i] = cast_macro2(t0); \ 1495 } \ 1496 } \ 1497 } 1498 1499 1500 ICV_FILTER_COL( 32f8u, float, uchar, int, cvRound, CV_CAST_8U ) 1501 ICV_FILTER_COL( 32f16s, float, short, int, cvRound, CV_CAST_16S ) 1502 ICV_FILTER_COL( 32f16u, float, ushort, int, cvRound, CV_CAST_16U ) 1503 1504 #define ICV_FILTER_COL_SYMM( flavor, srctype, dsttype, worktype, \ 1505 cast_macro1, cast_macro2 ) \ 1506 static void \ 1507 icvFilterColSymm_##flavor( const srctype** src, dsttype* dst, \ 1508 int dst_step, int count, void* params ) \ 1509 { \ 1510 const CvSepFilter* state = (const CvSepFilter*)params; \ 1511 const CvMat* _ky = state->get_y_kernel(); \ 1512 const srctype* ky = (const srctype*)_ky->data.ptr; \ 1513 int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2; \ 1514 int i, k, width = state->get_width(); \ 1515 int cn = CV_MAT_CN(state->get_src_type()); \ 1516 int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL;\ 1517 \ 1518 width *= cn; \ 1519 src += ksize2; \ 1520 ky += ksize2; \ 1521 dst_step /= sizeof(dst[0]); \ 1522 \ 1523 if( is_symm ) \ 1524 { \ 1525 for( ; count--; dst += dst_step, src++ ) \ 1526 { \ 1527 for( i = 0; i <= width - 4; i += 4 ) \ 1528 { \ 1529 double f = ky[0]; \ 1530 const srctype* sptr = src[0] + i, *sptr2; \ 1531 double s0 = f*sptr[0], s1 = f*sptr[1], \ 1532 s2 = f*sptr[2], s3 = f*sptr[3]; \ 1533 worktype t0, t1; \ 1534 for( k = 1; k <= ksize2; k++ ) \ 1535 { \ 1536 sptr = src[k] + i; \ 1537 sptr2 = src[-k] + i; \ 1538 f = ky[k]; \ 1539 s0 += f*(sptr[0] + sptr2[0]); \ 1540 s1 += f*(sptr[1] + sptr2[1]); \ 1541 s2 += f*(sptr[2] + sptr2[2]); \ 1542 s3 += f*(sptr[3] + sptr2[3]); \ 1543 } \ 1544 \ 1545 t0 = cast_macro1(s0); t1 = cast_macro1(s1); \ 1546 dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1); \ 1547 t0 = cast_macro1(s2); t1 = cast_macro1(s3); \ 1548 dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \ 1549 } \ 1550 \ 1551 for( ; i < width; i++ ) \ 1552 { \ 1553 double s0 = (double)ky[0]*src[0][i]; \ 1554 worktype t0; \ 1555 for( k = 1; k <= ksize2; k++ ) \ 1556 s0 += (double)ky[k]*(src[k][i] + src[-k][i]); \ 1557 t0 = cast_macro1(s0); \ 1558 dst[i] = cast_macro2(t0); \ 1559 } \ 1560 } \ 1561 } \ 1562 else \ 1563 { \ 1564 for( ; count--; dst += dst_step, src++ ) \ 1565 { \ 1566 for( i = 0; i <= width - 4; i += 4 ) \ 1567 { \ 1568 double f = ky[0]; \ 1569 const srctype* sptr = src[0] + i, *sptr2; \ 1570 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; \ 1571 worktype t0, t1; \ 1572 for( k = 1; k <= ksize2; k++ ) \ 1573 { \ 1574 sptr = src[k] + i; \ 1575 sptr2 = src[-k] + i; \ 1576 f = ky[k]; \ 1577 s0 += f*(sptr[0] - sptr2[0]); \ 1578 s1 += f*(sptr[1] - sptr2[1]); \ 1579 s2 += f*(sptr[2] - sptr2[2]); \ 1580 s3 += f*(sptr[3] - sptr2[3]); \ 1581 } \ 1582 \ 1583 t0 = cast_macro1(s0); t1 = cast_macro1(s1); \ 1584 dst[i]=cast_macro2(t0); dst[i+1]=cast_macro2(t1); \ 1585 t0 = cast_macro1(s2); t1 = cast_macro1(s3); \ 1586 dst[i+2]=cast_macro2(t0); dst[i+3]=cast_macro2(t1); \ 1587 } \ 1588 \ 1589 for( ; i < width; i++ ) \ 1590 { \ 1591 double s0 = (double)ky[0]*src[0][i]; \ 1592 worktype t0; \ 1593 for( k = 1; k <= ksize2; k++ ) \ 1594 s0 += (double)ky[k]*(src[k][i] - src[-k][i]); \ 1595 t0 = cast_macro1(s0); \ 1596 dst[i] = cast_macro2(t0); \ 1597 } \ 1598 } \ 1599 } \ 1600 } 1601 1602 1603 ICV_FILTER_COL_SYMM( 32f8u, float, uchar, int, cvRound, CV_CAST_8U ) 1604 ICV_FILTER_COL_SYMM( 32f16s, float, short, int, cvRound, CV_CAST_16S ) 1605 ICV_FILTER_COL_SYMM( 32f16u, float, ushort, int, cvRound, CV_CAST_16U ) 1606 1607 1608 static void 1609 icvFilterCol_32f( const float** src, float* dst, 1610 int dst_step, int count, void* params ) 1611 { 1612 const CvSepFilter* state = (const CvSepFilter*)params; 1613 const CvMat* _ky = state->get_y_kernel(); 1614 const float* ky = (const float*)_ky->data.ptr; 1615 int ksize = _ky->cols + _ky->rows - 1; 1616 int i, k, width = state->get_width(); 1617 int cn = CV_MAT_CN(state->get_src_type()); 1618 1619 width *= cn; 1620 dst_step /= sizeof(dst[0]); 1621 1622 for( ; count--; dst += dst_step, src++ ) 1623 { 1624 for( i = 0; i <= width - 4; i += 4 ) 1625 { 1626 double f = ky[0]; 1627 const float* sptr = src[0] + i; 1628 double s0 = f*sptr[0], s1 = f*sptr[1], 1629 s2 = f*sptr[2], s3 = f*sptr[3]; 1630 for( k = 1; k < ksize; k++ ) 1631 { 1632 sptr = src[k] + i; f = ky[k]; 1633 s0 += f*sptr[0]; s1 += f*sptr[1]; 1634 s2 += f*sptr[2]; s3 += f*sptr[3]; 1635 } 1636 1637 dst[i] = (float)s0; dst[i+1] = (float)s1; 1638 dst[i+2] = (float)s2; dst[i+3] = (float)s3; 1639 } 1640 1641 for( ; i < width; i++ ) 1642 { 1643 double s0 = (double)ky[0]*src[0][i]; 1644 for( k = 1; k < ksize; k++ ) 1645 s0 += (double)ky[k]*src[k][i]; 1646 dst[i] = (float)s0; 1647 } 1648 } 1649 } 1650 1651 1652 static void 1653 icvFilterColSymm_32f( const float** src, float* dst, 1654 int dst_step, int count, void* params ) 1655 { 1656 const CvSepFilter* state = (const CvSepFilter*)params; 1657 const CvMat* _ky = state->get_y_kernel(); 1658 const float* ky = (const float*)_ky->data.ptr; 1659 int ksize = _ky->cols + _ky->rows - 1, ksize2 = ksize/2; 1660 int i = 0, k, width = state->get_width(); 1661 int cn = CV_MAT_CN(state->get_src_type()); 1662 int is_symm = state->get_y_kernel_flags() & CvSepFilter::SYMMETRICAL; 1663 int is_1_2_1 = is_symm && ksize == 3 && 1664 fabs(ky[1] - 2.) <= FLT_EPSILON && fabs(ky[2] - 1.) <= FLT_EPSILON; 1665 int is_3_10_3 = is_symm && ksize == 3 && 1666 fabs(ky[1] - 10.) <= FLT_EPSILON && fabs(ky[2] - 3.) <= FLT_EPSILON; 1667 int is_m1_0_1 = !is_symm && ksize == 3 && 1668 fabs(ky[1]) <= FLT_EPSILON && fabs(ky[2]*ky[2] - 1.) <= FLT_EPSILON ? 1669 (ky[2] > 0 ? 1 : -1) : 0; 1670 1671 width *= cn; 1672 src += ksize2; 1673 ky += ksize2; 1674 dst_step /= sizeof(dst[0]); 1675 1676 if( is_symm ) 1677 { 1678 for( ; count--; dst += dst_step, src++ ) 1679 { 1680 if( is_1_2_1 ) 1681 { 1682 const float *src0 = src[-1], *src1 = src[0], *src2 = src[1]; 1683 1684 for( i = 0; i <= width - 4; i += 4 ) 1685 { 1686 float s0 = src0[i] + src1[i]*2 + src2[i], 1687 s1 = src0[i+1] + src1[i+1]*2 + src2[i+1], 1688 s2 = src0[i+2] + src1[i+2]*2 + src2[i+2], 1689 s3 = src0[i+3] + src1[i+3]*2 + src2[i+3]; 1690 1691 dst[i] = s0; dst[i+1] = s1; 1692 dst[i+2] = s2; dst[i+3] = s3; 1693 } 1694 } 1695 else if( is_3_10_3 ) 1696 { 1697 const float *src0 = src[-1], *src1 = src[0], *src2 = src[1]; 1698 1699 for( i = 0; i <= width - 4; i += 4 ) 1700 { 1701 float s0 = src1[i]*10 + (src0[i] + src2[i])*3, 1702 s1 = src1[i+1]*10 + (src0[i+1] + src2[i+1])*3, 1703 s2 = src1[i+2]*10 + (src0[i+2] + src2[i+2])*3, 1704 s3 = src1[i+3]*10 + (src0[i+3] + src2[i+3])*3; 1705 1706 dst[i] = s0; dst[i+1] = s1; 1707 dst[i+2] = s2; dst[i+3] = s3; 1708 } 1709 } 1710 else 1711 for( i = 0; i <= width - 4; i += 4 ) 1712 { 1713 double f = ky[0]; 1714 const float* sptr = src[0] + i, *sptr2; 1715 double s0 = f*sptr[0], s1 = f*sptr[1], 1716 s2 = f*sptr[2], s3 = f*sptr[3]; 1717 for( k = 1; k <= ksize2; k++ ) 1718 { 1719 sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k]; 1720 s0 += f*(sptr[0] + sptr2[0]); s1 += f*(sptr[1] + sptr2[1]); 1721 s2 += f*(sptr[2] + sptr2[2]); s3 += f*(sptr[3] + sptr2[3]); 1722 } 1723 1724 dst[i] = (float)s0; dst[i+1] = (float)s1; 1725 dst[i+2] = (float)s2; dst[i+3] = (float)s3; 1726 } 1727 1728 for( ; i < width; i++ ) 1729 { 1730 double s0 = (double)ky[0]*src[0][i]; 1731 for( k = 1; k <= ksize2; k++ ) 1732 s0 += (double)ky[k]*(src[k][i] + src[-k][i]); 1733 dst[i] = (float)s0; 1734 } 1735 } 1736 } 1737 else 1738 { 1739 for( ; count--; dst += dst_step, src++ ) 1740 { 1741 if( is_m1_0_1 ) 1742 { 1743 const float *src0 = src[-is_m1_0_1], *src2 = src[is_m1_0_1]; 1744 1745 for( i = 0; i <= width - 4; i += 4 ) 1746 { 1747 float s0 = src2[i] - src0[i], s1 = src2[i+1] - src0[i+1], 1748 s2 = src2[i+2] - src0[i+2], s3 = src2[i+3] - src0[i+3]; 1749 dst[i] = s0; dst[i+1] = s1; 1750 dst[i+2] = s2; dst[i+3] = s3; 1751 } 1752 } 1753 else 1754 for( i = 0; i <= width - 4; i += 4 ) 1755 { 1756 double f = ky[0]; 1757 const float* sptr = src[0] + i, *sptr2; 1758 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; 1759 for( k = 1; k <= ksize2; k++ ) 1760 { 1761 sptr = src[k] + i; sptr2 = src[-k] + i; f = ky[k]; 1762 s0 += f*(sptr[0] - sptr2[0]); s1 += f*(sptr[1] - sptr2[1]); 1763 s2 += f*(sptr[2] - sptr2[2]); s3 += f*(sptr[3] - sptr2[3]); 1764 } 1765 1766 dst[i] = (float)s0; dst[i+1] = (float)s1; 1767 dst[i+2] = (float)s2; dst[i+3] = (float)s3; 1768 } 1769 1770 for( ; i < width; i++ ) 1771 { 1772 double s0 = (double)ky[0]*src[0][i]; 1773 for( k = 1; k <= ksize2; k++ ) 1774 s0 += (double)ky[k]*(src[k][i] - src[-k][i]); 1775 dst[i] = (float)s0; 1776 } 1777 } 1778 } 1779 } 1780 1781 1782 #define SMALL_GAUSSIAN_SIZE 7 1783 1784 void CvSepFilter::init_gaussian_kernel( CvMat* kernel, double sigma ) 1785 { 1786 static const float small_gaussian_tab[][SMALL_GAUSSIAN_SIZE/2+1] = 1787 { 1788 {1.f}, 1789 {0.5f, 0.25f}, 1790 {0.375f, 0.25f, 0.0625f}, 1791 {0.28125f, 0.21875f, 0.109375f, 0.03125f} 1792 }; 1793 1794 CV_FUNCNAME( "CvSepFilter::init_gaussian_kernel" ); 1795 1796 __BEGIN__; 1797 1798 int type, i, n, step; 1799 const float* fixed_kernel = 0; 1800 double sigmaX, scale2X, sum; 1801 float* cf; 1802 double* cd; 1803 1804 if( !CV_IS_MAT(kernel) ) 1805 CV_ERROR( CV_StsBadArg, "kernel is not a valid matrix" ); 1806 1807 type = CV_MAT_TYPE(kernel->type); 1808 1809 if( (kernel->cols != 1 && kernel->rows != 1) || 1810 (kernel->cols + kernel->rows - 1) % 2 == 0 || 1811 (type != CV_32FC1 && type != CV_64FC1) ) 1812 CV_ERROR( CV_StsBadSize, "kernel should be 1D floating-point vector of odd (2*k+1) size" ); 1813 1814 n = kernel->cols + kernel->rows - 1; 1815 1816 if( n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 ) 1817 fixed_kernel = small_gaussian_tab[n>>1]; 1818 1819 sigmaX = sigma > 0 ? sigma : (n/2 - 1)*0.3 + 0.8; 1820 scale2X = -0.5/(sigmaX*sigmaX); 1821 step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type); 1822 cf = kernel->data.fl; 1823 cd = kernel->data.db; 1824 1825 sum = fixed_kernel ? -fixed_kernel[0] : -1.; 1826 1827 for( i = 0; i <= n/2; i++ ) 1828 { 1829 double t = fixed_kernel ? (double)fixed_kernel[i] : exp(scale2X*i*i); 1830 if( type == CV_32FC1 ) 1831 { 1832 cf[(n/2+i)*step] = (float)t; 1833 sum += cf[(n/2+i)*step]*2; 1834 } 1835 else 1836 { 1837 cd[(n/2+i)*step] = t; 1838 sum += cd[(n/2+i)*step]*2; 1839 } 1840 } 1841 1842 sum = 1./sum; 1843 for( i = 0; i <= n/2; i++ ) 1844 { 1845 if( type == CV_32FC1 ) 1846 cf[(n/2+i)*step] = cf[(n/2-i)*step] = (float)(cf[(n/2+i)*step]*sum); 1847 else 1848 cd[(n/2+i)*step] = cd[(n/2-i)*step] = cd[(n/2+i)*step]*sum; 1849 } 1850 1851 __END__; 1852 } 1853 1854 1855 void CvSepFilter::init_sobel_kernel( CvMat* _kx, CvMat* _ky, int dx, int dy, int flags ) 1856 { 1857 CV_FUNCNAME( "CvSepFilter::init_sobel_kernel" ); 1858 1859 __BEGIN__; 1860 1861 int i, j, k, msz; 1862 int* kerI; 1863 bool normalize = (flags & NORMALIZE_KERNEL) != 0; 1864 bool flip = (flags & FLIP_KERNEL) != 0; 1865 1866 if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) ) 1867 CV_ERROR( CV_StsBadArg, "One of the kernel matrices is not valid" ); 1868 1869 msz = MAX( _kx->cols + _kx->rows, _ky->cols + _ky->rows ); 1870 if( msz > 32 ) 1871 CV_ERROR( CV_StsOutOfRange, "Too large kernel size" ); 1872 1873 kerI = (int*)cvStackAlloc( msz*sizeof(kerI[0]) ); 1874 1875 if( dx < 0 || dy < 0 || dx+dy <= 0 ) 1876 CV_ERROR( CV_StsOutOfRange, 1877 "Both derivative orders (dx and dy) must be non-negative " 1878 "and at least one of them must be positive." ); 1879 1880 for( k = 0; k < 2; k++ ) 1881 { 1882 CvMat* kernel = k == 0 ? _kx : _ky; 1883 int order = k == 0 ? dx : dy; 1884 int n = kernel->cols + kernel->rows - 1, step; 1885 int type = CV_MAT_TYPE(kernel->type); 1886 double scale = !normalize ? 1. : 1./(1 << (n-order-1)); 1887 int iscale = 1; 1888 1889 if( (kernel->cols != 1 && kernel->rows != 1) || 1890 (kernel->cols + kernel->rows - 1) % 2 == 0 || 1891 (type != CV_32FC1 && type != CV_64FC1 && type != CV_32SC1) ) 1892 CV_ERROR( CV_StsOutOfRange, 1893 "Both kernels must be 1D floating-point or integer vectors of odd (2*k+1) size." ); 1894 1895 if( normalize && n > 1 && type == CV_32SC1 ) 1896 CV_ERROR( CV_StsBadArg, "Integer kernel can not be normalized" ); 1897 1898 if( n <= order ) 1899 CV_ERROR( CV_StsOutOfRange, 1900 "Derivative order must be smaller than the corresponding kernel size" ); 1901 1902 if( n == 1 ) 1903 kerI[0] = 1; 1904 else if( n == 3 ) 1905 { 1906 if( order == 0 ) 1907 kerI[0] = 1, kerI[1] = 2, kerI[2] = 1; 1908 else if( order == 1 ) 1909 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1; 1910 else 1911 kerI[0] = 1, kerI[1] = -2, kerI[2] = 1; 1912 } 1913 else 1914 { 1915 int oldval, newval; 1916 kerI[0] = 1; 1917 for( i = 0; i < n; i++ ) 1918 kerI[i+1] = 0; 1919 1920 for( i = 0; i < n - order - 1; i++ ) 1921 { 1922 oldval = kerI[0]; 1923 for( j = 1; j <= n; j++ ) 1924 { 1925 newval = kerI[j]+kerI[j-1]; 1926 kerI[j-1] = oldval; 1927 oldval = newval; 1928 } 1929 } 1930 1931 for( i = 0; i < order; i++ ) 1932 { 1933 oldval = -kerI[0]; 1934 for( j = 1; j <= n; j++ ) 1935 { 1936 newval = kerI[j-1] - kerI[j]; 1937 kerI[j-1] = oldval; 1938 oldval = newval; 1939 } 1940 } 1941 } 1942 1943 step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type); 1944 if( flip && (order & 1) && k ) 1945 iscale = -iscale, scale = -scale; 1946 1947 for( i = 0; i < n; i++ ) 1948 { 1949 if( type == CV_32SC1 ) 1950 kernel->data.i[i*step] = kerI[i]*iscale; 1951 else if( type == CV_32FC1 ) 1952 kernel->data.fl[i*step] = (float)(kerI[i]*scale); 1953 else 1954 kernel->data.db[i*step] = kerI[i]*scale; 1955 } 1956 } 1957 1958 __END__; 1959 } 1960 1961 1962 void CvSepFilter::init_scharr_kernel( CvMat* _kx, CvMat* _ky, int dx, int dy, int flags ) 1963 { 1964 CV_FUNCNAME( "CvSepFilter::init_scharr_kernel" ); 1965 1966 __BEGIN__; 1967 1968 int i, k; 1969 int kerI[3]; 1970 bool normalize = (flags & NORMALIZE_KERNEL) != 0; 1971 bool flip = (flags & FLIP_KERNEL) != 0; 1972 1973 if( !CV_IS_MAT(_kx) || !CV_IS_MAT(_ky) ) 1974 CV_ERROR( CV_StsBadArg, "One of the kernel matrices is not valid" ); 1975 1976 if( ((dx|dy)&~1) || dx+dy != 1 ) 1977 CV_ERROR( CV_StsOutOfRange, 1978 "Scharr kernel can only be used for 1st order derivatives" ); 1979 1980 for( k = 0; k < 2; k++ ) 1981 { 1982 CvMat* kernel = k == 0 ? _kx : _ky; 1983 int order = k == 0 ? dx : dy; 1984 int n = kernel->cols + kernel->rows - 1, step; 1985 int type = CV_MAT_TYPE(kernel->type); 1986 double scale = !normalize ? 1. : order == 0 ? 1./16 : 1./2; 1987 int iscale = 1; 1988 1989 if( (kernel->cols != 1 && kernel->rows != 1) || 1990 kernel->cols + kernel->rows - 1 != 3 || 1991 (type != CV_32FC1 && type != CV_64FC1 && type != CV_32SC1) ) 1992 CV_ERROR( CV_StsOutOfRange, 1993 "Both kernels must be 1D floating-point or integer vectors containing 3 elements each." ); 1994 1995 if( normalize && type == CV_32SC1 ) 1996 CV_ERROR( CV_StsBadArg, "Integer kernel can not be normalized" ); 1997 1998 if( order == 0 ) 1999 kerI[0] = 3, kerI[1] = 10, kerI[2] = 3; 2000 else 2001 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1; 2002 2003 step = kernel->rows == 1 ? 1 : kernel->step/CV_ELEM_SIZE1(type); 2004 if( flip && (order & 1) && k ) 2005 iscale = -iscale, scale = -scale; 2006 2007 for( i = 0; i < n; i++ ) 2008 { 2009 if( type == CV_32SC1 ) 2010 kernel->data.i[i*step] = kerI[i]*iscale; 2011 else if( type == CV_32FC1 ) 2012 kernel->data.fl[i*step] = (float)(kerI[i]*scale); 2013 else 2014 kernel->data.db[i*step] = kerI[i]*scale; 2015 } 2016 } 2017 2018 __END__; 2019 } 2020 2021 2022 void CvSepFilter::init_deriv( int _max_width, int _src_type, int _dst_type, 2023 int dx, int dy, int aperture_size, int flags ) 2024 { 2025 CV_FUNCNAME( "CvSepFilter::init_deriv" ); 2026 2027 __BEGIN__; 2028 2029 int kx_size = aperture_size == CV_SCHARR ? 3 : aperture_size, ky_size = kx_size; 2030 float kx_data[CV_MAX_SOBEL_KSIZE], ky_data[CV_MAX_SOBEL_KSIZE]; 2031 CvMat _kx, _ky; 2032 2033 if( kx_size <= 0 || ky_size > CV_MAX_SOBEL_KSIZE ) 2034 CV_ERROR( CV_StsOutOfRange, "Incorrect aperture_size" ); 2035 2036 if( kx_size == 1 && dx ) 2037 kx_size = 3; 2038 if( ky_size == 1 && dy ) 2039 ky_size = 3; 2040 2041 _kx = cvMat( 1, kx_size, CV_32FC1, kx_data ); 2042 _ky = cvMat( 1, ky_size, CV_32FC1, ky_data ); 2043 2044 if( aperture_size == CV_SCHARR ) 2045 { 2046 CV_CALL( init_scharr_kernel( &_kx, &_ky, dx, dy, flags )); 2047 } 2048 else 2049 { 2050 CV_CALL( init_sobel_kernel( &_kx, &_ky, dx, dy, flags )); 2051 } 2052 2053 CV_CALL( init( _max_width, _src_type, _dst_type, &_kx, &_ky )); 2054 2055 __END__; 2056 } 2057 2058 2059 void CvSepFilter::init_gaussian( int _max_width, int _src_type, int _dst_type, 2060 int gaussian_size, double sigma ) 2061 { 2062 float* kdata = 0; 2063 2064 CV_FUNCNAME( "CvSepFilter::init_gaussian" ); 2065 2066 __BEGIN__; 2067 2068 CvMat _kernel; 2069 2070 if( gaussian_size <= 0 || gaussian_size > 1024 ) 2071 CV_ERROR( CV_StsBadSize, "Incorrect size of gaussian kernel" ); 2072 2073 kdata = (float*)cvStackAlloc(gaussian_size*sizeof(kdata[0])); 2074 _kernel = cvMat( 1, gaussian_size, CV_32F, kdata ); 2075 2076 CV_CALL( init_gaussian_kernel( &_kernel, sigma )); 2077 CV_CALL( init( _max_width, _src_type, _dst_type, &_kernel, &_kernel )); 2078 2079 __END__; 2080 } 2081 2082 2083 /****************************************************************************************\ 2084 Non-separable Linear Filter 2085 \****************************************************************************************/ 2086 2087 static void icvLinearFilter_8u( const uchar** src, uchar* dst, int dst_step, 2088 int count, void* params ); 2089 static void icvLinearFilter_16s( const short** src, short* dst, int dst_step, 2090 int count, void* params ); 2091 static void icvLinearFilter_16u( const ushort** src, ushort* dst, int dst_step, 2092 int count, void* params ); 2093 static void icvLinearFilter_32f( const float** src, float* dst, int dst_step, 2094 int count, void* params ); 2095 2096 CvLinearFilter::CvLinearFilter() 2097 { 2098 kernel = 0; 2099 k_sparse = 0; 2100 } 2101 2102 CvLinearFilter::CvLinearFilter( int _max_width, int _src_type, int _dst_type, 2103 const CvMat* _kernel, CvPoint _anchor, 2104 int _border_mode, CvScalar _border_value ) 2105 { 2106 kernel = 0; 2107 k_sparse = 0; 2108 init( _max_width, _src_type, _dst_type, _kernel, 2109 _anchor, _border_mode, _border_value ); 2110 } 2111 2112 2113 void CvLinearFilter::clear() 2114 { 2115 cvReleaseMat( &kernel ); 2116 cvFree( &k_sparse ); 2117 CvBaseImageFilter::clear(); 2118 } 2119 2120 2121 CvLinearFilter::~CvLinearFilter() 2122 { 2123 clear(); 2124 } 2125 2126 2127 void CvLinearFilter::init( int _max_width, int _src_type, int _dst_type, 2128 const CvMat* _kernel, CvPoint _anchor, 2129 int _border_mode, CvScalar _border_value ) 2130 { 2131 CV_FUNCNAME( "CvLinearFilter::init" ); 2132 2133 __BEGIN__; 2134 2135 int depth = CV_MAT_DEPTH(_src_type); 2136 int cn = CV_MAT_CN(_src_type); 2137 CvPoint* nz_loc; 2138 float* coeffs; 2139 int i, j, k = 0; 2140 2141 if( !CV_IS_MAT(_kernel) ) 2142 CV_ERROR( CV_StsBadArg, "kernel is not valid matrix" ); 2143 2144 _src_type = CV_MAT_TYPE(_src_type); 2145 _dst_type = CV_MAT_TYPE(_dst_type); 2146 2147 if( _src_type != _dst_type ) 2148 CV_ERROR( CV_StsUnmatchedFormats, 2149 "The source and destination image types must be the same" ); 2150 2151 CV_CALL( CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 2152 false, cvGetMatSize(_kernel), _anchor, _border_mode, _border_value )); 2153 2154 if( !(kernel && k_sparse && ksize.width == kernel->cols && ksize.height == kernel->rows )) 2155 { 2156 cvReleaseMat( &kernel ); 2157 cvFree( &k_sparse ); 2158 CV_CALL( kernel = cvCreateMat( ksize.height, ksize.width, CV_32FC1 )); 2159 CV_CALL( k_sparse = (uchar*)cvAlloc( 2160 ksize.width*ksize.height*(2*sizeof(int) + sizeof(uchar*) + sizeof(float)))); 2161 } 2162 2163 CV_CALL( cvConvert( _kernel, kernel )); 2164 2165 nz_loc = (CvPoint*)k_sparse; 2166 for( i = 0; i < ksize.height; i++ ) 2167 { 2168 for( j = 0; j < ksize.width; j++ ) 2169 if( fabs(((float*)(kernel->data.ptr + i*kernel->step))[j])>FLT_EPSILON ) 2170 nz_loc[k++] = cvPoint(j,i); 2171 } 2172 if( k == 0 ) 2173 nz_loc[k++] = anchor; 2174 k_sparse_count = k; 2175 coeffs = (float*)((uchar**)(nz_loc + k_sparse_count) + k_sparse_count); 2176 2177 for( k = 0; k < k_sparse_count; k++ ) 2178 { 2179 coeffs[k] = CV_MAT_ELEM( *kernel, float, nz_loc[k].y, nz_loc[k].x ); 2180 nz_loc[k].x *= cn; 2181 } 2182 2183 x_func = 0; 2184 if( depth == CV_8U ) 2185 y_func = (CvColumnFilterFunc)icvLinearFilter_8u; 2186 else if( depth == CV_16S ) 2187 y_func = (CvColumnFilterFunc)icvLinearFilter_16s; 2188 else if( depth == CV_16U ) 2189 y_func = (CvColumnFilterFunc)icvLinearFilter_16u; 2190 else if( depth == CV_32F ) 2191 y_func = (CvColumnFilterFunc)icvLinearFilter_32f; 2192 else 2193 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported image type" ); 2194 2195 __END__; 2196 } 2197 2198 2199 void CvLinearFilter::init( int _max_width, int _src_type, int _dst_type, 2200 bool _is_separable, CvSize _ksize, 2201 CvPoint _anchor, int _border_mode, 2202 CvScalar _border_value ) 2203 { 2204 CvBaseImageFilter::init( _max_width, _src_type, _dst_type, _is_separable, 2205 _ksize, _anchor, _border_mode, _border_value ); 2206 } 2207 2208 2209 #define ICV_FILTER( flavor, arrtype, worktype, load_macro, \ 2210 cast_macro1, cast_macro2 ) \ 2211 static void \ 2212 icvLinearFilter_##flavor( const arrtype** src, arrtype* dst, \ 2213 int dst_step, int count, void* params ) \ 2214 { \ 2215 CvLinearFilter* state = (CvLinearFilter*)params; \ 2216 int width = state->get_width(); \ 2217 int cn = CV_MAT_CN(state->get_src_type()); \ 2218 int i, k; \ 2219 CvPoint* k_sparse = (CvPoint*)state->get_kernel_sparse_buf(); \ 2220 int k_count = state->get_kernel_sparse_count(); \ 2221 const arrtype** k_ptr = (const arrtype**)(k_sparse + k_count); \ 2222 const arrtype** k_end = k_ptr + k_count; \ 2223 const float* k_coeffs = (const float*)(k_ptr + k_count); \ 2224 \ 2225 width *= cn; \ 2226 dst_step /= sizeof(dst[0]); \ 2227 \ 2228 for( ; count > 0; count--, dst += dst_step, src++ ) \ 2229 { \ 2230 for( k = 0; k < k_count; k++ ) \ 2231 k_ptr[k] = src[k_sparse[k].y] + k_sparse[k].x; \ 2232 \ 2233 for( i = 0; i <= width - 4; i += 4 ) \ 2234 { \ 2235 const arrtype** kp = k_ptr; \ 2236 const float* kc = k_coeffs; \ 2237 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; \ 2238 worktype t0, t1; \ 2239 \ 2240 while( kp != k_end ) \ 2241 { \ 2242 const arrtype* sptr = (*kp++) + i; \ 2243 float f = *kc++; \ 2244 s0 += f*load_macro(sptr[0]); \ 2245 s1 += f*load_macro(sptr[1]); \ 2246 s2 += f*load_macro(sptr[2]); \ 2247 s3 += f*load_macro(sptr[3]); \ 2248 } \ 2249 \ 2250 t0 = cast_macro1(s0); t1 = cast_macro1(s1); \ 2251 dst[i] = cast_macro2(t0); \ 2252 dst[i+1] = cast_macro2(t1); \ 2253 t0 = cast_macro1(s2); t1 = cast_macro1(s3); \ 2254 dst[i+2] = cast_macro2(t0); \ 2255 dst[i+3] = cast_macro2(t1); \ 2256 } \ 2257 \ 2258 for( ; i < width; i++ ) \ 2259 { \ 2260 const arrtype** kp = k_ptr; \ 2261 const float* kc = k_coeffs; \ 2262 double s0 = 0; \ 2263 worktype t0; \ 2264 \ 2265 while( kp != k_end ) \ 2266 { \ 2267 const arrtype* sptr = *kp++; \ 2268 float f = *kc++; \ 2269 s0 += f*load_macro(sptr[i]); \ 2270 } \ 2271 \ 2272 t0 = cast_macro1(s0); \ 2273 dst[i] = cast_macro2(t0); \ 2274 } \ 2275 } \ 2276 } 2277 2278 2279 ICV_FILTER( 8u, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U ) 2280 ICV_FILTER( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U ) 2281 ICV_FILTER( 16s, short, int, CV_NOP, cvRound, CV_CAST_16S ) 2282 ICV_FILTER( 32f, float, float, CV_NOP, CV_CAST_32F, CV_NOP ) 2283 2284 2285 /////////////////////// common functions for working with IPP filters //////////////////// 2286 2287 CvMat* icvIPPFilterInit( const CvMat* src, int stripe_size, CvSize ksize ) 2288 { 2289 CvSize temp_size; 2290 int pix_size = CV_ELEM_SIZE(src->type); 2291 temp_size.width = cvAlign(src->cols + ksize.width - 1,8/CV_ELEM_SIZE(src->type & CV_MAT_DEPTH_MASK)); 2292 //temp_size.width = src->cols + ksize.width - 1; 2293 temp_size.height = (stripe_size*2 + temp_size.width*pix_size) / (temp_size.width*pix_size*2); 2294 temp_size.height = MAX( temp_size.height, ksize.height ); 2295 temp_size.height = MIN( temp_size.height, src->rows + ksize.height - 1 ); 2296 2297 return cvCreateMat( temp_size.height, temp_size.width, src->type ); 2298 } 2299 2300 2301 int icvIPPFilterNextStripe( const CvMat* src, CvMat* temp, int y, 2302 CvSize ksize, CvPoint anchor ) 2303 { 2304 int pix_size = CV_ELEM_SIZE(src->type); 2305 int src_step = src->step ? src->step : CV_STUB_STEP; 2306 int temp_step = temp->step ? temp->step : CV_STUB_STEP; 2307 int i, dy, src_y1 = 0, src_y2; 2308 int temp_rows; 2309 uchar* temp_ptr = temp->data.ptr; 2310 CvSize stripe_size, temp_size; 2311 2312 dy = MIN( temp->rows - ksize.height + 1, src->rows - y ); 2313 if( y > 0 ) 2314 { 2315 int temp_ready = ksize.height - 1; 2316 2317 for( i = 0; i < temp_ready; i++ ) 2318 memcpy( temp_ptr + temp_step*i, temp_ptr + 2319 temp_step*(temp->rows - temp_ready + i), temp_step ); 2320 2321 temp_ptr += temp_ready*temp_step; 2322 temp_rows = dy; 2323 src_y1 = y + temp_ready - anchor.y; 2324 src_y2 = src_y1 + dy; 2325 if( src_y1 >= src->rows ) 2326 { 2327 src_y1 = src->rows - 1; 2328 src_y2 = src->rows; 2329 } 2330 } 2331 else 2332 { 2333 temp_rows = dy + ksize.height - 1; 2334 src_y2 = temp_rows - anchor.y; 2335 } 2336 2337 src_y2 = MIN( src_y2, src->rows ); 2338 2339 stripe_size = cvSize(src->cols, src_y2 - src_y1); 2340 temp_size = cvSize(temp->cols, temp_rows); 2341 icvCopyReplicateBorder_8u( src->data.ptr + src_y1*src_step, src_step, 2342 stripe_size, temp_ptr, temp_step, temp_size, 2343 (y == 0 ? anchor.y : 0), anchor.x, pix_size ); 2344 return dy; 2345 } 2346 2347 2348 /////////////////////////////// IPP separable filter functions /////////////////////////// 2349 2350 icvFilterRow_8u_C1R_t icvFilterRow_8u_C1R_p = 0; 2351 icvFilterRow_8u_C3R_t icvFilterRow_8u_C3R_p = 0; 2352 icvFilterRow_8u_C4R_t icvFilterRow_8u_C4R_p = 0; 2353 icvFilterRow_16s_C1R_t icvFilterRow_16s_C1R_p = 0; 2354 icvFilterRow_16s_C3R_t icvFilterRow_16s_C3R_p = 0; 2355 icvFilterRow_16s_C4R_t icvFilterRow_16s_C4R_p = 0; 2356 icvFilterRow_32f_C1R_t icvFilterRow_32f_C1R_p = 0; 2357 icvFilterRow_32f_C3R_t icvFilterRow_32f_C3R_p = 0; 2358 icvFilterRow_32f_C4R_t icvFilterRow_32f_C4R_p = 0; 2359 2360 icvFilterColumn_8u_C1R_t icvFilterColumn_8u_C1R_p = 0; 2361 icvFilterColumn_8u_C3R_t icvFilterColumn_8u_C3R_p = 0; 2362 icvFilterColumn_8u_C4R_t icvFilterColumn_8u_C4R_p = 0; 2363 icvFilterColumn_16s_C1R_t icvFilterColumn_16s_C1R_p = 0; 2364 icvFilterColumn_16s_C3R_t icvFilterColumn_16s_C3R_p = 0; 2365 icvFilterColumn_16s_C4R_t icvFilterColumn_16s_C4R_p = 0; 2366 icvFilterColumn_32f_C1R_t icvFilterColumn_32f_C1R_p = 0; 2367 icvFilterColumn_32f_C3R_t icvFilterColumn_32f_C3R_p = 0; 2368 icvFilterColumn_32f_C4R_t icvFilterColumn_32f_C4R_p = 0; 2369 2370 ////////////////////////////////////////////////////////////////////////////////////////// 2371 2372 typedef CvStatus (CV_STDCALL * CvIPPSepFilterFunc) 2373 ( const void* src, int srcstep, void* dst, int dststep, 2374 CvSize size, const float* kernel, int ksize, int anchor ); 2375 2376 int icvIPPSepFilter( const CvMat* src, CvMat* dst, const CvMat* kernelX, 2377 const CvMat* kernelY, CvPoint anchor ) 2378 { 2379 int result = 0; 2380 2381 CvMat* top_bottom = 0; 2382 CvMat* vout_hin = 0; 2383 CvMat* dst_buf = 0; 2384 2385 CV_FUNCNAME( "icvIPPSepFilter" ); 2386 2387 __BEGIN__; 2388 2389 CvSize ksize; 2390 CvPoint el_anchor; 2391 CvSize size; 2392 int type, depth, pix_size; 2393 int i, x, y, dy = 0, prev_dy = 0, max_dy; 2394 CvMat vout; 2395 CvIPPSepFilterFunc x_func = 0, y_func = 0; 2396 int src_step, top_bottom_step; 2397 float *kx, *ky; 2398 int align, stripe_size; 2399 2400 if( !icvFilterRow_8u_C1R_p ) 2401 EXIT; 2402 2403 if( !CV_ARE_TYPES_EQ( src, dst ) || !CV_ARE_SIZES_EQ( src, dst ) || 2404 !CV_IS_MAT_CONT(kernelX->type & kernelY->type) || 2405 CV_MAT_TYPE(kernelX->type) != CV_32FC1 || 2406 CV_MAT_TYPE(kernelY->type) != CV_32FC1 || 2407 (kernelX->cols != 1 && kernelX->rows != 1) || 2408 (kernelY->cols != 1 && kernelY->rows != 1) || 2409 (unsigned)anchor.x >= (unsigned)(kernelX->cols + kernelX->rows - 1) || 2410 (unsigned)anchor.y >= (unsigned)(kernelY->cols + kernelY->rows - 1) ) 2411 CV_ERROR( CV_StsError, "Internal Error: incorrect parameters" ); 2412 2413 ksize.width = kernelX->cols + kernelX->rows - 1; 2414 ksize.height = kernelY->cols + kernelY->rows - 1; 2415 2416 /*if( ksize.width <= 5 && ksize.height <= 5 ) 2417 { 2418 float* ker = (float*)cvStackAlloc( ksize.width*ksize.height*sizeof(ker[0])); 2419 CvMat kernel = cvMat( ksize.height, ksize.width, CV_32F, ker ); 2420 for( y = 0, i = 0; y < ksize.height; y++ ) 2421 for( x = 0; x < ksize.width; x++, i++ ) 2422 ker[i] = kernelY->data.fl[y]*kernelX->data.fl[x]; 2423 2424 CV_CALL( cvFilter2D( src, dst, &kernel, anchor )); 2425 EXIT; 2426 }*/ 2427 2428 type = CV_MAT_TYPE(src->type); 2429 depth = CV_MAT_DEPTH(type); 2430 pix_size = CV_ELEM_SIZE(type); 2431 2432 if( type == CV_8UC1 ) 2433 x_func = icvFilterRow_8u_C1R_p, y_func = icvFilterColumn_8u_C1R_p; 2434 else if( type == CV_8UC3 ) 2435 x_func = icvFilterRow_8u_C3R_p, y_func = icvFilterColumn_8u_C3R_p; 2436 else if( type == CV_8UC4 ) 2437 x_func = icvFilterRow_8u_C4R_p, y_func = icvFilterColumn_8u_C4R_p; 2438 else if( type == CV_16SC1 ) 2439 x_func = icvFilterRow_16s_C1R_p, y_func = icvFilterColumn_16s_C1R_p; 2440 else if( type == CV_16SC3 ) 2441 x_func = icvFilterRow_16s_C3R_p, y_func = icvFilterColumn_16s_C3R_p; 2442 else if( type == CV_16SC4 ) 2443 x_func = icvFilterRow_16s_C4R_p, y_func = icvFilterColumn_16s_C4R_p; 2444 else if( type == CV_32FC1 ) 2445 x_func = icvFilterRow_32f_C1R_p, y_func = icvFilterColumn_32f_C1R_p; 2446 else if( type == CV_32FC3 ) 2447 x_func = icvFilterRow_32f_C3R_p, y_func = icvFilterColumn_32f_C3R_p; 2448 else if( type == CV_32FC4 ) 2449 x_func = icvFilterRow_32f_C4R_p, y_func = icvFilterColumn_32f_C4R_p; 2450 else 2451 EXIT; 2452 2453 size = cvGetMatSize(src); 2454 stripe_size = src->data.ptr == dst->data.ptr ? 1 << 15 : 1 << 16; 2455 max_dy = MAX( ksize.height - 1, stripe_size/(size.width + ksize.width - 1)); 2456 max_dy = MIN( max_dy, size.height + ksize.height - 1 ); 2457 2458 align = 8/CV_ELEM_SIZE(depth); 2459 2460 CV_CALL( top_bottom = cvCreateMat( ksize.height*2, cvAlign(size.width,align), type )); 2461 2462 CV_CALL( vout_hin = cvCreateMat( max_dy + ksize.height, 2463 cvAlign(size.width + ksize.width - 1, align), type )); 2464 2465 if( src->data.ptr == dst->data.ptr && size.height ) 2466 CV_CALL( dst_buf = cvCreateMat( max_dy + ksize.height, 2467 cvAlign(size.width, align), type )); 2468 2469 kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) ); 2470 ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) ); 2471 2472 // mirror the kernels 2473 for( i = 0; i < ksize.width; i++ ) 2474 kx[i] = kernelX->data.fl[ksize.width - i - 1]; 2475 2476 for( i = 0; i < ksize.height; i++ ) 2477 ky[i] = kernelY->data.fl[ksize.height - i - 1]; 2478 2479 el_anchor = cvPoint( ksize.width - anchor.x - 1, ksize.height - anchor.y - 1 ); 2480 2481 cvGetCols( vout_hin, &vout, anchor.x, anchor.x + size.width ); 2482 2483 src_step = src->step ? src->step : CV_STUB_STEP; 2484 top_bottom_step = top_bottom->step ? top_bottom->step : CV_STUB_STEP; 2485 vout.step = vout.step ? vout.step : CV_STUB_STEP; 2486 2487 for( y = 0; y < size.height; y += dy ) 2488 { 2489 const CvMat *vin = src, *hout = dst; 2490 int src_y = y, dst_y = y; 2491 dy = MIN( max_dy, size.height - (ksize.height - anchor.y - 1) - y ); 2492 2493 if( y < anchor.y || dy < anchor.y ) 2494 { 2495 int ay = anchor.y; 2496 CvSize src_stripe_size = size; 2497 2498 if( y < anchor.y ) 2499 { 2500 src_y = 0; 2501 dy = MIN( anchor.y, size.height ); 2502 src_stripe_size.height = MIN( dy + ksize.height - anchor.y - 1, size.height ); 2503 } 2504 else 2505 { 2506 src_y = MAX( y - anchor.y, 0 ); 2507 dy = size.height - y; 2508 src_stripe_size.height = MIN( dy + anchor.y, size.height ); 2509 ay = MAX( anchor.y - y, 0 ); 2510 } 2511 2512 icvCopyReplicateBorder_8u( src->data.ptr + src_y*src_step, src_step, 2513 src_stripe_size, top_bottom->data.ptr, top_bottom_step, 2514 cvSize(size.width, dy + ksize.height - 1), ay, 0, pix_size ); 2515 vin = top_bottom; 2516 src_y = anchor.y; 2517 } 2518 2519 // do vertical convolution 2520 IPPI_CALL( y_func( vin->data.ptr + src_y*vin->step, vin->step ? vin->step : CV_STUB_STEP, 2521 vout.data.ptr, vout.step, cvSize(size.width, dy), 2522 ky, ksize.height, el_anchor.y )); 2523 2524 // now it's time to copy the previously processed stripe to the input/output image 2525 if( src->data.ptr == dst->data.ptr ) 2526 { 2527 for( i = 0; i < prev_dy; i++ ) 2528 memcpy( dst->data.ptr + (y - prev_dy + i)*dst->step, 2529 dst_buf->data.ptr + i*dst_buf->step, size.width*pix_size ); 2530 if( y + dy < size.height ) 2531 { 2532 hout = dst_buf; 2533 dst_y = 0; 2534 } 2535 } 2536 2537 // create a border for every line by replicating the left-most/right-most elements 2538 for( i = 0; i < dy; i++ ) 2539 { 2540 uchar* ptr = vout.data.ptr + i*vout.step; 2541 for( x = -1; x >= -anchor.x*pix_size; x-- ) 2542 ptr[x] = ptr[x + pix_size]; 2543 for( x = size.width*pix_size; x < (size.width+ksize.width-anchor.x-1)*pix_size; x++ ) 2544 ptr[x] = ptr[x - pix_size]; 2545 } 2546 2547 // do horizontal convolution 2548 IPPI_CALL( x_func( vout.data.ptr, vout.step, hout->data.ptr + dst_y*hout->step, 2549 hout->step ? hout->step : CV_STUB_STEP, 2550 cvSize(size.width, dy), kx, ksize.width, el_anchor.x )); 2551 prev_dy = dy; 2552 } 2553 2554 result = 1; 2555 2556 __END__; 2557 2558 cvReleaseMat( &vout_hin ); 2559 cvReleaseMat( &dst_buf ); 2560 cvReleaseMat( &top_bottom ); 2561 2562 return result; 2563 } 2564 2565 ////////////////////////////////////////////////////////////////////////////////////////// 2566 2567 //////////////////////////////// IPP generic filter functions //////////////////////////// 2568 2569 icvFilter_8u_C1R_t icvFilter_8u_C1R_p = 0; 2570 icvFilter_8u_C3R_t icvFilter_8u_C3R_p = 0; 2571 icvFilter_8u_C4R_t icvFilter_8u_C4R_p = 0; 2572 icvFilter_16s_C1R_t icvFilter_16s_C1R_p = 0; 2573 icvFilter_16s_C3R_t icvFilter_16s_C3R_p = 0; 2574 icvFilter_16s_C4R_t icvFilter_16s_C4R_p = 0; 2575 icvFilter_32f_C1R_t icvFilter_32f_C1R_p = 0; 2576 icvFilter_32f_C3R_t icvFilter_32f_C3R_p = 0; 2577 icvFilter_32f_C4R_t icvFilter_32f_C4R_p = 0; 2578 2579 2580 typedef CvStatus (CV_STDCALL * CvFilterIPPFunc) 2581 ( const void* src, int srcstep, void* dst, int dststep, CvSize size, 2582 const float* kernel, CvSize ksize, CvPoint anchor ); 2583 2584 CV_IMPL void 2585 cvFilter2D( const CvArr* _src, CvArr* _dst, const CvMat* kernel, CvPoint anchor ) 2586 { 2587 const int dft_filter_size = 100; 2588 2589 CvLinearFilter filter; 2590 CvMat* ipp_kernel = 0; 2591 2592 // below that approximate size OpenCV is faster 2593 const int ipp_lower_limit = 20; 2594 CvMat* temp = 0; 2595 2596 CV_FUNCNAME( "cvFilter2D" ); 2597 2598 __BEGIN__; 2599 2600 int coi1 = 0, coi2 = 0; 2601 CvMat srcstub, *src = (CvMat*)_src; 2602 CvMat dststub, *dst = (CvMat*)_dst; 2603 int type; 2604 2605 CV_CALL( src = cvGetMat( src, &srcstub, &coi1 )); 2606 CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 )); 2607 2608 if( coi1 != 0 || coi2 != 0 ) 2609 CV_ERROR( CV_BadCOI, "" ); 2610 2611 type = CV_MAT_TYPE( src->type ); 2612 2613 if( !CV_ARE_SIZES_EQ( src, dst )) 2614 CV_ERROR( CV_StsUnmatchedSizes, "" ); 2615 2616 if( !CV_ARE_TYPES_EQ( src, dst )) 2617 CV_ERROR( CV_StsUnmatchedFormats, "" ); 2618 2619 if( anchor.x == -1 && anchor.y == -1 ) 2620 anchor = cvPoint(kernel->cols/2,kernel->rows/2); 2621 2622 if( kernel->cols*kernel->rows >= dft_filter_size && 2623 kernel->cols <= src->cols && kernel->rows <= src->rows ) 2624 { 2625 if( src->data.ptr == dst->data.ptr ) 2626 { 2627 temp = cvCloneMat( src ); 2628 src = temp; 2629 } 2630 icvCrossCorr( src, kernel, dst, anchor ); 2631 EXIT; 2632 } 2633 2634 if( icvFilter_8u_C1R_p && (src->rows >= ipp_lower_limit || src->cols >= ipp_lower_limit) ) 2635 { 2636 CvFilterIPPFunc ipp_func = 2637 type == CV_8UC1 ? (CvFilterIPPFunc)icvFilter_8u_C1R_p : 2638 type == CV_8UC3 ? (CvFilterIPPFunc)icvFilter_8u_C3R_p : 2639 type == CV_8UC4 ? (CvFilterIPPFunc)icvFilter_8u_C4R_p : 2640 type == CV_16SC1 ? (CvFilterIPPFunc)icvFilter_16s_C1R_p : 2641 type == CV_16SC3 ? (CvFilterIPPFunc)icvFilter_16s_C3R_p : 2642 type == CV_16SC4 ? (CvFilterIPPFunc)icvFilter_16s_C4R_p : 2643 type == CV_32FC1 ? (CvFilterIPPFunc)icvFilter_32f_C1R_p : 2644 type == CV_32FC3 ? (CvFilterIPPFunc)icvFilter_32f_C3R_p : 2645 type == CV_32FC4 ? (CvFilterIPPFunc)icvFilter_32f_C4R_p : 0; 2646 2647 if( ipp_func ) 2648 { 2649 CvSize el_size = { kernel->cols, kernel->rows }; 2650 CvPoint el_anchor; 2651 int stripe_size = 1 << 16; // the optimal value may depend on CPU cache, 2652 // overhead of current IPP code etc. 2653 const uchar* shifted_ptr; 2654 int i, j, y, dy = 0; 2655 int temp_step, dst_step = dst->step ? dst->step : CV_STUB_STEP; 2656 2657 if( (unsigned)anchor.x >= (unsigned)kernel->cols || 2658 (unsigned)anchor.y >= (unsigned)kernel->rows ) 2659 CV_ERROR( CV_StsOutOfRange, "anchor point is out of kernel" ); 2660 2661 el_anchor = cvPoint( el_size.width - anchor.x - 1, el_size.height - anchor.y - 1 ); 2662 2663 CV_CALL( ipp_kernel = cvCreateMat( kernel->rows, kernel->cols, CV_32FC1 )); 2664 CV_CALL( cvConvert( kernel, ipp_kernel )); 2665 2666 // mirror the kernel around the center 2667 for( i = 0; i < (el_size.height+1)/2; i++ ) 2668 { 2669 float* top_row = ipp_kernel->data.fl + el_size.width*i; 2670 float* bottom_row = ipp_kernel->data.fl + el_size.width*(el_size.height - i - 1); 2671 2672 for( j = 0; j < (el_size.width+1)/2; j++ ) 2673 { 2674 float a = top_row[j], b = top_row[el_size.width - j - 1]; 2675 float c = bottom_row[j], d = bottom_row[el_size.width - j - 1]; 2676 top_row[j] = d; 2677 top_row[el_size.width - j - 1] = c; 2678 bottom_row[j] = b; 2679 bottom_row[el_size.width - j - 1] = a; 2680 } 2681 } 2682 2683 CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size )); 2684 2685 shifted_ptr = temp->data.ptr + 2686 anchor.y*temp->step + anchor.x*CV_ELEM_SIZE(type); 2687 temp_step = temp->step ? temp->step : CV_STUB_STEP; 2688 2689 for( y = 0; y < src->rows; y += dy ) 2690 { 2691 dy = icvIPPFilterNextStripe( src, temp, y, el_size, anchor ); 2692 IPPI_CALL( ipp_func( shifted_ptr, temp_step, 2693 dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy), 2694 ipp_kernel->data.fl, el_size, el_anchor )); 2695 } 2696 EXIT; 2697 } 2698 } 2699 2700 CV_CALL( filter.init( src->cols, type, type, kernel, anchor )); 2701 CV_CALL( filter.process( src, dst )); 2702 2703 __END__; 2704 2705 cvReleaseMat( &temp ); 2706 cvReleaseMat( &ipp_kernel ); 2707 } 2708 2709 2710 /* End of file. */ 2711