1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // Intel License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000, Intel Corporation, all rights reserved. 14 // Third party copyrights are property of their respective owners. 15 // 16 // Redistribution and use in source and binary forms, with or without modification, 17 // are permitted provided that the following conditions are met: 18 // 19 // * Redistribution's of source code must retain the above copyright notice, 20 // this list of conditions and the following disclaimer. 21 // 22 // * Redistribution's in binary form must reproduce the above copyright notice, 23 // this list of conditions and the following disclaimer in the documentation 24 // and/or other materials provided with the distribution. 25 // 26 // * The name of Intel Corporation may not be used to endorse or promote products 27 // derived from this software without specific prior written permission. 28 // 29 // This software is provided by the copyright holders and contributors "as is" and 30 // any express or implied warranties, including, but not limited to, the implied 31 // warranties of merchantability and fitness for a particular purpose are disclaimed. 32 // In no event shall the Intel Corporation or contributors be liable for any direct, 33 // indirect, incidental, special, exemplary, or consequential damages 34 // (including, but not limited to, procurement of substitute goods or services; 35 // loss of use, data, or profits; or business interruption) however caused 36 // and on any theory of liability, whether in contract, strict liability, 37 // or tort (including negligence or otherwise) arising in any way out of 38 // the use of this software, even if advised of the possibility of such damage. 39 // 40 //M*/ 41 42 #include "_cxcore.h" 43 44 /****************************************************************************************\ 45 * cvGEMM * 46 \****************************************************************************************/ 47 48 icvBLAS_GEMM_32f_t icvBLAS_GEMM_32f_p = 0; 49 icvBLAS_GEMM_64f_t icvBLAS_GEMM_64f_p = 0; 50 icvBLAS_GEMM_32fc_t icvBLAS_GEMM_32fc_p = 0; 51 icvBLAS_GEMM_64fc_t icvBLAS_GEMM_64fc_p = 0; 52 53 static void 54 icvGEMM_CopyBlock( const uchar* src, int src_step, 55 uchar* dst, int dst_step, 56 CvSize size, int pix_size ) 57 { 58 int j; 59 size.width = size.width * (pix_size / sizeof(int)); 60 61 for( ; size.height--; src += src_step, dst += dst_step ) 62 { 63 for( j = 0; j <= size.width - 4; j += 4 ) 64 { 65 int t0 = ((const int*)src)[j]; 66 int t1 = ((const int*)src)[j+1]; 67 ((int*)dst)[j] = t0; 68 ((int*)dst)[j+1] = t1; 69 t0 = ((const int*)src)[j+2]; 70 t1 = ((const int*)src)[j+3]; 71 ((int*)dst)[j+2] = t0; 72 ((int*)dst)[j+3] = t1; 73 } 74 75 for( ; j < size.width; j++ ) 76 ((int*)dst)[j] = ((const int*)src)[j]; 77 } 78 } 79 80 81 static void 82 icvGEMM_TransposeBlock( const uchar* src, int src_step, 83 uchar* dst, int dst_step, 84 CvSize size, int pix_size ) 85 { 86 int i, j; 87 for( i = 0; i < size.width; i++, dst += dst_step, src += pix_size ) 88 { 89 const uchar* _src = src; 90 switch( pix_size ) 91 { 92 case sizeof(int): 93 for( j = 0; j < size.height; j++, _src += src_step ) 94 ((int*)dst)[j] = ((int*)_src)[0]; 95 break; 96 case sizeof(int)*2: 97 for( j = 0; j < size.height*2; j += 2, _src += src_step ) 98 { 99 int t0 = ((int*)_src)[0]; 100 int t1 = ((int*)_src)[1]; 101 ((int*)dst)[j] = t0; 102 ((int*)dst)[j+1] = t1; 103 } 104 break; 105 case sizeof(int)*4: 106 for( j = 0; j < size.height*4; j += 4, _src += src_step ) 107 { 108 int t0 = ((int*)_src)[0]; 109 int t1 = ((int*)_src)[1]; 110 ((int*)dst)[j] = t0; 111 ((int*)dst)[j+1] = t1; 112 t0 = ((int*)_src)[2]; 113 t1 = ((int*)_src)[3]; 114 ((int*)dst)[j+2] = t0; 115 ((int*)dst)[j+3] = t1; 116 } 117 break; 118 default: 119 assert(0); 120 return; 121 } 122 } 123 } 124 125 #define ICV_DEF_GEMM_SINGLE_MUL( flavor, arrtype, worktype ) \ 126 static CvStatus CV_STDCALL \ 127 icvGEMMSingleMul_##flavor( const arrtype* a_data, size_t a_step, \ 128 const arrtype* b_data, size_t b_step, \ 129 const arrtype* c_data, size_t c_step, \ 130 arrtype* d_data, size_t d_step, \ 131 CvSize a_size, CvSize d_size, \ 132 double alpha, double beta, int flags ) \ 133 { \ 134 int i, j, k, n = a_size.width, m = d_size.width, drows = d_size.height; \ 135 const arrtype *_a_data = a_data, *_b_data = b_data, *_c_data = c_data; \ 136 arrtype* a_buf = 0; \ 137 size_t a_step0, a_step1, c_step0, c_step1, t_step; \ 138 \ 139 a_step /= sizeof(a_data[0]); \ 140 b_step /= sizeof(b_data[0]); \ 141 c_step /= sizeof(c_data[0]); \ 142 d_step /= sizeof(d_data[0]); \ 143 a_step0 = a_step; \ 144 a_step1 = 1; \ 145 \ 146 if( !c_data ) \ 147 c_step0 = c_step1 = 0; \ 148 else if( !(flags & CV_GEMM_C_T) ) \ 149 c_step0 = c_step, c_step1 = 1; \ 150 else \ 151 c_step0 = 1, c_step1 = c_step; \ 152 \ 153 if( flags & CV_GEMM_A_T ) \ 154 { \ 155 CV_SWAP( a_step0, a_step1, t_step ); \ 156 n = a_size.height; \ 157 if( a_step > 1 && n > 1 ) \ 158 a_buf = (arrtype*)cvStackAlloc(n*sizeof(a_data[0])); \ 159 } \ 160 \ 161 if( n == 1 ) /* external product */ \ 162 { \ 163 arrtype* b_buf = 0; \ 164 \ 165 if( a_step > 1 ) \ 166 { \ 167 a_buf = (arrtype*)cvStackAlloc(drows*sizeof(a_data[0])); \ 168 for( k = 0; k < drows; k++ ) \ 169 a_buf[k] = a_data[a_step*k]; \ 170 a_data = a_buf; \ 171 } \ 172 \ 173 if( b_step > 1 ) \ 174 { \ 175 b_buf = (arrtype*)cvStackAlloc(d_size.width*sizeof(b_buf[0]) ); \ 176 for( j = 0; j < d_size.width; j++ ) \ 177 b_buf[j] = b_data[j*b_step]; \ 178 b_data = b_buf; \ 179 } \ 180 \ 181 for( i = 0; i < drows; i++, _c_data += c_step0, \ 182 d_data += d_step ) \ 183 { \ 184 worktype al = worktype(a_data[i])*alpha; \ 185 c_data = _c_data; \ 186 for( j = 0; j <= d_size.width - 2; j += 2, c_data += 2*c_step1 )\ 187 { \ 188 worktype s0 = al*b_data[j]; \ 189 worktype s1 = al*b_data[j+1]; \ 190 if( !c_data ) \ 191 { \ 192 d_data[j] = arrtype(s0); \ 193 d_data[j+1] = arrtype(s1); \ 194 } \ 195 else \ 196 { \ 197 d_data[j] = arrtype(s0 + c_data[0]*beta); \ 198 d_data[j+1] = arrtype(s1 + c_data[c_step1]*beta); \ 199 } \ 200 } \ 201 \ 202 for( ; j < d_size.width; j++, c_data += c_step1 ) \ 203 { \ 204 worktype s0 = al*b_data[j]; \ 205 if( !c_data ) \ 206 d_data[j] = arrtype(s0); \ 207 else \ 208 d_data[j] = arrtype(s0 + c_data[0]*beta); \ 209 } \ 210 } \ 211 } \ 212 else if( flags & CV_GEMM_B_T ) /* A * Bt */ \ 213 { \ 214 for( i = 0; i < drows; i++, _a_data += a_step0, \ 215 _c_data += c_step0, \ 216 d_data += d_step ) \ 217 { \ 218 a_data = _a_data; \ 219 b_data = _b_data; \ 220 c_data = _c_data; \ 221 \ 222 if( a_buf ) \ 223 { \ 224 for( k = 0; k < n; k++ ) \ 225 a_buf[k] = a_data[a_step1*k]; \ 226 a_data = a_buf; \ 227 } \ 228 \ 229 for( j = 0; j < d_size.width; j++, b_data += b_step, \ 230 c_data += c_step1 ) \ 231 { \ 232 worktype s0(0), s1(0), s2(0), s3(0); \ 233 \ 234 for( k = 0; k <= n - 4; k += 4 ) \ 235 { \ 236 s0 += worktype(a_data[k])*b_data[k]; \ 237 s1 += worktype(a_data[k+1])*b_data[k+1]; \ 238 s2 += worktype(a_data[k+2])*b_data[k+2]; \ 239 s3 += worktype(a_data[k+3])*b_data[k+3]; \ 240 } \ 241 \ 242 for( ; k < n; k++ ) \ 243 s0 += worktype(a_data[k])*b_data[k]; \ 244 s0 = (s0+s1+s2+s3)*alpha; \ 245 \ 246 if( !c_data ) \ 247 d_data[j] = arrtype(s0); \ 248 else \ 249 d_data[j] = arrtype(s0 + c_data[0]*beta); \ 250 } \ 251 } \ 252 } \ 253 else if( d_size.width*sizeof(d_data[0]) <= 1600 ) \ 254 { \ 255 for( i = 0; i < drows; i++, _a_data += a_step0, \ 256 _c_data += c_step0, \ 257 d_data += d_step ) \ 258 { \ 259 a_data = _a_data, c_data = _c_data; \ 260 \ 261 if( a_buf ) \ 262 { \ 263 for( k = 0; k < n; k++ ) \ 264 a_buf[k] = a_data[a_step1*k]; \ 265 a_data = a_buf; \ 266 } \ 267 \ 268 for( j = 0; j <= m - 4; j += 4, c_data += 4*c_step1 ) \ 269 { \ 270 const arrtype* b = _b_data + j; \ 271 worktype s0(0), s1(0), s2(0), s3(0); \ 272 \ 273 for( k = 0; k < n; k++, b += b_step ) \ 274 { \ 275 worktype a(a_data[k]); \ 276 s0 += a * b[0]; s1 += a * b[1]; \ 277 s2 += a * b[2]; s3 += a * b[3]; \ 278 } \ 279 \ 280 if( !c_data ) \ 281 { \ 282 d_data[j] = arrtype(s0*alpha); \ 283 d_data[j+1] = arrtype(s1*alpha); \ 284 d_data[j+2] = arrtype(s2*alpha); \ 285 d_data[j+3] = arrtype(s3*alpha); \ 286 } \ 287 else \ 288 { \ 289 s0 = s0*alpha; s1 = s1*alpha; \ 290 s2 = s2*alpha; s3 = s3*alpha; \ 291 d_data[j] = arrtype(s0 + c_data[0]*beta); \ 292 d_data[j+1] = arrtype(s1 + c_data[c_step1]*beta); \ 293 d_data[j+2] = arrtype(s2 + c_data[c_step1*2]*beta); \ 294 d_data[j+3] = arrtype(s3 + c_data[c_step1*3]*beta); \ 295 } \ 296 } \ 297 \ 298 for( ; j < m; j++, c_data += c_step1 ) \ 299 { \ 300 const arrtype* b = _b_data + j; \ 301 worktype s0(0); \ 302 \ 303 for( k = 0; k < n; k++, b += b_step ) \ 304 s0 += worktype(a_data[k]) * b[0]; \ 305 \ 306 s0 = s0*alpha; \ 307 if( !c_data ) \ 308 d_data[j] = arrtype(s0); \ 309 else \ 310 d_data[j] = arrtype(s0 + c_data[0]*beta); \ 311 } \ 312 } \ 313 } \ 314 else \ 315 { \ 316 worktype* d_buf = (worktype*)cvStackAlloc(m*sizeof(d_buf[0])); \ 317 \ 318 for( i = 0; i < drows; i++, _a_data += a_step0, \ 319 _c_data += c_step0, \ 320 d_data += d_step ) \ 321 { \ 322 a_data = _a_data; \ 323 b_data = _b_data; \ 324 c_data = _c_data; \ 325 \ 326 if( a_buf ) \ 327 { \ 328 for( k = 0; k < n; k++ ) \ 329 a_buf[k] = _a_data[a_step1*k]; \ 330 a_data = a_buf; \ 331 } \ 332 \ 333 for( j = 0; j < m; j++ ) \ 334 d_buf[j] = worktype(0); \ 335 \ 336 for( k = 0; k < n; k++, b_data += b_step ) \ 337 { \ 338 worktype al(a_data[k]); \ 339 \ 340 for( j = 0; j <= m - 4; j += 4 ) \ 341 { \ 342 worktype t0 = d_buf[j] + b_data[j]*al; \ 343 worktype t1 = d_buf[j+1] + b_data[j+1]*al; \ 344 d_buf[j] = t0; \ 345 d_buf[j+1] = t1; \ 346 t0 = d_buf[j+2] + b_data[j+2]*al; \ 347 t1 = d_buf[j+3] + b_data[j+3]*al; \ 348 d_buf[j+2] = t0; \ 349 d_buf[j+3] = t1; \ 350 } \ 351 \ 352 for( ; j < m; j++ ) \ 353 d_buf[j] += b_data[j]*al; \ 354 } \ 355 \ 356 if( !c_data ) \ 357 for( j = 0; j < m; j++ ) \ 358 d_data[j] = arrtype(d_buf[j]*alpha); \ 359 else \ 360 for( j = 0; j < m; j++, c_data += c_step1 ) \ 361 { \ 362 worktype t = d_buf[j]*alpha; \ 363 d_data[j] = arrtype(t + c_data[0]*beta); \ 364 } \ 365 } \ 366 } \ 367 return CV_OK; \ 368 } 369 370 371 #define ICV_DEF_GEMM_BLOCK_MUL( flavor, arrtype, worktype ) \ 372 static CvStatus CV_STDCALL \ 373 icvGEMMBlockMul_##flavor( const arrtype* a_data, size_t a_step, \ 374 const arrtype* b_data, size_t b_step, \ 375 worktype* d_data, size_t d_step, \ 376 CvSize a_size, CvSize d_size, int flags ) \ 377 { \ 378 int i, j, k, n = a_size.width, m = d_size.width; \ 379 const arrtype *_a_data = a_data, *_b_data = b_data; \ 380 arrtype* a_buf = 0; \ 381 size_t a_step0, a_step1, t_step; \ 382 int do_acc = flags & 16; \ 383 \ 384 a_step /= sizeof(a_data[0]); \ 385 b_step /= sizeof(b_data[0]); \ 386 d_step /= sizeof(d_data[0]); \ 387 \ 388 a_step0 = a_step; \ 389 a_step1 = 1; \ 390 \ 391 if( flags & CV_GEMM_A_T ) \ 392 { \ 393 CV_SWAP( a_step0, a_step1, t_step ); \ 394 n = a_size.height; \ 395 a_buf = (arrtype*)cvStackAlloc(n*sizeof(a_data[0])); \ 396 } \ 397 \ 398 if( flags & CV_GEMM_B_T ) \ 399 { \ 400 /* second operand is transposed */ \ 401 for( i = 0; i < d_size.height; i++, _a_data += a_step0, \ 402 d_data += d_step ) \ 403 { \ 404 a_data = _a_data; b_data = _b_data; \ 405 \ 406 if( a_buf ) \ 407 { \ 408 for( k = 0; k < n; k++ ) \ 409 a_buf[k] = a_data[a_step1*k]; \ 410 a_data = a_buf; \ 411 } \ 412 \ 413 for( j = 0; j < d_size.width; j++, b_data += b_step ) \ 414 { \ 415 worktype s0 = do_acc ? d_data[j]:worktype(0), s1(0);\ 416 for( k = 0; k <= n - 2; k += 2 ) \ 417 { \ 418 s0 += worktype(a_data[k])*b_data[k]; \ 419 s1 += worktype(a_data[k+1])*b_data[k+1]; \ 420 } \ 421 \ 422 for( ; k < n; k++ ) \ 423 s0 += worktype(a_data[k])*b_data[k]; \ 424 \ 425 d_data[j] = s0 + s1; \ 426 } \ 427 } \ 428 } \ 429 else \ 430 { \ 431 for( i = 0; i < d_size.height; i++, _a_data += a_step0, \ 432 d_data += d_step ) \ 433 { \ 434 a_data = _a_data, b_data = _b_data; \ 435 \ 436 if( a_buf ) \ 437 { \ 438 for( k = 0; k < n; k++ ) \ 439 a_buf[k] = a_data[a_step1*k]; \ 440 a_data = a_buf; \ 441 } \ 442 \ 443 for( j = 0; j <= m - 4; j += 4 ) \ 444 { \ 445 worktype s0, s1, s2, s3; \ 446 const arrtype* b = b_data + j; \ 447 \ 448 if( do_acc ) \ 449 { \ 450 s0 = d_data[j]; s1 = d_data[j+1]; \ 451 s2 = d_data[j+2]; s3 = d_data[j+3]; \ 452 } \ 453 else \ 454 s0 = s1 = s2 = s3 = worktype(0); \ 455 \ 456 for( k = 0; k < n; k++, b += b_step ) \ 457 { \ 458 worktype a(a_data[k]); \ 459 s0 += a * b[0]; s1 += a * b[1]; \ 460 s2 += a * b[2]; s3 += a * b[3]; \ 461 } \ 462 \ 463 d_data[j] = s0; d_data[j+1] = s1; \ 464 d_data[j+2] = s2; d_data[j+3] = s3; \ 465 } \ 466 \ 467 for( ; j < m; j++ ) \ 468 { \ 469 const arrtype* b = b_data + j; \ 470 worktype s0 = do_acc ? d_data[j] : worktype(0); \ 471 \ 472 for( k = 0; k < n; k++, b += b_step ) \ 473 s0 += worktype(a_data[k]) * b[0]; \ 474 \ 475 d_data[j] = s0; \ 476 } \ 477 } \ 478 } \ 479 \ 480 return CV_OK; \ 481 } 482 483 484 #define ICV_DEF_GEMM_STORE( flavor, arrtype, worktype ) \ 485 static CvStatus CV_STDCALL \ 486 icvGEMMStore_##flavor( const arrtype* c_data, size_t c_step, \ 487 const worktype* d_buf, size_t d_buf_step, \ 488 arrtype* d_data, size_t d_step, CvSize d_size,\ 489 double alpha, double beta, int flags ) \ 490 { \ 491 const arrtype* _c_data = c_data; \ 492 int j; \ 493 size_t c_step0, c_step1; \ 494 \ 495 c_step /= sizeof(c_data[0]); \ 496 d_buf_step /= sizeof(d_buf[0]); \ 497 d_step /= sizeof(d_data[0]); \ 498 \ 499 if( !c_data ) \ 500 c_step0 = c_step1 = 0; \ 501 else if( !(flags & CV_GEMM_C_T) ) \ 502 c_step0 = c_step, c_step1 = 1; \ 503 else \ 504 c_step0 = 1, c_step1 = c_step; \ 505 \ 506 for( ; d_size.height--; _c_data += c_step0, \ 507 d_buf += d_buf_step, \ 508 d_data += d_step ) \ 509 { \ 510 if( _c_data ) \ 511 { \ 512 c_data = _c_data; \ 513 for( j = 0; j <= d_size.width - 4; j += 4, c_data += 4*c_step1 )\ 514 { \ 515 worktype t0 = alpha*d_buf[j]; \ 516 worktype t1 = alpha*d_buf[j+1]; \ 517 t0 += beta*worktype(c_data[0]); \ 518 t1 += beta*worktype(c_data[c_step1]); \ 519 d_data[j] = arrtype(t0); \ 520 d_data[j+1] = arrtype(t1); \ 521 t0 = alpha*d_buf[j+2]; \ 522 t1 = alpha*d_buf[j+3]; \ 523 t0 += beta*worktype(c_data[c_step1*2]); \ 524 t1 += beta*worktype(c_data[c_step1*3]); \ 525 d_data[j+2] = arrtype(t0); \ 526 d_data[j+3] = arrtype(t1); \ 527 } \ 528 for( ; j < d_size.width; j++, c_data += c_step1 ) \ 529 { \ 530 worktype t0 = alpha*d_buf[j]; \ 531 d_data[j] = arrtype(t0 + beta*c_data[0]); \ 532 } \ 533 } \ 534 else \ 535 { \ 536 for( j = 0; j <= d_size.width - 4; j += 4 ) \ 537 { \ 538 worktype t0 = alpha*d_buf[j]; \ 539 worktype t1 = alpha*d_buf[j+1]; \ 540 d_data[j] = arrtype(t0); \ 541 d_data[j+1] = arrtype(t1); \ 542 t0 = alpha*d_buf[j+2]; \ 543 t1 = alpha*d_buf[j+3]; \ 544 d_data[j+2] = arrtype(t0); \ 545 d_data[j+3] = arrtype(t1); \ 546 } \ 547 for( ; j < d_size.width; j++ ) \ 548 d_data[j] = arrtype(alpha*d_buf[j]); \ 549 } \ 550 } \ 551 return CV_OK; \ 552 } 553 554 555 ICV_DEF_GEMM_SINGLE_MUL( 32f_C1R, float, double) 556 ICV_DEF_GEMM_BLOCK_MUL( 32f_C1R, float, double) 557 ICV_DEF_GEMM_STORE( 32f_C1R, float, double) 558 559 ICV_DEF_GEMM_SINGLE_MUL( 64f_C1R, double, double) 560 ICV_DEF_GEMM_BLOCK_MUL( 64f_C1R, double, double) 561 ICV_DEF_GEMM_STORE( 64f_C1R, double, double) 562 563 ICV_DEF_GEMM_SINGLE_MUL( 32f_C2R, CvComplex32f, CvComplex64f) 564 ICV_DEF_GEMM_BLOCK_MUL( 32f_C2R, CvComplex32f, CvComplex64f) 565 ICV_DEF_GEMM_STORE( 32f_C2R, CvComplex32f, CvComplex64f) 566 567 ICV_DEF_GEMM_SINGLE_MUL( 64f_C2R, CvComplex64f, CvComplex64f) 568 ICV_DEF_GEMM_BLOCK_MUL( 64f_C2R, CvComplex64f, CvComplex64f) 569 ICV_DEF_GEMM_STORE( 64f_C2R, CvComplex64f, CvComplex64f) 570 571 typedef CvStatus (CV_STDCALL *CvGEMMSingleMulFunc)( const void* src1, size_t step1, 572 const void* src2, size_t step2, const void* src3, size_t step3, 573 void* dst, size_t dststep, CvSize srcsize, CvSize dstsize, 574 double alpha, double beta, int flags ); 575 576 typedef CvStatus (CV_STDCALL *CvGEMMBlockMulFunc)( const void* src1, size_t step1, 577 const void* src2, size_t step2, void* dst, size_t dststep, 578 CvSize srcsize, CvSize dstsize, int flags ); 579 580 typedef CvStatus (CV_STDCALL *CvGEMMStoreFunc)( const void* src1, size_t step1, 581 const void* src2, size_t step2, void* dst, size_t dststep, 582 CvSize dstsize, double alpha, double beta, int flags ); 583 584 585 static void icvInitGEMMTable( CvBigFuncTable* single_mul_tab, 586 CvBigFuncTable* block_mul_tab, 587 CvBigFuncTable* store_tab ) 588 { 589 single_mul_tab->fn_2d[CV_32FC1] = (void*)icvGEMMSingleMul_32f_C1R; 590 single_mul_tab->fn_2d[CV_64FC1] = (void*)icvGEMMSingleMul_64f_C1R; 591 single_mul_tab->fn_2d[CV_32FC2] = (void*)icvGEMMSingleMul_32f_C2R; 592 single_mul_tab->fn_2d[CV_64FC2] = (void*)icvGEMMSingleMul_64f_C2R; 593 block_mul_tab->fn_2d[CV_32FC1] = (void*)icvGEMMBlockMul_32f_C1R; 594 block_mul_tab->fn_2d[CV_64FC1] = (void*)icvGEMMBlockMul_64f_C1R; 595 block_mul_tab->fn_2d[CV_32FC2] = (void*)icvGEMMBlockMul_32f_C2R; 596 block_mul_tab->fn_2d[CV_64FC2] = (void*)icvGEMMBlockMul_64f_C2R; 597 store_tab->fn_2d[CV_32FC1] = (void*)icvGEMMStore_32f_C1R; 598 store_tab->fn_2d[CV_64FC1] = (void*)icvGEMMStore_64f_C1R; 599 store_tab->fn_2d[CV_32FC2] = (void*)icvGEMMStore_32f_C2R; 600 store_tab->fn_2d[CV_64FC2] = (void*)icvGEMMStore_64f_C2R; 601 } 602 603 604 CV_IMPL void 605 cvGEMM( const CvArr* Aarr, const CvArr* Barr, double alpha, 606 const CvArr* Carr, double beta, CvArr* Darr, int flags ) 607 { 608 const int block_lin_size = 128; 609 const int block_size = block_lin_size * block_lin_size; 610 611 static CvBigFuncTable single_mul_tab, block_mul_tab, store_tab; 612 static int inittab = 0; 613 static double zero[] = {0,0,0,0}; 614 static float zerof[] = {0,0,0,0}; 615 616 uchar* buffer = 0; 617 int local_alloc = 0; 618 uchar* block_buffer = 0; 619 620 CV_FUNCNAME( "cvGEMM" ); 621 622 __BEGIN__; 623 624 CvMat *A = (CvMat*)Aarr; 625 CvMat *B = (CvMat*)Barr; 626 CvMat *C = (CvMat*)Carr; 627 CvMat *D = (CvMat*)Darr; 628 int len = 0; 629 630 CvMat stub, stub1, stub2, stub3; 631 CvSize a_size, d_size; 632 int type; 633 634 if( !CV_IS_MAT( A )) 635 { 636 int coi = 0; 637 CV_CALL( A = cvGetMat( A, &stub1, &coi )); 638 639 if( coi != 0 ) 640 CV_ERROR( CV_BadCOI, "" ); 641 } 642 643 if( !CV_IS_MAT( B )) 644 { 645 int coi = 0; 646 CV_CALL( B = cvGetMat( B, &stub2, &coi )); 647 648 if( coi != 0 ) 649 CV_ERROR( CV_BadCOI, "" ); 650 } 651 652 if( !CV_IS_MAT( D )) 653 { 654 int coi = 0; 655 CV_CALL( D = cvGetMat( D, &stub, &coi )); 656 657 if( coi != 0 ) 658 CV_ERROR( CV_BadCOI, "" ); 659 } 660 661 if( beta == 0 ) 662 C = 0; 663 664 if( C ) 665 { 666 if( !CV_IS_MAT( C )) 667 { 668 int coi = 0; 669 CV_CALL( C = cvGetMat( C, &stub3, &coi )); 670 671 if( coi != 0 ) 672 CV_ERROR( CV_BadCOI, "" ); 673 } 674 675 if( !CV_ARE_TYPES_EQ( C, D )) 676 CV_ERROR( CV_StsUnmatchedFormats, "" ); 677 678 if( ((flags&CV_GEMM_C_T) == 0 && (C->cols != D->cols || C->rows != D->rows)) || 679 ((flags&CV_GEMM_C_T) != 0 && (C->rows != D->cols || C->cols != D->rows))) 680 CV_ERROR( CV_StsUnmatchedSizes, "" ); 681 682 if( (flags & CV_GEMM_C_T) != 0 && C->data.ptr == D->data.ptr ) 683 { 684 cvTranspose( C, D ); 685 C = D; 686 flags &= ~CV_GEMM_C_T; 687 } 688 } 689 else 690 { 691 C = &stub3; 692 C->data.ptr = 0; 693 C->step = 0; 694 C->type = CV_MAT_CONT_FLAG; 695 } 696 697 type = CV_MAT_TYPE(A->type); 698 if( !CV_ARE_TYPES_EQ( A, B ) || !CV_ARE_TYPES_EQ( A, D ) ) 699 CV_ERROR( CV_StsUnmatchedFormats, "" ); 700 701 a_size.width = A->cols; 702 a_size.height = A->rows; 703 d_size.width = D->cols; 704 d_size.height = D->rows; 705 706 switch( flags & (CV_GEMM_A_T|CV_GEMM_B_T) ) 707 { 708 case 0: 709 len = B->rows; 710 if( a_size.width != len || 711 B->cols != d_size.width || 712 a_size.height != d_size.height ) 713 CV_ERROR( CV_StsUnmatchedSizes, "" ); 714 break; 715 case 1: 716 len = B->rows; 717 if( a_size.height != len || 718 B->cols != d_size.width || 719 a_size.width != d_size.height ) 720 CV_ERROR( CV_StsUnmatchedSizes, "" ); 721 break; 722 case 2: 723 len = B->cols; 724 if( a_size.width != len || 725 B->rows != d_size.width || 726 a_size.height != d_size.height ) 727 CV_ERROR( CV_StsUnmatchedSizes, "" ); 728 break; 729 case 3: 730 len = B->cols; 731 if( a_size.height != len || 732 B->rows != d_size.width || 733 a_size.width != d_size.height ) 734 CV_ERROR( CV_StsUnmatchedSizes, "" ); 735 break; 736 } 737 738 if( flags == 0 && 2 <= len && len <= 4 && (len == d_size.width || len == d_size.height) ) 739 { 740 int i; 741 if( type == CV_64F ) 742 { 743 double* d = D->data.db; 744 const double *a = A->data.db, *b = B->data.db, *c = C->data.db; 745 size_t d_step = D->step/sizeof(d[0]), 746 a_step = A->step/sizeof(a[0]), 747 b_step = B->step/sizeof(b[0]), 748 c_step = C->step/sizeof(c[0]); 749 750 if( !c ) 751 c = zero; 752 753 switch( len ) 754 { 755 case 2: 756 if( len == d_size.width && b != d ) 757 { 758 for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step ) 759 { 760 double t0 = a[0]*b[0] + a[1]*b[b_step]; 761 double t1 = a[0]*b[1] + a[1]*b[b_step+1]; 762 d[0] = t0*alpha + c[0]*beta; 763 d[1] = t1*alpha + c[1]*beta; 764 } 765 } 766 else if( a != d ) 767 { 768 int c_step0 = 1; 769 if( c == zero ) 770 { 771 c_step0 = 0; 772 c_step = 1; 773 } 774 775 for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 ) 776 { 777 double t0 = a[0]*b[0] + a[1]*b[b_step]; 778 double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step]; 779 d[0] = t0*alpha + c[0]*beta; 780 d[d_step] = t1*alpha + c[c_step]*beta; 781 } 782 } 783 else 784 break; 785 EXIT; 786 case 3: 787 if( len == d_size.width && b != d ) 788 { 789 for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step ) 790 { 791 double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2]; 792 double t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1]; 793 double t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2]; 794 d[0] = t0*alpha + c[0]*beta; 795 d[1] = t1*alpha + c[1]*beta; 796 d[2] = t2*alpha + c[2]*beta; 797 } 798 } 799 else if( a != d ) 800 { 801 int c_step0 = 1; 802 if( c == zero ) 803 { 804 c_step0 = 0; 805 c_step = 1; 806 } 807 808 for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 ) 809 { 810 double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2]; 811 double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] + a[a_step+2]*b[b_step*2]; 812 double t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] + a[a_step*2+2]*b[b_step*2]; 813 814 d[0] = t0*alpha + c[0]*beta; 815 d[d_step] = t1*alpha + c[c_step]*beta; 816 d[d_step*2] = t2*alpha + c[c_step*2]*beta; 817 } 818 } 819 else 820 break; 821 EXIT; 822 case 4: 823 if( len == d_size.width && b != d ) 824 { 825 for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step ) 826 { 827 double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3]; 828 double t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1] + a[3]*b[b_step*3+1]; 829 double t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2] + a[3]*b[b_step*3+2]; 830 double t3 = a[0]*b[3] + a[1]*b[b_step+3] + a[2]*b[b_step*2+3] + a[3]*b[b_step*3+3]; 831 d[0] = t0*alpha + c[0]*beta; 832 d[1] = t1*alpha + c[1]*beta; 833 d[2] = t2*alpha + c[2]*beta; 834 d[3] = t3*alpha + c[3]*beta; 835 } 836 } 837 else if( d_size.width <= 16 && a != d ) 838 { 839 int c_step0 = 1; 840 if( c == zero ) 841 { 842 c_step0 = 0; 843 c_step = 1; 844 } 845 846 for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 ) 847 { 848 double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3]; 849 double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] + 850 a[a_step+2]*b[b_step*2] + a[a_step+3]*b[b_step*3]; 851 double t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] + 852 a[a_step*2+2]*b[b_step*2] + a[a_step*2+3]*b[b_step*3]; 853 double t3 = a[a_step*3]*b[0] + a[a_step*3+1]*b[b_step] + 854 a[a_step*3+2]*b[b_step*2] + a[a_step*3+3]*b[b_step*3]; 855 d[0] = t0*alpha + c[0]*beta; 856 d[d_step] = t1*alpha + c[c_step]*beta; 857 d[d_step*2] = t2*alpha + c[c_step*2]*beta; 858 d[d_step*3] = t3*alpha + c[c_step*3]*beta; 859 } 860 } 861 else 862 break; 863 EXIT; 864 } 865 } 866 867 if( type == CV_32F ) 868 { 869 float* d = D->data.fl; 870 const float *a = A->data.fl, *b = B->data.fl, *c = C->data.fl; 871 size_t d_step = D->step/sizeof(d[0]), 872 a_step = A->step/sizeof(a[0]), 873 b_step = B->step/sizeof(b[0]), 874 c_step = C->step/sizeof(c[0]); 875 876 if( !c ) 877 c = zerof; 878 879 switch( len ) 880 { 881 case 2: 882 if( len == d_size.width && b != d ) 883 { 884 for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step ) 885 { 886 float t0 = a[0]*b[0] + a[1]*b[b_step]; 887 float t1 = a[0]*b[1] + a[1]*b[b_step+1]; 888 d[0] = (float)(t0*alpha + c[0]*beta); 889 d[1] = (float)(t1*alpha + c[1]*beta); 890 } 891 } 892 else if( a != d ) 893 { 894 int c_step0 = 1; 895 if( c == zerof ) 896 { 897 c_step0 = 0; 898 c_step = 1; 899 } 900 901 for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 ) 902 { 903 float t0 = a[0]*b[0] + a[1]*b[b_step]; 904 float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step]; 905 d[0] = (float)(t0*alpha + c[0]*beta); 906 d[d_step] = (float)(t1*alpha + c[c_step]*beta); 907 } 908 } 909 else 910 break; 911 EXIT; 912 case 3: 913 if( len == d_size.width && b != d ) 914 { 915 for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step ) 916 { 917 float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2]; 918 float t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1]; 919 float t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2]; 920 d[0] = (float)(t0*alpha + c[0]*beta); 921 d[1] = (float)(t1*alpha + c[1]*beta); 922 d[2] = (float)(t2*alpha + c[2]*beta); 923 } 924 } 925 else if( a != d ) 926 { 927 int c_step0 = 1; 928 if( c == zerof ) 929 { 930 c_step0 = 0; 931 c_step = 1; 932 } 933 934 for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 ) 935 { 936 float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2]; 937 float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] + a[a_step+2]*b[b_step*2]; 938 float t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] + a[a_step*2+2]*b[b_step*2]; 939 940 d[0] = (float)(t0*alpha + c[0]*beta); 941 d[d_step] = (float)(t1*alpha + c[c_step]*beta); 942 d[d_step*2] = (float)(t2*alpha + c[c_step*2]*beta); 943 } 944 } 945 else 946 break; 947 EXIT; 948 case 4: 949 if( len == d_size.width && b != d ) 950 { 951 for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step ) 952 { 953 float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3]; 954 float t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1] + a[3]*b[b_step*3+1]; 955 float t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2] + a[3]*b[b_step*3+2]; 956 float t3 = a[0]*b[3] + a[1]*b[b_step+3] + a[2]*b[b_step*2+3] + a[3]*b[b_step*3+3]; 957 d[0] = (float)(t0*alpha + c[0]*beta); 958 d[1] = (float)(t1*alpha + c[1]*beta); 959 d[2] = (float)(t2*alpha + c[2]*beta); 960 d[3] = (float)(t3*alpha + c[3]*beta); 961 } 962 } 963 else if( len <= 16 && a != d ) 964 { 965 int c_step0 = 1; 966 if( c == zerof ) 967 { 968 c_step0 = 0; 969 c_step = 1; 970 } 971 972 for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 ) 973 { 974 float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3]; 975 float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] + 976 a[a_step+2]*b[b_step*2] + a[a_step+3]*b[b_step*3]; 977 float t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] + 978 a[a_step*2+2]*b[b_step*2] + a[a_step*2+3]*b[b_step*3]; 979 float t3 = a[a_step*3]*b[0] + a[a_step*3+1]*b[b_step] + 980 a[a_step*3+2]*b[b_step*2] + a[a_step*3+3]*b[b_step*3]; 981 d[0] = (float)(t0*alpha + c[0]*beta); 982 d[d_step] = (float)(t1*alpha + c[c_step]*beta); 983 d[d_step*2] = (float)(t2*alpha + c[c_step*2]*beta); 984 d[d_step*3] = (float)(t3*alpha + c[c_step*3]*beta); 985 } 986 } 987 else 988 break; 989 EXIT; 990 } 991 } 992 } 993 994 { 995 int b_step = B->step; 996 CvGEMMSingleMulFunc single_mul_func; 997 CvMat tmat, *D0 = D; 998 icvBLAS_GEMM_32f_t blas_func = 0; 999 1000 if( !inittab ) 1001 { 1002 icvInitGEMMTable( &single_mul_tab, &block_mul_tab, &store_tab ); 1003 inittab = 1; 1004 } 1005 1006 single_mul_func = (CvGEMMSingleMulFunc)single_mul_tab.fn_2d[type]; 1007 if( !single_mul_func ) 1008 CV_ERROR( CV_StsUnsupportedFormat, "" ); 1009 1010 if( D->data.ptr == A->data.ptr || D->data.ptr == B->data.ptr ) 1011 { 1012 int buf_size = d_size.width*d_size.height*CV_ELEM_SIZE(type); 1013 if( d_size.width <= CV_MAX_LOCAL_MAT_SIZE ) 1014 { 1015 buffer = (uchar*)cvStackAlloc( buf_size ); 1016 local_alloc = 1; 1017 } 1018 else 1019 CV_CALL( buffer = (uchar*)cvAlloc( buf_size )); 1020 1021 tmat = cvMat( d_size.height, d_size.width, type, buffer ); 1022 D = &tmat; 1023 } 1024 1025 if( (d_size.width == 1 || len == 1) && !(flags & CV_GEMM_B_T) && CV_IS_MAT_CONT(B->type) ) 1026 { 1027 b_step = d_size.width == 1 ? 0 : CV_ELEM_SIZE(type); 1028 flags |= CV_GEMM_B_T; 1029 } 1030 1031 if( (d_size.width | d_size.height | len) >= 16 && icvBLAS_GEMM_32f_p != 0 ) 1032 { 1033 blas_func = type == CV_32FC1 ? (icvBLAS_GEMM_32f_t)icvBLAS_GEMM_32f_p : 1034 type == CV_64FC1 ? (icvBLAS_GEMM_32f_t)icvBLAS_GEMM_64f_p : 1035 type == CV_32FC2 ? (icvBLAS_GEMM_32f_t)icvBLAS_GEMM_32fc_p : 1036 type == CV_64FC2 ? (icvBLAS_GEMM_32f_t)icvBLAS_GEMM_64fc_p : 0; 1037 } 1038 1039 if( blas_func ) 1040 { 1041 const char* transa = flags & CV_GEMM_A_T ? "t" : "n"; 1042 const char* transb = flags & CV_GEMM_B_T ? "t" : "n"; 1043 int lda, ldb, ldd; 1044 1045 if( C->data.ptr ) 1046 { 1047 if( C->data.ptr != D->data.ptr ) 1048 { 1049 if( !(flags & CV_GEMM_C_T) ) 1050 cvCopy( C, D ); 1051 else 1052 cvTranspose( C, D ); 1053 } 1054 } 1055 1056 if( CV_MAT_DEPTH(type) == CV_32F ) 1057 { 1058 CvComplex32f _alpha, _beta; 1059 1060 lda = A->step/sizeof(float); 1061 ldb = b_step/sizeof(float); 1062 ldd = D->step/sizeof(float); 1063 _alpha.re = (float)alpha; 1064 _alpha.im = 0; 1065 _beta.re = C->data.ptr ? (float)beta : 0; 1066 _beta.im = 0; 1067 if( CV_MAT_CN(type) == 2 ) 1068 lda /= 2, ldb /= 2, ldd /= 2; 1069 1070 blas_func( transb, transa, &d_size.width, &d_size.height, &len, 1071 &_alpha, B->data.ptr, &ldb, A->data.ptr, &lda, 1072 &_beta, D->data.ptr, &ldd ); 1073 } 1074 else 1075 { 1076 CvComplex64f _alpha, _beta; 1077 1078 lda = A->step/sizeof(double); 1079 ldb = b_step/sizeof(double); 1080 ldd = D->step/sizeof(double); 1081 _alpha.re = alpha; 1082 _alpha.im = 0; 1083 _beta.re = C->data.ptr ? beta : 0; 1084 _beta.im = 0; 1085 if( CV_MAT_CN(type) == 2 ) 1086 lda /= 2, ldb /= 2, ldd /= 2; 1087 1088 blas_func( transb, transa, &d_size.width, &d_size.height, &len, 1089 &_alpha, B->data.ptr, &ldb, A->data.ptr, &lda, 1090 &_beta, D->data.ptr, &ldd ); 1091 } 1092 } 1093 else if( ((d_size.height <= block_lin_size/2 || d_size.width <= block_lin_size/2) && 1094 len <= 10000) || len <= 10 || 1095 (d_size.width <= block_lin_size && d_size.height <= block_lin_size && len <= block_lin_size) ) 1096 { 1097 single_mul_func( A->data.ptr, A->step, B->data.ptr, b_step, 1098 C->data.ptr, C->step, D->data.ptr, D->step, 1099 a_size, d_size, alpha, beta, flags ); 1100 } 1101 else 1102 { 1103 int is_a_t = flags & CV_GEMM_A_T; 1104 int is_b_t = flags & CV_GEMM_B_T; 1105 int elem_size = CV_ELEM_SIZE(type); 1106 int dk0_1, dk0_2; 1107 int a_buf_size = 0, b_buf_size, d_buf_size; 1108 uchar* a_buf = 0; 1109 uchar* b_buf = 0; 1110 uchar* d_buf = 0; 1111 int i, j, k, di = 0, dj = 0, dk = 0; 1112 int dm0, dn0, dk0; 1113 int a_step0, a_step1, b_step0, b_step1, c_step0, c_step1; 1114 int work_elem_size = elem_size << (CV_MAT_DEPTH(type) == CV_32F ? 1 : 0); 1115 CvGEMMBlockMulFunc block_mul_func = (CvGEMMBlockMulFunc)block_mul_tab.fn_2d[type]; 1116 CvGEMMStoreFunc store_func = (CvGEMMStoreFunc)store_tab.fn_2d[type]; 1117 1118 assert( block_mul_func && store_func ); 1119 1120 if( !is_a_t ) 1121 a_step0 = A->step, a_step1 = elem_size; 1122 else 1123 a_step0 = elem_size, a_step1 = A->step; 1124 1125 if( !is_b_t ) 1126 b_step0 = b_step, b_step1 = elem_size; 1127 else 1128 b_step0 = elem_size, b_step1 = b_step; 1129 1130 if( !C->data.ptr ) 1131 { 1132 c_step0 = c_step1 = 0; 1133 flags &= ~CV_GEMM_C_T; 1134 } 1135 else if( !(flags & CV_GEMM_C_T) ) 1136 c_step0 = C->step, c_step1 = elem_size; 1137 else 1138 c_step0 = elem_size, c_step1 = C->step; 1139 1140 dm0 = MIN( block_lin_size, d_size.height ); 1141 dn0 = MIN( block_lin_size, d_size.width ); 1142 dk0_1 = block_size / dm0; 1143 dk0_2 = block_size / dn0; 1144 dk0 = MAX( dk0_1, dk0_2 ); 1145 dk0 = MIN( dk0, len ); 1146 if( dk0*dm0 > block_size ) 1147 dm0 = block_size / dk0; 1148 if( dk0*dn0 > block_size ) 1149 dn0 = block_size / dk0; 1150 1151 dk0_1 = (dn0+dn0/8+2) & -2; 1152 b_buf_size = (dk0+dk0/8+1)*dk0_1*elem_size; 1153 d_buf_size = (dk0+dk0/8+1)*dk0_1*work_elem_size; 1154 1155 if( is_a_t ) 1156 { 1157 a_buf_size = (dm0+dm0/8+1)*((dk0+dk0/8+2)&-2)*elem_size; 1158 flags &= ~CV_GEMM_A_T; 1159 } 1160 1161 CV_CALL( block_buffer = (uchar*)cvAlloc(a_buf_size + b_buf_size + d_buf_size)); 1162 d_buf = block_buffer; 1163 b_buf = d_buf + d_buf_size; 1164 1165 if( is_a_t ) 1166 a_buf = b_buf + b_buf_size; 1167 1168 for( i = 0; i < d_size.height; i += di ) 1169 { 1170 di = dm0; 1171 if( i + di >= d_size.height || 8*(i + di) + di > 8*d_size.height ) 1172 di = d_size.height - i; 1173 1174 for( j = 0; j < d_size.width; j += dj ) 1175 { 1176 uchar* _d = D->data.ptr + i*D->step + j*elem_size; 1177 const uchar* _c = C->data.ptr + i*c_step0 + j*c_step1; 1178 int _d_step = D->step; 1179 dj = dn0; 1180 1181 if( j + dj >= d_size.width || 8*(j + dj) + dj > 8*d_size.width ) 1182 dj = d_size.width - j; 1183 1184 flags &= 15; 1185 if( dk0 < len ) 1186 { 1187 _d = d_buf; 1188 _d_step = dj*work_elem_size; 1189 } 1190 1191 for( k = 0; k < len; k += dk ) 1192 { 1193 const uchar* _a = A->data.ptr + i*a_step0 + k*a_step1; 1194 int _a_step = A->step; 1195 const uchar* _b = B->data.ptr + k*b_step0 + j*b_step1; 1196 int _b_step = b_step; 1197 CvSize a_bl_size; 1198 1199 dk = dk0; 1200 if( k + dk >= len || 8*(k + dk) + dk > 8*len ) 1201 dk = len - k; 1202 1203 if( !is_a_t ) 1204 a_bl_size.width = dk, a_bl_size.height = di; 1205 else 1206 a_bl_size.width = di, a_bl_size.height = dk; 1207 1208 if( a_buf && is_a_t ) 1209 { 1210 int t; 1211 _a_step = dk*elem_size; 1212 icvGEMM_TransposeBlock( _a, A->step, a_buf, _a_step, a_bl_size, elem_size ); 1213 CV_SWAP( a_bl_size.width, a_bl_size.height, t ); 1214 _a = a_buf; 1215 } 1216 1217 if( dj < d_size.width ) 1218 { 1219 CvSize b_size; 1220 if( !is_b_t ) 1221 b_size.width = dj, b_size.height = dk; 1222 else 1223 b_size.width = dk, b_size.height = dj; 1224 1225 _b_step = b_size.width*elem_size; 1226 icvGEMM_CopyBlock( _b, b_step, b_buf, _b_step, b_size, elem_size ); 1227 _b = b_buf; 1228 } 1229 1230 if( dk0 < len ) 1231 block_mul_func( _a, _a_step, _b, _b_step, _d, _d_step, 1232 a_bl_size, cvSize(dj,di), flags ); 1233 else 1234 single_mul_func( _a, _a_step, _b, _b_step, _c, C->step, _d, _d_step, 1235 a_bl_size, cvSize(dj,di), alpha, beta, flags ); 1236 flags |= 16; 1237 } 1238 1239 if( dk0 < len ) 1240 store_func( _c, C->step, _d, _d_step, D->data.ptr + i*D->step + j*elem_size, 1241 D->step, cvSize(dj,di), alpha, beta, flags ); 1242 } 1243 } 1244 } 1245 1246 if( D0 != D ) 1247 CV_CALL( cvCopy( D, D0 )); 1248 } 1249 1250 __END__; 1251 1252 if( buffer && !local_alloc ) 1253 cvFree( &buffer ); 1254 if( block_buffer ) 1255 cvFree( &block_buffer ); 1256 } 1257 1258 1259 /****************************************************************************************\ 1260 * cvTransform * 1261 \****************************************************************************************/ 1262 1263 #define ICV_DEF_TRANSFORM_CASE_C1( arrtype, temptype, _ld_, \ 1264 _cast_macro1_, _cast_macro2_ ) \ 1265 { \ 1266 for( i = 0; i < size.width; i++, dst += dst_cn ) \ 1267 { \ 1268 const double* _mat = mat; \ 1269 double v0 = _ld_(src[i]); \ 1270 for( k = 0; k < dst_cn; k++, _mat += 2 ) \ 1271 { \ 1272 temptype t0 = _cast_macro1_(_mat[0]*v0 + _mat[1]); \ 1273 dst[k] = _cast_macro2_(t0); \ 1274 } \ 1275 } \ 1276 src += size.width; \ 1277 } 1278 1279 1280 #define ICV_DEF_DIAG_TRANSFORM_CASE_C1( arrtype, temptype, _ld_, \ 1281 _cast_macro1_, _cast_macro2_ ) \ 1282 for( i = 0; i < size.width; i++ ) \ 1283 { \ 1284 double ft0; \ 1285 temptype t0; \ 1286 ft0 = mat[0]*_ld_(src[i]) + mat[1]; \ 1287 t0 = _cast_macro1_(ft0); \ 1288 dst[i] = _cast_macro2_(t0); \ 1289 } 1290 1291 1292 #define ICV_DEF_TRANSFORM_CASE_C2( arrtype, temptype, _ld_, \ 1293 _cast_macro1_, _cast_macro2_ ) \ 1294 if( dst_cn == 2 ) \ 1295 { \ 1296 for( i = 0; i < size.width*2; i += 2 ) \ 1297 { \ 1298 double ft0, ft1; \ 1299 temptype t0, t1; \ 1300 ft0 = mat[0]*_ld_(src[i]) + mat[1]*_ld_(src[i+1]) + mat[2]; \ 1301 ft1 = mat[3]*_ld_(src[i]) + mat[4]*_ld_(src[i+1]) + mat[5]; \ 1302 t0 = _cast_macro1_(ft0); \ 1303 t1 = _cast_macro1_(ft1); \ 1304 dst[i] = _cast_macro2_(t0); \ 1305 dst[i+1] = _cast_macro2_(t1); \ 1306 } \ 1307 src += size.width*2; dst += size.width*2; \ 1308 } \ 1309 else \ 1310 for( i = 0; i < size.width; i++, src += 2, dst += dst_cn ) \ 1311 { \ 1312 const double* _mat = mat; \ 1313 double v0 = _ld_(src[0]), v1 = src[1]; \ 1314 for( k = 0; k < dst_cn; k++, _mat += 3 ) \ 1315 { \ 1316 temptype t0 = \ 1317 _cast_macro1_(_mat[0]*v0 + _mat[1]*v1 + _mat[2]); \ 1318 dst[k] = _cast_macro2_(t0); \ 1319 } \ 1320 } 1321 1322 1323 #define ICV_DEF_DIAG_TRANSFORM_CASE_C2( arrtype, temptype, _ld_, \ 1324 _cast_macro1_, _cast_macro2_ ) \ 1325 for( i = 0; i < size.width*2; i += 2 ) \ 1326 { \ 1327 double ft0, ft1; \ 1328 temptype t0, t1; \ 1329 ft0 = mat[0]*_ld_(src[i]) + mat[2]; \ 1330 ft1 = mat[4]*_ld_(src[i+1]) + mat[5]; \ 1331 t0 = _cast_macro1_(ft0); \ 1332 t1 = _cast_macro1_(ft1); \ 1333 dst[i] = _cast_macro2_(t0); \ 1334 dst[i+1] = _cast_macro2_(t1); \ 1335 } 1336 1337 1338 #define ICV_DEF_TRANSFORM_CASE_C3( arrtype, temptype, _ld_, \ 1339 _cast_macro1_, _cast_macro2_ ) \ 1340 if( dst_cn == 3 ) \ 1341 { \ 1342 for( i = 0; i < size.width*3; i += 3 ) \ 1343 { \ 1344 double ft0, ft1, ft2; \ 1345 temptype t0, t1, t2; \ 1346 ft0 = mat[0]*_ld_(src[i]) + mat[1]*_ld_(src[i+1]) + \ 1347 mat[2]*_ld_(src[i+2]) + mat[3]; \ 1348 ft1 = mat[4]*_ld_(src[i]) + mat[5]*_ld_(src[i+1]) + \ 1349 mat[6]*_ld_(src[i+2]) + mat[7]; \ 1350 ft2 = mat[8]*_ld_(src[i]) + mat[9]*_ld_(src[i+1]) + \ 1351 mat[10]*_ld_(src[i+2]) + mat[11]; \ 1352 t0 = _cast_macro1_(ft0); \ 1353 t1 = _cast_macro1_(ft1); \ 1354 t2 = _cast_macro1_(ft2); \ 1355 dst[i] = _cast_macro2_(t0); \ 1356 dst[i+1] = _cast_macro2_(t1); \ 1357 dst[i+2] = _cast_macro2_(t2); \ 1358 } \ 1359 src += size.width*3; dst += size.width*3; \ 1360 } \ 1361 else if( dst_cn == 1 ) \ 1362 { \ 1363 for( i = 0; i < size.width; i++, src += 3 ) \ 1364 { \ 1365 temptype t0 = _cast_macro1_(mat[0]*_ld_(src[0]) + \ 1366 mat[1]*_ld_(src[1]) + mat[2]*_ld_(src[2]) + mat[3]); \ 1367 dst[i] = _cast_macro2_(t0); \ 1368 } \ 1369 dst += size.width; \ 1370 } \ 1371 else \ 1372 for( i = 0; i < size.width; i++, src += 3, dst += dst_cn ) \ 1373 { \ 1374 const double* _mat = mat; \ 1375 double v0=_ld_(src[0]), v1=_ld_(src[1]), v2=_ld_(src[2]); \ 1376 for( k = 0; k < dst_cn; k++, _mat += 4 ) \ 1377 { \ 1378 temptype t0 = _cast_macro1_(_mat[0]*v0 + \ 1379 _mat[1]*v1 + _mat[2]*v2 + _mat[3]); \ 1380 dst[k] = _cast_macro2_(t0); \ 1381 } \ 1382 } 1383 1384 1385 #define ICV_DEF_DIAG_TRANSFORM_CASE_C3( arrtype, temptype, _ld_, \ 1386 _cast_macro1_, _cast_macro2_ ) \ 1387 for( i = 0; i < size.width*3; i += 3 ) \ 1388 { \ 1389 double ft0, ft1, ft2; \ 1390 temptype t0, t1, t2; \ 1391 ft0 = mat[0]*_ld_(src[i]) + mat[3]; \ 1392 ft1 = mat[5]*_ld_(src[i+1]) + mat[7]; \ 1393 ft2 = mat[10]*_ld_(src[i+2]) + mat[11]; \ 1394 t0 = _cast_macro1_(ft0); \ 1395 t1 = _cast_macro1_(ft1); \ 1396 t2 = _cast_macro1_(ft2); \ 1397 dst[i] = _cast_macro2_(t0); \ 1398 dst[i+1] = _cast_macro2_(t1); \ 1399 dst[i+2] = _cast_macro2_(t2); \ 1400 } 1401 1402 1403 #define ICV_DEF_TRANSFORM_CASE_C4( arrtype, temptype, _ld_, \ 1404 _cast_macro1_, _cast_macro2_ ) \ 1405 for( i = 0; i < size.width; i++, src += 4, dst += dst_cn ) \ 1406 { \ 1407 const double* _mat = mat; \ 1408 double v0 = _ld_(src[0]), v1 = _ld_(src[1]), \ 1409 v2 = _ld_(src[2]), v3 = _ld_(src[3]); \ 1410 for( k = 0; k < dst_cn; k++, _mat += 5 ) \ 1411 { \ 1412 temptype t0 =_cast_macro1_(_mat[0]*v0+_mat[1]*v1+ \ 1413 _mat[2]*v2+_mat[3]*v3+_mat[4]); \ 1414 dst[k] = _cast_macro2_(t0); \ 1415 } \ 1416 } 1417 1418 1419 #define ICV_DEF_DIAG_TRANSFORM_CASE_C4( arrtype, temptype, _ld_, \ 1420 _cast_macro1_, _cast_macro2_ ) \ 1421 for( i = 0; i < size.width*4; i += 4 ) \ 1422 { \ 1423 double ft0, ft1; \ 1424 temptype t0, t1; \ 1425 ft0 = mat[0]*_ld_(src[i]) + mat[4]; \ 1426 ft1 = mat[6]*_ld_(src[i+1]) + mat[9]; \ 1427 t0 = _cast_macro1_(ft0); \ 1428 t1 = _cast_macro1_(ft1); \ 1429 dst[i] = _cast_macro2_(t0); \ 1430 dst[i+1] = _cast_macro2_(t1); \ 1431 ft0 = mat[12]*_ld_(src[i+2]) + mat[14]; \ 1432 ft1 = mat[18]*_ld_(src[i+3]) + mat[19]; \ 1433 t0 = _cast_macro1_(ft0); \ 1434 t1 = _cast_macro1_(ft1); \ 1435 dst[i+2] = _cast_macro2_(t0); \ 1436 dst[i+3] = _cast_macro2_(t1); \ 1437 } 1438 1439 1440 1441 #define ICV_DEF_TRANSFORM_FUNC( flavor, arrtype, temptype, _ld_, \ 1442 _cast_macro1_, _cast_macro2_, cn )\ 1443 static CvStatus CV_STDCALL \ 1444 icvTransform_##flavor( const arrtype* src, int srcstep, \ 1445 arrtype* dst, int dststep, CvSize size, \ 1446 const double* mat, int dst_cn ) \ 1447 { \ 1448 srcstep = srcstep/sizeof(src[0]) - size.width*cn; \ 1449 dststep = dststep/sizeof(dst[0]) - size.width*dst_cn; \ 1450 for( ; size.height--; src += srcstep, dst += dststep ) \ 1451 { \ 1452 int i, k; \ 1453 ICV_DEF_TRANSFORM_CASE_C##cn( arrtype, temptype, _ld_, \ 1454 _cast_macro1_, _cast_macro2_ ) \ 1455 } \ 1456 \ 1457 return CV_OK; \ 1458 } 1459 1460 1461 #define ICV_DEF_DIAG_TRANSFORM_FUNC( flavor, arrtype, temptype, _ld_, \ 1462 _cast_macro1_, _cast_macro2_, cn )\ 1463 static CvStatus CV_STDCALL \ 1464 icvDiagTransform_##flavor( const arrtype* src, int srcstep, \ 1465 arrtype* dst, int dststep, CvSize size, \ 1466 const double* mat ) \ 1467 { \ 1468 srcstep /= sizeof(src[0]); \ 1469 dststep /= sizeof(dst[0]); \ 1470 for( ; size.height--; src += srcstep, dst += dststep ) \ 1471 { \ 1472 int i; \ 1473 ICV_DEF_DIAG_TRANSFORM_CASE_C##cn( arrtype, temptype, _ld_, \ 1474 _cast_macro1_, _cast_macro2_ ) \ 1475 } \ 1476 \ 1477 return CV_OK; \ 1478 } 1479 1480 1481 ICV_DEF_TRANSFORM_FUNC( 8u_C1R, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U, 1 ) 1482 ICV_DEF_TRANSFORM_FUNC( 8u_C2R, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U, 2 ) 1483 ICV_DEF_TRANSFORM_FUNC( 8u_C3R, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U, 3 ) 1484 ICV_DEF_TRANSFORM_FUNC( 8u_C4R, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U, 4 ) 1485 1486 ICV_DEF_TRANSFORM_FUNC( 16u_C1R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 1 ) 1487 ICV_DEF_TRANSFORM_FUNC( 16u_C2R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 2 ) 1488 ICV_DEF_TRANSFORM_FUNC( 16u_C3R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 3 ) 1489 ICV_DEF_TRANSFORM_FUNC( 16u_C4R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 4 ) 1490 1491 ICV_DEF_TRANSFORM_FUNC( 16s_C1R, short, int, CV_NOP, cvRound, CV_CAST_16S, 1 ) 1492 ICV_DEF_TRANSFORM_FUNC( 16s_C2R, short, int, CV_NOP, cvRound, CV_CAST_16S, 2 ) 1493 ICV_DEF_TRANSFORM_FUNC( 16s_C3R, short, int, CV_NOP, cvRound, CV_CAST_16S, 3 ) 1494 ICV_DEF_TRANSFORM_FUNC( 16s_C4R, short, int, CV_NOP, cvRound, CV_CAST_16S, 4 ) 1495 1496 ICV_DEF_TRANSFORM_FUNC( 32s_C1R, int, int, CV_NOP, cvRound, CV_NOP, 1 ) 1497 ICV_DEF_TRANSFORM_FUNC( 32s_C2R, int, int, CV_NOP, cvRound, CV_NOP, 2 ) 1498 ICV_DEF_TRANSFORM_FUNC( 32s_C3R, int, int, CV_NOP, cvRound, CV_NOP, 3 ) 1499 ICV_DEF_TRANSFORM_FUNC( 32s_C4R, int, int, CV_NOP, cvRound, CV_NOP, 4 ) 1500 1501 ICV_DEF_TRANSFORM_FUNC( 32f_C1R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 1 ) 1502 ICV_DEF_TRANSFORM_FUNC( 32f_C2R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 2 ) 1503 ICV_DEF_TRANSFORM_FUNC( 32f_C3R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 3 ) 1504 ICV_DEF_TRANSFORM_FUNC( 32f_C4R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 4 ) 1505 1506 ICV_DEF_TRANSFORM_FUNC( 64f_C1R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 1 ) 1507 ICV_DEF_TRANSFORM_FUNC( 64f_C2R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 2 ) 1508 ICV_DEF_TRANSFORM_FUNC( 64f_C3R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 3 ) 1509 ICV_DEF_TRANSFORM_FUNC( 64f_C4R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 4 ) 1510 1511 ICV_DEF_DIAG_TRANSFORM_FUNC( 16u_C1R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 1 ) 1512 ICV_DEF_DIAG_TRANSFORM_FUNC( 16u_C2R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 2 ) 1513 ICV_DEF_DIAG_TRANSFORM_FUNC( 16u_C3R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 3 ) 1514 ICV_DEF_DIAG_TRANSFORM_FUNC( 16u_C4R, ushort, int, CV_NOP, cvRound, CV_CAST_16U, 4 ) 1515 1516 ICV_DEF_DIAG_TRANSFORM_FUNC( 16s_C1R, short, int, CV_NOP, cvRound, CV_CAST_16S, 1 ) 1517 ICV_DEF_DIAG_TRANSFORM_FUNC( 16s_C2R, short, int, CV_NOP, cvRound, CV_CAST_16S, 2 ) 1518 ICV_DEF_DIAG_TRANSFORM_FUNC( 16s_C3R, short, int, CV_NOP, cvRound, CV_CAST_16S, 3 ) 1519 ICV_DEF_DIAG_TRANSFORM_FUNC( 16s_C4R, short, int, CV_NOP, cvRound, CV_CAST_16S, 4 ) 1520 1521 ICV_DEF_DIAG_TRANSFORM_FUNC( 32s_C1R, int, int, CV_NOP, cvRound, CV_NOP, 1 ) 1522 ICV_DEF_DIAG_TRANSFORM_FUNC( 32s_C2R, int, int, CV_NOP, cvRound, CV_NOP, 2 ) 1523 ICV_DEF_DIAG_TRANSFORM_FUNC( 32s_C3R, int, int, CV_NOP, cvRound, CV_NOP, 3 ) 1524 ICV_DEF_DIAG_TRANSFORM_FUNC( 32s_C4R, int, int, CV_NOP, cvRound, CV_NOP, 4 ) 1525 1526 ICV_DEF_DIAG_TRANSFORM_FUNC( 32f_C1R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 1 ) 1527 ICV_DEF_DIAG_TRANSFORM_FUNC( 32f_C2R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 2 ) 1528 ICV_DEF_DIAG_TRANSFORM_FUNC( 32f_C3R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 3 ) 1529 ICV_DEF_DIAG_TRANSFORM_FUNC( 32f_C4R, float, double, CV_NOP, CV_NOP, CV_CAST_32F, 4 ) 1530 1531 ICV_DEF_DIAG_TRANSFORM_FUNC( 64f_C1R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 1 ) 1532 ICV_DEF_DIAG_TRANSFORM_FUNC( 64f_C2R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 2 ) 1533 ICV_DEF_DIAG_TRANSFORM_FUNC( 64f_C3R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 3 ) 1534 ICV_DEF_DIAG_TRANSFORM_FUNC( 64f_C4R, double, double, CV_NOP, CV_NOP, CV_CAST_64F, 4 ) 1535 1536 #define icvTransform_8s_C1R 0 1537 #define icvTransform_8s_C2R 0 1538 #define icvTransform_8s_C3R 0 1539 #define icvTransform_8s_C4R 0 1540 1541 #define icvDiagTransform_8s_C1R 0 1542 #define icvDiagTransform_8s_C2R 0 1543 #define icvDiagTransform_8s_C3R 0 1544 #define icvDiagTransform_8s_C4R 0 1545 1546 #define icvDiagTransform_8u_C1R 0 1547 #define icvDiagTransform_8u_C2R 0 1548 #define icvDiagTransform_8u_C3R 0 1549 #define icvDiagTransform_8u_C4R 0 1550 1551 CV_DEF_INIT_BIG_FUNC_TAB_2D( Transform, R ) 1552 CV_DEF_INIT_BIG_FUNC_TAB_2D( DiagTransform, R ) 1553 1554 typedef CvStatus (CV_STDCALL * CvTransformFunc)( 1555 const void* src, int srcstep, 1556 void* dst, int dststep, CvSize size, 1557 const void* mat, int dst_cn ); 1558 1559 typedef CvStatus (CV_STDCALL * CvDiagTransformFunc)( 1560 const void* src, int srcstep, 1561 void* dst, int dststep, CvSize size, 1562 const void* mat ); 1563 1564 typedef CvStatus (CV_STDCALL * CvDiagTransformFunc)( 1565 const void* src, int srcstep, 1566 void* dst, int dststep, CvSize size, 1567 const void* mat ); 1568 1569 ///////////////////// IPP transform functions ////////////////// 1570 1571 icvColorTwist_8u_C3R_t icvColorTwist_8u_C3R_p = 0; 1572 icvColorTwist_16u_C3R_t icvColorTwist_16u_C3R_p = 0; 1573 icvColorTwist_16s_C3R_t icvColorTwist_16s_C3R_p = 0; 1574 icvColorTwist_32f_C3R_t icvColorTwist_32f_C3R_p = 0; 1575 icvColorTwist_32f_C4R_t icvColorTwist_32f_C4R_p = 0; 1576 1577 icvColorToGray_8u_C3C1R_t icvColorToGray_8u_C3C1R_p = 0; 1578 icvColorToGray_16u_C3C1R_t icvColorToGray_16u_C3C1R_p = 0; 1579 icvColorToGray_16s_C3C1R_t icvColorToGray_16s_C3C1R_p = 0; 1580 icvColorToGray_32f_C3C1R_t icvColorToGray_32f_C3C1R_p = 0; 1581 1582 icvColorToGray_8u_AC4C1R_t icvColorToGray_8u_AC4C1R_p = 0; 1583 icvColorToGray_16u_AC4C1R_t icvColorToGray_16u_AC4C1R_p = 0; 1584 icvColorToGray_16s_AC4C1R_t icvColorToGray_16s_AC4C1R_p = 0; 1585 icvColorToGray_32f_AC4C1R_t icvColorToGray_32f_AC4C1R_p = 0; 1586 1587 typedef CvStatus (CV_STDCALL * CvColorTwistIPPFunc)( const void* src, int srcstep, 1588 void* dst, int dststep, CvSize size, const float* coeffs ); 1589 1590 //////////////////////////////////////////////////////////////// 1591 1592 CV_IMPL void 1593 cvTransform( const CvArr* srcarr, CvArr* dstarr, 1594 const CvMat* transmat, const CvMat* shiftvec ) 1595 { 1596 static CvBigFuncTable transform_tab, diag_transform_tab; 1597 static int inittab = 0; 1598 CvMat* lut = 0; 1599 1600 CV_FUNCNAME( "cvTransform" ); 1601 1602 __BEGIN__; 1603 1604 CvMat srcstub, *src = (CvMat*)srcarr; 1605 CvMat dststub, *dst = (CvMat*)dstarr; 1606 CvMat rotstub, *rot = (CvMat*)transmat; 1607 CvMat shiftstub, *shift = (CvMat*)shiftvec; 1608 CvSeq *src_seq = 0, *dst_seq = 0; 1609 CvSeq hdr; // need only one copy of stub header & seqblock (either for src or dst) 1610 CvSeqBlock block_hdr; 1611 int i, j, type, cn, dst_cn; 1612 int coi = 0, coi2 = 0; 1613 double* buffer = (double*)cvStackAlloc( CV_CN_MAX*(CV_CN_MAX+1)*sizeof(buffer[0]) ); 1614 1615 if( !inittab ) 1616 { 1617 icvInitTransformRTable( &transform_tab ); 1618 icvInitDiagTransformRTable( &diag_transform_tab ); 1619 inittab = 1; 1620 } 1621 1622 if( CV_IS_SEQ( src )) 1623 { 1624 src_seq = (CvSeq*)src; 1625 if( CV_ELEM_SIZE(src_seq->flags) != src_seq->elem_size ) 1626 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported type of sequence elements" ); 1627 } 1628 else 1629 CV_CALL( src = cvGetMat( src, &srcstub, &coi )); 1630 1631 if( CV_IS_SEQ( dst )) 1632 { 1633 dst_seq = (CvSeq*)dst; 1634 if( CV_ELEM_SIZE(dst_seq->flags) != dst_seq->elem_size ) 1635 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported type of sequence elements" ); 1636 } 1637 else 1638 CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 )); 1639 1640 if( coi != 0 || coi2 != 0 ) 1641 CV_ERROR( CV_BadCOI, "" ); 1642 1643 if( !CV_ARE_DEPTHS_EQ(src, dst) ) 1644 CV_ERROR( CV_StsUnmatchedFormats, "" ); 1645 1646 if( src_seq || dst_seq ) 1647 { 1648 if( !src_seq ) 1649 { 1650 if( CV_IS_MAT_CONT(src->type) || (src->rows != 1 && src->cols != 1) ) 1651 CV_ERROR( CV_StsBadSize, "if eigher the source or destination is a sequence, " 1652 "the other array must be also a sequence of continous 1d vector" ); 1653 src_seq = cvMakeSeqHeaderForArray( CV_MAT_TYPE(src->type), sizeof(hdr), 1654 CV_ELEM_SIZE(src->type), src->data.ptr, 1655 src->rows + src->cols + 1, &hdr, &block_hdr ); 1656 } 1657 1658 if( !dst_seq ) 1659 { 1660 if( CV_IS_MAT_CONT(dst->type) || (dst->rows != 1 && dst->cols != 1) ) 1661 CV_ERROR( CV_StsBadSize, "if eigher the source or destination is a sequence, " 1662 "the other array must be also a sequence of continous 1d vector" ); 1663 if( dst->rows + dst->cols - 1 != src_seq->total ) 1664 CV_ERROR( CV_StsUnmatchedFormats, 1665 "source sequence and destination vector have different sizes" ); 1666 dst_seq = cvMakeSeqHeaderForArray( CV_MAT_TYPE(dst->type), sizeof(hdr), 1667 CV_ELEM_SIZE(dst->type), dst->data.ptr, 1668 dst->rows + dst->cols + 1, &hdr, &block_hdr ); 1669 } 1670 else if( dst_seq->total != src_seq->total ) 1671 { 1672 if( dst_seq->total > src_seq->total ) 1673 cvSeqPopMulti( dst_seq, 0, dst_seq->total - src_seq->total ); 1674 else 1675 cvSeqPushMulti( dst_seq, 0, src_seq->total - dst_seq->total ); 1676 } 1677 } 1678 else if( !CV_ARE_SIZES_EQ( src, dst )) 1679 CV_ERROR( CV_StsUnmatchedSizes, "" ); 1680 1681 type = CV_MAT_TYPE( src->type ); 1682 cn = CV_MAT_CN( type ); 1683 dst_cn = CV_MAT_CN( dst->type ); 1684 1685 if( cn > 4 || dst_cn > 4 ) 1686 CV_ERROR( CV_StsOutOfRange, "Both input and output array must have at most 4 channels" ); 1687 1688 if( !CV_IS_MAT( rot )) 1689 CV_CALL( rot = cvGetMat( rot, &rotstub, &coi )); 1690 1691 if( rot->rows != dst_cn ) 1692 CV_ERROR( CV_StsBadSize, 1693 "The height of transmat matrix must be equal to number of channels" ); 1694 1695 if( rot->cols == cn + 1 || rot->cols == cn ) 1696 { 1697 if( CV_MAT_TYPE( rot->type ) == CV_64FC1 ) 1698 { 1699 for( i = 0; i < dst_cn; i++ ) 1700 { 1701 buffer[i*(cn+1) + cn] = 0; 1702 for( j = 0; j < rot->cols; j++ ) 1703 buffer[i*(cn+1) + j] = ((double*)(rot->data.ptr + rot->step*i))[j]; 1704 } 1705 } 1706 else if( CV_MAT_TYPE( rot->type ) == CV_32FC1 ) 1707 { 1708 for( i = 0; i < dst_cn; i++ ) 1709 { 1710 buffer[i*(cn+1) + cn] = 0; 1711 for( j = 0; j < rot->cols; j++ ) 1712 buffer[i*(cn+1) + j] = ((float*)(rot->data.ptr + rot->step*i))[j]; 1713 } 1714 } 1715 else 1716 CV_ERROR( CV_StsUnsupportedFormat, "Rotation matrix must be 32fC1 or 64fC1" ); 1717 } 1718 else 1719 CV_ERROR( CV_StsUnmatchedSizes, "If the source array has <cn> channels, " 1720 "the transformation matrix must have <cn> x <cn>+1 or <cn> x <cn> size" ); 1721 1722 if( shift ) 1723 { 1724 if( !CV_IS_MAT( shift )) 1725 CV_CALL( shift = cvGetMat( shift, &shiftstub, &coi )); 1726 1727 if( CV_MAT_CN( shift->type ) * shift->cols * shift->rows == dst_cn && 1728 (shift->rows == 1 || shift->cols == 1) ) 1729 { 1730 if( CV_MAT_DEPTH( shift->type ) == CV_64F ) 1731 { 1732 int step = shift->step ? shift->step/sizeof(double) : 1; 1733 for( i = 0; i < dst_cn; i++ ) 1734 buffer[i*(cn+1) + cn] += shift->data.db[i*step]; 1735 } 1736 else if( CV_MAT_DEPTH( shift->type ) == CV_32F ) 1737 { 1738 int step = shift->step ? shift->step/sizeof(float) : 1; 1739 for( i = 0; i < dst_cn; i++ ) 1740 buffer[i*(cn+1) + cn] += shift->data.fl[i*step]; 1741 } 1742 else 1743 CV_ERROR( CV_StsUnsupportedFormat, "Shift vector must be 32f or 64f" ); 1744 } 1745 else 1746 { 1747 CV_ERROR( CV_StsUnmatchedSizes, 1748 "Shift (if present) must be 1 dimensional vector with the number " 1749 "of elements equal to number of channels in the processed array" ); 1750 } 1751 } 1752 1753 if( coi != 0 || coi2 != 0 ) 1754 CV_ERROR( CV_BadCOI, "" ); 1755 1756 { 1757 CvTransformFunc func = (CvTransformFunc)(transform_tab.fn_2d[type]); 1758 CvDiagTransformFunc diag_func = 0; 1759 CvLUT_TransformFunc lut_func = 0; 1760 int diag_transform = 0; 1761 CvColorTwistIPPFunc ipp_func = 0; 1762 CvSize size; 1763 float* ipp_coeffs = (float*)cvStackAlloc( 16*sizeof(ipp_coeffs[0]) ); 1764 1765 if( !func ) 1766 CV_ERROR( CV_StsUnsupportedFormat, "" ); 1767 1768 if( cn == dst_cn ) 1769 ipp_func = type == CV_8UC3 ? icvColorTwist_8u_C3R_p : 1770 type == CV_16UC3 ? icvColorTwist_16u_C3R_p : 1771 type == CV_16SC3 ? icvColorTwist_16s_C3R_p : 1772 type == CV_32FC3 ? icvColorTwist_32f_C3R_p : 1773 type == CV_32FC4 && fabs(buffer[4]) < DBL_EPSILON && 1774 fabs(buffer[9]) < DBL_EPSILON && fabs(buffer[14]) < DBL_EPSILON && 1775 fabs(buffer[19]) < DBL_EPSILON ? icvColorTwist_32f_C4R_p : 0; 1776 else if( dst_cn == 1 && (cn == 3 || cn == 4) && 1777 buffer[0] >= 0 && buffer[1] >= 0 && buffer[2] >= 0 && 1778 buffer[0] + buffer[1] + buffer[2] <= 1.01 && 1779 fabs(buffer[3]) < DBL_EPSILON && (cn == 3 || fabs(buffer[4]) < DBL_EPSILON) ) 1780 { 1781 if( cn == 3 ) 1782 ipp_func = type == CV_8UC3 ? icvColorToGray_8u_C3C1R_p : 1783 type == CV_16UC3 ? icvColorToGray_16u_C3C1R_p : 1784 type == CV_16SC3 ? icvColorToGray_16s_C3C1R_p : 1785 type == CV_32FC3 ? icvColorToGray_32f_C3C1R_p : 0; 1786 else 1787 ipp_func = type == CV_8UC4 ? icvColorToGray_8u_AC4C1R_p : 1788 type == CV_16UC4 ? icvColorToGray_16u_AC4C1R_p : 1789 type == CV_16SC4 ? icvColorToGray_16s_AC4C1R_p : 1790 type == CV_32FC4 ? icvColorToGray_32f_AC4C1R_p : 0; 1791 } 1792 1793 if( dst_cn == cn ) 1794 { 1795 diag_transform = 1; 1796 for( i = 0; i < dst_cn; i++ ) 1797 for( j = 0; j < cn; j++ ) 1798 { 1799 if( i != j && fabs(buffer[i*(cn+1) + j]) > DBL_EPSILON ) 1800 { 1801 diag_transform = 0; 1802 break; 1803 } 1804 } 1805 1806 if( diag_transform ) 1807 { 1808 if( CV_MAT_DEPTH(type) == CV_8U ) 1809 { 1810 CV_CALL( lut = cvCreateMat( 1, 256, type )); 1811 for( i = 0; i < cn; i++ ) 1812 { 1813 double a = buffer[i*(cn+1) + i], b = buffer[i*(cn+1) + cn]; 1814 uchar* ltab = lut->data.ptr; 1815 for( j = 0; j < 256; j++ ) 1816 { 1817 int t = cvRound(a*j + b); 1818 ltab[j*cn + i] = CV_CAST_8U(t); 1819 } 1820 } 1821 lut_func = cn == 1 ? (CvLUT_TransformFunc)icvLUT_Transform8u_8u_C1R : 1822 cn == 2 ? (CvLUT_TransformFunc)icvLUT_Transform8u_8u_C2R : 1823 cn == 3 ? (CvLUT_TransformFunc)icvLUT_Transform8u_8u_C3R : 1824 (CvLUT_TransformFunc)icvLUT_Transform8u_8u_C4R; 1825 } 1826 else 1827 diag_func = (CvDiagTransformFunc)(diag_transform_tab.fn_2d[type]); 1828 } 1829 } 1830 1831 if( ipp_func ) 1832 { 1833 const double* ptr = buffer; 1834 1835 // fill cn x 4 ipp_coeffs array 1836 for( i = 0; i < cn*4; i += 4, ptr += cn+1 ) 1837 { 1838 float t0 = (float)ptr[0]; 1839 float t1 = (float)ptr[1]; 1840 ipp_coeffs[i] = t0; 1841 ipp_coeffs[i+1] = t1; 1842 t0 = (float)ptr[2]; 1843 t1 = (float)ptr[3]; 1844 ipp_coeffs[i+2] = t0; 1845 ipp_coeffs[i+3] = t1; 1846 } 1847 } 1848 1849 if( !src_seq ) 1850 { 1851 int srcstep = src->step; 1852 int dststep = dst->step; 1853 size = cvGetMatSize( src ); 1854 1855 if( CV_IS_MAT_CONT( src->type & dst->type )) 1856 { 1857 size.width *= size.height; 1858 size.height = 1; 1859 srcstep = dststep = CV_STUB_STEP; 1860 } 1861 1862 if( lut_func ) 1863 lut_func( src->data.ptr, src->step, dst->data.ptr, 1864 dst->step, size, lut->data.ptr ); 1865 else if( ipp_func ) 1866 { 1867 IPPI_CALL( ipp_func( src->data.ptr, srcstep, dst->data.ptr, 1868 dststep, size, ipp_coeffs )); 1869 } 1870 else if( diag_transform ) 1871 diag_func( src->data.ptr, src->step, dst->data.ptr, 1872 dst->step, size, buffer ); 1873 else 1874 func( src->data.ptr, src->step, dst->data.ptr, 1875 dst->step, size, buffer, dst_cn ); 1876 } 1877 else 1878 { 1879 CvSeqBlock* src_block = src_seq->first; 1880 CvSeqBlock* dst_block = dst_seq->first; 1881 int src_idx = 0, dst_idx = 0; 1882 int src_elem_size = CV_ELEM_SIZE(src_seq->flags); 1883 int dst_elem_size = CV_ELEM_SIZE(dst_seq->flags); 1884 1885 for( i = src_seq->total; i > 0; ) 1886 { 1887 int src_len = src_block->count - src_idx; 1888 int dst_len = dst_block->count - dst_idx; 1889 const void* srcptr = src_block->data + src_idx*src_elem_size; 1890 void* dstptr = dst_block->data + dst_idx*dst_elem_size; 1891 src_len = MIN(src_len, dst_len); 1892 1893 if( lut_func ) 1894 lut_func( srcptr, CV_STUB_STEP, dstptr, CV_STUB_STEP, 1895 cvSize( src_len, 1 ), lut->data.ptr ); 1896 else if( ipp_func ) 1897 { 1898 IPPI_CALL( ipp_func( srcptr, CV_STUB_STEP, dstptr, CV_STUB_STEP, 1899 cvSize( src_len, 1 ), ipp_coeffs )); 1900 } 1901 else if( diag_transform ) 1902 diag_func( srcptr, CV_STUB_STEP, dstptr, CV_STUB_STEP, 1903 cvSize( src_len, 1 ), buffer ); 1904 else 1905 func( srcptr, CV_STUB_STEP, dstptr, CV_STUB_STEP, 1906 cvSize( src_len, 1 ), buffer, dst_cn ); 1907 1908 if( (src_idx += src_len) == src_block->count ) 1909 src_block = src_block->next, src_idx = 0; 1910 if( (dst_idx += src_len) == dst_block->count ) 1911 dst_block = dst_block->next, dst_idx = 0; 1912 i -= src_len; 1913 } 1914 } 1915 } 1916 1917 __END__; 1918 1919 cvReleaseMat( &lut ); 1920 } 1921 1922 1923 /****************************************************************************************\ 1924 * cvPerspectiveTransform * 1925 \****************************************************************************************/ 1926 1927 #define ICV_PERSPECTIVE_TRANSFORM_FUNC_2( flavor, arrtype ) \ 1928 static CvStatus CV_STDCALL \ 1929 icvPerspectiveTransform_##flavor##_C2R( const arrtype* src, int srcstep, \ 1930 arrtype* dst, int dststep, \ 1931 CvSize size, const double* mat ) \ 1932 { \ 1933 int i; \ 1934 size.width *= 2; \ 1935 srcstep /= sizeof(src[0]); dststep /= sizeof(dst[0]); \ 1936 \ 1937 for( ; size.height--; src += srcstep, dst += dststep ) \ 1938 { \ 1939 for( i = 0; i < size.width; i += 2 ) \ 1940 { \ 1941 arrtype x = src[i], y = src[i + 1]; \ 1942 double w = x*mat[6] + y*mat[7] + mat[8]; \ 1943 \ 1944 if( fabs(w) > FLT_EPSILON ) \ 1945 { \ 1946 w = 1./w; \ 1947 dst[i] = (arrtype)((x*mat[0] + y*mat[1] + mat[2]) * w); \ 1948 dst[i+1] = (arrtype)((x*mat[3] + y*mat[4] + mat[5]) * w); \ 1949 } \ 1950 else \ 1951 { \ 1952 dst[i] = (arrtype)0; \ 1953 dst[i+1] = (arrtype)0; \ 1954 } \ 1955 } \ 1956 } \ 1957 \ 1958 return CV_OK; \ 1959 } 1960 1961 1962 #define ICV_PERSPECTIVE_TRANSFORM_FUNC_3( flavor, arrtype ) \ 1963 static CvStatus CV_STDCALL \ 1964 icvPerspectiveTransform_##flavor##_C3R( const arrtype* src, int srcstep, \ 1965 arrtype* dst, int dststep, \ 1966 CvSize size, const double* mat ) \ 1967 { \ 1968 int i; \ 1969 size.width *= 3; \ 1970 srcstep /= sizeof(src[0]); dststep /= sizeof(dst[0]); \ 1971 \ 1972 for( ; size.height--; src += srcstep, dst += dststep ) \ 1973 { \ 1974 for( i = 0; i < size.width; i += 3 ) \ 1975 { \ 1976 arrtype x = src[i], y = src[i + 1], z = src[i + 2]; \ 1977 double w = x*mat[12] + y*mat[13] + z*mat[14] + mat[15]; \ 1978 \ 1979 if( fabs(w) > FLT_EPSILON ) \ 1980 { \ 1981 w = 1./w; \ 1982 dst[i] = (arrtype)((x*mat[0] + y*mat[1] + z*mat[2] + mat[3]) * w); \ 1983 dst[i+1] = (arrtype)((x*mat[4] + y*mat[5] + z*mat[6] + mat[7]) * w); \ 1984 dst[i+2] = (arrtype)((x*mat[8] + y*mat[9] + z*mat[10] + mat[11]) * w); \ 1985 } \ 1986 else \ 1987 { \ 1988 dst[i] = (arrtype)0; \ 1989 dst[i+1] = (arrtype)0; \ 1990 dst[i+2] = (arrtype)0; \ 1991 } \ 1992 } \ 1993 } \ 1994 \ 1995 return CV_OK; \ 1996 } 1997 1998 ICV_PERSPECTIVE_TRANSFORM_FUNC_2( 32f, float ) 1999 ICV_PERSPECTIVE_TRANSFORM_FUNC_2( 64f, double ) 2000 ICV_PERSPECTIVE_TRANSFORM_FUNC_3( 32f, float ) 2001 ICV_PERSPECTIVE_TRANSFORM_FUNC_3( 64f, double ) 2002 2003 static void icvInitPerspectiveTransformTable( CvFuncTable* tab2, CvFuncTable* tab3 )\ 2004 { \ 2005 tab2->fn_2d[CV_32F] = (void*)icvPerspectiveTransform_32f_C2R; \ 2006 tab2->fn_2d[CV_64F] = (void*)icvPerspectiveTransform_64f_C2R; \ 2007 tab3->fn_2d[CV_32F] = (void*)icvPerspectiveTransform_32f_C3R; \ 2008 tab3->fn_2d[CV_64F] = (void*)icvPerspectiveTransform_64f_C3R; \ 2009 } 2010 2011 2012 CV_IMPL void 2013 cvPerspectiveTransform( const CvArr* srcarr, CvArr* dstarr, const CvMat* mat ) 2014 { 2015 static CvFuncTable tab[2]; 2016 static int inittab = 0; 2017 double buffer[16]; 2018 2019 CV_FUNCNAME( "cvPerspectiveProject" ); 2020 2021 __BEGIN__; 2022 2023 CvMat sstub, *src = (CvMat*)srcarr; 2024 CvMat dstub, *dst = (CvMat*)dstarr; 2025 int i, j, type, cn; 2026 CvFunc2D_2A1P func = 0; 2027 CvSize size; 2028 2029 if( !inittab ) 2030 { 2031 icvInitPerspectiveTransformTable( &tab[0], &tab[1] ); 2032 inittab = 1; 2033 } 2034 2035 if( !CV_IS_MAT( src )) 2036 { 2037 int coi = 0; 2038 CV_CALL( src = cvGetMat( src, &sstub, &coi )); 2039 2040 if( coi != 0 ) 2041 CV_ERROR( CV_BadCOI, "" ); 2042 } 2043 2044 if( !CV_IS_MAT( dst )) 2045 { 2046 int coi = 0; 2047 CV_CALL( dst = cvGetMat( dst, &dstub, &coi )); 2048 2049 if( coi != 0 ) 2050 CV_ERROR( CV_BadCOI, "" ); 2051 } 2052 2053 if( !CV_ARE_TYPES_EQ( src, dst )) 2054 CV_ERROR( CV_StsUnmatchedFormats, "" ); 2055 2056 if( !CV_ARE_SIZES_EQ( src, dst )) 2057 CV_ERROR( CV_StsUnmatchedSizes, "" ); 2058 2059 type = CV_MAT_TYPE( src->type ); 2060 cn = CV_MAT_CN( type ); 2061 2062 if( cn != 2 && cn != 3 ) 2063 CV_ERROR( CV_BadNumChannels, cvUnsupportedFormat ); 2064 2065 if( !CV_IS_MAT( mat )) 2066 CV_ERROR( CV_StsBadArg, "Invalid transformation matrix" ); 2067 2068 if( mat->rows != cn + 1 && mat->cols != mat->rows ) 2069 CV_ERROR( CV_StsBadSize, 2070 "The size of transform matrix must be equal to number of channels" ); 2071 2072 if( CV_MAT_TYPE( mat->type ) == CV_64FC1 ) 2073 { 2074 for( i = 0; i <= cn; i++ ) 2075 { 2076 for( j = 0; j <= cn; j++ ) 2077 buffer[i*(cn+1) + j] = ((double*)(mat->data.ptr + mat->step*i))[j]; 2078 } 2079 } 2080 else if( CV_MAT_TYPE( mat->type ) == CV_32FC1 ) 2081 { 2082 for( i = 0; i <= cn; i++ ) 2083 { 2084 for( j = 0; j <= cn; j++ ) 2085 buffer[i*(cn+1) + j] = ((float*)(mat->data.ptr + mat->step*i))[j]; 2086 } 2087 } 2088 else 2089 { 2090 CV_ERROR( CV_StsUnsupportedFormat, "Rotation matrix must be 32fC1 or 64fC1" ); 2091 } 2092 2093 func = (CvFunc2D_2A1P)tab[cn == 3].fn_2d[CV_MAT_DEPTH(type)]; 2094 2095 if( !func ) 2096 CV_ERROR( CV_StsUnsupportedFormat, "" ); 2097 2098 size = cvGetMatSize( src ); 2099 2100 if( CV_IS_MAT_CONT( src->type & dst->type )) 2101 { 2102 size.width *= size.height; 2103 size.height = 1; 2104 } 2105 2106 IPPI_CALL( func( src->data.ptr, src->step, dst->data.ptr, dst->step, size, buffer)); 2107 2108 CV_CHECK_NANS( dst ); 2109 2110 __END__; 2111 } 2112 2113 2114 /****************************************************************************************\ 2115 * cvScaleAdd * 2116 \****************************************************************************************/ 2117 2118 #define ICV_DEF_MULADDC_CASE_C1( arrtype, temptype, src1, src2, dst, len ) \ 2119 { \ 2120 int i; \ 2121 \ 2122 for( i = 0; i <= (len) - 4; i += 4 ) \ 2123 { \ 2124 temptype t0 = (src1)[i]*s0 + (src2)[i]; \ 2125 temptype t1 = (src1)[i+1]*s0 + (src2)[i+1]; \ 2126 \ 2127 (dst)[i] = (arrtype)t0; \ 2128 (dst)[i+1] = (arrtype)t1; \ 2129 \ 2130 t0 = (src1)[i+2]*s0 + (src2)[i+2]; \ 2131 t1 = (src1)[i+3]*s0 + (src2)[i+3]; \ 2132 \ 2133 (dst)[i+2] = (arrtype)t0; \ 2134 (dst)[i+3] = (arrtype)t1; \ 2135 } \ 2136 \ 2137 for( ; i < (len); i++ ) \ 2138 { \ 2139 temptype t0 = (src1)[i]*s0 + (src2)[i]; \ 2140 (dst)[i] = (arrtype)t0; \ 2141 } \ 2142 } 2143 2144 2145 #define ICV_DEF_MULADDC_CASE_C2( arrtype, temptype, src1, src2, dst, len ) \ 2146 { \ 2147 int i; \ 2148 \ 2149 for( i = 0; i <= (len) - 4; i += 4 ) \ 2150 { \ 2151 temptype t0 = (src1)[i]*s0 - (src1)[i+1]*s1 + (src2)[i]; \ 2152 temptype t1 = (src1)[i]*s1 + (src1)[i+1]*s0 + (src2)[i+1]; \ 2153 \ 2154 (dst)[i] = (arrtype)t0; \ 2155 (dst)[i+1] = (arrtype)t1; \ 2156 \ 2157 t0 = (src1)[i+2]*s0 - (src1)[i+3]*s1 + (src2)[i+2]; \ 2158 t1 = (src1)[i+2]*s1 + (src1)[i+3]*s0 + (src2)[i+3]; \ 2159 \ 2160 (dst)[i+2] = (arrtype)t0; \ 2161 (dst)[i+3] = (arrtype)t1; \ 2162 } \ 2163 \ 2164 for( ; i < (len); i += 2 ) \ 2165 { \ 2166 temptype t0 = (src1)[i]*s0 - (src1)[i+1]*s1 + (src2)[i]; \ 2167 temptype t1 = (src1)[i]*s1 + (src1)[i+1]*s0 + (src2)[i+1]; \ 2168 \ 2169 (dst)[i] = (arrtype)t0; \ 2170 (dst)[i+1] = (arrtype)t1; \ 2171 } \ 2172 } 2173 2174 2175 #define ICV_DEF_MULADDS_FUNC( flavor, arrtype, scalartype, entry, cn ) \ 2176 static CvStatus CV_STDCALL \ 2177 icvMulAddC_##flavor( const arrtype* src1, int srcstep1, \ 2178 const arrtype* src2, int srcstep2, \ 2179 arrtype* dst, int dststep, CvSize size, \ 2180 const scalartype* scalar ) \ 2181 { \ 2182 entry(scalartype); \ 2183 size.width *= (cn); \ 2184 srcstep1 /= sizeof(src1[0]); srcstep2 /= sizeof(src2[0]); \ 2185 dststep /= sizeof(dst[0]); \ 2186 \ 2187 for( ; size.height--; src1+=srcstep1, src2+=srcstep2, dst+=dststep ) \ 2188 { \ 2189 ICV_DEF_MULADDC_CASE_C##cn( arrtype, scalartype, src1, src2, \ 2190 dst, size.width ) \ 2191 } \ 2192 \ 2193 return CV_OK; \ 2194 } 2195 2196 2197 ICV_DEF_MULADDS_FUNC( 32f_C1R, float, double, CV_UN_ENTRY_C1, 1 ) 2198 ICV_DEF_MULADDS_FUNC( 32f_C2R, float, double, CV_UN_ENTRY_C2, 2 ) 2199 ICV_DEF_MULADDS_FUNC( 64f_C1R, double, double, CV_UN_ENTRY_C1, 1 ) 2200 ICV_DEF_MULADDS_FUNC( 64f_C2R, double, double, CV_UN_ENTRY_C2, 2 ) 2201 2202 2203 static void 2204 icvInitMulAddCTable( CvBigFuncTable* tab ) 2205 { 2206 tab->fn_2d[CV_32FC1] = (void*)icvMulAddC_32f_C1R; 2207 tab->fn_2d[CV_32FC2] = (void*)icvMulAddC_32f_C2R; 2208 tab->fn_2d[CV_64FC1] = (void*)icvMulAddC_64f_C1R; 2209 tab->fn_2d[CV_64FC2] = (void*)icvMulAddC_64f_C2R; 2210 } 2211 2212 2213 CV_IMPL void 2214 cvScaleAdd( const CvArr* srcarr1, CvScalar scale, 2215 const CvArr* srcarr2, CvArr* dstarr ) 2216 { 2217 static CvBigFuncTable muladds_tab; 2218 static int inittab = 0; 2219 2220 CV_FUNCNAME( "cvScaleAdd" ); 2221 2222 __BEGIN__; 2223 2224 CvMat stub1, *src1 = (CvMat*)srcarr1; 2225 CvMat stub2, *src2 = (CvMat*)srcarr2; 2226 CvMat stub, *dst = (CvMat*)dstarr; 2227 CvSize size; 2228 int type; 2229 2230 if( !CV_IS_MAT( src1 ) || !CV_IS_MAT(src2) || !CV_IS_MAT(dst)) 2231 { 2232 int coi1 = 0, coi2 = 0, coi3 = 0; 2233 CV_CALL( src1 = cvGetMat( src1, &stub1, &coi1 )); 2234 CV_CALL( src2 = cvGetMat( src2, &stub2, &coi2 )); 2235 CV_CALL( dst = cvGetMat( dst, &stub, &coi3 )); 2236 2237 if( coi1 + coi2 + coi3 != 0 ) 2238 CV_ERROR( CV_BadCOI, "" ); 2239 } 2240 2241 if( !CV_ARE_TYPES_EQ( src1, dst ) || !CV_ARE_TYPES_EQ( src2, dst )) 2242 CV_ERROR( CV_StsUnmatchedFormats, "" ); 2243 2244 if( !CV_ARE_SIZES_EQ( src1, dst ) || !CV_ARE_SIZES_EQ( src2, dst )) 2245 CV_ERROR( CV_StsUnmatchedSizes, "" ); 2246 2247 type = CV_MAT_TYPE( src1->type ); 2248 size = cvGetMatSize( src1 ); 2249 2250 if( CV_IS_MAT_CONT( src1->type & src2->type & dst->type )) 2251 { 2252 size.width *= size.height; 2253 2254 if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE ) 2255 { 2256 if( type == CV_32FC1 ) 2257 { 2258 float* mA = src1->data.fl; 2259 float* mB = src2->data.fl; 2260 float* mC = dst->data.fl; 2261 2262 do 2263 { 2264 mC[size.width - 1] = (float)(mA[size.width - 1]*scale.val[0] + 2265 mB[size.width - 1]); 2266 } 2267 while( --size.width ); 2268 2269 EXIT; 2270 } 2271 2272 if( type == CV_64FC1 ) 2273 { 2274 double* mA = src1->data.db; 2275 double* mB = src2->data.db; 2276 double* mC = dst->data.db; 2277 2278 do 2279 { 2280 mC[size.width - 1] = mA[size.width - 1]*scale.val[0] + 2281 mB[size.width - 1]; 2282 } 2283 while( --size.width ); 2284 2285 EXIT; 2286 } 2287 } 2288 2289 size.height = 1; 2290 } 2291 2292 if( !inittab ) 2293 { 2294 icvInitMulAddCTable( &muladds_tab ); 2295 inittab = 1; 2296 } 2297 2298 if( CV_MAT_CN(type) > 2 ) 2299 CV_ERROR( CV_StsOutOfRange, "The function only supports 1- and 2-channel arrays" ); 2300 2301 { 2302 CvFunc2D_3A1P func = (CvFunc2D_3A1P)(muladds_tab.fn_2d[type]); 2303 2304 if( !func ) 2305 CV_ERROR( CV_StsUnsupportedFormat, "" ); 2306 2307 IPPI_CALL( func( src1->data.ptr, src1->step, src2->data.ptr, src2->step, 2308 dst->data.ptr, dst->step, size, scale.val )); 2309 } 2310 2311 CV_CHECK_NANS( dst ); 2312 2313 __END__; 2314 } 2315 2316 2317 /****************************************************************************************\ 2318 * cvCalcCovarMatrix * 2319 \****************************************************************************************/ 2320 2321 #define ICV_DOT_PRODUCT_CASE( flavor, srctype, avgtype, load_macro ) \ 2322 static CvStatus CV_STDCALL \ 2323 icvDotProductShifted_##flavor##_C1R( const srctype* vec1, int vecstep1, \ 2324 const srctype* vec2, int vecstep2, \ 2325 const avgtype* avg, int avgstep, \ 2326 CvSize size, double* _result ) \ 2327 { \ 2328 double result = 0; \ 2329 vecstep1 /= sizeof(vec1[0]); vecstep2 /= sizeof(vec2[0]); avgstep /= sizeof(avg[0]);\ 2330 \ 2331 for( ; size.height--; vec1 += vecstep1, vec2 += vecstep2, avg += avgstep ) \ 2332 { \ 2333 int x; \ 2334 for( x = 0; x <= size.width - 4; x += 4 ) \ 2335 result += (load_macro(vec1[x]) - avg[x])*(load_macro(vec2[x]) - avg[x]) + \ 2336 (load_macro(vec1[x+1]) - avg[x+1])*(load_macro(vec2[x+1]) - avg[x+1]) + \ 2337 (load_macro(vec1[x+2]) - avg[x+2])*(load_macro(vec2[x+2]) - avg[x+2]) + \ 2338 (load_macro(vec1[x+3]) - avg[x+3])*(load_macro(vec2[x+3]) - avg[x+3]); \ 2339 for( ; x < size.width; x++ ) \ 2340 result += (load_macro(vec1[x]) - avg[x])*(load_macro(vec2[x]) - avg[x]); \ 2341 } \ 2342 \ 2343 *_result = result; \ 2344 return CV_OK; \ 2345 } 2346 2347 2348 ICV_DOT_PRODUCT_CASE( 8u32f, uchar, float, CV_8TO32F ) 2349 ICV_DOT_PRODUCT_CASE( 8u64f, uchar, double, CV_8TO32F ) 2350 ICV_DOT_PRODUCT_CASE( 16u32f, ushort, float, CV_NOP ) 2351 ICV_DOT_PRODUCT_CASE( 16u64f, ushort, double, CV_NOP ) 2352 ICV_DOT_PRODUCT_CASE( 16s32f, short, float, CV_NOP ) 2353 ICV_DOT_PRODUCT_CASE( 16s64f, short, double, CV_NOP ) 2354 ICV_DOT_PRODUCT_CASE( 32f, float, float, CV_NOP ) 2355 ICV_DOT_PRODUCT_CASE( 32f64f, float, double, CV_NOP ) 2356 ICV_DOT_PRODUCT_CASE( 64f, double, double, CV_NOP ) 2357 2358 static void icvInitDotProductShiftedTable( CvFuncTable* tabfl, CvFuncTable* tabdb ) 2359 { 2360 tabfl->fn_2d[CV_8U] = (void*)icvDotProductShifted_8u32f_C1R; 2361 tabfl->fn_2d[CV_8S] = 0; 2362 tabfl->fn_2d[CV_16U] = (void*)icvDotProductShifted_16u32f_C1R; 2363 tabfl->fn_2d[CV_16S] = (void*)icvDotProductShifted_16s32f_C1R; 2364 tabfl->fn_2d[CV_32S] = 0; 2365 tabfl->fn_2d[CV_32F] = (void*)icvDotProductShifted_32f_C1R; 2366 tabfl->fn_2d[CV_64F] = 0; 2367 2368 tabdb->fn_2d[CV_8U] = (void*)icvDotProductShifted_8u64f_C1R; 2369 tabdb->fn_2d[CV_8S] = 0; 2370 tabdb->fn_2d[CV_16U] = (void*)icvDotProductShifted_16u64f_C1R; 2371 tabdb->fn_2d[CV_16S] = (void*)icvDotProductShifted_16s64f_C1R; 2372 tabdb->fn_2d[CV_32S] = 0; 2373 tabdb->fn_2d[CV_32F] = (void*)icvDotProductShifted_32f64f_C1R; 2374 tabdb->fn_2d[CV_64F] = (void*)icvDotProductShifted_64f_C1R; 2375 } 2376 2377 #define ICV_EXT_PRODUCT_CASE( flavor, srctype, avgtype, load_macro ) \ 2378 static CvStatus CV_STDCALL \ 2379 icvExtProductShifted_##flavor##_C1R( const srctype* vec, int vecstep, \ 2380 const avgtype* avg, int avgstep, \ 2381 avgtype* dst, int dststep, \ 2382 CvSize size, avgtype* tempbuf ) \ 2383 { \ 2384 int x, y, dstsize = size.width * size.height; \ 2385 \ 2386 vecstep /= sizeof(vec[0]); avgstep /= sizeof(avg[0]); \ 2387 for( y = 0; y < size.height; y++, vec += vecstep, avg += avgstep ) \ 2388 for( x = 0; x < size.width; x++ ) \ 2389 *tempbuf++ = load_macro(vec[x]) - avg[x]; \ 2390 tempbuf -= dstsize; \ 2391 \ 2392 dststep /= sizeof(dst[0]); \ 2393 for( y = 0; y < dstsize; y++, dst += dststep ) \ 2394 { \ 2395 double ty = tempbuf[y]; \ 2396 for( x = 0; x <= y - 3; x += 4 ) \ 2397 { \ 2398 double t0 = dst[x] + ty*tempbuf[x]; \ 2399 double t1 = dst[x+1] + ty*tempbuf[x+1]; \ 2400 dst[x] = (avgtype)t0; \ 2401 dst[x+1] = (avgtype)t1; \ 2402 t0 = dst[x+2] + ty*tempbuf[x+2]; \ 2403 t1 = dst[x+3] + ty*tempbuf[x+3]; \ 2404 dst[x+2] = (avgtype)t0; \ 2405 dst[x+3] = (avgtype)t1; \ 2406 } \ 2407 for( ; x <= y; x++ ) \ 2408 dst[x] = (avgtype)(dst[x] + ty*tempbuf[x]); \ 2409 } \ 2410 \ 2411 return CV_OK; \ 2412 } 2413 2414 ICV_EXT_PRODUCT_CASE( 8u32f, uchar, float, CV_8TO32F ) 2415 ICV_EXT_PRODUCT_CASE( 8u64f, uchar, double, CV_8TO32F ) 2416 ICV_EXT_PRODUCT_CASE( 16u32f, ushort, float, CV_NOP ) 2417 ICV_EXT_PRODUCT_CASE( 16u64f, ushort, double, CV_NOP ) 2418 ICV_EXT_PRODUCT_CASE( 16s32f, short, float, CV_NOP ) 2419 ICV_EXT_PRODUCT_CASE( 16s64f, short, double, CV_NOP ) 2420 ICV_EXT_PRODUCT_CASE( 32f, float, float, CV_NOP ) 2421 ICV_EXT_PRODUCT_CASE( 32f64f, float, double, CV_NOP ) 2422 ICV_EXT_PRODUCT_CASE( 64f, double, double, CV_NOP ) 2423 2424 2425 static void icvInitExtProductShiftedTable( CvFuncTable* tabfl, CvFuncTable* tabdb ) 2426 { 2427 tabfl->fn_2d[CV_8U] = (void*)icvExtProductShifted_8u32f_C1R; 2428 tabfl->fn_2d[CV_8S] = 0; 2429 tabfl->fn_2d[CV_16U] = (void*)icvExtProductShifted_16u32f_C1R; 2430 tabfl->fn_2d[CV_16S] = (void*)icvExtProductShifted_16s32f_C1R; 2431 tabfl->fn_2d[CV_32S] = 0; 2432 tabfl->fn_2d[CV_32F] = (void*)icvExtProductShifted_32f_C1R; 2433 tabfl->fn_2d[CV_64F] = 0; 2434 2435 tabdb->fn_2d[CV_8U] = (void*)icvExtProductShifted_8u64f_C1R; 2436 tabdb->fn_2d[CV_8S] = 0; 2437 tabdb->fn_2d[CV_16U] = (void*)icvExtProductShifted_16u64f_C1R; 2438 tabdb->fn_2d[CV_16S] = (void*)icvExtProductShifted_16s64f_C1R; 2439 tabdb->fn_2d[CV_32S] = 0; 2440 tabdb->fn_2d[CV_32F] = (void*)icvExtProductShifted_32f64f_C1R; 2441 tabdb->fn_2d[CV_64F] = (void*)icvExtProductShifted_64f_C1R; 2442 } 2443 2444 2445 typedef struct vec_data 2446 { 2447 void* ptr; 2448 int step; 2449 } 2450 vec_data; 2451 2452 CV_IMPL void 2453 cvCalcCovarMatrix( const CvArr** vecarr, int count, 2454 CvArr* covarr, CvArr* avgarr, int flags ) 2455 { 2456 static CvFuncTable dot_tab[2]; 2457 static CvFuncTable ext_tab[2]; 2458 static int inittab = 0; 2459 vec_data* vecdata = 0; 2460 CvMat *tempvec = 0; 2461 2462 CV_FUNCNAME( "cvCalcCovarMatrix" ); 2463 2464 __BEGIN__; 2465 2466 CvMat covstub, *cov = (CvMat*)covarr; 2467 CvMat avgstub, *avg = (CvMat*)avgarr; 2468 CvMat vecstub0, *vecmat = 0; 2469 CvSize srcsize, contsize; 2470 int srctype = 0, dsttype = 0; 2471 int i, j; 2472 int cont_flag, vec_delta = 0, vec_step = 0; 2473 int is_covar_normal = (flags & CV_COVAR_NORMAL) != 0; 2474 double scale; 2475 2476 if( !inittab ) 2477 { 2478 icvInitDotProductShiftedTable( dot_tab + 0, dot_tab + 1 ); 2479 icvInitExtProductShiftedTable( ext_tab + 0, ext_tab + 1 ); 2480 inittab = 1; 2481 } 2482 2483 if( !vecarr ) 2484 CV_ERROR( CV_StsNullPtr, "NULL vec pointer" ); 2485 2486 CV_CALL( cov = cvGetMat( cov, &covstub )); 2487 CV_CALL( avg = cvGetMat( avg, &avgstub )); 2488 2489 if( !CV_ARE_TYPES_EQ( cov, avg )) 2490 CV_ERROR( CV_StsUnmatchedFormats, 2491 "Covariation matrix and average vector should have the same types" ); 2492 2493 dsttype = CV_MAT_TYPE( cov->type ); 2494 if( dsttype != CV_32FC1 && dsttype != CV_64FC1 ) 2495 CV_ERROR( CV_StsUnsupportedFormat, "Covariation matrix must be 32fC1 or 64fC1" ); 2496 2497 if( cov->rows != cov->cols ) 2498 CV_ERROR( CV_StsBadSize, "Covariation matrix must be square" ); 2499 2500 srcsize = cvGetMatSize( avg ); 2501 contsize.width = srcsize.width * srcsize.height; 2502 contsize.height = 1; 2503 cont_flag = avg->type; 2504 2505 if( flags & (CV_COVAR_ROWS|CV_COVAR_COLS) ) 2506 { 2507 CV_CALL( vecmat = cvGetMat( vecarr[0], &vecstub0 )); 2508 srctype = CV_MAT_TYPE(vecmat->type); 2509 if( flags & CV_COVAR_COLS ) 2510 { 2511 count = vecmat->cols; 2512 if( avg->cols != 1 || avg->rows != vecmat->rows ) 2513 CV_ERROR( CV_StsUnmatchedSizes, 2514 "The number of input vectors does not match to avg vector size" ); 2515 cont_flag = 0; 2516 vec_delta = CV_ELEM_SIZE(vecmat->type); 2517 vec_step = vecmat->step; 2518 } 2519 else 2520 { 2521 count = vecmat->rows; 2522 if( avg->rows != 1 || avg->cols != vecmat->cols ) 2523 CV_ERROR( CV_StsUnmatchedSizes, 2524 "The number of input vectors does not match to avg vector size" ); 2525 vec_delta = vecmat->step; 2526 vec_step = CV_STUB_STEP; 2527 } 2528 2529 if( !(flags & CV_COVAR_USE_AVG) ) 2530 CV_CALL( cvReduce( vecmat, avg, -1, CV_REDUCE_AVG )); 2531 2532 scale = !(flags & CV_COVAR_SCALE) ? 1. : 1./count; 2533 2534 cvMulTransposed( vecmat, cov, ((flags & CV_COVAR_ROWS)!=0) ^ ((flags & CV_COVAR_NORMAL)==0), avg, scale ); 2535 EXIT; 2536 } 2537 2538 scale = !(flags & CV_COVAR_SCALE) ? 1. : 1./count; 2539 2540 if( is_covar_normal ) 2541 { 2542 if( count <= 0 ) 2543 CV_ERROR( CV_StsBadSize, 2544 "The number of vectors is zero or negative" ); 2545 if( cov->rows != contsize.width ) 2546 CV_ERROR( CV_StsUnmatchedSizes, 2547 "The size of input vectors does not match with the size of covariation matrix" ); 2548 2549 CV_CALL( tempvec = cvCreateMat( avg->rows, avg->cols, dsttype )); 2550 } 2551 else if( count != cov->rows ) 2552 CV_ERROR( CV_StsUnmatchedSizes, 2553 "The vector count and covariance matrix size do not match" ); 2554 2555 if( !(flags & (CV_COVAR_ROWS|CV_COVAR_COLS)) ) 2556 { 2557 if( !(flags & CV_COVAR_USE_AVG) ) 2558 cvZero( avg ); 2559 2560 CV_CALL( vecdata = (vec_data*)cvAlloc( count*sizeof(vecdata[0]))); 2561 2562 for( i = 0; i < count; i++ ) 2563 { 2564 CvMat vecstub, *vec = (CvMat*)vecarr[i]; 2565 CvMat* temp; 2566 2567 if( !CV_IS_MAT(vec) ) 2568 CV_CALL( vec = cvGetMat( vec, &vecstub )); 2569 2570 if( !CV_ARE_SIZES_EQ( vec, avg )) 2571 CV_ERROR( CV_StsUnmatchedSizes, 2572 "All input vectors and average vector must have the same size" ); 2573 2574 vecdata[i].ptr = vec->data.ptr; 2575 vecdata[i].step = vec->step; 2576 cont_flag &= vec->type; 2577 temp = vec; 2578 if( i == 0 ) 2579 { 2580 srctype = CV_MAT_TYPE( vec->type ); 2581 if( CV_MAT_CN( srctype ) != 1 ) 2582 CV_ERROR( CV_BadNumChannels, "All vectors must have a single channel" ); 2583 if( srctype != dsttype && !tempvec && !(flags & CV_COVAR_USE_AVG)) 2584 CV_CALL( tempvec = cvCreateMat( vec->rows, vec->cols, dsttype )); 2585 } 2586 else if( CV_MAT_TYPE(vec->type) != srctype ) 2587 CV_ERROR( CV_StsUnmatchedFormats, 2588 "All input vectors must have the same type" ); 2589 2590 if( !(flags & CV_COVAR_USE_AVG) ) 2591 { 2592 if( tempvec ) 2593 { 2594 temp = tempvec; 2595 cvConvert( vec, temp ); 2596 } 2597 cvAdd( temp, avg, avg ); 2598 } 2599 } 2600 2601 if( !(flags & CV_COVAR_USE_AVG) ) 2602 cvScale( avg, avg, 1./count ); 2603 } 2604 2605 cont_flag = CV_IS_MAT_CONT( cont_flag ); 2606 if( cont_flag ) 2607 srcsize = contsize; 2608 2609 if( !is_covar_normal ) 2610 { 2611 CvFunc2D_3A1P dot_func = 2612 (CvFunc2D_3A1P)dot_tab[dsttype == CV_64FC1].fn_2d[CV_MAT_DEPTH(srctype)]; 2613 2614 if( !dot_func ) 2615 CV_ERROR( CV_StsUnsupportedFormat, 2616 "The format of input vectors is not supported" ); 2617 2618 for( i = 0; i < count; i++ ) 2619 { 2620 int a, b, delta; 2621 if( !(i & 1) ) 2622 a = 0, b = i+1, delta = 1; 2623 else 2624 a = i, b = -1, delta = -1; 2625 2626 for( j = a; j != b; j += delta ) 2627 { 2628 double result = 0; 2629 void *v_i, *v_j; 2630 int step_i, step_j; 2631 2632 if( !vecmat ) 2633 { 2634 v_i = vecdata[i].ptr; 2635 v_j = vecdata[j].ptr; 2636 step_i = vecdata[i].step; 2637 step_j = vecdata[j].step; 2638 } 2639 else 2640 { 2641 v_i = vecmat->data.ptr + vec_delta*i; 2642 v_j = vecmat->data.ptr + vec_delta*j; 2643 step_i = step_j = vec_step; 2644 } 2645 2646 dot_func( v_i, step_i, v_j, step_j, avg->data.ptr, avg->step, srcsize, &result ); 2647 2648 if( dsttype == CV_64FC1 ) 2649 { 2650 ((double*)(cov->data.ptr + i*cov->step))[j] = 2651 ((double*)(cov->data.ptr + j*cov->step))[i] = result*scale; 2652 } 2653 else 2654 { 2655 ((float*)(cov->data.ptr + i*cov->step))[j] = 2656 ((float*)(cov->data.ptr + j*cov->step))[i] = (float)(result*scale); 2657 } 2658 } 2659 } 2660 } 2661 else 2662 { 2663 uchar* cov_ptr = cov->data.ptr; 2664 int cov_step = cov->step; 2665 int cov_size = cov->rows; 2666 CvFunc2D_3A1P ext_func = 2667 (CvFunc2D_3A1P)ext_tab[dsttype == CV_64FC1].fn_2d[CV_MAT_DEPTH(srctype)]; 2668 if( !ext_func ) 2669 CV_ERROR( CV_StsUnsupportedFormat, 2670 "The format of input vectors is not supported" ); 2671 2672 cvZero( cov ); 2673 2674 for( i = 0; i < count; i++ ) 2675 { 2676 void* v; 2677 int vstep; 2678 if( !vecmat ) 2679 { 2680 v = vecdata[i].ptr; 2681 vstep = vecdata[i].step; 2682 } 2683 else 2684 { 2685 v = vecmat->data.ptr + vec_delta*i; 2686 vstep = vec_step; 2687 } 2688 2689 ext_func( v, vstep, avg->data.ptr, avg->step, 2690 cov_ptr, cov_step, srcsize, tempvec->data.ptr ); 2691 } 2692 2693 if( dsttype == CV_64FC1 ) 2694 for( i = 0; i < cov_size; i++ ) 2695 for( j = 0; j <= i; j++ ) 2696 { 2697 double* cov1 = ((double*)(cov_ptr + i*cov_step)) + j; 2698 double* cov2 = ((double*)(cov_ptr + j*cov_step)) + i; 2699 2700 if( flags & CV_COVAR_SCALE ) 2701 *cov1 = *cov2 = *cov1*scale; 2702 else 2703 *cov2 = *cov1; 2704 } 2705 else 2706 for( i = 0; i < cov_size; i++ ) 2707 for( j = 0; j <= i; j++ ) 2708 { 2709 float* cov1 = ((float*)(cov_ptr + i*cov_step)) + j; 2710 float* cov2 = ((float*)(cov_ptr + j*cov_step)) + i; 2711 2712 if( flags & CV_COVAR_SCALE ) 2713 *cov1 = *cov2 = (float)(*cov1*scale); 2714 else 2715 *cov2 = *cov1; 2716 } 2717 } 2718 2719 __END__; 2720 2721 cvFree( &vecdata ); 2722 cvReleaseMat( &tempvec ); 2723 } 2724 2725 /****************************************************************************************\ 2726 * cvMahalanobis * 2727 \****************************************************************************************/ 2728 2729 #define ICV_MAHALANOBIS( flavor, arrtype ) \ 2730 static CvStatus CV_STDCALL \ 2731 icvMahalanobis_##flavor##_C1R( const arrtype* mat, int matstep, \ 2732 const arrtype* vec, int len, double* _result ) \ 2733 { \ 2734 int i, j; \ 2735 double result = 0; \ 2736 \ 2737 matstep /= sizeof(mat[0]); \ 2738 for( i = 0; i < len; i++, mat += matstep ) \ 2739 { \ 2740 double row_sum = 0; \ 2741 for( j = 0; j <= len - 4; j += 4 ) \ 2742 row_sum += vec[j]*mat[j] + vec[j+1]*mat[j+1] + \ 2743 vec[j+2]*mat[j+2] + vec[j+3]*mat[j+3]; \ 2744 for( ; j < len; j++ ) \ 2745 row_sum += vec[j]*mat[j]; \ 2746 result += row_sum * vec[i]; \ 2747 } \ 2748 *_result = result; \ 2749 \ 2750 return CV_OK; \ 2751 } 2752 2753 ICV_MAHALANOBIS( 32f, float ) 2754 ICV_MAHALANOBIS( 64f, double ) 2755 2756 static void icvInitMahalanobisTable( CvFuncTable* tab ) 2757 { 2758 tab->fn_2d[CV_32F] = (void*)icvMahalanobis_32f_C1R; 2759 tab->fn_2d[CV_64F] = (void*)icvMahalanobis_64f_C1R; 2760 } 2761 2762 typedef CvStatus (CV_STDCALL * CvMahalanobisFunc)( const void* mat, int matstep, 2763 const void* vec, int len, double* _result ); 2764 2765 CV_IMPL double 2766 cvMahalanobis( const CvArr* srcAarr, const CvArr* srcBarr, CvArr* matarr ) 2767 { 2768 static CvFuncTable mahal_tab; 2769 static int inittab = 0; 2770 uchar* buffer = 0; 2771 int local_alloc = 0; 2772 double dist = 0; 2773 2774 CV_FUNCNAME( "cvMahalanobis" ); 2775 2776 __BEGIN__; 2777 2778 int buf_size, elem_size, len; 2779 CvMat stubA, *srcA = (CvMat*)srcAarr; 2780 CvMat stubB, *srcB = (CvMat*)srcBarr; 2781 CvMat stub, *mat = (CvMat*)matarr; 2782 CvMat temp; 2783 CvMahalanobisFunc func; 2784 2785 if( !inittab ) 2786 { 2787 icvInitMahalanobisTable( &mahal_tab ); 2788 inittab = 1; 2789 } 2790 2791 if( !CV_IS_MAT(srcA) ) 2792 CV_CALL( srcA = cvGetMat( srcA, &stubA )); 2793 2794 if( !CV_IS_MAT(srcB) ) 2795 CV_CALL( srcB = cvGetMat( srcB, &stubB )); 2796 2797 if( !CV_IS_MAT(mat) ) 2798 CV_CALL( mat = cvGetMat( mat, &stub )); 2799 2800 if( srcA->rows != 1 && srcA->cols != 1 ) 2801 CV_ERROR( CV_StsBadSize, "Input matrices must be 1-d vectors" ); 2802 2803 len = srcA->rows + srcA->cols - 1; 2804 2805 if( !CV_ARE_SIZES_EQ(srcA,srcB) ) 2806 CV_ERROR( CV_StsUnmatchedSizes, "Input vectors have different sizes" ); 2807 2808 if( mat->rows != len || mat->cols != len ) 2809 CV_ERROR( CV_StsUnmatchedSizes, "Input vectors and covariation matrix have different sizes" ); 2810 2811 func = (CvMahalanobisFunc)mahal_tab.fn_2d[CV_MAT_DEPTH(srcA->type)]; 2812 2813 if( CV_MAT_CN(srcA->type) > 1 || !func ) 2814 CV_ERROR( CV_StsUnsupportedFormat, 2815 "Only single-channel floating-point vectors are supported" ); 2816 2817 if( !CV_ARE_TYPES_EQ(srcA,srcB) || !CV_ARE_TYPES_EQ(srcA,mat) ) 2818 CV_ERROR( CV_StsUnmatchedSizes, "Input vectors have different sizes" ); 2819 2820 elem_size = CV_ELEM_SIZE(srcA->type); 2821 buf_size = len*elem_size; 2822 2823 if( buf_size <= CV_MAX_LOCAL_SIZE ) 2824 { 2825 buffer = (uchar*)cvStackAlloc( buf_size ); 2826 local_alloc = 1; 2827 } 2828 else 2829 { 2830 CV_CALL( buffer = (uchar*)cvAlloc( buf_size )); 2831 } 2832 2833 temp = cvMat( srcA->rows, srcA->cols, srcA->type, buffer ); 2834 CV_CALL( cvSub( srcA, srcB, &temp )); 2835 2836 IPPI_CALL( func( mat->data.ptr, mat->step, temp.data.ptr, len, &dist )); 2837 dist = sqrt(dist); 2838 2839 __END__; 2840 2841 if( buffer && !local_alloc ) 2842 cvFree( &buffer ); 2843 2844 return dist; 2845 } 2846 2847 2848 /****************************************************************************************\ 2849 * cvMulTransposed * 2850 \****************************************************************************************/ 2851 2852 #define ICV_DEF_MULTRANS_R_FUNC( flavor, srctype, dsttype, load_macro ) \ 2853 static CvStatus CV_STDCALL \ 2854 icvMulTransposedR_##flavor( const srctype* src, int srcstep, \ 2855 dsttype* dst, int dststep, \ 2856 const dsttype* delta, int deltastep, \ 2857 CvSize size, int delta_cols, double scale ) \ 2858 { \ 2859 int i, j, k; \ 2860 dsttype* tdst = dst; \ 2861 dsttype* col_buf = 0; \ 2862 dsttype* delta_buf = 0; \ 2863 int local_alloc = 0; \ 2864 int buf_size = size.height*sizeof(dsttype); \ 2865 \ 2866 if( delta && delta_cols < size.width ) \ 2867 { \ 2868 assert( delta_cols == 1 ); \ 2869 buf_size += 4*buf_size; \ 2870 } \ 2871 \ 2872 if( buf_size <= CV_MAX_LOCAL_SIZE ) \ 2873 { \ 2874 col_buf = (dsttype*)cvStackAlloc( buf_size ); \ 2875 local_alloc = 1; \ 2876 } \ 2877 else \ 2878 { \ 2879 col_buf = (dsttype*)cvAlloc( buf_size ); \ 2880 if( !col_buf ) \ 2881 return CV_OUTOFMEM_ERR; \ 2882 } \ 2883 \ 2884 srcstep /= sizeof(src[0]); dststep /= sizeof(dst[0]); \ 2885 deltastep /= sizeof(delta[0]); \ 2886 \ 2887 if( delta && delta_cols < size.width ) \ 2888 { \ 2889 delta_buf = col_buf + size.height; \ 2890 for( i = 0; i < size.height; i++ ) \ 2891 delta_buf[i*4] = delta_buf[i*4+1] = \ 2892 delta_buf[i*4+2] = delta_buf[i*4+3] = delta[i*deltastep]; \ 2893 delta = delta_buf; \ 2894 deltastep = deltastep ? 4 : 0; \ 2895 } \ 2896 \ 2897 if( !delta ) \ 2898 for( i = 0; i < size.width; i++, tdst += dststep ) \ 2899 { \ 2900 for( k = 0; k < size.height; k++ ) \ 2901 col_buf[k] = src[k*srcstep+i]; \ 2902 \ 2903 for( j = i; j <= size.width - 4; j += 4 ) \ 2904 { \ 2905 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; \ 2906 const srctype *tsrc = src + j; \ 2907 \ 2908 for( k = 0; k < size.height; k++, tsrc += srcstep ) \ 2909 { \ 2910 double a = col_buf[k]; \ 2911 s0 += a * load_macro(tsrc[0]); \ 2912 s1 += a * load_macro(tsrc[1]); \ 2913 s2 += a * load_macro(tsrc[2]); \ 2914 s3 += a * load_macro(tsrc[3]); \ 2915 } \ 2916 \ 2917 tdst[j] = (dsttype)(s0*scale); \ 2918 tdst[j+1] = (dsttype)(s1*scale); \ 2919 tdst[j+2] = (dsttype)(s2*scale); \ 2920 tdst[j+3] = (dsttype)(s3*scale); \ 2921 } \ 2922 \ 2923 for( ; j < size.width; j++ ) \ 2924 { \ 2925 double s0 = 0; \ 2926 const srctype *tsrc = src + j; \ 2927 \ 2928 for( k = 0; k < size.height; k++, tsrc += srcstep ) \ 2929 s0 += col_buf[k] * tsrc[0]; \ 2930 \ 2931 tdst[j] = (dsttype)(s0*scale); \ 2932 } \ 2933 } \ 2934 else \ 2935 for( i = 0; i < size.width; i++, tdst += dststep ) \ 2936 { \ 2937 if( !delta_buf ) \ 2938 for( k = 0; k < size.height; k++ ) \ 2939 col_buf[k] = load_macro(src[k*srcstep+i]) - delta[k*deltastep+i]; \ 2940 else \ 2941 for( k = 0; k < size.height; k++ ) \ 2942 col_buf[k] = load_macro(src[k*srcstep+i]) - delta_buf[k*deltastep]; \ 2943 \ 2944 for( j = i; j <= size.width - 4; j += 4 ) \ 2945 { \ 2946 double s0 = 0, s1 = 0, s2 = 0, s3 = 0; \ 2947 const srctype *tsrc = src + j; \ 2948 const dsttype *d = delta_buf ? delta_buf : delta + j; \ 2949 \ 2950 for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep ) \ 2951 { \ 2952 double a = col_buf[k]; \ 2953 s0 += a * (load_macro(tsrc[0]) - d[0]); \ 2954 s1 += a * (load_macro(tsrc[1]) - d[1]); \ 2955 s2 += a * (load_macro(tsrc[2]) - d[2]); \ 2956 s3 += a * (load_macro(tsrc[3]) - d[3]); \ 2957 } \ 2958 \ 2959 tdst[j] = (dsttype)(s0*scale); \ 2960 tdst[j+1] = (dsttype)(s1*scale); \ 2961 tdst[j+2] = (dsttype)(s2*scale); \ 2962 tdst[j+3] = (dsttype)(s3*scale); \ 2963 } \ 2964 \ 2965 for( ; j < size.width; j++ ) \ 2966 { \ 2967 double s0 = 0; \ 2968 const srctype *tsrc = src + j; \ 2969 const dsttype *d = delta_buf ? delta_buf : delta + j; \ 2970 \ 2971 for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep ) \ 2972 s0 += col_buf[k] * (load_macro(tsrc[0]) - d[0]); \ 2973 \ 2974 tdst[j] = (dsttype)(s0*scale); \ 2975 } \ 2976 } \ 2977 \ 2978 /* fill the lower part of the destination matrix */ \ 2979 for( i = 1; i < size.width; i++ ) \ 2980 for( j = 0; j < i; j++ ) \ 2981 dst[dststep*i + j] = dst[dststep*j + i]; \ 2982 \ 2983 if( col_buf && !local_alloc ) \ 2984 cvFree( &col_buf ); \ 2985 \ 2986 return CV_NO_ERR; \ 2987 } 2988 2989 2990 #define ICV_DEF_MULTRANS_L_FUNC( flavor, srctype, dsttype, load_macro ) \ 2991 static CvStatus CV_STDCALL \ 2992 icvMulTransposedL_##flavor( const srctype* src, int srcstep, \ 2993 dsttype* dst, int dststep, \ 2994 dsttype* delta, int deltastep, \ 2995 CvSize size, int delta_cols, double scale ) \ 2996 { \ 2997 int i, j, k; \ 2998 dsttype* tdst = dst; \ 2999 \ 3000 srcstep /= sizeof(src[0]); dststep /= sizeof(dst[0]); \ 3001 deltastep /= sizeof(delta[0]); \ 3002 \ 3003 if( !delta ) \ 3004 for( i = 0; i < size.height; i++, tdst += dststep ) \ 3005 for( j = i; j < size.height; j++ ) \ 3006 { \ 3007 double s = 0; \ 3008 const srctype *tsrc1 = src + i*srcstep; \ 3009 const srctype *tsrc2 = src + j*srcstep; \ 3010 \ 3011 for( k = 0; k <= size.width - 4; k += 4 ) \ 3012 s += tsrc1[k]*tsrc2[k] + tsrc1[k+1]*tsrc2[k+1] + \ 3013 tsrc1[k+2]*tsrc2[k+2] + tsrc1[k+3]*tsrc2[k+3]; \ 3014 for( ; k < size.width; k++ ) \ 3015 s += tsrc1[k] * tsrc2[k]; \ 3016 tdst[j] = (dsttype)(s*scale); \ 3017 } \ 3018 else \ 3019 { \ 3020 dsttype* row_buf = 0; \ 3021 int local_alloc = 0; \ 3022 int buf_size = size.width*sizeof(dsttype); \ 3023 dsttype delta_buf[4]; \ 3024 int delta_shift = delta_cols == size.width ? 4 : 0; \ 3025 \ 3026 if( buf_size <= CV_MAX_LOCAL_SIZE ) \ 3027 { \ 3028 row_buf = (dsttype*)cvStackAlloc( buf_size ); \ 3029 local_alloc = 1; \ 3030 } \ 3031 else \ 3032 { \ 3033 row_buf = (dsttype*)cvAlloc( buf_size ); \ 3034 if( !row_buf ) \ 3035 return CV_OUTOFMEM_ERR; \ 3036 } \ 3037 \ 3038 for( i = 0; i < size.height; i++, tdst += dststep ) \ 3039 { \ 3040 const srctype *tsrc1 = src + i*srcstep; \ 3041 const dsttype *tdelta1 = delta + i*deltastep; \ 3042 \ 3043 if( delta_cols < size.width ) \ 3044 for( k = 0; k < size.width; k++ ) \ 3045 row_buf[k] = tsrc1[k] - tdelta1[0]; \ 3046 else \ 3047 for( k = 0; k < size.width; k++ ) \ 3048 row_buf[k] = tsrc1[k] - tdelta1[k]; \ 3049 \ 3050 for( j = i; j < size.height; j++ ) \ 3051 { \ 3052 double s = 0; \ 3053 const srctype *tsrc2 = src + j*srcstep; \ 3054 const dsttype *tdelta2 = delta + j*deltastep; \ 3055 if( delta_cols < size.width ) \ 3056 { \ 3057 delta_buf[0] = delta_buf[1] = \ 3058 delta_buf[2] = delta_buf[3] = tdelta2[0]; \ 3059 tdelta2 = delta_buf; \ 3060 } \ 3061 for( k = 0; k <= size.width-4; k += 4, tdelta2 += delta_shift ) \ 3062 s += row_buf[k]*(load_macro(tsrc2[k]) - tdelta2[0]) + \ 3063 row_buf[k+1]*(load_macro(tsrc2[k+1]) - tdelta2[1]) + \ 3064 row_buf[k+2]*(load_macro(tsrc2[k+2]) - tdelta2[2]) + \ 3065 row_buf[k+3]*(load_macro(tsrc2[k+3]) - tdelta2[3]); \ 3066 for( ; k < size.width; k++, tdelta2++ ) \ 3067 s += row_buf[k]*(load_macro(tsrc2[k]) - tdelta2[0]); \ 3068 tdst[j] = (dsttype)(s*scale); \ 3069 } \ 3070 } \ 3071 \ 3072 if( row_buf && !local_alloc ) \ 3073 cvFree( &row_buf ); \ 3074 } \ 3075 \ 3076 /* fill the lower part of the destination matrix */ \ 3077 for( j = 0; j < size.height - 1; j++ ) \ 3078 for( i = j; i < size.height; i++ ) \ 3079 dst[dststep*i + j] = dst[dststep*j + i]; \ 3080 \ 3081 return CV_NO_ERR; \ 3082 } 3083 3084 3085 ICV_DEF_MULTRANS_R_FUNC( 8u32f, uchar, float, CV_8TO32F ) 3086 ICV_DEF_MULTRANS_R_FUNC( 8u64f, uchar, double, CV_8TO32F ) 3087 ICV_DEF_MULTRANS_R_FUNC( 32f, float, float, CV_NOP ) 3088 ICV_DEF_MULTRANS_R_FUNC( 32f64f, float, double, CV_NOP ) 3089 ICV_DEF_MULTRANS_R_FUNC( 64f, double, double, CV_NOP ) 3090 ICV_DEF_MULTRANS_R_FUNC( 16u32f, ushort, float, CV_NOP ) 3091 ICV_DEF_MULTRANS_R_FUNC( 16u64f, ushort, double, CV_NOP ) 3092 ICV_DEF_MULTRANS_R_FUNC( 16s32f, short, float, CV_NOP ) 3093 ICV_DEF_MULTRANS_R_FUNC( 16s64f, short, double, CV_NOP ) 3094 3095 ICV_DEF_MULTRANS_L_FUNC( 8u32f, uchar, float, CV_8TO32F ) 3096 ICV_DEF_MULTRANS_L_FUNC( 8u64f, uchar, double, CV_8TO32F ) 3097 ICV_DEF_MULTRANS_L_FUNC( 32f, float, float, CV_NOP ) 3098 ICV_DEF_MULTRANS_L_FUNC( 32f64f, float, double, CV_NOP ) 3099 ICV_DEF_MULTRANS_L_FUNC( 64f, double, double, CV_NOP ) 3100 ICV_DEF_MULTRANS_L_FUNC( 16u32f, ushort, float, CV_NOP ) 3101 ICV_DEF_MULTRANS_L_FUNC( 16u64f, ushort, double, CV_NOP ) 3102 ICV_DEF_MULTRANS_L_FUNC( 16s32f, short, float, CV_NOP ) 3103 ICV_DEF_MULTRANS_L_FUNC( 16s64f, short, double, CV_NOP ) 3104 3105 3106 typedef CvStatus (CV_STDCALL * CvMulTransposedFunc) 3107 ( const void* src, int srcstep, 3108 void* dst, int dststep, const void* delta, 3109 int deltastep, CvSize size, int delta_cols, double scale ); 3110 3111 CV_IMPL void 3112 cvMulTransposed( const CvArr* srcarr, CvArr* dstarr, 3113 int order, const CvArr* deltaarr, double scale ) 3114 { 3115 const int gemm_level = 100; // boundary above which GEMM is faster. 3116 CvMat* src2 = 0; 3117 3118 CV_FUNCNAME( "cvMulTransposed" ); 3119 3120 __BEGIN__; 3121 3122 CvMat sstub, *src = (CvMat*)srcarr; 3123 CvMat dstub, *dst = (CvMat*)dstarr; 3124 CvMat deltastub, *delta = (CvMat*)deltaarr; 3125 int stype, dtype; 3126 3127 if( !CV_IS_MAT( src )) 3128 CV_CALL( src = cvGetMat( src, &sstub )); 3129 3130 if( !CV_IS_MAT( dst )) 3131 CV_CALL( dst = cvGetMat( dst, &dstub )); 3132 3133 if( delta ) 3134 { 3135 if( !CV_IS_MAT( delta )) 3136 CV_CALL( delta = cvGetMat( delta, &deltastub )); 3137 3138 if( !CV_ARE_TYPES_EQ( dst, delta )) 3139 CV_ERROR( CV_StsUnmatchedFormats, "" ); 3140 3141 if( (delta->rows != src->rows && delta->rows != 1) || 3142 (delta->cols != src->cols && delta->cols != 1) ) 3143 CV_ERROR( CV_StsUnmatchedSizes, "" ); 3144 } 3145 else 3146 { 3147 delta = &deltastub; 3148 delta->data.ptr = 0; 3149 delta->step = 0; 3150 delta->rows = delta->cols = 0; 3151 } 3152 3153 stype = CV_MAT_TYPE( src->type ); 3154 dtype = CV_MAT_TYPE( dst->type ); 3155 3156 if( dst->rows != dst->cols ) 3157 CV_ERROR( CV_StsBadSize, "The destination matrix must be square" ); 3158 3159 if( (order != 0 && src->cols != dst->cols) || 3160 (order == 0 && src->rows != dst->rows)) 3161 CV_ERROR( CV_StsUnmatchedSizes, "" ); 3162 3163 if( src->data.ptr == dst->data.ptr || (stype == dtype && 3164 (dst->cols >= gemm_level && dst->rows >= gemm_level && 3165 src->cols >= gemm_level && src->rows >= gemm_level))) 3166 { 3167 if( deltaarr ) 3168 { 3169 CV_CALL( src2 = cvCreateMat( src->rows, src->cols, src->type )); 3170 cvRepeat( delta, src2 ); 3171 cvSub( src, src2, src2 ); 3172 src = src2; 3173 } 3174 cvGEMM( src, src, scale, 0, 0, dst, order == 0 ? CV_GEMM_B_T : CV_GEMM_A_T ); 3175 } 3176 else 3177 { 3178 CvMulTransposedFunc func = 3179 stype == CV_8U && dtype == CV_32F ? 3180 (order ? (CvMulTransposedFunc)icvMulTransposedR_8u32f : 3181 (CvMulTransposedFunc)icvMulTransposedL_8u32f) : 3182 stype == CV_8U && dtype == CV_64F ? 3183 (order ? (CvMulTransposedFunc)icvMulTransposedR_8u64f : 3184 (CvMulTransposedFunc)icvMulTransposedL_8u64f) : 3185 stype == CV_16U && dtype == CV_32F ? 3186 (order ? (CvMulTransposedFunc)icvMulTransposedR_16u32f : 3187 (CvMulTransposedFunc)icvMulTransposedL_16u32f) : 3188 stype == CV_16U && dtype == CV_64F ? 3189 (order ? (CvMulTransposedFunc)icvMulTransposedR_16u64f : 3190 (CvMulTransposedFunc)icvMulTransposedL_16u64f) : 3191 stype == CV_16S && dtype == CV_32F ? 3192 (order ? (CvMulTransposedFunc)icvMulTransposedR_16s32f : 3193 (CvMulTransposedFunc)icvMulTransposedL_16s32f) : 3194 stype == CV_16S && dtype == CV_64F ? 3195 (order ? (CvMulTransposedFunc)icvMulTransposedR_16s64f : 3196 (CvMulTransposedFunc)icvMulTransposedL_16s64f) : 3197 stype == CV_32F && dtype == CV_32F ? 3198 (order ? (CvMulTransposedFunc)icvMulTransposedR_32f : 3199 (CvMulTransposedFunc)icvMulTransposedL_32f) : 3200 stype == CV_32F && dtype == CV_64F ? 3201 (order ? (CvMulTransposedFunc)icvMulTransposedR_32f64f : 3202 (CvMulTransposedFunc)icvMulTransposedL_32f64f) : 3203 stype == CV_64F && dtype == CV_64F ? 3204 (order ? (CvMulTransposedFunc)icvMulTransposedR_64f : 3205 (CvMulTransposedFunc)icvMulTransposedL_64f) : 0; 3206 3207 if( !func ) 3208 CV_ERROR( CV_StsUnsupportedFormat, "" ); 3209 3210 IPPI_CALL( func( src->data.ptr, src->step, dst->data.ptr, dst->step, 3211 delta->data.ptr, delta->step, cvGetMatSize( src ), 3212 delta->cols, scale )); 3213 } 3214 3215 __END__; 3216 3217 if( src2 ) 3218 cvReleaseMat( &src2 ); 3219 } 3220 3221 3222 /****************************************************************************************\ 3223 * cvDotProduct * 3224 \****************************************************************************************/ 3225 3226 #define ICV_DEF_DOT_PROD_FUNC_2D( flavor, arrtype, temptype, sumtype ) \ 3227 static CvStatus CV_STDCALL \ 3228 icvDotProduct_##flavor##_C1R( const arrtype* src1, int step1, \ 3229 const arrtype* src2, int step2, \ 3230 CvSize size, sumtype* _sum ) \ 3231 { \ 3232 sumtype sum = 0; \ 3233 step1 /= sizeof(src1[0]); step2 /= sizeof(src2[0]); \ 3234 \ 3235 for( ; size.height--; src1 += step1, src2 += step2 ) \ 3236 { \ 3237 int i; \ 3238 \ 3239 for( i = 0; i <= size.width - 4; i += 4 ) \ 3240 { \ 3241 temptype t0 = (temptype)src1[i]*src2[i]; \ 3242 temptype t1 = (temptype)src1[i+1]*src2[i+1]; \ 3243 t0 += (temptype)src1[i+2]*src2[i+2]; \ 3244 t1 += (temptype)src1[i+3]*src2[i+3]; \ 3245 sum += t0 + t1; \ 3246 } \ 3247 \ 3248 for( ; i < size.width; i++ ) \ 3249 { \ 3250 sum += (temptype)src1[i]*src2[i]; \ 3251 } \ 3252 } \ 3253 \ 3254 *_sum = sum; \ 3255 return CV_OK; \ 3256 } 3257 3258 3259 ICV_DEF_DOT_PROD_FUNC_2D( 8u, uchar, int, int64 ) 3260 ICV_DEF_DOT_PROD_FUNC_2D( 16u, ushort, int64, int64 ) 3261 ICV_DEF_DOT_PROD_FUNC_2D( 16s, short, int64, int64 ) 3262 ICV_DEF_DOT_PROD_FUNC_2D( 32s, int, double, double ) 3263 ICV_DEF_DOT_PROD_FUNC_2D( 32f, float, double, double ) 3264 ICV_DEF_DOT_PROD_FUNC_2D( 64f, double, double, double ) 3265 3266 #define icvDotProduct_8s_C1R 0 3267 3268 CV_DEF_INIT_FUNC_TAB_2D( DotProduct, C1R ) 3269 3270 CV_IMPL double 3271 cvDotProduct( const CvArr* srcAarr, const CvArr* srcBarr ) 3272 { 3273 static CvFuncTable tab_2d; 3274 static int inittab = 0; 3275 3276 Cv64suf result; 3277 result.f = 0; 3278 3279 CV_FUNCNAME( "cvDotProduct" ); 3280 3281 __BEGIN__; 3282 3283 CvMat stubA, *srcA = (CvMat*)srcAarr; 3284 CvMat stubB, *srcB = (CvMat*)srcBarr; 3285 CvSize size; 3286 int type, depth; 3287 CvFunc2D_2A1P func; 3288 3289 if( !inittab ) 3290 { 3291 icvInitDotProductC1RTable( &tab_2d ); 3292 inittab = 1; 3293 } 3294 3295 if( !CV_IS_MAT( srcA )) 3296 { 3297 int coi = 0; 3298 CV_CALL( srcA = cvGetMat( srcA, &stubA, &coi )); 3299 if( coi != 0 ) 3300 CV_ERROR( CV_BadCOI, "coi is not supported" ); 3301 } 3302 3303 if( srcBarr == srcAarr ) 3304 srcB = srcA; 3305 else 3306 { 3307 if( !CV_IS_MAT( srcB )) 3308 { 3309 int coi = 0; 3310 CV_CALL( srcB = cvGetMat( srcB, &stubB, &coi )); 3311 3312 if( coi != 0 ) 3313 CV_ERROR( CV_BadCOI, "coi is not supported" ); 3314 } 3315 3316 if( !CV_ARE_TYPES_EQ( srcA, srcB )) 3317 CV_ERROR( CV_StsUnmatchedFormats, "" ); 3318 3319 if( !CV_ARE_SIZES_EQ( srcA, srcB )) 3320 CV_ERROR( CV_StsUnmatchedSizes, "" ); 3321 } 3322 3323 type = CV_MAT_TYPE( srcA->type ); 3324 size = cvGetMatSize( srcA ); 3325 3326 size.width *= CV_MAT_CN( type ); 3327 depth = CV_MAT_DEPTH( type ); 3328 3329 if( CV_IS_MAT_CONT( srcA->type & srcB->type )) 3330 { 3331 size.width *= size.height; 3332 3333 if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE ) 3334 { 3335 if( depth == CV_32F ) 3336 { 3337 float* mA = srcA->data.fl; 3338 float* mB = srcB->data.fl; 3339 double sum = 0; 3340 do 3341 sum += (double)mA[size.width - 1]*mB[size.width - 1]; 3342 while( --size.width ); 3343 result.f = sum; 3344 EXIT; 3345 } 3346 3347 if( depth == CV_64F ) 3348 { 3349 double* mA = srcA->data.db; 3350 double* mB = srcB->data.db; 3351 double sum = 0; 3352 do 3353 sum += mA[size.width - 1]*mB[size.width - 1]; 3354 while( --size.width ); 3355 result.f = sum; 3356 EXIT; 3357 } 3358 } 3359 size.height = 1; 3360 } 3361 3362 func = (CvFunc2D_2A1P)(tab_2d.fn_2d[depth]); 3363 if( !func ) 3364 CV_ERROR( CV_StsUnsupportedFormat, "" ); 3365 3366 IPPI_CALL( func( srcA->data.ptr, srcA->step, 3367 srcB->data.ptr, srcB->step, 3368 size, &result )); 3369 3370 if( depth < CV_32S ) 3371 result.f = (double)result.i; 3372 3373 __END__; 3374 3375 return result.f; 3376 } 3377 3378 /* End of file. */ 3379