1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/ 18 19 /***************************************************************** 20 * Lean and mean begins here * 21 *****************************************************************/ 22 23 #include "db_utilities.h" 24 #include "db_feature_detection.h" 25 #ifdef _VERBOSE_ 26 #include <iostream> 27 #endif 28 #include <float.h> 29 30 #define DB_SUB_PIXEL 31 32 #define BORDER 10 // 5 33 34 float** db_AllocStrengthImage_f(float **im,int w,int h) 35 { 36 int i,n,aw; 37 long c,size; 38 float **img,*aim,*p; 39 40 /*Determine number of 124 element chunks needed*/ 41 n=(db_maxi(1,w-6)+123)/124; 42 /*Determine the total allocation width aw*/ 43 aw=n*124+8; 44 /*Allocate*/ 45 size=aw*h+16; 46 *im=new float [size]; 47 /*Clean up*/ 48 p=(*im); 49 for(c=0;c<size;c++) p[c]=0.0; 50 /*Get a 16 byte aligned pointer*/ 51 aim=db_AlignPointer_f(*im,16); 52 /*Allocate pointer table*/ 53 img=new float* [h]; 54 /*Initialize the pointer table*/ 55 for(i=0;i<h;i++) 56 { 57 img[i]=aim+aw*i+1; 58 } 59 60 return(img); 61 } 62 63 void db_FreeStrengthImage_f(float *im,float **img,int h) 64 { 65 delete [] im; 66 delete [] img; 67 } 68 69 /*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width 70 Memory references occur one pixel outside the subrow*/ 71 inline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width) 72 { 73 int c; 74 75 for(c=0;c<chunk_width;c++) 76 { 77 Ix[c]=img[i][j+c-1]-img[i][j+c+1]; 78 Iy[c]=img[i-1][j+c]-img[i+1][j+c]; 79 } 80 } 81 82 /*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128 83 Memory references occur one pixel outside the subrow*/ 84 inline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc) 85 { 86 #ifdef DB_USE_MMX 87 const unsigned char *r1,*r2,*r3; 88 89 r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j; 90 91 _asm 92 { 93 mov esi,16 94 mov eax,r1 95 mov ebx,r2 96 mov ecx,r3 97 mov edx,dxx 98 99 /*Get bitmask into mm7*/ 100 mov edi,7F7F7F7Fh 101 movd mm7,edi 102 punpckldq mm7,mm7 103 104 loopstart: 105 /***************dx part 1-12*********************************/ 106 movq mm0,[eax] /*1 Get upper*/ 107 pxor mm6,mm6 /*2 Set to zero*/ 108 movq mm1,[ecx] /*3 Get lower*/ 109 psrlq mm0,1 /*4 Shift*/ 110 psrlq mm1,1 /*5 Shift*/ 111 pand mm0,mm7 /*6 And*/ 112 movq mm2,[ebx-1] /*13 Get left*/ 113 pand mm1,mm7 /*7 And*/ 114 psubb mm0,mm1 /*8 Subtract*/ 115 pxor mm5,mm5 /*14 Set to zero*/ 116 movq mm1,mm0 /*9 Copy*/ 117 pcmpgtb mm6,mm0 /*10 Create unpack mask*/ 118 movq mm3,[ebx+1] /*15 Get right*/ 119 punpcklbw mm0,mm6 /*11 Unpack low*/ 120 punpckhbw mm1,mm6 /*12 Unpack high*/ 121 /***************dy part 13-24*********************************/ 122 movq mm4,mm0 /*25 Copy dx*/ 123 psrlq mm2,1 /*16 Shift*/ 124 pmullw mm0,mm0 /*26 Multiply dx*dx*/ 125 psrlq mm3,1 /*17 Shift*/ 126 pand mm2,mm7 /*18 And*/ 127 pand mm3,mm7 /*19 And*/ 128 /*Stall*/ 129 psubb mm2,mm3 /*20 Subtract*/ 130 /*Stall*/ 131 movq mm3,mm2 /*21 Copy*/ 132 pcmpgtb mm5,mm2 /*22 Create unpack mask*/ 133 punpcklbw mm2,mm5 /*23 Unpack low*/ 134 /*Stall*/ 135 punpckhbw mm3,mm5 /*24 Unpack high*/ 136 /***************dxx dxy dyy low part 25-49*********************************/ 137 pmullw mm4,mm2 /*27 Multiply dx*dy*/ 138 pmullw mm2,mm2 /*28 Multiply dy*dy*/ 139 pxor mm6,mm6 /*29 Set to zero*/ 140 movq mm5,mm0 /*30 Copy dx*dx*/ 141 pcmpgtw mm6,mm0 /*31 Create unpack mask for dx*dx*/ 142 punpcklwd mm0,mm6 /*32 Unpack dx*dx lows*/ 143 /*Stall*/ 144 punpckhwd mm5,mm6 /*33 Unpack dx*dx highs*/ 145 pxor mm6,mm6 /*36 Set to zero*/ 146 movq [edx],mm0 /*34 Store dx*dx lows*/ 147 movq mm0,mm4 /*37 Copy dx*dy*/ 148 movq [edx+8],mm5 /*35 Store dx*dx highs*/ 149 pcmpgtw mm6,mm4 /*38 Create unpack mask for dx*dy*/ 150 punpcklwd mm4,mm6 /*39 Unpack dx*dy lows*/ 151 /*Stall*/ 152 punpckhwd mm0,mm6 /*40 Unpack dx*dy highs*/ 153 pxor mm6,mm6 /*43 Set to zero*/ 154 movq [edx+512],mm4 /*41 Store dx*dy lows*/ 155 movq mm5,mm2 /*44 Copy dy*dy*/ 156 movq [edx+520],mm0 /*42 Store dx*dy highs*/ 157 pcmpgtw mm6,mm2 /*45 Create unpack mask for dy*dy*/ 158 punpcklwd mm2,mm6 /*46 Unpack dy*dy lows*/ 159 movq mm4,mm1 /*50 Copy dx*/ 160 punpckhwd mm5,mm6 /*47 Unpack dy*dy highs*/ 161 pmullw mm1,mm1 /*51 Multiply dx*dx*/ 162 movq [edx+1024],mm2 /*48 Store dy*dy lows*/ 163 pmullw mm4,mm3 /*52 Multiply dx*dy*/ 164 movq [edx+1032],mm5 /*49 Store dy*dy highs*/ 165 /***************dxx dxy dyy high part 50-79*********************************/ 166 pmullw mm3,mm3 /*53 Multiply dy*dy*/ 167 pxor mm6,mm6 /*54 Set to zero*/ 168 movq mm5,mm1 /*55 Copy dx*dx*/ 169 pcmpgtw mm6,mm1 /*56 Create unpack mask for dx*dx*/ 170 pxor mm2,mm2 /*61 Set to zero*/ 171 punpcklwd mm1,mm6 /*57 Unpack dx*dx lows*/ 172 movq mm0,mm4 /*62 Copy dx*dy*/ 173 punpckhwd mm5,mm6 /*58 Unpack dx*dx highs*/ 174 pcmpgtw mm2,mm4 /*63 Create unpack mask for dx*dy*/ 175 movq [edx+16],mm1 /*59 Store dx*dx lows*/ 176 punpcklwd mm4,mm2 /*64 Unpack dx*dy lows*/ 177 movq [edx+24],mm5 /*60 Store dx*dx highs*/ 178 punpckhwd mm0,mm2 /*65 Unpack dx*dy highs*/ 179 movq [edx+528],mm4 /*66 Store dx*dy lows*/ 180 pxor mm6,mm6 /*68 Set to zero*/ 181 movq [edx+536],mm0 /*67 Store dx*dy highs*/ 182 movq mm5,mm3 /*69 Copy dy*dy*/ 183 pcmpgtw mm6,mm3 /*70 Create unpack mask for dy*dy*/ 184 add eax,8 /*75*/ 185 punpcklwd mm3,mm6 /*71 Unpack dy*dy lows*/ 186 add ebx,8 /*76*/ 187 punpckhwd mm5,mm6 /*72 Unpack dy*dy highs*/ 188 add ecx,8 /*77*/ 189 movq [edx+1040],mm3 /*73 Store dy*dy lows*/ 190 /*Stall*/ 191 movq [edx+1048],mm5 /*74 Store dy*dy highs*/ 192 /*Stall*/ 193 add edx,32 /*78*/ 194 dec esi /*79*/ 195 jnz loopstart 196 197 emms 198 } 199 200 #else 201 int c; 202 int Ix,Iy; 203 204 for(c=0;c<nc;c++) 205 { 206 Ix=(img[i][j+c-1]-img[i][j+c+1])>>1; 207 Iy=(img[i-1][j+c]-img[i+1][j+c])>>1; 208 dxx[c]=Ix*Ix; 209 dxx[c+128]=Ix*Iy; 210 dxx[c+256]=Iy*Iy; 211 } 212 #endif /*DB_USE_MMX*/ 213 } 214 215 /*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/ 216 inline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width, 217 float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4, 218 float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4) 219 { 220 int c; 221 float dx,dy; 222 float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4; 223 224 for(c=0;c<chunk_width;c++) 225 { 226 dx=Ix0[c]; 227 dy=Iy0[c]; 228 Ixx0=dx*dx; 229 Ixy0=dx*dy; 230 Iyy0=dy*dy; 231 232 dx=Ix1[c]; 233 dy=Iy1[c]; 234 Ixx1=dx*dx; 235 Ixy1=dx*dy; 236 Iyy1=dy*dy; 237 238 dx=Ix2[c]; 239 dy=Iy2[c]; 240 Ixx2=dx*dx; 241 Ixy2=dx*dy; 242 Iyy2=dy*dy; 243 244 dx=Ix3[c]; 245 dy=Iy3[c]; 246 Ixx3=dx*dx; 247 Ixy3=dx*dy; 248 Iyy3=dy*dy; 249 250 dx=Ix4[c]; 251 dy=Iy4[c]; 252 Ixx4=dx*dx; 253 Ixy4=dx*dy; 254 Iyy4=dy*dy; 255 256 /*Filter vertically*/ 257 gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4; 258 gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4; 259 gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4; 260 } 261 } 262 263 /*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/ 264 inline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc) 265 { 266 #ifdef DB_USE_MMX 267 int c; 268 269 _asm 270 { 271 mov c,64 272 mov eax,d0 273 mov ebx,d1 274 mov ecx,d2 275 mov edx,d3 276 mov edi,d4 277 mov esi,g 278 279 loopstart: 280 /***************dxx part 1-14*********************************/ 281 movq mm0,[eax] /*1 Get dxx0*/ 282 /*Stall*/ 283 movq mm1,[ebx] /*2 Get dxx1*/ 284 /*Stall*/ 285 movq mm2,[ecx] /*5 Get dxx2*/ 286 pslld mm1,2 /*3 Shift dxx1*/ 287 movq mm3,[edx] /*10 Get dxx3*/ 288 paddd mm0,mm1 /*4 Accumulate dxx1*/ 289 movq mm4,[eax+512] /*15 Get dxy0*/ 290 pslld mm2,1 /*6 Shift dxx2 1*/ 291 paddd mm0,mm2 /*7 Accumulate dxx2 1*/ 292 pslld mm2,1 /*8 Shift dxx2 2*/ 293 movq mm5,[ebx+512] /*16 Get dxy1*/ 294 paddd mm0,mm2 /*9 Accumulate dxx2 2*/ 295 pslld mm3,2 /*11 Shift dxx3*/ 296 /*Stall*/ 297 paddd mm0,mm3 /*12 Accumulate dxx3*/ 298 pslld mm5,2 /*17 Shift dxy1*/ 299 paddd mm0,[edi] /*13 Accumulate dxx4*/ 300 paddd mm4,mm5 /*18 Accumulate dxy1*/ 301 movq mm6,[ecx+512] /*19 Get dxy2*/ 302 /*Stall*/ 303 movq [esi],mm0 /*14 Store dxx sums*/ 304 /***************dxy part 15-28*********************************/ 305 pslld mm6,1 /*20 Shift dxy2 1*/ 306 paddd mm4,mm6 /*21 Accumulate dxy2 1*/ 307 pslld mm6,1 /*22 Shift dxy2 2*/ 308 movq mm0,[eax+1024] /*29 Get dyy0*/ 309 paddd mm4,mm6 /*23 Accumulate dxy2 2*/ 310 movq mm7,[edx+512] /*24 Get dxy3*/ 311 pslld mm7,2 /*25 Shift dxy3*/ 312 movq mm1,[ebx+1024] /*30 Get dyy1*/ 313 paddd mm4,mm7 /*26 Accumulate dxy3*/ 314 paddd mm4,[edi+512] /*27 Accumulate dxy4*/ 315 pslld mm1,2 /*31 Shift dyy1*/ 316 movq mm2,[ecx+1024] /*33 Get dyy2*/ 317 paddd mm0,mm1 /*32 Accumulate dyy1*/ 318 movq [esi+512],mm4 /*28 Store dxy sums*/ 319 pslld mm2,1 /*34 Shift dyy2 1*/ 320 /***************dyy part 29-49*********************************/ 321 322 323 movq mm3,[edx+1024] /*38 Get dyy3*/ 324 paddd mm0,mm2 /*35 Accumulate dyy2 1*/ 325 paddd mm0,[edi+1024] /*41 Accumulate dyy4*/ 326 pslld mm2,1 /*36 Shift dyy2 2*/ 327 paddd mm0,mm2 /*37 Accumulate dyy2 2*/ 328 pslld mm3,2 /*39 Shift dyy3*/ 329 paddd mm0,mm3 /*40 Accumulate dyy3*/ 330 add eax,8 /*43*/ 331 add ebx,8 /*44*/ 332 add ecx,8 /*45*/ 333 movq [esi+1024],mm0 /*42 Store dyy sums*/ 334 /*Stall*/ 335 add edx,8 /*46*/ 336 add edi,8 /*47*/ 337 add esi,8 /*48*/ 338 dec c /*49*/ 339 jnz loopstart 340 341 emms 342 } 343 344 #else 345 int c,dd; 346 347 for(c=0;c<nc;c++) 348 { 349 /*Filter vertically*/ 350 dd=d2[c]; 351 g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c]; 352 353 dd=d2[c+128]; 354 g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128]; 355 356 dd=d2[c+256]; 357 g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256]; 358 } 359 #endif /*DB_USE_MMX*/ 360 } 361 362 /*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j 363 and with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width 364 and starting at (i,j-2)*/ 365 inline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width) 366 { 367 float Gxx,Gxy,Gyy,det,trc; 368 int c; 369 370 for(c=0;c<chunk_width;c++) 371 { 372 Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4]; 373 Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4]; 374 Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4]; 375 376 det=Gxx*Gyy-Gxy*Gxy; 377 trc=Gxx+Gyy; 378 s[i][j+c]=det-0.06f*trc*trc; 379 } 380 } 381 382 /*Filter g of length 128 in place with 14641. Output is shifted two steps 383 and of length 124*/ 384 inline void db_Filter14641_128_i(int *g,int nc) 385 { 386 #ifdef DB_USE_MMX 387 int mask; 388 389 mask=0xFFFFFFFF; 390 _asm 391 { 392 mov esi,31 393 mov eax,g 394 395 /*Get bitmask 00000000FFFFFFFF into mm7*/ 396 movd mm7,mask 397 398 /*Warming iteration one 1-16********************/ 399 movq mm6,[eax] /*1 Load new data*/ 400 paddd mm0,mm6 /*2 Add 1* behind two steps*/ 401 movq mm2,mm6 /*3 Start with 1* in front two steps*/ 402 pslld mm6,1 /*4*/ 403 paddd mm1,mm6 /*5 Add 2* same place*/ 404 pslld mm6,1 /*6*/ 405 paddd mm1,mm6 /*7 Add 4* same place*/ 406 pshufw mm6,mm6,4Eh /*8 Swap the two double-words using bitmask 01001110=4Eh*/ 407 paddd mm1,mm6 /*9 Add 4* swapped*/ 408 movq mm5,mm6 /*10 Copy*/ 409 pand mm6,mm7 /*11 Get low double-word only*/ 410 paddd mm2,mm6 /*12 Add 4* in front one step*/ 411 pxor mm6,mm5 /*13 Get high double-word only*/ 412 paddd mm0,mm6 /*14 Add 4* behind one step*/ 413 movq mm0,mm1 /*15 Shift along*/ 414 movq mm1,mm2 /*16 Shift along*/ 415 /*Warming iteration two 17-32********************/ 416 movq mm4,[eax+8] /*17 Load new data*/ 417 paddd mm0,mm4 /*18 Add 1* behind two steps*/ 418 movq mm2,mm4 /*19 Start with 1* in front two steps*/ 419 pslld mm4,1 /*20*/ 420 paddd mm1,mm4 /*21 Add 2* same place*/ 421 pslld mm4,1 /*22*/ 422 paddd mm1,mm4 /*23 Add 4* same place*/ 423 pshufw mm4,mm4,4Eh /*24 Swap the two double-words using bitmask 01001110=4Eh*/ 424 paddd mm1,mm4 /*25 Add 4* swapped*/ 425 movq mm3,mm4 /*26 Copy*/ 426 pand mm4,mm7 /*27 Get low double-word only*/ 427 paddd mm2,mm4 /*28 Add 4* in front one step*/ 428 pxor mm4,mm3 /*29 Get high double-word only*/ 429 paddd mm0,mm4 /*30 Add 4* behind one step*/ 430 movq mm0,mm1 /*31 Shift along*/ 431 movq mm1,mm2 /*32 Shift along*/ 432 433 /*Loop********************/ 434 loopstart: 435 /*First part of loop 33-47********/ 436 movq mm6,[eax+16] /*33 Load new data*/ 437 /*Stall*/ 438 paddd mm0,mm6 /*34 Add 1* behind two steps*/ 439 movq mm2,mm6 /*35 Start with 1* in front two steps*/ 440 movq mm4,[eax+24] /*48 Load new data*/ 441 pslld mm6,1 /*36*/ 442 paddd mm1,mm6 /*37 Add 2* same place*/ 443 pslld mm6,1 /*38*/ 444 paddd mm1,mm6 /*39 Add 4* same place*/ 445 pshufw mm6,mm6,4Eh /*40 Swap the two double-words using bitmask 01001110=4Eh*/ 446 paddd mm1,mm4 /*49 Add 1* behind two steps*/ 447 movq mm5,mm6 /*41 Copy*/ 448 paddd mm1,mm6 /*42 Add 4* swapped*/ 449 pand mm6,mm7 /*43 Get low double-word only*/ 450 paddd mm2,mm6 /*44 Add 4* in front one step*/ 451 pxor mm6,mm5 /*45 Get high double-word only*/ 452 paddd mm0,mm6 /*46 Add 4* behind one step*/ 453 movq mm6,mm4 /*50a Copy*/ 454 pslld mm4,1 /*51*/ 455 /*Stall*/ 456 movq [eax],mm0 /*47 Store result two steps behind*/ 457 /*Second part of loop 48-66********/ 458 movq mm0,mm6 /*50b Start with 1* in front two steps*/ 459 paddd mm2,mm4 /*52 Add 2* same place*/ 460 pslld mm4,1 /*53*/ 461 paddd mm2,mm4 /*54 Add 4* same place*/ 462 pshufw mm4,mm4,4Eh /*55 Swap the two double-words using bitmask 01001110=4Eh*/ 463 paddd mm2,mm4 /*56 Add 4* swapped*/ 464 movq mm3,mm4 /*57 Copy*/ 465 pand mm4,mm7 /*58 Get low double-word only*/ 466 /*Stall*/ 467 paddd mm0,mm4 /*59 Add 4* in front one step*/ 468 pxor mm4,mm3 /*60 Get high double-word only*/ 469 paddd mm1,mm4 /*61 Add 4* behind one step*/ 470 add eax,16 /*65*/ 471 dec esi /*66*/ 472 /*Stall*/ 473 movq [eax-8],mm1 /*62 Store result two steps behind*/ 474 movq mm1,mm0 /*63 Shift along*/ 475 movq mm0,mm2 /*64 Shift along*/ 476 jnz loopstart 477 478 emms 479 } 480 481 #else 482 int c; 483 484 for(c=0;c<nc-4;c++) 485 { 486 g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4]; 487 } 488 #endif /*DB_USE_MMX*/ 489 } 490 491 /*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s 492 of length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought. 493 s should be 16 byte aligned*/ 494 inline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc) 495 { 496 float k; 497 498 k=0.06f; 499 500 db_Filter14641_128_i(gxx,nc); 501 db_Filter14641_128_i(gxy,nc); 502 db_Filter14641_128_i(gyy,nc); 503 504 #ifdef DB_USE_SIMD 505 506 507 _asm 508 { 509 mov esi,15 510 mov eax,gxx 511 mov ebx,gxy 512 mov ecx,gyy 513 mov edx,s 514 515 /*broadcast k to all positions of xmm7*/ 516 movss xmm7,k 517 shufps xmm7,xmm7,0 518 519 /*****Warm up 1-10**************************************/ 520 cvtpi2ps xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/ 521 /*Stall*/ 522 cvtpi2ps xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/ 523 movlhps xmm0,xmm0 /*2 Move them to the high double-word*/ 524 cvtpi2ps xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/ 525 movlhps xmm1,xmm1 /*5 Move them to the high double-word*/ 526 cvtpi2ps xmm0,[eax] /*3 Convert two integers into floating point of low double-word*/ 527 movlhps xmm2,xmm2 /*8 Move them to the high double-word*/ 528 cvtpi2ps xmm1,[ebx] /*6 Convert two integers into floating point of low double-word*/ 529 movaps xmm3,xmm0 /*10 Copy Cxx*/ 530 cvtpi2ps xmm2,[ecx] /*9 Convert two integers into floating point of low double-word*/ 531 /*Stall*/ 532 loopstart: 533 /*****First part of loop 11-18***********************/ 534 mulps xmm0,xmm2 /*11 Multiply to get Gxx*Gyy*/ 535 addps xmm2,xmm3 /*12 Add to get Gxx+Gyy*/ 536 cvtpi2ps xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/ 537 mulps xmm1,xmm1 /*13 Multiply to get Gxy*Gxy*/ 538 mulps xmm2,xmm2 /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ 539 movlhps xmm4,xmm4 /*20 Move them to the high double-word*/ 540 cvtpi2ps xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/ 541 /*Stall*/ 542 subps xmm0,xmm1 /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/ 543 mulps xmm2,xmm7 /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ 544 cvtpi2ps xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/ 545 /*Stall*/ 546 movlhps xmm5,xmm5 /*23 Move them to the high double-word*/ 547 /*Stall*/ 548 cvtpi2ps xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/ 549 subps xmm0,xmm2 /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ 550 cvtpi2ps xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/ 551 /*Stall*/ 552 movaps [edx],xmm0 /*18 Store*/ 553 /*****Second part of loop 26-40***********************/ 554 movlhps xmm6,xmm6 /*26 Move them to the high double-word*/ 555 cvtpi2ps xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/ 556 movaps xmm3,xmm4 /*28 Copy Cxx*/ 557 mulps xmm4,xmm6 /*29 Multiply to get Gxx*Gyy*/ 558 addps xmm6,xmm3 /*30 Add to get Gxx+Gyy*/ 559 cvtpi2ps xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/ 560 mulps xmm5,xmm5 /*31 Multiply to get Gxy*Gxy*/ 561 cvtpi2ps xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/ 562 mulps xmm6,xmm6 /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ 563 cvtpi2ps xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/ 564 movlhps xmm0,xmm0 /*(2 Next) Move them to the high double-word*/ 565 subps xmm4,xmm5 /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/ 566 movlhps xmm1,xmm1 /*(5 Next) Move them to the high double-word*/ 567 cvtpi2ps xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/ 568 mulps xmm6,xmm7 /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ 569 cvtpi2ps xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/ 570 movlhps xmm2,xmm2 /*(8 Next) Move them to the high double-word*/ 571 movaps xmm3,xmm0 /*(10 Next) Copy Cxx*/ 572 add eax,32 /*37*/ 573 subps xmm4,xmm6 /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ 574 add ebx,32 /*38*/ 575 cvtpi2ps xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/ 576 /*Stall*/ 577 movaps [edx+16],xmm4 /*36 Store*/ 578 /*Stall*/ 579 add ecx,32 /*39*/ 580 add edx,32 /*40*/ 581 dec esi /*41*/ 582 jnz loopstart 583 584 /****Cool down***************/ 585 mulps xmm0,xmm2 /*Multiply to get Gxx*Gyy*/ 586 addps xmm2,xmm3 /*Add to get Gxx+Gyy*/ 587 mulps xmm1,xmm1 /*Multiply to get Gxy*Gxy*/ 588 mulps xmm2,xmm2 /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/ 589 subps xmm0,xmm1 /*Subtract to get Gxx*Gyy-Gxy*Gxy*/ 590 mulps xmm2,xmm7 /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/ 591 subps xmm0,xmm2 /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/ 592 movaps [edx],xmm0 /*Store*/ 593 } 594 595 #else 596 float Gxx,Gxy,Gyy,det,trc; 597 int c; 598 599 //for(c=0;c<124;c++) 600 for(c=0;c<nc-4;c++) 601 { 602 Gxx=(float)gxx[c]; 603 Gxy=(float)gxy[c]; 604 Gyy=(float)gyy[c]; 605 606 det=Gxx*Gyy-Gxy*Gxy; 607 trc=Gxx+Gyy; 608 s[c]=det-k*trc*trc; 609 } 610 #endif /*DB_USE_SIMD*/ 611 } 612 613 /*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and 614 store it into the corresponding region of s. left and top have to be at least 3 and 615 right and bottom have to be at most width-4,height-4*/ 616 inline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom, 617 /*temp should point to at least 618 13*(right-left+5) of allocated memory*/ 619 float *temp) 620 { 621 float *Ix[5],*Iy[5]; 622 float *gxx,*gxy,*gyy; 623 int i,chunk_width,chunk_width_p4; 624 625 chunk_width=right-left+1; 626 chunk_width_p4=chunk_width+4; 627 gxx=temp; 628 gxy=gxx+chunk_width_p4; 629 gyy=gxy+chunk_width_p4; 630 for(i=0;i<5;i++) 631 { 632 Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4); 633 Iy[i]=Ix[i]+chunk_width_p4; 634 } 635 636 /*Fill four rows of the wrap-around derivative buffers*/ 637 for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4); 638 639 /*For each output row*/ 640 for(i=top;i<=bottom;i++) 641 { 642 /*Step the derivative buffers*/ 643 db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4); 644 645 /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/ 646 db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4, 647 Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5], 648 Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]); 649 650 /*Filter gxx,gxy,gyy horizontally and compute corner response s*/ 651 db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width); 652 } 653 } 654 655 /*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and 656 store it into the corresponding region of s. left and top have to be at least 3 and 657 right and bottom have to be at most width-4,height-4. The left of the region in s should 658 be 16 byte aligned*/ 659 inline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom, 660 /*temp should point to at least 661 18*128 of allocated memory*/ 662 int *temp, int nc) 663 { 664 int *Ixx[5],*Ixy[5],*Iyy[5]; 665 int *gxx,*gxy,*gyy; 666 int i; 667 668 gxx=temp; 669 gxy=gxx+128; 670 gyy=gxy+128; 671 for(i=0;i<5;i++) 672 { 673 Ixx[i]=gyy+(3*i+1)*128; 674 Ixy[i]=gyy+(3*i+2)*128; 675 Iyy[i]=gyy+(3*i+3)*128; 676 } 677 678 /*Fill four rows of the wrap-around derivative buffers*/ 679 for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc); 680 681 /*For each output row*/ 682 for(i=top;i<=bottom;i++) 683 { 684 /*Step the derivative buffers*/ 685 db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc); 686 687 /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/ 688 db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc); 689 690 /*Filter gxx,gxy,gyy horizontally and compute corner response s*/ 691 db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc); 692 } 693 694 } 695 696 /*Compute Harris corner strength of img. Strength is returned for the region 697 with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the 698 same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high 699 for a meaningful result*/ 700 void db_HarrisStrength_f(float **s,const float * const *img,int w,int h, 701 /*temp should point to at least 702 13*(chunk_width+4) of allocated memory*/ 703 float *temp, 704 int chunk_width) 705 { 706 int x,next_x,last,right; 707 708 last=w-4; 709 for(x=3;x<=last;x=next_x) 710 { 711 next_x=x+chunk_width; 712 right=next_x-1; 713 if(right>last) right=last; 714 /*Compute the Harris strength of a chunk*/ 715 db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp); 716 } 717 } 718 719 /*Compute Harris corner strength of img. Strength is returned for the region 720 with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the 721 same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high 722 for a meaningful result.Moreover, the image should be overallocated by 256 bytes. 723 s[i][3] should by 16 byte aligned for any i*/ 724 void db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h, 725 /*temp should point to at least 726 18*128 of allocated memory*/ 727 int *temp) 728 { 729 int x,next_x,last; 730 int nc; 731 732 last=w-4; 733 for(x=3;x<=last;x=next_x) 734 { 735 next_x=x+124; 736 737 // mayban: to revert to the original full chunks state, change the line below to: nc = 128; 738 nc = db_mini(128,last-x+1); 739 //nc = 128; 740 741 /*Compute the Harris strength of a chunk*/ 742 db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc); 743 } 744 } 745 746 inline float db_Max_128Aligned16_f(float *v) 747 { 748 #ifdef DB_USE_SIMD 749 float back; 750 751 _asm 752 { 753 mov eax,v 754 755 /*Chunk1*/ 756 movaps xmm0,[eax] 757 movaps xmm1,[eax+16] 758 movaps xmm2,[eax+32] 759 movaps xmm3,[eax+48] 760 movaps xmm4,[eax+64] 761 movaps xmm5,[eax+80] 762 movaps xmm6,[eax+96] 763 movaps xmm7,[eax+112] 764 765 /*Chunk2*/ 766 maxps xmm0,[eax+128] 767 maxps xmm1,[eax+144] 768 maxps xmm2,[eax+160] 769 maxps xmm3,[eax+176] 770 maxps xmm4,[eax+192] 771 maxps xmm5,[eax+208] 772 maxps xmm6,[eax+224] 773 maxps xmm7,[eax+240] 774 775 /*Chunk3*/ 776 maxps xmm0,[eax+256] 777 maxps xmm1,[eax+272] 778 maxps xmm2,[eax+288] 779 maxps xmm3,[eax+304] 780 maxps xmm4,[eax+320] 781 maxps xmm5,[eax+336] 782 maxps xmm6,[eax+352] 783 maxps xmm7,[eax+368] 784 785 /*Chunk4*/ 786 maxps xmm0,[eax+384] 787 maxps xmm1,[eax+400] 788 maxps xmm2,[eax+416] 789 maxps xmm3,[eax+432] 790 maxps xmm4,[eax+448] 791 maxps xmm5,[eax+464] 792 maxps xmm6,[eax+480] 793 maxps xmm7,[eax+496] 794 795 /*Collect*/ 796 maxps xmm0,xmm1 797 maxps xmm2,xmm3 798 maxps xmm4,xmm5 799 maxps xmm6,xmm7 800 maxps xmm0,xmm2 801 maxps xmm4,xmm6 802 maxps xmm0,xmm4 803 movhlps xmm1,xmm0 804 maxps xmm0,xmm1 805 shufps xmm1,xmm0,1 806 maxps xmm0,xmm1 807 movss back,xmm0 808 } 809 810 return(back); 811 #else 812 float val,max_val; 813 float *p,*stop_p; 814 max_val=v[0]; 815 for(p=v+1,stop_p=v+128;p!=stop_p;) 816 { 817 val= *p++; 818 if(val>max_val) max_val=val; 819 } 820 return(max_val); 821 #endif /*DB_USE_SIMD*/ 822 } 823 824 inline float db_Max_64Aligned16_f(float *v) 825 { 826 #ifdef DB_USE_SIMD 827 float back; 828 829 _asm 830 { 831 mov eax,v 832 833 /*Chunk1*/ 834 movaps xmm0,[eax] 835 movaps xmm1,[eax+16] 836 movaps xmm2,[eax+32] 837 movaps xmm3,[eax+48] 838 movaps xmm4,[eax+64] 839 movaps xmm5,[eax+80] 840 movaps xmm6,[eax+96] 841 movaps xmm7,[eax+112] 842 843 /*Chunk2*/ 844 maxps xmm0,[eax+128] 845 maxps xmm1,[eax+144] 846 maxps xmm2,[eax+160] 847 maxps xmm3,[eax+176] 848 maxps xmm4,[eax+192] 849 maxps xmm5,[eax+208] 850 maxps xmm6,[eax+224] 851 maxps xmm7,[eax+240] 852 853 /*Collect*/ 854 maxps xmm0,xmm1 855 maxps xmm2,xmm3 856 maxps xmm4,xmm5 857 maxps xmm6,xmm7 858 maxps xmm0,xmm2 859 maxps xmm4,xmm6 860 maxps xmm0,xmm4 861 movhlps xmm1,xmm0 862 maxps xmm0,xmm1 863 shufps xmm1,xmm0,1 864 maxps xmm0,xmm1 865 movss back,xmm0 866 } 867 868 return(back); 869 #else 870 float val,max_val; 871 float *p,*stop_p; 872 max_val=v[0]; 873 for(p=v+1,stop_p=v+64;p!=stop_p;) 874 { 875 val= *p++; 876 if(val>max_val) max_val=val; 877 } 878 return(max_val); 879 #endif /*DB_USE_SIMD*/ 880 } 881 882 inline float db_Max_32Aligned16_f(float *v) 883 { 884 #ifdef DB_USE_SIMD 885 float back; 886 887 _asm 888 { 889 mov eax,v 890 891 /*Chunk1*/ 892 movaps xmm0,[eax] 893 movaps xmm1,[eax+16] 894 movaps xmm2,[eax+32] 895 movaps xmm3,[eax+48] 896 movaps xmm4,[eax+64] 897 movaps xmm5,[eax+80] 898 movaps xmm6,[eax+96] 899 movaps xmm7,[eax+112] 900 901 /*Collect*/ 902 maxps xmm0,xmm1 903 maxps xmm2,xmm3 904 maxps xmm4,xmm5 905 maxps xmm6,xmm7 906 maxps xmm0,xmm2 907 maxps xmm4,xmm6 908 maxps xmm0,xmm4 909 movhlps xmm1,xmm0 910 maxps xmm0,xmm1 911 shufps xmm1,xmm0,1 912 maxps xmm0,xmm1 913 movss back,xmm0 914 } 915 916 return(back); 917 #else 918 float val,max_val; 919 float *p,*stop_p; 920 max_val=v[0]; 921 for(p=v+1,stop_p=v+32;p!=stop_p;) 922 { 923 val= *p++; 924 if(val>max_val) max_val=val; 925 } 926 return(max_val); 927 #endif /*DB_USE_SIMD*/ 928 } 929 930 inline float db_Max_16Aligned16_f(float *v) 931 { 932 #ifdef DB_USE_SIMD 933 float back; 934 935 _asm 936 { 937 mov eax,v 938 939 /*Chunk1*/ 940 movaps xmm0,[eax] 941 movaps xmm1,[eax+16] 942 movaps xmm2,[eax+32] 943 movaps xmm3,[eax+48] 944 945 /*Collect*/ 946 maxps xmm0,xmm1 947 maxps xmm2,xmm3 948 maxps xmm0,xmm2 949 movhlps xmm1,xmm0 950 maxps xmm0,xmm1 951 shufps xmm1,xmm0,1 952 maxps xmm0,xmm1 953 movss back,xmm0 954 } 955 956 return(back); 957 #else 958 float val,max_val; 959 float *p,*stop_p; 960 max_val=v[0]; 961 for(p=v+1,stop_p=v+16;p!=stop_p;) 962 { 963 val= *p++; 964 if(val>max_val) max_val=val; 965 } 966 return(max_val); 967 #endif /*DB_USE_SIMD*/ 968 } 969 970 inline float db_Max_8Aligned16_f(float *v) 971 { 972 #ifdef DB_USE_SIMD 973 float back; 974 975 _asm 976 { 977 mov eax,v 978 979 /*Chunk1*/ 980 movaps xmm0,[eax] 981 movaps xmm1,[eax+16] 982 983 /*Collect*/ 984 maxps xmm0,xmm1 985 movhlps xmm1,xmm0 986 maxps xmm0,xmm1 987 shufps xmm1,xmm0,1 988 maxps xmm0,xmm1 989 movss back,xmm0 990 } 991 992 return(back); 993 #else 994 float val,max_val; 995 float *p,*stop_p; 996 max_val=v[0]; 997 for(p=v+1,stop_p=v+8;p!=stop_p;) 998 { 999 val= *p++; 1000 if(val>max_val) max_val=val; 1001 } 1002 return(max_val); 1003 #endif /*DB_USE_SIMD*/ 1004 } 1005 1006 inline float db_Max_Aligned16_f(float *v,int size) 1007 { 1008 float val,max_val; 1009 float *stop_v; 1010 1011 max_val=v[0]; 1012 for(;size>=128;size-=128) 1013 { 1014 val=db_Max_128Aligned16_f(v); 1015 v+=128; 1016 if(val>max_val) max_val=val; 1017 } 1018 if(size&64) 1019 { 1020 val=db_Max_64Aligned16_f(v); 1021 v+=64; 1022 if(val>max_val) max_val=val; 1023 } 1024 if(size&32) 1025 { 1026 val=db_Max_32Aligned16_f(v); 1027 v+=32; 1028 if(val>max_val) max_val=val; 1029 } 1030 if(size&16) 1031 { 1032 val=db_Max_16Aligned16_f(v); 1033 v+=16; 1034 if(val>max_val) max_val=val; 1035 } 1036 if(size&8) 1037 { 1038 val=db_Max_8Aligned16_f(v); 1039 v+=8; 1040 if(val>max_val) max_val=val; 1041 } 1042 if(size&7) 1043 { 1044 for(stop_v=v+(size&7);v!=stop_v;) 1045 { 1046 val= *v++; 1047 if(val>max_val) max_val=val; 1048 } 1049 } 1050 1051 return(max_val); 1052 } 1053 1054 /*Find maximum value of img in the region starting at (left,top) 1055 and with width w and height h. img[left] should be 16 byte aligned*/ 1056 float db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h) 1057 { 1058 float val,max_val; 1059 int i,stop_i; 1060 1061 if(w && h) 1062 { 1063 stop_i=top+h; 1064 max_val=img[top][left]; 1065 1066 for(i=top;i<stop_i;i++) 1067 { 1068 val=db_Max_Aligned16_f(img[i]+left,w); 1069 if(val>max_val) max_val=val; 1070 } 1071 return(max_val); 1072 } 1073 return(0.0); 1074 } 1075 1076 inline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2) 1077 { 1078 #ifdef DB_USE_SIMD 1079 _asm 1080 { 1081 mov eax,v1 1082 mov ebx,v2 1083 mov ecx,m 1084 1085 /*Chunk1*/ 1086 movaps xmm0,[eax] 1087 movaps xmm1,[eax+16] 1088 movaps xmm2,[eax+32] 1089 movaps xmm3,[eax+48] 1090 movaps xmm4,[eax+64] 1091 movaps xmm5,[eax+80] 1092 movaps xmm6,[eax+96] 1093 movaps xmm7,[eax+112] 1094 maxps xmm0,[ebx] 1095 maxps xmm1,[ebx+16] 1096 maxps xmm2,[ebx+32] 1097 maxps xmm3,[ebx+48] 1098 maxps xmm4,[ebx+64] 1099 maxps xmm5,[ebx+80] 1100 maxps xmm6,[ebx+96] 1101 maxps xmm7,[ebx+112] 1102 movaps [ecx],xmm0 1103 movaps [ecx+16],xmm1 1104 movaps [ecx+32],xmm2 1105 movaps [ecx+48],xmm3 1106 movaps [ecx+64],xmm4 1107 movaps [ecx+80],xmm5 1108 movaps [ecx+96],xmm6 1109 movaps [ecx+112],xmm7 1110 1111 /*Chunk2*/ 1112 movaps xmm0,[eax+128] 1113 movaps xmm1,[eax+144] 1114 movaps xmm2,[eax+160] 1115 movaps xmm3,[eax+176] 1116 movaps xmm4,[eax+192] 1117 movaps xmm5,[eax+208] 1118 movaps xmm6,[eax+224] 1119 movaps xmm7,[eax+240] 1120 maxps xmm0,[ebx+128] 1121 maxps xmm1,[ebx+144] 1122 maxps xmm2,[ebx+160] 1123 maxps xmm3,[ebx+176] 1124 maxps xmm4,[ebx+192] 1125 maxps xmm5,[ebx+208] 1126 maxps xmm6,[ebx+224] 1127 maxps xmm7,[ebx+240] 1128 movaps [ecx+128],xmm0 1129 movaps [ecx+144],xmm1 1130 movaps [ecx+160],xmm2 1131 movaps [ecx+176],xmm3 1132 movaps [ecx+192],xmm4 1133 movaps [ecx+208],xmm5 1134 movaps [ecx+224],xmm6 1135 movaps [ecx+240],xmm7 1136 1137 /*Chunk3*/ 1138 movaps xmm0,[eax+256] 1139 movaps xmm1,[eax+272] 1140 movaps xmm2,[eax+288] 1141 movaps xmm3,[eax+304] 1142 movaps xmm4,[eax+320] 1143 movaps xmm5,[eax+336] 1144 movaps xmm6,[eax+352] 1145 movaps xmm7,[eax+368] 1146 maxps xmm0,[ebx+256] 1147 maxps xmm1,[ebx+272] 1148 maxps xmm2,[ebx+288] 1149 maxps xmm3,[ebx+304] 1150 maxps xmm4,[ebx+320] 1151 maxps xmm5,[ebx+336] 1152 maxps xmm6,[ebx+352] 1153 maxps xmm7,[ebx+368] 1154 movaps [ecx+256],xmm0 1155 movaps [ecx+272],xmm1 1156 movaps [ecx+288],xmm2 1157 movaps [ecx+304],xmm3 1158 movaps [ecx+320],xmm4 1159 movaps [ecx+336],xmm5 1160 movaps [ecx+352],xmm6 1161 movaps [ecx+368],xmm7 1162 1163 /*Chunk4*/ 1164 movaps xmm0,[eax+384] 1165 movaps xmm1,[eax+400] 1166 movaps xmm2,[eax+416] 1167 movaps xmm3,[eax+432] 1168 movaps xmm4,[eax+448] 1169 movaps xmm5,[eax+464] 1170 movaps xmm6,[eax+480] 1171 movaps xmm7,[eax+496] 1172 maxps xmm0,[ebx+384] 1173 maxps xmm1,[ebx+400] 1174 maxps xmm2,[ebx+416] 1175 maxps xmm3,[ebx+432] 1176 maxps xmm4,[ebx+448] 1177 maxps xmm5,[ebx+464] 1178 maxps xmm6,[ebx+480] 1179 maxps xmm7,[ebx+496] 1180 movaps [ecx+384],xmm0 1181 movaps [ecx+400],xmm1 1182 movaps [ecx+416],xmm2 1183 movaps [ecx+432],xmm3 1184 movaps [ecx+448],xmm4 1185 movaps [ecx+464],xmm5 1186 movaps [ecx+480],xmm6 1187 movaps [ecx+496],xmm7 1188 } 1189 #else 1190 int i; 1191 float a,b; 1192 for(i=0;i<128;i++) 1193 { 1194 a=v1[i]; 1195 b=v2[i]; 1196 if(a>=b) m[i]=a; 1197 else m[i]=b; 1198 } 1199 #endif /*DB_USE_SIMD*/ 1200 } 1201 1202 inline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2) 1203 { 1204 #ifdef DB_USE_SIMD 1205 _asm 1206 { 1207 mov eax,v1 1208 mov ebx,v2 1209 mov ecx,m 1210 1211 /*Chunk1*/ 1212 movups xmm0,[eax] 1213 movups xmm1,[eax+16] 1214 movups xmm2,[eax+32] 1215 movups xmm3,[eax+48] 1216 movups xmm4,[eax+64] 1217 movups xmm5,[eax+80] 1218 movups xmm6,[eax+96] 1219 movups xmm7,[eax+112] 1220 maxps xmm0,[ebx] 1221 maxps xmm1,[ebx+16] 1222 maxps xmm2,[ebx+32] 1223 maxps xmm3,[ebx+48] 1224 maxps xmm4,[ebx+64] 1225 maxps xmm5,[ebx+80] 1226 maxps xmm6,[ebx+96] 1227 maxps xmm7,[ebx+112] 1228 movaps [ecx],xmm0 1229 movaps [ecx+16],xmm1 1230 movaps [ecx+32],xmm2 1231 movaps [ecx+48],xmm3 1232 movaps [ecx+64],xmm4 1233 movaps [ecx+80],xmm5 1234 movaps [ecx+96],xmm6 1235 movaps [ecx+112],xmm7 1236 1237 /*Chunk2*/ 1238 movups xmm0,[eax+128] 1239 movups xmm1,[eax+144] 1240 movups xmm2,[eax+160] 1241 movups xmm3,[eax+176] 1242 movups xmm4,[eax+192] 1243 movups xmm5,[eax+208] 1244 movups xmm6,[eax+224] 1245 movups xmm7,[eax+240] 1246 maxps xmm0,[ebx+128] 1247 maxps xmm1,[ebx+144] 1248 maxps xmm2,[ebx+160] 1249 maxps xmm3,[ebx+176] 1250 maxps xmm4,[ebx+192] 1251 maxps xmm5,[ebx+208] 1252 maxps xmm6,[ebx+224] 1253 maxps xmm7,[ebx+240] 1254 movaps [ecx+128],xmm0 1255 movaps [ecx+144],xmm1 1256 movaps [ecx+160],xmm2 1257 movaps [ecx+176],xmm3 1258 movaps [ecx+192],xmm4 1259 movaps [ecx+208],xmm5 1260 movaps [ecx+224],xmm6 1261 movaps [ecx+240],xmm7 1262 1263 /*Chunk3*/ 1264 movups xmm0,[eax+256] 1265 movups xmm1,[eax+272] 1266 movups xmm2,[eax+288] 1267 movups xmm3,[eax+304] 1268 movups xmm4,[eax+320] 1269 movups xmm5,[eax+336] 1270 movups xmm6,[eax+352] 1271 movups xmm7,[eax+368] 1272 maxps xmm0,[ebx+256] 1273 maxps xmm1,[ebx+272] 1274 maxps xmm2,[ebx+288] 1275 maxps xmm3,[ebx+304] 1276 maxps xmm4,[ebx+320] 1277 maxps xmm5,[ebx+336] 1278 maxps xmm6,[ebx+352] 1279 maxps xmm7,[ebx+368] 1280 movaps [ecx+256],xmm0 1281 movaps [ecx+272],xmm1 1282 movaps [ecx+288],xmm2 1283 movaps [ecx+304],xmm3 1284 movaps [ecx+320],xmm4 1285 movaps [ecx+336],xmm5 1286 movaps [ecx+352],xmm6 1287 movaps [ecx+368],xmm7 1288 1289 /*Chunk4*/ 1290 movups xmm0,[eax+384] 1291 movups xmm1,[eax+400] 1292 movups xmm2,[eax+416] 1293 movups xmm3,[eax+432] 1294 movups xmm4,[eax+448] 1295 movups xmm5,[eax+464] 1296 movups xmm6,[eax+480] 1297 movups xmm7,[eax+496] 1298 maxps xmm0,[ebx+384] 1299 maxps xmm1,[ebx+400] 1300 maxps xmm2,[ebx+416] 1301 maxps xmm3,[ebx+432] 1302 maxps xmm4,[ebx+448] 1303 maxps xmm5,[ebx+464] 1304 maxps xmm6,[ebx+480] 1305 maxps xmm7,[ebx+496] 1306 movaps [ecx+384],xmm0 1307 movaps [ecx+400],xmm1 1308 movaps [ecx+416],xmm2 1309 movaps [ecx+432],xmm3 1310 movaps [ecx+448],xmm4 1311 movaps [ecx+464],xmm5 1312 movaps [ecx+480],xmm6 1313 movaps [ecx+496],xmm7 1314 } 1315 #else 1316 int i; 1317 float a,b; 1318 for(i=0;i<128;i++) 1319 { 1320 a=v1[i]; 1321 b=v2[i]; 1322 if(a>=b) m[i]=a; 1323 else m[i]=b; 1324 } 1325 #endif /*DB_USE_SIMD*/ 1326 } 1327 1328 /*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and 1329 stopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row. 1330 The input s should be of width at least 128, and exist for 2 pixels outside the specified region. 1331 s[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/ 1332 inline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom, 1333 /*temp should point to at least 1334 6*132 floats of 16-byte-aligned allocated memory*/ 1335 float *temp) 1336 { 1337 #ifdef DB_USE_SIMD 1338 int i,lm2; 1339 float *two[4]; 1340 float *four,*five; 1341 1342 lm2=left-2; 1343 1344 /*Set pointers to pre-allocated memory*/ 1345 four=temp; 1346 five=four+132; 1347 for(i=0;i<4;i++) 1348 { 1349 two[i]=five+(i+1)*132; 1350 } 1351 1352 /*Set rests of four and five to zero to avoid 1353 floating point exceptions*/ 1354 for(i=129;i<132;i++) 1355 { 1356 four[i]=0.0; 1357 five[i]=0.0; 1358 } 1359 1360 /*Fill three rows of the wrap-around max buffers*/ 1361 for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2); 1362 1363 /*For each output row*/ 1364 for(;i<=bottom;i++) 1365 { 1366 /*Compute max of the lowest pair of rows in the five row window*/ 1367 db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2); 1368 /*Compute max of the lowest and highest pair of rows in the five row window*/ 1369 db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]); 1370 /*Compute max of all rows*/ 1371 db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]); 1372 /*Compute max of 2x5 chunks*/ 1373 db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five); 1374 /*Compute max of pairs of 2x5 chunks*/ 1375 db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five); 1376 /*Compute max of pairs of 5x5 except middle*/ 1377 db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five); 1378 } 1379 1380 #else 1381 int i,j,right; 1382 float sv; 1383 1384 right=left+128; 1385 for(i=top;i<=bottom;i++) for(j=left;j<right;j++) 1386 { 1387 sv=s[i][j]; 1388 1389 if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] && 1390 sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] && 1391 sv>s[ i][j-2] && sv>s[ i][j-1] && sv>s[ i][j+1] && sv>s[ i][j+2] && 1392 sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] && 1393 sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2]) 1394 { 1395 sf[i][j-2]=0.0; 1396 } 1397 else sf[i][j-2]=sv; 1398 } 1399 #endif /*DB_USE_SIMD*/ 1400 } 1401 1402 /*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and 1403 stopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels 1404 outside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned. 1405 Top must be at least 3. Reading and writing from and to the input and output images is done 1406 as if the region had a width equal to a multiple of 124. If this is not the case, the images 1407 should be over-allocated and the input cleared for a sufficient region*/ 1408 void db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom, 1409 /*temp should point to at least 1410 6*132 floats of 16-byte-aligned allocated memory*/ 1411 float *temp) 1412 { 1413 int x,next_x; 1414 1415 for(x=left;x<=right;x=next_x) 1416 { 1417 next_x=x+124; 1418 db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp); 1419 } 1420 } 1421 1422 /*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp 1423 which should point to space of at least as many positions as there are pixels in the chunk*/ 1424 inline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp) 1425 { 1426 int i,j,nr; 1427 float s; 1428 1429 nr=0; 1430 for(i=top;i<=bottom;i++) for(j=left;j<=right;j++) 1431 { 1432 s=strength[i][j]; 1433 1434 if(s>=threshold && 1435 s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] && 1436 s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] && 1437 s>strength[ i][j-2] && s>strength[ i][j-1] && s>strength[ i][j+1] && s>strength[ i][j+2] && 1438 s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] && 1439 s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2]) 1440 { 1441 x_temp[nr]=(double) j; 1442 y_temp[nr]=(double) i; 1443 s_temp[nr]=(double) s; 1444 nr++; 1445 } 1446 } 1447 return(nr); 1448 } 1449 1450 1451 //Sub-pixel accuracy using 2D quadratic interpolation.(YCJ) 1452 inline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys) 1453 { 1454 int x = (int) xd; 1455 int y = (int) yd; 1456 1457 float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1]; 1458 float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x]; 1459 float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0; 1460 1461 float denom = (fxx * fyy - fxy * fxy) * (float) 2.0; 1462 1463 xs = xd; 1464 ys = yd; 1465 1466 if ( db_absf(denom) <= FLT_EPSILON ) 1467 { 1468 return; 1469 } 1470 else 1471 { 1472 float fx = strength[y][x+1] - strength[y][x-1]; 1473 float fy = strength[y+1][x] - strength[y-1][x]; 1474 1475 float dx = (fyy * fx - fxy * fy) / denom; 1476 float dy = (fxx * fy - fxy * fx) / denom; 1477 1478 if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 ) 1479 { 1480 return; 1481 } 1482 else 1483 { 1484 xs -= dx; 1485 ys -= dy; 1486 } 1487 } 1488 1489 return; 1490 } 1491 1492 /*Extract corners from the image part from (left,top) to (right,bottom). 1493 Store in x and y, extracting at most satnr corners in each block of size (bw,bh). 1494 The pointer temp_d should point to at least 5*bw*bh positions. 1495 area_factor holds how many corners max to extract per 10000 pixels*/ 1496 void db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom, 1497 int bw,int bh,unsigned long area_factor, 1498 float threshold,double *temp_d, 1499 double *x_coord,double *y_coord,int *nr_corners) 1500 { 1501 double *x_temp,*y_temp,*s_temp,*select_temp; 1502 double loc_thresh; 1503 unsigned long bwbh,area,saturation; 1504 int x,next_x,last_x; 1505 int y,next_y,last_y; 1506 int nr,nr_points,i,stop; 1507 1508 bwbh=bw*bh; 1509 x_temp=temp_d; 1510 y_temp=x_temp+bwbh; 1511 s_temp=y_temp+bwbh; 1512 select_temp=s_temp+bwbh; 1513 1514 #ifdef DB_SUB_PIXEL 1515 // subpixel processing may sometimes push the corner ourside the real border 1516 // increasing border size: 1517 left++; 1518 top++; 1519 bottom--; 1520 right--; 1521 #endif /*DB_SUB_PIXEL*/ 1522 1523 nr_points=0; 1524 for(y=top;y<=bottom;y=next_y) 1525 { 1526 next_y=y+bh; 1527 last_y=next_y-1; 1528 if(last_y>bottom) last_y=bottom; 1529 for(x=left;x<=right;x=next_x) 1530 { 1531 next_x=x+bw; 1532 last_x=next_x-1; 1533 if(last_x>right) last_x=right; 1534 1535 area=(last_x-x+1)*(last_y-y+1); 1536 saturation=(area*area_factor)/10000; 1537 nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp); 1538 if(nr) 1539 { 1540 if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp); 1541 else loc_thresh=threshold; 1542 1543 stop=nr_points+saturation; 1544 for(i=0;(i<nr)&&(nr_points<stop);i++) 1545 { 1546 if(s_temp[i]>=loc_thresh) 1547 { 1548 #ifdef DB_SUB_PIXEL 1549 db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]); 1550 #else 1551 x_coord[nr_points]=x_temp[i]; 1552 y_coord[nr_points]=y_temp[i]; 1553 #endif 1554 1555 nr_points++; 1556 } 1557 } 1558 } 1559 } 1560 } 1561 *nr_corners=nr_points; 1562 } 1563 1564 db_CornerDetector_f::db_CornerDetector_f() 1565 { 1566 m_w=0; m_h=0; 1567 } 1568 1569 db_CornerDetector_f::~db_CornerDetector_f() 1570 { 1571 Clean(); 1572 } 1573 1574 void db_CornerDetector_f::Clean() 1575 { 1576 if(m_w!=0) 1577 { 1578 delete [] m_temp_f; 1579 delete [] m_temp_d; 1580 db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h); 1581 } 1582 m_w=0; m_h=0; 1583 } 1584 1585 unsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners, 1586 int nr_horizontal_blocks,int nr_vertical_blocks, 1587 double absolute_threshold,double relative_threshold) 1588 { 1589 int chunkwidth=208; 1590 int block_width,block_height; 1591 unsigned long area_factor; 1592 int active_width,active_height; 1593 1594 active_width=db_maxi(1,im_width-10); 1595 active_height=db_maxi(1,im_height-10); 1596 block_width=db_maxi(1,active_width/nr_horizontal_blocks); 1597 block_height=db_maxi(1,active_height/nr_vertical_blocks); 1598 1599 area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/ 1600 (((double)active_width)*((double)active_height))))); 1601 1602 return(Start(im_width,im_height,block_width,block_height,area_factor, 1603 absolute_threshold,relative_threshold,chunkwidth)); 1604 } 1605 1606 unsigned long db_CornerDetector_f::Start(int im_width,int im_height, 1607 int block_width,int block_height,unsigned long area_factor, 1608 double absolute_threshold,double relative_threshold,int chunkwidth) 1609 { 1610 Clean(); 1611 1612 m_w=im_width; 1613 m_h=im_height; 1614 m_cw=chunkwidth; 1615 m_bw=block_width; 1616 m_bh=block_height; 1617 m_area_factor=area_factor; 1618 m_r_thresh=relative_threshold; 1619 m_a_thresh=absolute_threshold; 1620 m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000); 1621 1622 m_temp_f=new float[13*(m_cw+4)]; 1623 m_temp_d=new double[5*m_bw*m_bh]; 1624 m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h); 1625 1626 return(m_max_nr); 1627 } 1628 1629 void db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const 1630 { 1631 float max_val,threshold; 1632 1633 db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw); 1634 1635 if(m_r_thresh) 1636 { 1637 max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6); 1638 threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh); 1639 } 1640 else threshold= (float) m_a_thresh; 1641 1642 db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold, 1643 m_temp_d,x_coord,y_coord,nr_corners); 1644 } 1645 1646 db_CornerDetector_u::db_CornerDetector_u() 1647 { 1648 m_w=0; m_h=0; 1649 } 1650 1651 db_CornerDetector_u::~db_CornerDetector_u() 1652 { 1653 Clean(); 1654 } 1655 1656 db_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd) 1657 { 1658 Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor, 1659 cd.m_a_thresh, cd.m_r_thresh); 1660 } 1661 1662 db_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd) 1663 { 1664 if ( this == &cd ) return *this; 1665 1666 Clean(); 1667 1668 Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor, 1669 cd.m_a_thresh, cd.m_r_thresh); 1670 1671 return *this; 1672 } 1673 1674 void db_CornerDetector_u::Clean() 1675 { 1676 if(m_w!=0) 1677 { 1678 delete [] m_temp_i; 1679 delete [] m_temp_d; 1680 db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h); 1681 } 1682 m_w=0; m_h=0; 1683 } 1684 1685 unsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners, 1686 int nr_horizontal_blocks,int nr_vertical_blocks, 1687 double absolute_threshold,double relative_threshold) 1688 { 1689 int block_width,block_height; 1690 unsigned long area_factor; 1691 int active_width,active_height; 1692 1693 active_width=db_maxi(1,im_width-10); 1694 active_height=db_maxi(1,im_height-10); 1695 block_width=db_maxi(1,active_width/nr_horizontal_blocks); 1696 block_height=db_maxi(1,active_height/nr_vertical_blocks); 1697 1698 area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/ 1699 (((double)active_width)*((double)active_height))))); 1700 1701 return(Start(im_width,im_height,block_width,block_height,area_factor, 1702 16.0*absolute_threshold,relative_threshold)); 1703 } 1704 1705 unsigned long db_CornerDetector_u::Start(int im_width,int im_height, 1706 int block_width,int block_height,unsigned long area_factor, 1707 double absolute_threshold,double relative_threshold) 1708 { 1709 Clean(); 1710 1711 m_w=im_width; 1712 m_h=im_height; 1713 m_bw=block_width; 1714 m_bh=block_height; 1715 m_area_factor=area_factor; 1716 m_r_thresh=relative_threshold; 1717 m_a_thresh=absolute_threshold; 1718 m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000); 1719 1720 m_temp_i=new int[18*128]; 1721 m_temp_d=new double[5*m_bw*m_bh]; 1722 m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h); 1723 1724 return(m_max_nr); 1725 } 1726 1727 void db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners, 1728 const unsigned char * const *msk, unsigned char fgnd) const 1729 { 1730 float max_val,threshold; 1731 1732 db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i); 1733 1734 1735 if(m_r_thresh) 1736 { 1737 max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6); 1738 threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh); 1739 } 1740 else threshold= (float) m_a_thresh; 1741 1742 db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold, 1743 m_temp_d,x_coord,y_coord,nr_corners); 1744 1745 1746 if ( msk ) 1747 { 1748 int nr_corners_mask=0; 1749 1750 for ( int i = 0; i < *nr_corners; ++i) 1751 { 1752 int cor_x = db_roundi(*(x_coord+i)); 1753 int cor_y = db_roundi(*(y_coord+i)); 1754 if ( msk[cor_y][cor_x] == fgnd ) 1755 { 1756 x_coord[nr_corners_mask] = x_coord[i]; 1757 y_coord[nr_corners_mask] = y_coord[i]; 1758 nr_corners_mask++; 1759 } 1760 } 1761 *nr_corners = nr_corners_mask; 1762 } 1763 } 1764 1765 void db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) { 1766 if ( m_w!=0 ) 1767 db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh), 1768 m_temp_d,x_coord,y_coord,nr_corners); 1769 } 1770 1771