Home | History | Annotate | Download | only in db_vlvm
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
     18 
     19 /*****************************************************************
     20 *    Lean and mean begins here                                   *
     21 *****************************************************************/
     22 
     23 #include "db_utilities.h"
     24 #include "db_feature_detection.h"
     25 #ifdef _VERBOSE_
     26 #include <iostream>
     27 #endif
     28 #include <float.h>
     29 
     30 #define DB_SUB_PIXEL
     31 
     32 #define BORDER 10 // 5
     33 
     34 float** db_AllocStrengthImage_f(float **im,int w,int h)
     35 {
     36     int i,n,aw;
     37     long c,size;
     38     float **img,*aim,*p;
     39 
     40     /*Determine number of 124 element chunks needed*/
     41     n=(db_maxi(1,w-6)+123)/124;
     42     /*Determine the total allocation width aw*/
     43     aw=n*124+8;
     44     /*Allocate*/
     45     size=aw*h+16;
     46     *im=new float [size];
     47     /*Clean up*/
     48     p=(*im);
     49     for(c=0;c<size;c++) p[c]=0.0;
     50     /*Get a 16 byte aligned pointer*/
     51     aim=db_AlignPointer_f(*im,16);
     52     /*Allocate pointer table*/
     53     img=new float* [h];
     54     /*Initialize the pointer table*/
     55     for(i=0;i<h;i++)
     56     {
     57         img[i]=aim+aw*i+1;
     58     }
     59 
     60     return(img);
     61 }
     62 
     63 void db_FreeStrengthImage_f(float *im,float **img,int h)
     64 {
     65     delete [] im;
     66     delete [] img;
     67 }
     68 
     69 /*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width
     70 Memory references occur one pixel outside the subrow*/
     71 inline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width)
     72 {
     73     int c;
     74 
     75     for(c=0;c<chunk_width;c++)
     76     {
     77         Ix[c]=img[i][j+c-1]-img[i][j+c+1];
     78         Iy[c]=img[i-1][j+c]-img[i+1][j+c];
     79     }
     80 }
     81 
     82 /*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128
     83 Memory references occur one pixel outside the subrow*/
     84 inline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc)
     85 {
     86 #ifdef DB_USE_MMX
     87     const unsigned char *r1,*r2,*r3;
     88 
     89     r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j;
     90 
     91     _asm
     92     {
     93         mov esi,16
     94         mov eax,r1
     95         mov ebx,r2
     96         mov ecx,r3
     97         mov edx,dxx
     98 
     99         /*Get bitmask into mm7*/
    100         mov       edi,7F7F7F7Fh
    101         movd      mm7,edi
    102         punpckldq mm7,mm7
    103 
    104 loopstart:
    105         /***************dx part 1-12*********************************/
    106         movq       mm0,[eax]       /*1 Get upper*/
    107          pxor      mm6,mm6         /*2 Set to zero*/
    108         movq       mm1,[ecx]       /*3 Get lower*/
    109          psrlq     mm0,1           /*4 Shift*/
    110         psrlq      mm1,1           /*5 Shift*/
    111          pand      mm0,mm7         /*6 And*/
    112         movq       mm2,[ebx-1]     /*13 Get left*/
    113          pand      mm1,mm7         /*7 And*/
    114         psubb      mm0,mm1         /*8 Subtract*/
    115          pxor      mm5,mm5         /*14 Set to zero*/
    116         movq       mm1,mm0         /*9 Copy*/
    117          pcmpgtb   mm6,mm0         /*10 Create unpack mask*/
    118         movq       mm3,[ebx+1]     /*15 Get right*/
    119          punpcklbw mm0,mm6         /*11 Unpack low*/
    120         punpckhbw  mm1,mm6         /*12 Unpack high*/
    121         /***************dy part 13-24*********************************/
    122          movq      mm4,mm0         /*25 Copy dx*/
    123         psrlq      mm2,1           /*16 Shift*/
    124          pmullw    mm0,mm0         /*26 Multiply dx*dx*/
    125         psrlq      mm3,1           /*17 Shift*/
    126          pand      mm2,mm7         /*18 And*/
    127         pand       mm3,mm7         /*19 And*/
    128          /*Stall*/
    129         psubb      mm2,mm3         /*20 Subtract*/
    130          /*Stall*/
    131         movq       mm3,mm2         /*21 Copy*/
    132          pcmpgtb   mm5,mm2         /*22 Create unpack mask*/
    133         punpcklbw  mm2,mm5         /*23 Unpack low*/
    134          /*Stall*/
    135         punpckhbw  mm3,mm5         /*24 Unpack high*/
    136         /***************dxx dxy dyy low part 25-49*********************************/
    137          pmullw    mm4,mm2         /*27 Multiply dx*dy*/
    138         pmullw     mm2,mm2         /*28 Multiply dy*dy*/
    139          pxor      mm6,mm6         /*29 Set to zero*/
    140         movq       mm5,mm0         /*30 Copy dx*dx*/
    141          pcmpgtw   mm6,mm0         /*31 Create unpack mask for dx*dx*/
    142         punpcklwd  mm0,mm6         /*32 Unpack dx*dx lows*/
    143          /*Stall*/
    144         punpckhwd  mm5,mm6         /*33 Unpack dx*dx highs*/
    145          pxor      mm6,mm6         /*36 Set to zero*/
    146         movq       [edx],mm0       /*34 Store dx*dx lows*/
    147          movq      mm0,mm4         /*37 Copy dx*dy*/
    148         movq       [edx+8],mm5     /*35 Store dx*dx highs*/
    149          pcmpgtw   mm6,mm4         /*38 Create unpack mask for dx*dy*/
    150         punpcklwd  mm4,mm6         /*39 Unpack dx*dy lows*/
    151          /*Stall*/
    152         punpckhwd  mm0,mm6         /*40 Unpack dx*dy highs*/
    153          pxor      mm6,mm6         /*43 Set to zero*/
    154         movq       [edx+512],mm4   /*41 Store dx*dy lows*/
    155          movq      mm5,mm2         /*44 Copy dy*dy*/
    156         movq       [edx+520],mm0   /*42 Store dx*dy highs*/
    157          pcmpgtw   mm6,mm2         /*45 Create unpack mask for dy*dy*/
    158         punpcklwd  mm2,mm6         /*46 Unpack dy*dy lows*/
    159          movq      mm4,mm1         /*50 Copy dx*/
    160         punpckhwd  mm5,mm6         /*47 Unpack dy*dy highs*/
    161          pmullw    mm1,mm1         /*51 Multiply dx*dx*/
    162         movq       [edx+1024],mm2  /*48 Store dy*dy lows*/
    163          pmullw    mm4,mm3         /*52 Multiply dx*dy*/
    164         movq       [edx+1032],mm5  /*49 Store dy*dy highs*/
    165         /***************dxx dxy dyy high part 50-79*********************************/
    166          pmullw    mm3,mm3         /*53 Multiply dy*dy*/
    167         pxor       mm6,mm6         /*54 Set to zero*/
    168          movq      mm5,mm1         /*55 Copy dx*dx*/
    169         pcmpgtw    mm6,mm1         /*56 Create unpack mask for dx*dx*/
    170          pxor      mm2,mm2         /*61 Set to zero*/
    171         punpcklwd  mm1,mm6         /*57 Unpack dx*dx lows*/
    172          movq      mm0,mm4         /*62 Copy dx*dy*/
    173         punpckhwd  mm5,mm6         /*58 Unpack dx*dx highs*/
    174          pcmpgtw   mm2,mm4         /*63 Create unpack mask for dx*dy*/
    175         movq       [edx+16],mm1    /*59 Store dx*dx lows*/
    176          punpcklwd mm4,mm2         /*64 Unpack dx*dy lows*/
    177         movq       [edx+24],mm5    /*60 Store dx*dx highs*/
    178          punpckhwd mm0,mm2         /*65 Unpack dx*dy highs*/
    179         movq       [edx+528],mm4   /*66 Store dx*dy lows*/
    180          pxor      mm6,mm6         /*68 Set to zero*/
    181         movq       [edx+536],mm0   /*67 Store dx*dy highs*/
    182          movq      mm5,mm3         /*69 Copy dy*dy*/
    183         pcmpgtw    mm6,mm3         /*70 Create unpack mask for dy*dy*/
    184          add       eax,8           /*75*/
    185         punpcklwd  mm3,mm6         /*71 Unpack dy*dy lows*/
    186          add       ebx,8           /*76*/
    187         punpckhwd  mm5,mm6         /*72 Unpack dy*dy highs*/
    188          add       ecx,8           /*77*/
    189         movq       [edx+1040],mm3  /*73 Store dy*dy lows*/
    190          /*Stall*/
    191         movq       [edx+1048],mm5  /*74 Store dy*dy highs*/
    192          /*Stall*/
    193         add        edx,32          /*78*/
    194          dec esi                   /*79*/
    195         jnz loopstart
    196 
    197         emms
    198     }
    199 
    200 #else
    201     int c;
    202     int Ix,Iy;
    203 
    204     for(c=0;c<nc;c++)
    205     {
    206         Ix=(img[i][j+c-1]-img[i][j+c+1])>>1;
    207         Iy=(img[i-1][j+c]-img[i+1][j+c])>>1;
    208         dxx[c]=Ix*Ix;
    209         dxx[c+128]=Ix*Iy;
    210         dxx[c+256]=Iy*Iy;
    211     }
    212 #endif /*DB_USE_MMX*/
    213 }
    214 
    215 /*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/
    216 inline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width,
    217                                  float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4,
    218                                  float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4)
    219 {
    220     int c;
    221     float dx,dy;
    222     float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4;
    223 
    224     for(c=0;c<chunk_width;c++)
    225     {
    226         dx=Ix0[c];
    227         dy=Iy0[c];
    228         Ixx0=dx*dx;
    229         Ixy0=dx*dy;
    230         Iyy0=dy*dy;
    231 
    232         dx=Ix1[c];
    233         dy=Iy1[c];
    234         Ixx1=dx*dx;
    235         Ixy1=dx*dy;
    236         Iyy1=dy*dy;
    237 
    238         dx=Ix2[c];
    239         dy=Iy2[c];
    240         Ixx2=dx*dx;
    241         Ixy2=dx*dy;
    242         Iyy2=dy*dy;
    243 
    244         dx=Ix3[c];
    245         dy=Iy3[c];
    246         Ixx3=dx*dx;
    247         Ixy3=dx*dy;
    248         Iyy3=dy*dy;
    249 
    250         dx=Ix4[c];
    251         dy=Iy4[c];
    252         Ixx4=dx*dx;
    253         Ixy4=dx*dy;
    254         Iyy4=dy*dy;
    255 
    256         /*Filter vertically*/
    257         gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4;
    258         gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4;
    259         gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4;
    260     }
    261 }
    262 
    263 /*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/
    264 inline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc)
    265 {
    266 #ifdef DB_USE_MMX
    267     int c;
    268 
    269     _asm
    270     {
    271         mov c,64
    272         mov eax,d0
    273         mov ebx,d1
    274         mov ecx,d2
    275         mov edx,d3
    276         mov edi,d4
    277         mov esi,g
    278 
    279 loopstart:
    280         /***************dxx part 1-14*********************************/
    281         movq        mm0,[eax]      /*1 Get dxx0*/
    282          /*Stall*/
    283         movq        mm1,[ebx]      /*2 Get dxx1*/
    284          /*Stall*/
    285         movq        mm2,[ecx]      /*5 Get dxx2*/
    286          pslld      mm1,2          /*3 Shift dxx1*/
    287         movq        mm3,[edx]      /*10 Get dxx3*/
    288          paddd      mm0,mm1        /*4 Accumulate dxx1*/
    289         movq        mm4,[eax+512]  /*15 Get dxy0*/
    290          pslld      mm2,1          /*6 Shift dxx2 1*/
    291         paddd       mm0,mm2        /*7 Accumulate dxx2 1*/
    292          pslld      mm2,1          /*8 Shift dxx2 2*/
    293         movq        mm5,[ebx+512]  /*16 Get dxy1*/
    294          paddd      mm0,mm2        /*9 Accumulate dxx2 2*/
    295         pslld       mm3,2          /*11 Shift dxx3*/
    296          /*Stall*/
    297         paddd       mm0,mm3        /*12 Accumulate dxx3*/
    298          pslld      mm5,2          /*17 Shift dxy1*/
    299         paddd       mm0,[edi]      /*13 Accumulate dxx4*/
    300          paddd      mm4,mm5        /*18 Accumulate dxy1*/
    301         movq        mm6,[ecx+512]  /*19 Get dxy2*/
    302          /*Stall*/
    303         movq        [esi],mm0      /*14 Store dxx sums*/
    304         /***************dxy part 15-28*********************************/
    305          pslld      mm6,1          /*20 Shift dxy2 1*/
    306         paddd       mm4,mm6        /*21 Accumulate dxy2 1*/
    307          pslld      mm6,1          /*22 Shift dxy2 2*/
    308         movq        mm0,[eax+1024] /*29 Get dyy0*/
    309          paddd      mm4,mm6        /*23 Accumulate dxy2 2*/
    310         movq        mm7,[edx+512]  /*24 Get dxy3*/
    311          pslld      mm7,2          /*25 Shift dxy3*/
    312         movq        mm1,[ebx+1024] /*30 Get dyy1*/
    313          paddd      mm4,mm7        /*26 Accumulate dxy3*/
    314         paddd       mm4,[edi+512]  /*27 Accumulate dxy4*/
    315          pslld      mm1,2          /*31 Shift dyy1*/
    316         movq        mm2,[ecx+1024] /*33 Get dyy2*/
    317          paddd      mm0,mm1        /*32 Accumulate dyy1*/
    318         movq        [esi+512],mm4  /*28 Store dxy sums*/
    319          pslld      mm2,1          /*34 Shift dyy2 1*/
    320         /***************dyy part 29-49*********************************/
    321 
    322 
    323         movq        mm3,[edx+1024] /*38 Get dyy3*/
    324          paddd      mm0,mm2        /*35 Accumulate dyy2 1*/
    325         paddd       mm0,[edi+1024] /*41 Accumulate dyy4*/
    326          pslld      mm2,1          /*36 Shift dyy2 2*/
    327         paddd       mm0,mm2        /*37 Accumulate dyy2 2*/
    328          pslld      mm3,2          /*39 Shift dyy3*/
    329         paddd       mm0,mm3        /*40 Accumulate dyy3*/
    330          add        eax,8           /*43*/
    331         add         ebx,8           /*44*/
    332          add        ecx,8           /*45*/
    333         movq        [esi+1024],mm0 /*42 Store dyy sums*/
    334          /*Stall*/
    335         add         edx,8           /*46*/
    336          add        edi,8           /*47*/
    337         add         esi,8           /*48*/
    338          dec        c               /*49*/
    339         jnz         loopstart
    340 
    341         emms
    342     }
    343 
    344 #else
    345     int c,dd;
    346 
    347     for(c=0;c<nc;c++)
    348     {
    349         /*Filter vertically*/
    350         dd=d2[c];
    351         g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c];
    352 
    353         dd=d2[c+128];
    354         g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128];
    355 
    356         dd=d2[c+256];
    357         g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256];
    358     }
    359 #endif /*DB_USE_MMX*/
    360 }
    361 
    362 /*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j
    363 and with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width
    364 and starting at (i,j-2)*/
    365 inline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width)
    366 {
    367     float Gxx,Gxy,Gyy,det,trc;
    368     int c;
    369 
    370     for(c=0;c<chunk_width;c++)
    371     {
    372         Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4];
    373         Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4];
    374         Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4];
    375 
    376         det=Gxx*Gyy-Gxy*Gxy;
    377         trc=Gxx+Gyy;
    378         s[i][j+c]=det-0.06f*trc*trc;
    379     }
    380 }
    381 
    382 /*Filter g of length 128 in place with 14641. Output is shifted two steps
    383 and of length 124*/
    384 inline void db_Filter14641_128_i(int *g,int nc)
    385 {
    386 #ifdef DB_USE_MMX
    387     int mask;
    388 
    389     mask=0xFFFFFFFF;
    390     _asm
    391     {
    392         mov esi,31
    393         mov eax,g
    394 
    395         /*Get bitmask 00000000FFFFFFFF into mm7*/
    396         movd mm7,mask
    397 
    398         /*Warming iteration one 1-16********************/
    399         movq       mm6,[eax]      /*1 Load new data*/
    400         paddd      mm0,mm6        /*2 Add 1* behind two steps*/
    401         movq       mm2,mm6        /*3 Start with 1* in front two steps*/
    402         pslld      mm6,1          /*4*/
    403         paddd      mm1,mm6        /*5 Add 2* same place*/
    404         pslld      mm6,1          /*6*/
    405         paddd      mm1,mm6        /*7 Add 4* same place*/
    406         pshufw     mm6,mm6,4Eh    /*8 Swap the two double-words using bitmask 01001110=4Eh*/
    407         paddd      mm1,mm6        /*9 Add 4* swapped*/
    408         movq       mm5,mm6        /*10 Copy*/
    409         pand       mm6,mm7        /*11 Get low double-word only*/
    410         paddd      mm2,mm6        /*12 Add 4* in front one step*/
    411         pxor       mm6,mm5        /*13 Get high double-word only*/
    412         paddd      mm0,mm6        /*14 Add 4* behind one step*/
    413         movq       mm0,mm1        /*15 Shift along*/
    414         movq       mm1,mm2        /*16 Shift along*/
    415         /*Warming iteration two 17-32********************/
    416         movq       mm4,[eax+8]    /*17 Load new data*/
    417         paddd      mm0,mm4        /*18 Add 1* behind two steps*/
    418         movq       mm2,mm4        /*19 Start with 1* in front two steps*/
    419         pslld      mm4,1          /*20*/
    420         paddd      mm1,mm4        /*21 Add 2* same place*/
    421         pslld      mm4,1          /*22*/
    422         paddd      mm1,mm4        /*23 Add 4* same place*/
    423         pshufw     mm4,mm4,4Eh    /*24 Swap the two double-words using bitmask 01001110=4Eh*/
    424         paddd      mm1,mm4        /*25 Add 4* swapped*/
    425         movq       mm3,mm4        /*26 Copy*/
    426         pand       mm4,mm7        /*27 Get low double-word only*/
    427         paddd      mm2,mm4        /*28 Add 4* in front one step*/
    428         pxor       mm4,mm3        /*29 Get high double-word only*/
    429         paddd      mm0,mm4        /*30 Add 4* behind one step*/
    430         movq       mm0,mm1        /*31 Shift along*/
    431         movq       mm1,mm2        /*32 Shift along*/
    432 
    433         /*Loop********************/
    434 loopstart:
    435         /*First part of loop 33-47********/
    436         movq        mm6,[eax+16]   /*33 Load new data*/
    437          /*Stall*/
    438         paddd       mm0,mm6        /*34 Add 1* behind two steps*/
    439          movq       mm2,mm6        /*35 Start with 1* in front two steps*/
    440         movq        mm4,[eax+24]   /*48 Load new data*/
    441          pslld      mm6,1          /*36*/
    442         paddd       mm1,mm6        /*37 Add 2* same place*/
    443          pslld      mm6,1          /*38*/
    444         paddd       mm1,mm6        /*39 Add 4* same place*/
    445          pshufw     mm6,mm6,4Eh    /*40 Swap the two double-words using bitmask 01001110=4Eh*/
    446         paddd       mm1,mm4        /*49 Add 1* behind two steps*/
    447          movq       mm5,mm6        /*41 Copy*/
    448         paddd       mm1,mm6        /*42 Add 4* swapped*/
    449          pand       mm6,mm7        /*43 Get low double-word only*/
    450         paddd       mm2,mm6        /*44 Add 4* in front one step*/
    451          pxor       mm6,mm5        /*45 Get high double-word only*/
    452         paddd       mm0,mm6        /*46 Add 4* behind one step*/
    453          movq       mm6,mm4        /*50a Copy*/
    454         pslld       mm4,1          /*51*/
    455          /*Stall*/
    456         movq        [eax],mm0      /*47 Store result two steps behind*/
    457         /*Second part of loop 48-66********/
    458          movq       mm0,mm6        /*50b Start with 1* in front two steps*/
    459         paddd       mm2,mm4        /*52 Add 2* same place*/
    460          pslld      mm4,1          /*53*/
    461         paddd       mm2,mm4        /*54 Add 4* same place*/
    462          pshufw     mm4,mm4,4Eh    /*55 Swap the two double-words using bitmask 01001110=4Eh*/
    463         paddd       mm2,mm4        /*56 Add 4* swapped*/
    464          movq       mm3,mm4        /*57 Copy*/
    465         pand        mm4,mm7        /*58 Get low double-word only*/
    466          /*Stall*/
    467         paddd       mm0,mm4        /*59 Add 4* in front one step*/
    468          pxor       mm4,mm3        /*60 Get high double-word only*/
    469         paddd       mm1,mm4        /*61 Add 4* behind one step*/
    470          add        eax,16         /*65*/
    471         dec         esi            /*66*/
    472          /*Stall*/
    473         movq        [eax-8],mm1    /*62 Store result two steps behind*/
    474          movq       mm1,mm0        /*63 Shift along*/
    475         movq        mm0,mm2        /*64 Shift along*/
    476         jnz loopstart
    477 
    478         emms
    479     }
    480 
    481 #else
    482     int c;
    483 
    484     for(c=0;c<nc-4;c++)
    485     {
    486         g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4];
    487     }
    488 #endif /*DB_USE_MMX*/
    489 }
    490 
    491 /*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s
    492 of length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought.
    493 s should be 16 byte aligned*/
    494 inline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc)
    495 {
    496     float k;
    497 
    498     k=0.06f;
    499 
    500     db_Filter14641_128_i(gxx,nc);
    501     db_Filter14641_128_i(gxy,nc);
    502     db_Filter14641_128_i(gyy,nc);
    503 
    504 #ifdef DB_USE_SIMD
    505 
    506 
    507     _asm
    508     {
    509         mov esi,15
    510         mov eax,gxx
    511         mov ebx,gxy
    512         mov ecx,gyy
    513         mov edx,s
    514 
    515         /*broadcast k to all positions of xmm7*/
    516         movss   xmm7,k
    517         shufps  xmm7,xmm7,0
    518 
    519         /*****Warm up 1-10**************************************/
    520         cvtpi2ps  xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/
    521          /*Stall*/
    522         cvtpi2ps  xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/
    523          movlhps  xmm0,xmm0    /*2 Move them to the high double-word*/
    524         cvtpi2ps  xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/
    525          movlhps  xmm1,xmm1    /*5 Move them to the high double-word*/
    526         cvtpi2ps  xmm0,[eax]   /*3 Convert two integers into floating point of low double-word*/
    527          movlhps  xmm2,xmm2    /*8 Move them to the high double-word*/
    528         cvtpi2ps  xmm1,[ebx]   /*6 Convert two integers into floating point of low double-word*/
    529          movaps   xmm3,xmm0    /*10 Copy Cxx*/
    530         cvtpi2ps  xmm2,[ecx]   /*9 Convert two integers into floating point of low double-word*/
    531          /*Stall*/
    532 loopstart:
    533         /*****First part of loop 11-18***********************/
    534         mulps     xmm0,xmm2     /*11 Multiply to get Gxx*Gyy*/
    535          addps    xmm2,xmm3     /*12 Add to get Gxx+Gyy*/
    536         cvtpi2ps  xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/
    537          mulps    xmm1,xmm1     /*13 Multiply to get Gxy*Gxy*/
    538         mulps     xmm2,xmm2     /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
    539          movlhps  xmm4,xmm4     /*20 Move them to the high double-word*/
    540         cvtpi2ps  xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/
    541          /*Stall*/
    542         subps     xmm0,xmm1     /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/
    543          mulps    xmm2,xmm7     /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
    544         cvtpi2ps  xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/
    545          /*Stall*/
    546         movlhps   xmm5,xmm5     /*23 Move them to the high double-word*/
    547          /*Stall*/
    548         cvtpi2ps  xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/
    549          subps    xmm0,xmm2     /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
    550         cvtpi2ps  xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/
    551          /*Stall*/
    552         movaps    [edx],xmm0    /*18 Store*/
    553         /*****Second part of loop 26-40***********************/
    554          movlhps  xmm6,xmm6     /*26 Move them to the high double-word*/
    555         cvtpi2ps  xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/
    556          movaps   xmm3,xmm4     /*28 Copy Cxx*/
    557         mulps     xmm4,xmm6     /*29 Multiply to get Gxx*Gyy*/
    558          addps    xmm6,xmm3     /*30 Add to get Gxx+Gyy*/
    559         cvtpi2ps  xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/
    560          mulps    xmm5,xmm5     /*31 Multiply to get Gxy*Gxy*/
    561         cvtpi2ps  xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/
    562          mulps    xmm6,xmm6     /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
    563         cvtpi2ps  xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/
    564          movlhps  xmm0,xmm0     /*(2 Next) Move them to the high double-word*/
    565         subps     xmm4,xmm5     /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/
    566          movlhps  xmm1,xmm1     /*(5 Next) Move them to the high double-word*/
    567         cvtpi2ps  xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/
    568          mulps    xmm6,xmm7     /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
    569         cvtpi2ps  xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/
    570          movlhps  xmm2,xmm2     /*(8 Next) Move them to the high double-word*/
    571         movaps    xmm3,xmm0     /*(10 Next) Copy Cxx*/
    572          add      eax,32        /*37*/
    573         subps     xmm4,xmm6     /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
    574          add      ebx,32        /*38*/
    575         cvtpi2ps  xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/
    576          /*Stall*/
    577         movaps    [edx+16],xmm4 /*36 Store*/
    578          /*Stall*/
    579         add       ecx,32        /*39*/
    580          add      edx,32        /*40*/
    581         dec       esi           /*41*/
    582         jnz loopstart
    583 
    584         /****Cool down***************/
    585         mulps    xmm0,xmm2    /*Multiply to get Gxx*Gyy*/
    586         addps    xmm2,xmm3    /*Add to get Gxx+Gyy*/
    587         mulps    xmm1,xmm1    /*Multiply to get Gxy*Gxy*/
    588         mulps    xmm2,xmm2    /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
    589         subps    xmm0,xmm1    /*Subtract to get Gxx*Gyy-Gxy*Gxy*/
    590         mulps    xmm2,xmm7    /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
    591         subps    xmm0,xmm2    /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
    592         movaps   [edx],xmm0   /*Store*/
    593     }
    594 
    595 #else
    596     float Gxx,Gxy,Gyy,det,trc;
    597     int c;
    598 
    599     //for(c=0;c<124;c++)
    600     for(c=0;c<nc-4;c++)
    601     {
    602         Gxx=(float)gxx[c];
    603         Gxy=(float)gxy[c];
    604         Gyy=(float)gyy[c];
    605 
    606         det=Gxx*Gyy-Gxy*Gxy;
    607         trc=Gxx+Gyy;
    608         s[c]=det-k*trc*trc;
    609     }
    610 #endif /*DB_USE_SIMD*/
    611 }
    612 
    613 /*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and
    614 store it into the corresponding region of s. left and top have to be at least 3 and
    615 right and bottom have to be at most width-4,height-4*/
    616 inline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom,
    617                                       /*temp should point to at least
    618                                       13*(right-left+5) of allocated memory*/
    619                                       float *temp)
    620 {
    621     float *Ix[5],*Iy[5];
    622     float *gxx,*gxy,*gyy;
    623     int i,chunk_width,chunk_width_p4;
    624 
    625     chunk_width=right-left+1;
    626     chunk_width_p4=chunk_width+4;
    627     gxx=temp;
    628     gxy=gxx+chunk_width_p4;
    629     gyy=gxy+chunk_width_p4;
    630     for(i=0;i<5;i++)
    631     {
    632         Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4);
    633         Iy[i]=Ix[i]+chunk_width_p4;
    634     }
    635 
    636     /*Fill four rows of the wrap-around derivative buffers*/
    637     for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4);
    638 
    639     /*For each output row*/
    640     for(i=top;i<=bottom;i++)
    641     {
    642         /*Step the derivative buffers*/
    643         db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4);
    644 
    645         /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
    646         db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4,
    647                                  Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5],
    648                                  Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]);
    649 
    650         /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
    651         db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width);
    652     }
    653 }
    654 
    655 /*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and
    656 store it into the corresponding region of s. left and top have to be at least 3 and
    657 right and bottom have to be at most width-4,height-4. The left of the region in s should
    658 be 16 byte aligned*/
    659 inline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom,
    660                                       /*temp should point to at least
    661                                       18*128 of allocated memory*/
    662                                       int *temp, int nc)
    663 {
    664     int *Ixx[5],*Ixy[5],*Iyy[5];
    665     int *gxx,*gxy,*gyy;
    666     int i;
    667 
    668     gxx=temp;
    669     gxy=gxx+128;
    670     gyy=gxy+128;
    671     for(i=0;i<5;i++)
    672     {
    673         Ixx[i]=gyy+(3*i+1)*128;
    674         Ixy[i]=gyy+(3*i+2)*128;
    675         Iyy[i]=gyy+(3*i+3)*128;
    676     }
    677 
    678     /*Fill four rows of the wrap-around derivative buffers*/
    679     for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc);
    680 
    681     /*For each output row*/
    682     for(i=top;i<=bottom;i++)
    683     {
    684         /*Step the derivative buffers*/
    685         db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc);
    686 
    687         /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
    688         db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc);
    689 
    690         /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
    691         db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc);
    692     }
    693 
    694 }
    695 
    696 /*Compute Harris corner strength of img. Strength is returned for the region
    697 with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
    698 same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
    699 for a meaningful result*/
    700 void db_HarrisStrength_f(float **s,const float * const *img,int w,int h,
    701                                     /*temp should point to at least
    702                                     13*(chunk_width+4) of allocated memory*/
    703                                     float *temp,
    704                                     int chunk_width)
    705 {
    706     int x,next_x,last,right;
    707 
    708     last=w-4;
    709     for(x=3;x<=last;x=next_x)
    710     {
    711         next_x=x+chunk_width;
    712         right=next_x-1;
    713         if(right>last) right=last;
    714         /*Compute the Harris strength of a chunk*/
    715         db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp);
    716     }
    717 }
    718 
    719 /*Compute Harris corner strength of img. Strength is returned for the region
    720 with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
    721 same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
    722 for a meaningful result.Moreover, the image should be overallocated by 256 bytes.
    723 s[i][3] should by 16 byte aligned for any i*/
    724 void db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h,
    725                                     /*temp should point to at least
    726                                     18*128 of allocated memory*/
    727                                     int *temp)
    728 {
    729     int x,next_x,last;
    730     int nc;
    731 
    732     last=w-4;
    733     for(x=3;x<=last;x=next_x)
    734     {
    735         next_x=x+124;
    736 
    737         // mayban: to revert to the original full chunks state, change the line below to: nc = 128;
    738         nc = db_mini(128,last-x+1);
    739         //nc = 128;
    740 
    741         /*Compute the Harris strength of a chunk*/
    742         db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc);
    743     }
    744 }
    745 
    746 inline float db_Max_128Aligned16_f(float *v)
    747 {
    748 #ifdef DB_USE_SIMD
    749     float back;
    750 
    751     _asm
    752     {
    753         mov eax,v
    754 
    755         /*Chunk1*/
    756         movaps xmm0,[eax]
    757         movaps xmm1,[eax+16]
    758         movaps xmm2,[eax+32]
    759         movaps xmm3,[eax+48]
    760         movaps xmm4,[eax+64]
    761         movaps xmm5,[eax+80]
    762         movaps xmm6,[eax+96]
    763         movaps xmm7,[eax+112]
    764 
    765         /*Chunk2*/
    766         maxps xmm0,[eax+128]
    767         maxps xmm1,[eax+144]
    768         maxps xmm2,[eax+160]
    769         maxps xmm3,[eax+176]
    770         maxps xmm4,[eax+192]
    771         maxps xmm5,[eax+208]
    772         maxps xmm6,[eax+224]
    773         maxps xmm7,[eax+240]
    774 
    775         /*Chunk3*/
    776         maxps xmm0,[eax+256]
    777         maxps xmm1,[eax+272]
    778         maxps xmm2,[eax+288]
    779         maxps xmm3,[eax+304]
    780         maxps xmm4,[eax+320]
    781         maxps xmm5,[eax+336]
    782         maxps xmm6,[eax+352]
    783         maxps xmm7,[eax+368]
    784 
    785         /*Chunk4*/
    786         maxps xmm0,[eax+384]
    787         maxps xmm1,[eax+400]
    788         maxps xmm2,[eax+416]
    789         maxps xmm3,[eax+432]
    790         maxps xmm4,[eax+448]
    791         maxps xmm5,[eax+464]
    792         maxps xmm6,[eax+480]
    793         maxps xmm7,[eax+496]
    794 
    795         /*Collect*/
    796         maxps   xmm0,xmm1
    797         maxps   xmm2,xmm3
    798         maxps   xmm4,xmm5
    799         maxps   xmm6,xmm7
    800         maxps   xmm0,xmm2
    801         maxps   xmm4,xmm6
    802         maxps   xmm0,xmm4
    803         movhlps xmm1,xmm0
    804         maxps   xmm0,xmm1
    805         shufps  xmm1,xmm0,1
    806         maxps   xmm0,xmm1
    807         movss   back,xmm0
    808     }
    809 
    810     return(back);
    811 #else
    812     float val,max_val;
    813     float *p,*stop_p;
    814     max_val=v[0];
    815     for(p=v+1,stop_p=v+128;p!=stop_p;)
    816     {
    817         val= *p++;
    818         if(val>max_val) max_val=val;
    819     }
    820     return(max_val);
    821 #endif /*DB_USE_SIMD*/
    822 }
    823 
    824 inline float db_Max_64Aligned16_f(float *v)
    825 {
    826 #ifdef DB_USE_SIMD
    827     float back;
    828 
    829     _asm
    830     {
    831         mov eax,v
    832 
    833         /*Chunk1*/
    834         movaps xmm0,[eax]
    835         movaps xmm1,[eax+16]
    836         movaps xmm2,[eax+32]
    837         movaps xmm3,[eax+48]
    838         movaps xmm4,[eax+64]
    839         movaps xmm5,[eax+80]
    840         movaps xmm6,[eax+96]
    841         movaps xmm7,[eax+112]
    842 
    843         /*Chunk2*/
    844         maxps xmm0,[eax+128]
    845         maxps xmm1,[eax+144]
    846         maxps xmm2,[eax+160]
    847         maxps xmm3,[eax+176]
    848         maxps xmm4,[eax+192]
    849         maxps xmm5,[eax+208]
    850         maxps xmm6,[eax+224]
    851         maxps xmm7,[eax+240]
    852 
    853         /*Collect*/
    854         maxps   xmm0,xmm1
    855         maxps   xmm2,xmm3
    856         maxps   xmm4,xmm5
    857         maxps   xmm6,xmm7
    858         maxps   xmm0,xmm2
    859         maxps   xmm4,xmm6
    860         maxps   xmm0,xmm4
    861         movhlps xmm1,xmm0
    862         maxps   xmm0,xmm1
    863         shufps  xmm1,xmm0,1
    864         maxps   xmm0,xmm1
    865         movss   back,xmm0
    866     }
    867 
    868     return(back);
    869 #else
    870     float val,max_val;
    871     float *p,*stop_p;
    872     max_val=v[0];
    873     for(p=v+1,stop_p=v+64;p!=stop_p;)
    874     {
    875         val= *p++;
    876         if(val>max_val) max_val=val;
    877     }
    878     return(max_val);
    879 #endif /*DB_USE_SIMD*/
    880 }
    881 
    882 inline float db_Max_32Aligned16_f(float *v)
    883 {
    884 #ifdef DB_USE_SIMD
    885     float back;
    886 
    887     _asm
    888     {
    889         mov eax,v
    890 
    891         /*Chunk1*/
    892         movaps xmm0,[eax]
    893         movaps xmm1,[eax+16]
    894         movaps xmm2,[eax+32]
    895         movaps xmm3,[eax+48]
    896         movaps xmm4,[eax+64]
    897         movaps xmm5,[eax+80]
    898         movaps xmm6,[eax+96]
    899         movaps xmm7,[eax+112]
    900 
    901         /*Collect*/
    902         maxps   xmm0,xmm1
    903         maxps   xmm2,xmm3
    904         maxps   xmm4,xmm5
    905         maxps   xmm6,xmm7
    906         maxps   xmm0,xmm2
    907         maxps   xmm4,xmm6
    908         maxps   xmm0,xmm4
    909         movhlps xmm1,xmm0
    910         maxps   xmm0,xmm1
    911         shufps  xmm1,xmm0,1
    912         maxps   xmm0,xmm1
    913         movss   back,xmm0
    914     }
    915 
    916     return(back);
    917 #else
    918     float val,max_val;
    919     float *p,*stop_p;
    920     max_val=v[0];
    921     for(p=v+1,stop_p=v+32;p!=stop_p;)
    922     {
    923         val= *p++;
    924         if(val>max_val) max_val=val;
    925     }
    926     return(max_val);
    927 #endif /*DB_USE_SIMD*/
    928 }
    929 
    930 inline float db_Max_16Aligned16_f(float *v)
    931 {
    932 #ifdef DB_USE_SIMD
    933     float back;
    934 
    935     _asm
    936     {
    937         mov eax,v
    938 
    939         /*Chunk1*/
    940         movaps xmm0,[eax]
    941         movaps xmm1,[eax+16]
    942         movaps xmm2,[eax+32]
    943         movaps xmm3,[eax+48]
    944 
    945         /*Collect*/
    946         maxps   xmm0,xmm1
    947         maxps   xmm2,xmm3
    948         maxps   xmm0,xmm2
    949         movhlps xmm1,xmm0
    950         maxps   xmm0,xmm1
    951         shufps  xmm1,xmm0,1
    952         maxps   xmm0,xmm1
    953         movss   back,xmm0
    954     }
    955 
    956     return(back);
    957 #else
    958     float val,max_val;
    959     float *p,*stop_p;
    960     max_val=v[0];
    961     for(p=v+1,stop_p=v+16;p!=stop_p;)
    962     {
    963         val= *p++;
    964         if(val>max_val) max_val=val;
    965     }
    966     return(max_val);
    967 #endif /*DB_USE_SIMD*/
    968 }
    969 
    970 inline float db_Max_8Aligned16_f(float *v)
    971 {
    972 #ifdef DB_USE_SIMD
    973     float back;
    974 
    975     _asm
    976     {
    977         mov eax,v
    978 
    979         /*Chunk1*/
    980         movaps xmm0,[eax]
    981         movaps xmm1,[eax+16]
    982 
    983         /*Collect*/
    984         maxps   xmm0,xmm1
    985         movhlps xmm1,xmm0
    986         maxps   xmm0,xmm1
    987         shufps  xmm1,xmm0,1
    988         maxps   xmm0,xmm1
    989         movss   back,xmm0
    990     }
    991 
    992     return(back);
    993 #else
    994     float val,max_val;
    995     float *p,*stop_p;
    996     max_val=v[0];
    997     for(p=v+1,stop_p=v+8;p!=stop_p;)
    998     {
    999         val= *p++;
   1000         if(val>max_val) max_val=val;
   1001     }
   1002     return(max_val);
   1003 #endif /*DB_USE_SIMD*/
   1004 }
   1005 
   1006 inline float db_Max_Aligned16_f(float *v,int size)
   1007 {
   1008     float val,max_val;
   1009     float *stop_v;
   1010 
   1011     max_val=v[0];
   1012     for(;size>=128;size-=128)
   1013     {
   1014         val=db_Max_128Aligned16_f(v);
   1015         v+=128;
   1016         if(val>max_val) max_val=val;
   1017     }
   1018     if(size&64)
   1019     {
   1020         val=db_Max_64Aligned16_f(v);
   1021         v+=64;
   1022         if(val>max_val) max_val=val;
   1023     }
   1024     if(size&32)
   1025     {
   1026         val=db_Max_32Aligned16_f(v);
   1027         v+=32;
   1028         if(val>max_val) max_val=val;
   1029     }
   1030     if(size&16)
   1031     {
   1032         val=db_Max_16Aligned16_f(v);
   1033         v+=16;
   1034         if(val>max_val) max_val=val;
   1035     }
   1036     if(size&8)
   1037     {
   1038         val=db_Max_8Aligned16_f(v);
   1039         v+=8;
   1040         if(val>max_val) max_val=val;
   1041     }
   1042     if(size&7)
   1043     {
   1044         for(stop_v=v+(size&7);v!=stop_v;)
   1045         {
   1046             val= *v++;
   1047             if(val>max_val) max_val=val;
   1048         }
   1049     }
   1050 
   1051     return(max_val);
   1052 }
   1053 
   1054 /*Find maximum value of img in the region starting at (left,top)
   1055 and with width w and height h. img[left] should be 16 byte aligned*/
   1056 float db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h)
   1057 {
   1058     float val,max_val;
   1059     int i,stop_i;
   1060 
   1061     if(w && h)
   1062     {
   1063         stop_i=top+h;
   1064         max_val=img[top][left];
   1065 
   1066         for(i=top;i<stop_i;i++)
   1067         {
   1068             val=db_Max_Aligned16_f(img[i]+left,w);
   1069             if(val>max_val) max_val=val;
   1070         }
   1071         return(max_val);
   1072     }
   1073     return(0.0);
   1074 }
   1075 
   1076 inline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2)
   1077 {
   1078 #ifdef DB_USE_SIMD
   1079     _asm
   1080     {
   1081         mov eax,v1
   1082         mov ebx,v2
   1083         mov ecx,m
   1084 
   1085         /*Chunk1*/
   1086         movaps xmm0,[eax]
   1087         movaps xmm1,[eax+16]
   1088         movaps xmm2,[eax+32]
   1089         movaps xmm3,[eax+48]
   1090         movaps xmm4,[eax+64]
   1091         movaps xmm5,[eax+80]
   1092         movaps xmm6,[eax+96]
   1093         movaps xmm7,[eax+112]
   1094         maxps  xmm0,[ebx]
   1095         maxps  xmm1,[ebx+16]
   1096         maxps  xmm2,[ebx+32]
   1097         maxps  xmm3,[ebx+48]
   1098         maxps  xmm4,[ebx+64]
   1099         maxps  xmm5,[ebx+80]
   1100         maxps  xmm6,[ebx+96]
   1101         maxps  xmm7,[ebx+112]
   1102         movaps [ecx],xmm0
   1103         movaps [ecx+16],xmm1
   1104         movaps [ecx+32],xmm2
   1105         movaps [ecx+48],xmm3
   1106         movaps [ecx+64],xmm4
   1107         movaps [ecx+80],xmm5
   1108         movaps [ecx+96],xmm6
   1109         movaps [ecx+112],xmm7
   1110 
   1111         /*Chunk2*/
   1112         movaps xmm0,[eax+128]
   1113         movaps xmm1,[eax+144]
   1114         movaps xmm2,[eax+160]
   1115         movaps xmm3,[eax+176]
   1116         movaps xmm4,[eax+192]
   1117         movaps xmm5,[eax+208]
   1118         movaps xmm6,[eax+224]
   1119         movaps xmm7,[eax+240]
   1120         maxps  xmm0,[ebx+128]
   1121         maxps  xmm1,[ebx+144]
   1122         maxps  xmm2,[ebx+160]
   1123         maxps  xmm3,[ebx+176]
   1124         maxps  xmm4,[ebx+192]
   1125         maxps  xmm5,[ebx+208]
   1126         maxps  xmm6,[ebx+224]
   1127         maxps  xmm7,[ebx+240]
   1128         movaps [ecx+128],xmm0
   1129         movaps [ecx+144],xmm1
   1130         movaps [ecx+160],xmm2
   1131         movaps [ecx+176],xmm3
   1132         movaps [ecx+192],xmm4
   1133         movaps [ecx+208],xmm5
   1134         movaps [ecx+224],xmm6
   1135         movaps [ecx+240],xmm7
   1136 
   1137         /*Chunk3*/
   1138         movaps xmm0,[eax+256]
   1139         movaps xmm1,[eax+272]
   1140         movaps xmm2,[eax+288]
   1141         movaps xmm3,[eax+304]
   1142         movaps xmm4,[eax+320]
   1143         movaps xmm5,[eax+336]
   1144         movaps xmm6,[eax+352]
   1145         movaps xmm7,[eax+368]
   1146         maxps  xmm0,[ebx+256]
   1147         maxps  xmm1,[ebx+272]
   1148         maxps  xmm2,[ebx+288]
   1149         maxps  xmm3,[ebx+304]
   1150         maxps  xmm4,[ebx+320]
   1151         maxps  xmm5,[ebx+336]
   1152         maxps  xmm6,[ebx+352]
   1153         maxps  xmm7,[ebx+368]
   1154         movaps [ecx+256],xmm0
   1155         movaps [ecx+272],xmm1
   1156         movaps [ecx+288],xmm2
   1157         movaps [ecx+304],xmm3
   1158         movaps [ecx+320],xmm4
   1159         movaps [ecx+336],xmm5
   1160         movaps [ecx+352],xmm6
   1161         movaps [ecx+368],xmm7
   1162 
   1163         /*Chunk4*/
   1164         movaps xmm0,[eax+384]
   1165         movaps xmm1,[eax+400]
   1166         movaps xmm2,[eax+416]
   1167         movaps xmm3,[eax+432]
   1168         movaps xmm4,[eax+448]
   1169         movaps xmm5,[eax+464]
   1170         movaps xmm6,[eax+480]
   1171         movaps xmm7,[eax+496]
   1172         maxps  xmm0,[ebx+384]
   1173         maxps  xmm1,[ebx+400]
   1174         maxps  xmm2,[ebx+416]
   1175         maxps  xmm3,[ebx+432]
   1176         maxps  xmm4,[ebx+448]
   1177         maxps  xmm5,[ebx+464]
   1178         maxps  xmm6,[ebx+480]
   1179         maxps  xmm7,[ebx+496]
   1180         movaps [ecx+384],xmm0
   1181         movaps [ecx+400],xmm1
   1182         movaps [ecx+416],xmm2
   1183         movaps [ecx+432],xmm3
   1184         movaps [ecx+448],xmm4
   1185         movaps [ecx+464],xmm5
   1186         movaps [ecx+480],xmm6
   1187         movaps [ecx+496],xmm7
   1188     }
   1189 #else
   1190     int i;
   1191     float a,b;
   1192     for(i=0;i<128;i++)
   1193     {
   1194         a=v1[i];
   1195         b=v2[i];
   1196         if(a>=b) m[i]=a;
   1197         else m[i]=b;
   1198     }
   1199 #endif /*DB_USE_SIMD*/
   1200 }
   1201 
   1202 inline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2)
   1203 {
   1204 #ifdef DB_USE_SIMD
   1205     _asm
   1206     {
   1207         mov eax,v1
   1208         mov ebx,v2
   1209         mov ecx,m
   1210 
   1211         /*Chunk1*/
   1212         movups xmm0,[eax]
   1213         movups xmm1,[eax+16]
   1214         movups xmm2,[eax+32]
   1215         movups xmm3,[eax+48]
   1216         movups xmm4,[eax+64]
   1217         movups xmm5,[eax+80]
   1218         movups xmm6,[eax+96]
   1219         movups xmm7,[eax+112]
   1220         maxps  xmm0,[ebx]
   1221         maxps  xmm1,[ebx+16]
   1222         maxps  xmm2,[ebx+32]
   1223         maxps  xmm3,[ebx+48]
   1224         maxps  xmm4,[ebx+64]
   1225         maxps  xmm5,[ebx+80]
   1226         maxps  xmm6,[ebx+96]
   1227         maxps  xmm7,[ebx+112]
   1228         movaps [ecx],xmm0
   1229         movaps [ecx+16],xmm1
   1230         movaps [ecx+32],xmm2
   1231         movaps [ecx+48],xmm3
   1232         movaps [ecx+64],xmm4
   1233         movaps [ecx+80],xmm5
   1234         movaps [ecx+96],xmm6
   1235         movaps [ecx+112],xmm7
   1236 
   1237         /*Chunk2*/
   1238         movups xmm0,[eax+128]
   1239         movups xmm1,[eax+144]
   1240         movups xmm2,[eax+160]
   1241         movups xmm3,[eax+176]
   1242         movups xmm4,[eax+192]
   1243         movups xmm5,[eax+208]
   1244         movups xmm6,[eax+224]
   1245         movups xmm7,[eax+240]
   1246         maxps  xmm0,[ebx+128]
   1247         maxps  xmm1,[ebx+144]
   1248         maxps  xmm2,[ebx+160]
   1249         maxps  xmm3,[ebx+176]
   1250         maxps  xmm4,[ebx+192]
   1251         maxps  xmm5,[ebx+208]
   1252         maxps  xmm6,[ebx+224]
   1253         maxps  xmm7,[ebx+240]
   1254         movaps [ecx+128],xmm0
   1255         movaps [ecx+144],xmm1
   1256         movaps [ecx+160],xmm2
   1257         movaps [ecx+176],xmm3
   1258         movaps [ecx+192],xmm4
   1259         movaps [ecx+208],xmm5
   1260         movaps [ecx+224],xmm6
   1261         movaps [ecx+240],xmm7
   1262 
   1263         /*Chunk3*/
   1264         movups xmm0,[eax+256]
   1265         movups xmm1,[eax+272]
   1266         movups xmm2,[eax+288]
   1267         movups xmm3,[eax+304]
   1268         movups xmm4,[eax+320]
   1269         movups xmm5,[eax+336]
   1270         movups xmm6,[eax+352]
   1271         movups xmm7,[eax+368]
   1272         maxps  xmm0,[ebx+256]
   1273         maxps  xmm1,[ebx+272]
   1274         maxps  xmm2,[ebx+288]
   1275         maxps  xmm3,[ebx+304]
   1276         maxps  xmm4,[ebx+320]
   1277         maxps  xmm5,[ebx+336]
   1278         maxps  xmm6,[ebx+352]
   1279         maxps  xmm7,[ebx+368]
   1280         movaps [ecx+256],xmm0
   1281         movaps [ecx+272],xmm1
   1282         movaps [ecx+288],xmm2
   1283         movaps [ecx+304],xmm3
   1284         movaps [ecx+320],xmm4
   1285         movaps [ecx+336],xmm5
   1286         movaps [ecx+352],xmm6
   1287         movaps [ecx+368],xmm7
   1288 
   1289         /*Chunk4*/
   1290         movups xmm0,[eax+384]
   1291         movups xmm1,[eax+400]
   1292         movups xmm2,[eax+416]
   1293         movups xmm3,[eax+432]
   1294         movups xmm4,[eax+448]
   1295         movups xmm5,[eax+464]
   1296         movups xmm6,[eax+480]
   1297         movups xmm7,[eax+496]
   1298         maxps  xmm0,[ebx+384]
   1299         maxps  xmm1,[ebx+400]
   1300         maxps  xmm2,[ebx+416]
   1301         maxps  xmm3,[ebx+432]
   1302         maxps  xmm4,[ebx+448]
   1303         maxps  xmm5,[ebx+464]
   1304         maxps  xmm6,[ebx+480]
   1305         maxps  xmm7,[ebx+496]
   1306         movaps [ecx+384],xmm0
   1307         movaps [ecx+400],xmm1
   1308         movaps [ecx+416],xmm2
   1309         movaps [ecx+432],xmm3
   1310         movaps [ecx+448],xmm4
   1311         movaps [ecx+464],xmm5
   1312         movaps [ecx+480],xmm6
   1313         movaps [ecx+496],xmm7
   1314     }
   1315 #else
   1316     int i;
   1317     float a,b;
   1318     for(i=0;i<128;i++)
   1319     {
   1320         a=v1[i];
   1321         b=v2[i];
   1322         if(a>=b) m[i]=a;
   1323         else m[i]=b;
   1324     }
   1325 #endif /*DB_USE_SIMD*/
   1326 }
   1327 
   1328 /*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and
   1329 stopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row.
   1330 The input s should be of width at least 128, and exist for 2 pixels outside the specified region.
   1331 s[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/
   1332 inline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom,
   1333                                       /*temp should point to at least
   1334                                       6*132 floats of 16-byte-aligned allocated memory*/
   1335                                       float *temp)
   1336 {
   1337 #ifdef DB_USE_SIMD
   1338     int i,lm2;
   1339     float *two[4];
   1340     float *four,*five;
   1341 
   1342     lm2=left-2;
   1343 
   1344     /*Set pointers to pre-allocated memory*/
   1345     four=temp;
   1346     five=four+132;
   1347     for(i=0;i<4;i++)
   1348     {
   1349         two[i]=five+(i+1)*132;
   1350     }
   1351 
   1352     /*Set rests of four and five to zero to avoid
   1353     floating point exceptions*/
   1354     for(i=129;i<132;i++)
   1355     {
   1356         four[i]=0.0;
   1357         five[i]=0.0;
   1358     }
   1359 
   1360     /*Fill three rows of the wrap-around max buffers*/
   1361     for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
   1362 
   1363     /*For each output row*/
   1364     for(;i<=bottom;i++)
   1365     {
   1366         /*Compute max of the lowest pair of rows in the five row window*/
   1367         db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
   1368         /*Compute max of the lowest and highest pair of rows in the five row window*/
   1369         db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]);
   1370         /*Compute max of all rows*/
   1371         db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]);
   1372         /*Compute max of 2x5 chunks*/
   1373         db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five);
   1374         /*Compute max of pairs of 2x5 chunks*/
   1375         db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five);
   1376         /*Compute max of pairs of 5x5 except middle*/
   1377         db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five);
   1378     }
   1379 
   1380 #else
   1381     int i,j,right;
   1382     float sv;
   1383 
   1384     right=left+128;
   1385     for(i=top;i<=bottom;i++) for(j=left;j<right;j++)
   1386     {
   1387         sv=s[i][j];
   1388 
   1389         if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] &&
   1390             sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] &&
   1391             sv>s[  i][j-2] && sv>s[  i][j-1] &&                 sv>s[  i][j+1] && sv>s[  i][j+2] &&
   1392             sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] &&
   1393             sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2])
   1394         {
   1395             sf[i][j-2]=0.0;
   1396         }
   1397         else sf[i][j-2]=sv;
   1398     }
   1399 #endif /*DB_USE_SIMD*/
   1400 }
   1401 
   1402 /*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and
   1403 stopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels
   1404 outside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned.
   1405 Top must be at least 3. Reading and writing from and to the input and output images is done
   1406 as if the region had a width equal to a multiple of 124. If this is not the case, the images
   1407 should be over-allocated and the input cleared for a sufficient region*/
   1408 void db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom,
   1409                                           /*temp should point to at least
   1410                                           6*132 floats of 16-byte-aligned allocated memory*/
   1411                                           float *temp)
   1412 {
   1413     int x,next_x;
   1414 
   1415     for(x=left;x<=right;x=next_x)
   1416     {
   1417         next_x=x+124;
   1418         db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp);
   1419     }
   1420 }
   1421 
   1422 /*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp
   1423 which should point to space of at least as many positions as there are pixels in the chunk*/
   1424 inline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp)
   1425 {
   1426     int i,j,nr;
   1427     float s;
   1428 
   1429     nr=0;
   1430     for(i=top;i<=bottom;i++) for(j=left;j<=right;j++)
   1431     {
   1432         s=strength[i][j];
   1433 
   1434         if(s>=threshold &&
   1435             s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] &&
   1436             s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] &&
   1437             s>strength[  i][j-2] && s>strength[  i][j-1] &&                       s>strength[  i][j+1] && s>strength[  i][j+2] &&
   1438             s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] &&
   1439             s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2])
   1440         {
   1441             x_temp[nr]=(double) j;
   1442             y_temp[nr]=(double) i;
   1443             s_temp[nr]=(double) s;
   1444             nr++;
   1445         }
   1446     }
   1447     return(nr);
   1448 }
   1449 
   1450 
   1451 //Sub-pixel accuracy using 2D quadratic interpolation.(YCJ)
   1452 inline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys)
   1453 {
   1454     int x = (int) xd;
   1455     int y = (int) yd;
   1456 
   1457     float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1];
   1458     float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x];
   1459     float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0;
   1460 
   1461     float denom = (fxx * fyy - fxy * fxy) * (float) 2.0;
   1462 
   1463     xs = xd;
   1464     ys = yd;
   1465 
   1466     if ( db_absf(denom) <= FLT_EPSILON )
   1467     {
   1468         return;
   1469     }
   1470     else
   1471     {
   1472         float fx = strength[y][x+1] - strength[y][x-1];
   1473         float fy = strength[y+1][x] - strength[y-1][x];
   1474 
   1475         float dx = (fyy * fx - fxy * fy) / denom;
   1476         float dy = (fxx * fy - fxy * fx) / denom;
   1477 
   1478         if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 )
   1479         {
   1480             return;
   1481         }
   1482         else
   1483         {
   1484             xs -= dx;
   1485             ys -= dy;
   1486         }
   1487     }
   1488 
   1489     return;
   1490 }
   1491 
   1492 /*Extract corners from the image part from (left,top) to (right,bottom).
   1493 Store in x and y, extracting at most satnr corners in each block of size (bw,bh).
   1494 The pointer temp_d should point to at least 5*bw*bh positions.
   1495 area_factor holds how many corners max to extract per 10000 pixels*/
   1496 void db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom,
   1497                                 int bw,int bh,unsigned long area_factor,
   1498                                 float threshold,double *temp_d,
   1499                                 double *x_coord,double *y_coord,int *nr_corners)
   1500 {
   1501     double *x_temp,*y_temp,*s_temp,*select_temp;
   1502     double loc_thresh;
   1503     unsigned long bwbh,area,saturation;
   1504     int x,next_x,last_x;
   1505     int y,next_y,last_y;
   1506     int nr,nr_points,i,stop;
   1507 
   1508     bwbh=bw*bh;
   1509     x_temp=temp_d;
   1510     y_temp=x_temp+bwbh;
   1511     s_temp=y_temp+bwbh;
   1512     select_temp=s_temp+bwbh;
   1513 
   1514 #ifdef DB_SUB_PIXEL
   1515     // subpixel processing may sometimes push the corner ourside the real border
   1516     // increasing border size:
   1517     left++;
   1518     top++;
   1519     bottom--;
   1520     right--;
   1521 #endif /*DB_SUB_PIXEL*/
   1522 
   1523     nr_points=0;
   1524     for(y=top;y<=bottom;y=next_y)
   1525     {
   1526         next_y=y+bh;
   1527         last_y=next_y-1;
   1528         if(last_y>bottom) last_y=bottom;
   1529         for(x=left;x<=right;x=next_x)
   1530         {
   1531             next_x=x+bw;
   1532             last_x=next_x-1;
   1533             if(last_x>right) last_x=right;
   1534 
   1535             area=(last_x-x+1)*(last_y-y+1);
   1536             saturation=(area*area_factor)/10000;
   1537             nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp);
   1538             if(nr)
   1539             {
   1540                 if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp);
   1541                 else loc_thresh=threshold;
   1542 
   1543                 stop=nr_points+saturation;
   1544                 for(i=0;(i<nr)&&(nr_points<stop);i++)
   1545                 {
   1546                     if(s_temp[i]>=loc_thresh)
   1547                     {
   1548                         #ifdef DB_SUB_PIXEL
   1549                                db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]);
   1550                         #else
   1551                                x_coord[nr_points]=x_temp[i];
   1552                                y_coord[nr_points]=y_temp[i];
   1553                         #endif
   1554 
   1555                         nr_points++;
   1556                     }
   1557                 }
   1558             }
   1559         }
   1560     }
   1561     *nr_corners=nr_points;
   1562 }
   1563 
   1564 db_CornerDetector_f::db_CornerDetector_f()
   1565 {
   1566     m_w=0; m_h=0;
   1567 }
   1568 
   1569 db_CornerDetector_f::~db_CornerDetector_f()
   1570 {
   1571     Clean();
   1572 }
   1573 
   1574 void db_CornerDetector_f::Clean()
   1575 {
   1576     if(m_w!=0)
   1577     {
   1578         delete [] m_temp_f;
   1579         delete [] m_temp_d;
   1580         db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
   1581     }
   1582     m_w=0; m_h=0;
   1583 }
   1584 
   1585 unsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners,
   1586                             int nr_horizontal_blocks,int nr_vertical_blocks,
   1587                             double absolute_threshold,double relative_threshold)
   1588 {
   1589     int chunkwidth=208;
   1590     int block_width,block_height;
   1591     unsigned long area_factor;
   1592     int active_width,active_height;
   1593 
   1594     active_width=db_maxi(1,im_width-10);
   1595     active_height=db_maxi(1,im_height-10);
   1596     block_width=db_maxi(1,active_width/nr_horizontal_blocks);
   1597     block_height=db_maxi(1,active_height/nr_vertical_blocks);
   1598 
   1599     area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
   1600         (((double)active_width)*((double)active_height)))));
   1601 
   1602     return(Start(im_width,im_height,block_width,block_height,area_factor,
   1603         absolute_threshold,relative_threshold,chunkwidth));
   1604 }
   1605 
   1606 unsigned long db_CornerDetector_f::Start(int im_width,int im_height,
   1607                              int block_width,int block_height,unsigned long area_factor,
   1608                              double absolute_threshold,double relative_threshold,int chunkwidth)
   1609 {
   1610     Clean();
   1611 
   1612     m_w=im_width;
   1613     m_h=im_height;
   1614     m_cw=chunkwidth;
   1615     m_bw=block_width;
   1616     m_bh=block_height;
   1617     m_area_factor=area_factor;
   1618     m_r_thresh=relative_threshold;
   1619     m_a_thresh=absolute_threshold;
   1620     m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
   1621 
   1622     m_temp_f=new float[13*(m_cw+4)];
   1623     m_temp_d=new double[5*m_bw*m_bh];
   1624     m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
   1625 
   1626     return(m_max_nr);
   1627 }
   1628 
   1629 void db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const
   1630 {
   1631     float max_val,threshold;
   1632 
   1633     db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw);
   1634 
   1635     if(m_r_thresh)
   1636     {
   1637         max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
   1638         threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
   1639     }
   1640     else threshold= (float) m_a_thresh;
   1641 
   1642     db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
   1643         m_temp_d,x_coord,y_coord,nr_corners);
   1644 }
   1645 
   1646 db_CornerDetector_u::db_CornerDetector_u()
   1647 {
   1648     m_w=0; m_h=0;
   1649 }
   1650 
   1651 db_CornerDetector_u::~db_CornerDetector_u()
   1652 {
   1653     Clean();
   1654 }
   1655 
   1656 db_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd)
   1657 {
   1658     Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
   1659         cd.m_a_thresh, cd.m_r_thresh);
   1660 }
   1661 
   1662 db_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd)
   1663 {
   1664     if ( this == &cd ) return *this;
   1665 
   1666     Clean();
   1667 
   1668     Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
   1669         cd.m_a_thresh, cd.m_r_thresh);
   1670 
   1671     return *this;
   1672 }
   1673 
   1674 void db_CornerDetector_u::Clean()
   1675 {
   1676     if(m_w!=0)
   1677     {
   1678         delete [] m_temp_i;
   1679         delete [] m_temp_d;
   1680         db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
   1681     }
   1682     m_w=0; m_h=0;
   1683 }
   1684 
   1685 unsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners,
   1686                             int nr_horizontal_blocks,int nr_vertical_blocks,
   1687                             double absolute_threshold,double relative_threshold)
   1688 {
   1689     int block_width,block_height;
   1690     unsigned long area_factor;
   1691     int active_width,active_height;
   1692 
   1693     active_width=db_maxi(1,im_width-10);
   1694     active_height=db_maxi(1,im_height-10);
   1695     block_width=db_maxi(1,active_width/nr_horizontal_blocks);
   1696     block_height=db_maxi(1,active_height/nr_vertical_blocks);
   1697 
   1698     area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
   1699         (((double)active_width)*((double)active_height)))));
   1700 
   1701     return(Start(im_width,im_height,block_width,block_height,area_factor,
   1702         16.0*absolute_threshold,relative_threshold));
   1703 }
   1704 
   1705 unsigned long db_CornerDetector_u::Start(int im_width,int im_height,
   1706                              int block_width,int block_height,unsigned long area_factor,
   1707                              double absolute_threshold,double relative_threshold)
   1708 {
   1709     Clean();
   1710 
   1711     m_w=im_width;
   1712     m_h=im_height;
   1713     m_bw=block_width;
   1714     m_bh=block_height;
   1715     m_area_factor=area_factor;
   1716     m_r_thresh=relative_threshold;
   1717     m_a_thresh=absolute_threshold;
   1718     m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
   1719 
   1720     m_temp_i=new int[18*128];
   1721     m_temp_d=new double[5*m_bw*m_bh];
   1722     m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
   1723 
   1724     return(m_max_nr);
   1725 }
   1726 
   1727 void db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
   1728                                         const unsigned char * const *msk, unsigned char fgnd) const
   1729 {
   1730     float max_val,threshold;
   1731 
   1732     db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i);
   1733 
   1734 
   1735     if(m_r_thresh)
   1736     {
   1737         max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
   1738         threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
   1739     }
   1740     else threshold= (float) m_a_thresh;
   1741 
   1742     db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
   1743         m_temp_d,x_coord,y_coord,nr_corners);
   1744 
   1745 
   1746     if ( msk )
   1747     {
   1748         int nr_corners_mask=0;
   1749 
   1750         for ( int i = 0; i < *nr_corners; ++i)
   1751         {
   1752             int cor_x = db_roundi(*(x_coord+i));
   1753             int cor_y = db_roundi(*(y_coord+i));
   1754             if ( msk[cor_y][cor_x] == fgnd )
   1755             {
   1756                 x_coord[nr_corners_mask] = x_coord[i];
   1757                 y_coord[nr_corners_mask] = y_coord[i];
   1758                 nr_corners_mask++;
   1759             }
   1760         }
   1761         *nr_corners = nr_corners_mask;
   1762     }
   1763 }
   1764 
   1765 void db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) {
   1766     if ( m_w!=0 )
   1767         db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh),
   1768             m_temp_d,x_coord,y_coord,nr_corners);
   1769 }
   1770 
   1771