Home | History | Annotate | Download | only in db_vlvm
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*$Id: db_feature_matching.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
     18 
     19 /*****************************************************************
     20 *    Lean and mean begins here                                   *
     21 *****************************************************************/
     22 
     23 #include "db_utilities.h"
     24 #include "db_feature_matching.h"
     25 #ifdef _VERBOSE_
     26 #include <iostream>
     27 #endif
     28 
     29 
     30 int AffineWarpPoint_NN_LUT_x[11][11];
     31 int AffineWarpPoint_NN_LUT_y[11][11];
     32 
     33 float AffineWarpPoint_BL_LUT_x[11][11];
     34 float AffineWarpPoint_BL_LUT_y[11][11];
     35 
     36 
     37 inline float db_SignedSquareNormCorr7x7_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
     38 {
     39     unsigned char *pf,*pg;
     40     float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
     41     int xm_f,xm_g;
     42 
     43     xm_f=x_f-3;
     44     xm_g=x_g-3;
     45     fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
     46 
     47     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
     48     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     49     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     50     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     51     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     52     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     53     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     54     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     55 
     56     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
     57     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     58     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     59     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     60     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     61     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     62     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     63     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     64 
     65     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
     66     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     67     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     68     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     69     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     70     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     71     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     72     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     73 
     74     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
     75     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     76     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     77     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     78     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     79     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     80     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     81     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     82 
     83     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
     84     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     85     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     86     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     87     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     88     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     89     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     90     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     91 
     92     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
     93     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     94     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     95     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     96     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     97     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     98     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
     99     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    100 
    101     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
    102     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    103     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    104     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    105     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    106     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    107     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    108     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    109 
    110     fg_corr=49.0f*fgsum-fsum*gsum;
    111     den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum);
    112     if(den!=0.0)
    113     {
    114         if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
    115         return(-fg_corr*fg_corr/den);
    116     }
    117     return(0.0);
    118 }
    119 
    120 inline float db_SignedSquareNormCorr9x9_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
    121 {
    122     unsigned char *pf,*pg;
    123     float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
    124     int xm_f,xm_g;
    125 
    126     xm_f=x_f-4;
    127     xm_g=x_g-4;
    128     fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
    129 
    130     pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
    131     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    132     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    133     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    134     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    135     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    136     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    137     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    138     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    139     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    140 
    141     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
    142     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    143     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    144     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    145     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    146     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    147     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    148     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    149     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    150     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    151 
    152     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
    153     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    154     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    155     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    156     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    157     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    158     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    159     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    160     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    161     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    162 
    163     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
    164     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    165     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    166     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    167     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    168     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    169     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    170     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    171     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    172     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    173 
    174     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
    175     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    176     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    177     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    178     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    179     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    180     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    181     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    182     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    183     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    184 
    185     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
    186     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    187     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    188     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    189     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    190     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    191     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    192     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    193     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    194     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    195 
    196     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
    197     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    198     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    199     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    200     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    201     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    202     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    203     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    204     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    205     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    206 
    207     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
    208     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    209     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    210     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    211     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    212     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    213     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    214     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    215     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    216     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    217 
    218     pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
    219     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    220     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    221     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    222     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    223     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    224     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    225     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    226     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    227     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    228 
    229     fg_corr=81.0f*fgsum-fsum*gsum;
    230     den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum);
    231     if(den!=0.0)
    232     {
    233         if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
    234         return(-fg_corr*fg_corr/den);
    235     }
    236     return(0.0);
    237 }
    238 
    239 inline float db_SignedSquareNormCorr11x11_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
    240 {
    241     unsigned char *pf,*pg;
    242     float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
    243     int xm_f,xm_g;
    244 
    245     xm_f=x_f-5;
    246     xm_g=x_g-5;
    247     fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
    248 
    249     pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
    250     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    251     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    252     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    253     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    254     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    255     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    256     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    257     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    258     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    259     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    260     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    261 
    262     pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
    263     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    264     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    265     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    266     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    267     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    268     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    269     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    270     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    271     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    272     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    273     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    274 
    275     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
    276     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    277     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    278     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    279     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    280     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    281     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    282     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    283     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    284     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    285     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    286     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    287 
    288     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
    289     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    290     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    291     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    292     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    293     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    294     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    295     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    296     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    297     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    298     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    299     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    300 
    301     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
    302     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    303     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    304     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    305     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    306     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    307     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    308     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    309     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    310     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    311     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    312     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    313 
    314     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
    315     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    316     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    317     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    318     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    319     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    320     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    321     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    322     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    323     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    324     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    325     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    326 
    327     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
    328     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    329     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    330     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    331     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    332     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    333     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    334     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    335     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    336     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    337     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    338     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    339 
    340     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
    341     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    342     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    343     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    344     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    345     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    346     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    347     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    348     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    349     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    350     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    351     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    352 
    353     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
    354     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    355     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    356     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    357     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    358     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    359     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    360     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    361     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    362     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    363     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    364     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    365 
    366     pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
    367     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    368     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    369     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    370     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    371     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    372     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    373     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    374     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    375     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    376     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    377     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    378 
    379     pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
    380     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    381     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    382     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    383     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    384     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    385     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    386     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    387     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    388     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    389     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    390     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
    391 
    392     fg_corr=121.0f*fgsum-fsum*gsum;
    393     den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum);
    394     if(den!=0.0)
    395     {
    396         if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
    397         return(-fg_corr*fg_corr/den);
    398     }
    399     return(0.0);
    400 }
    401 
    402 inline void db_SignedSquareNormCorr11x11_Pre_u(unsigned char **f_img,int x_f,int y_f,float *sum,float *recip)
    403 {
    404     unsigned char *pf;
    405     float den;
    406     int f,f2sum,fsum;
    407     int xm_f;
    408 
    409     xm_f=x_f-5;
    410 
    411     pf=f_img[y_f-5]+xm_f;
    412     f= *pf++; f2sum=f*f;  fsum=f;
    413     f= *pf++; f2sum+=f*f; fsum+=f;
    414     f= *pf++; f2sum+=f*f; fsum+=f;
    415     f= *pf++; f2sum+=f*f; fsum+=f;
    416     f= *pf++; f2sum+=f*f; fsum+=f;
    417     f= *pf++; f2sum+=f*f; fsum+=f;
    418     f= *pf++; f2sum+=f*f; fsum+=f;
    419     f= *pf++; f2sum+=f*f; fsum+=f;
    420     f= *pf++; f2sum+=f*f; fsum+=f;
    421     f= *pf++; f2sum+=f*f; fsum+=f;
    422     f= *pf;   f2sum+=f*f; fsum+=f;
    423 
    424     pf=f_img[y_f-4]+xm_f;
    425     f= *pf++; f2sum+=f*f; fsum+=f;
    426     f= *pf++; f2sum+=f*f; fsum+=f;
    427     f= *pf++; f2sum+=f*f; fsum+=f;
    428     f= *pf++; f2sum+=f*f; fsum+=f;
    429     f= *pf++; f2sum+=f*f; fsum+=f;
    430     f= *pf++; f2sum+=f*f; fsum+=f;
    431     f= *pf++; f2sum+=f*f; fsum+=f;
    432     f= *pf++; f2sum+=f*f; fsum+=f;
    433     f= *pf++; f2sum+=f*f; fsum+=f;
    434     f= *pf++; f2sum+=f*f; fsum+=f;
    435     f= *pf;   f2sum+=f*f; fsum+=f;
    436 
    437     pf=f_img[y_f-3]+xm_f;
    438     f= *pf++; f2sum+=f*f; fsum+=f;
    439     f= *pf++; f2sum+=f*f; fsum+=f;
    440     f= *pf++; f2sum+=f*f; fsum+=f;
    441     f= *pf++; f2sum+=f*f; fsum+=f;
    442     f= *pf++; f2sum+=f*f; fsum+=f;
    443     f= *pf++; f2sum+=f*f; fsum+=f;
    444     f= *pf++; f2sum+=f*f; fsum+=f;
    445     f= *pf++; f2sum+=f*f; fsum+=f;
    446     f= *pf++; f2sum+=f*f; fsum+=f;
    447     f= *pf++; f2sum+=f*f; fsum+=f;
    448     f= *pf;   f2sum+=f*f; fsum+=f;
    449 
    450     pf=f_img[y_f-2]+xm_f;
    451     f= *pf++; f2sum+=f*f; fsum+=f;
    452     f= *pf++; f2sum+=f*f; fsum+=f;
    453     f= *pf++; f2sum+=f*f; fsum+=f;
    454     f= *pf++; f2sum+=f*f; fsum+=f;
    455     f= *pf++; f2sum+=f*f; fsum+=f;
    456     f= *pf++; f2sum+=f*f; fsum+=f;
    457     f= *pf++; f2sum+=f*f; fsum+=f;
    458     f= *pf++; f2sum+=f*f; fsum+=f;
    459     f= *pf++; f2sum+=f*f; fsum+=f;
    460     f= *pf++; f2sum+=f*f; fsum+=f;
    461     f= *pf;   f2sum+=f*f; fsum+=f;
    462 
    463     pf=f_img[y_f-1]+xm_f;
    464     f= *pf++; f2sum+=f*f; fsum+=f;
    465     f= *pf++; f2sum+=f*f; fsum+=f;
    466     f= *pf++; f2sum+=f*f; fsum+=f;
    467     f= *pf++; f2sum+=f*f; fsum+=f;
    468     f= *pf++; f2sum+=f*f; fsum+=f;
    469     f= *pf++; f2sum+=f*f; fsum+=f;
    470     f= *pf++; f2sum+=f*f; fsum+=f;
    471     f= *pf++; f2sum+=f*f; fsum+=f;
    472     f= *pf++; f2sum+=f*f; fsum+=f;
    473     f= *pf++; f2sum+=f*f; fsum+=f;
    474     f= *pf;   f2sum+=f*f; fsum+=f;
    475 
    476     pf=f_img[y_f]+xm_f;
    477     f= *pf++; f2sum+=f*f; fsum+=f;
    478     f= *pf++; f2sum+=f*f; fsum+=f;
    479     f= *pf++; f2sum+=f*f; fsum+=f;
    480     f= *pf++; f2sum+=f*f; fsum+=f;
    481     f= *pf++; f2sum+=f*f; fsum+=f;
    482     f= *pf++; f2sum+=f*f; fsum+=f;
    483     f= *pf++; f2sum+=f*f; fsum+=f;
    484     f= *pf++; f2sum+=f*f; fsum+=f;
    485     f= *pf++; f2sum+=f*f; fsum+=f;
    486     f= *pf++; f2sum+=f*f; fsum+=f;
    487     f= *pf;   f2sum+=f*f; fsum+=f;
    488 
    489     pf=f_img[y_f+1]+xm_f;
    490     f= *pf++; f2sum+=f*f; fsum+=f;
    491     f= *pf++; f2sum+=f*f; fsum+=f;
    492     f= *pf++; f2sum+=f*f; fsum+=f;
    493     f= *pf++; f2sum+=f*f; fsum+=f;
    494     f= *pf++; f2sum+=f*f; fsum+=f;
    495     f= *pf++; f2sum+=f*f; fsum+=f;
    496     f= *pf++; f2sum+=f*f; fsum+=f;
    497     f= *pf++; f2sum+=f*f; fsum+=f;
    498     f= *pf++; f2sum+=f*f; fsum+=f;
    499     f= *pf++; f2sum+=f*f; fsum+=f;
    500     f= *pf;   f2sum+=f*f; fsum+=f;
    501 
    502     pf=f_img[y_f+2]+xm_f;
    503     f= *pf++; f2sum+=f*f; fsum+=f;
    504     f= *pf++; f2sum+=f*f; fsum+=f;
    505     f= *pf++; f2sum+=f*f; fsum+=f;
    506     f= *pf++; f2sum+=f*f; fsum+=f;
    507     f= *pf++; f2sum+=f*f; fsum+=f;
    508     f= *pf++; f2sum+=f*f; fsum+=f;
    509     f= *pf++; f2sum+=f*f; fsum+=f;
    510     f= *pf++; f2sum+=f*f; fsum+=f;
    511     f= *pf++; f2sum+=f*f; fsum+=f;
    512     f= *pf++; f2sum+=f*f; fsum+=f;
    513     f= *pf;   f2sum+=f*f; fsum+=f;
    514 
    515     pf=f_img[y_f+3]+xm_f;
    516     f= *pf++; f2sum+=f*f; fsum+=f;
    517     f= *pf++; f2sum+=f*f; fsum+=f;
    518     f= *pf++; f2sum+=f*f; fsum+=f;
    519     f= *pf++; f2sum+=f*f; fsum+=f;
    520     f= *pf++; f2sum+=f*f; fsum+=f;
    521     f= *pf++; f2sum+=f*f; fsum+=f;
    522     f= *pf++; f2sum+=f*f; fsum+=f;
    523     f= *pf++; f2sum+=f*f; fsum+=f;
    524     f= *pf++; f2sum+=f*f; fsum+=f;
    525     f= *pf++; f2sum+=f*f; fsum+=f;
    526     f= *pf;   f2sum+=f*f; fsum+=f;
    527 
    528     pf=f_img[y_f+4]+xm_f;
    529     f= *pf++; f2sum+=f*f; fsum+=f;
    530     f= *pf++; f2sum+=f*f; fsum+=f;
    531     f= *pf++; f2sum+=f*f; fsum+=f;
    532     f= *pf++; f2sum+=f*f; fsum+=f;
    533     f= *pf++; f2sum+=f*f; fsum+=f;
    534     f= *pf++; f2sum+=f*f; fsum+=f;
    535     f= *pf++; f2sum+=f*f; fsum+=f;
    536     f= *pf++; f2sum+=f*f; fsum+=f;
    537     f= *pf++; f2sum+=f*f; fsum+=f;
    538     f= *pf++; f2sum+=f*f; fsum+=f;
    539     f= *pf;   f2sum+=f*f; fsum+=f;
    540 
    541     pf=f_img[y_f+5]+xm_f;
    542     f= *pf++; f2sum+=f*f; fsum+=f;
    543     f= *pf++; f2sum+=f*f; fsum+=f;
    544     f= *pf++; f2sum+=f*f; fsum+=f;
    545     f= *pf++; f2sum+=f*f; fsum+=f;
    546     f= *pf++; f2sum+=f*f; fsum+=f;
    547     f= *pf++; f2sum+=f*f; fsum+=f;
    548     f= *pf++; f2sum+=f*f; fsum+=f;
    549     f= *pf++; f2sum+=f*f; fsum+=f;
    550     f= *pf++; f2sum+=f*f; fsum+=f;
    551     f= *pf++; f2sum+=f*f; fsum+=f;
    552     f= *pf;   f2sum+=f*f; fsum+=f;
    553 
    554     *sum= (float) fsum;
    555     den=(121.0f*f2sum-fsum*fsum);
    556     *recip=(float)(((den!=0.0)?1.0/den:0.0));
    557 }
    558 
    559 inline void db_SignedSquareNormCorr5x5_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
    560 {
    561     float den;
    562     int f2sum,fsum;
    563     int xm_f=x_f-2;
    564 
    565 #ifndef DB_USE_SSE2
    566     const unsigned char *pf;
    567     short f;
    568 
    569     pf=f_img[y_f-2]+xm_f;
    570     f= *pf++; f2sum=f*f; fsum=f; (*patch++)=f;
    571     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    572     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    573     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    574     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    575 
    576     pf=f_img[y_f-1]+xm_f;
    577     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    578     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    579     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    580     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    581     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    582 
    583     pf=f_img[y_f]+xm_f;
    584     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    585     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    586     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    587     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    588     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    589 
    590     pf=f_img[y_f+1]+xm_f;
    591     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    592     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    593     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    594     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    595     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    596 
    597     pf=f_img[y_f+2]+xm_f;
    598     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    599     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    600     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    601     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    602     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    603     //int xwi;
    604     //int ywi;
    605     //f2sum=0;
    606     //fsum=0;
    607     //for (int r=-5;r<=5;r++){
    608     //  ywi=y_f+r;
    609     //  for (int c=-5;c<=5;c++){
    610     //      xwi=x_f+c;
    611     //      f=f_img[ywi][xwi];
    612     //      f2sum+=f*f;
    613     //      fsum+=f;
    614     //      (*patch++)=f;
    615     //  }
    616     //}
    617     (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
    618     (*patch++)=0; (*patch++)=0;
    619 #endif /* DB_USE_SSE2 */
    620 
    621     *sum= (float) fsum;
    622     den=(25.0f*f2sum-fsum*fsum);
    623     *recip= (float)((den!=0.0)?1.0/den:0.0);
    624 }
    625 
    626 inline void db_SignedSquareNormCorr21x21_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
    627 {
    628     float den;
    629     int f2sum,fsum;
    630     int xm_f=x_f-10;
    631     short f;
    632 
    633     int xwi;
    634     int ywi;
    635     f2sum=0;
    636     fsum=0;
    637     for (int r=-10;r<=10;r++){
    638         ywi=y_f+r;
    639         for (int c=-10;c<=10;c++){
    640             xwi=x_f+c;
    641             f=f_img[ywi][xwi];
    642             f2sum+=f*f;
    643             fsum+=f;
    644             (*patch++)=f;
    645         }
    646     }
    647 
    648     for(int i=442; i<512; i++)
    649         (*patch++)=0;
    650 
    651     *sum= (float) fsum;
    652     den=(441.0f*f2sum-fsum*fsum);
    653     *recip= (float)((den!=0.0)?1.0/den:0.0);
    654 
    655 
    656 }
    657 
    658 /* Lay out the image in the patch, computing norm and
    659 */
    660 inline void db_SignedSquareNormCorr11x11_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
    661 {
    662     float den;
    663     int f2sum,fsum;
    664     int xm_f=x_f-5;
    665 
    666 #ifndef DB_USE_SSE2
    667     const unsigned char *pf;
    668     short f;
    669 
    670     pf=f_img[y_f-5]+xm_f;
    671     f= *pf++; f2sum=f*f;  fsum=f;  (*patch++)=f;
    672     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    673     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    674     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    675     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    676     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    677     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    678     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    679     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    680     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    681     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    682 
    683     pf=f_img[y_f-4]+xm_f;
    684     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    685     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    686     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    687     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    688     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    689     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    690     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    691     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    692     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    693     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    694     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    695 
    696     pf=f_img[y_f-3]+xm_f;
    697     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    698     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    699     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    700     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    701     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    702     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    703     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    704     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    705     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    706     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    707     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    708 
    709     pf=f_img[y_f-2]+xm_f;
    710     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    711     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    712     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    713     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    714     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    715     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    716     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    717     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    718     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    719     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    720     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    721 
    722     pf=f_img[y_f-1]+xm_f;
    723     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    724     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    725     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    726     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    727     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    728     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    729     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    730     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    731     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    732     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    733     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    734 
    735     pf=f_img[y_f]+xm_f;
    736     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    737     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    738     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    739     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    740     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    741     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    742     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    743     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    744     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    745     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    746     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    747 
    748     pf=f_img[y_f+1]+xm_f;
    749     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    750     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    751     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    752     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    753     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    754     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    755     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    756     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    757     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    758     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    759     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    760 
    761     pf=f_img[y_f+2]+xm_f;
    762     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    763     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    764     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    765     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    766     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    767     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    768     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    769     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    770     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    771     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    772     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    773 
    774     pf=f_img[y_f+3]+xm_f;
    775     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    776     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    777     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    778     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    779     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    780     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    781     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    782     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    783     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    784     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    785     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    786 
    787     pf=f_img[y_f+4]+xm_f;
    788     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    789     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    790     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    791     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    792     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    793     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    794     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    795     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    796     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    797     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    798     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    799 
    800     pf=f_img[y_f+5]+xm_f;
    801     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    802     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    803     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    804     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    805     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    806     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    807     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    808     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    809     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    810     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
    811     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
    812 
    813     //int xwi;
    814     //int ywi;
    815     //f2sum=0;
    816     //fsum=0;
    817     //for (int r=-5;r<=5;r++){
    818     //  ywi=y_f+r;
    819     //  for (int c=-5;c<=5;c++){
    820     //      xwi=x_f+c;
    821     //      f=f_img[ywi][xwi];
    822     //      f2sum+=f*f;
    823     //      fsum+=f;
    824     //      (*patch++)=f;
    825     //  }
    826     //}
    827 
    828     (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
    829     (*patch++)=0; (*patch++)=0;
    830 #else
    831     const unsigned char *pf0 =f_img[y_f-5]+xm_f;
    832     const unsigned char *pf1 =f_img[y_f-4]+xm_f;
    833     const unsigned char *pf2 =f_img[y_f-3]+xm_f;
    834     const unsigned char *pf3 =f_img[y_f-2]+xm_f;
    835     const unsigned char *pf4 =f_img[y_f-1]+xm_f;
    836     const unsigned char *pf5 =f_img[y_f  ]+xm_f;
    837     const unsigned char *pf6 =f_img[y_f+1]+xm_f;
    838     const unsigned char *pf7 =f_img[y_f+2]+xm_f;
    839     const unsigned char *pf8 =f_img[y_f+3]+xm_f;
    840     const unsigned char *pf9 =f_img[y_f+4]+xm_f;
    841     const unsigned char *pf10=f_img[y_f+5]+xm_f;
    842 
    843     /* pixel mask */
    844     const unsigned char pm[16] = {
    845         0xFF,0xFF,
    846         0xFF,0xFF,
    847         0xFF,0xFF,
    848         0,0,0,0,0,
    849         0,0,0,0,0};
    850     const unsigned char * pm_p = pm;
    851 
    852     _asm
    853     {
    854         mov         ecx,patch   /* load patch pointer */
    855         mov         ebx, pm_p   /* load pixel mask pointer */
    856         movdqu      xmm1,[ebx]  /* load pixel mask */
    857 
    858         pxor        xmm5,xmm5   /* set xmm5 to 0 accumulator for sum squares */
    859         pxor        xmm4,xmm4   /* set xmm4 to 0 accumulator for sum */
    860         pxor        xmm0,xmm0   /* set xmm0 to 0 */
    861 
    862         /* row 0 */
    863         mov         eax,pf0     /* load image pointer */
    864         movdqu      xmm7,[eax]  /* load 16 pixels */
    865         movdqa      xmm6,xmm7
    866 
    867         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
    868         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
    869 
    870         pand        xmm6,xmm1   /* mask out pixels 12-16 */
    871 
    872         movdqa      [ecx+0*22],xmm7 /* move short values to patch */
    873         movdqa      [ecx+0*22+16],xmm6  /* move short values to patch */
    874 
    875         paddusw     xmm4,xmm7   /* accumulate sums */
    876         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
    877         paddd       xmm5,xmm7   /* accumulate sum squares */
    878 
    879         paddw       xmm4,xmm6   /* accumulate sums */
    880         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
    881         paddd       xmm5,xmm6   /* accumulate sum squares */
    882 
    883         /* row 1 */
    884         mov         eax,pf1     /* load image pointer */
    885         movdqu      xmm7,[eax]  /* load 16 pixels */
    886         movdqa      xmm6,xmm7
    887 
    888         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
    889         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
    890 
    891         pand        xmm6,xmm1   /* mask out pixels 12-16 */
    892 
    893         movdqu      [ecx+1*22],xmm7 /* move short values to patch */
    894         movdqu      [ecx+1*22+16],xmm6  /* move short values to patch */
    895 
    896         paddusw     xmm4,xmm7   /* accumulate sums */
    897         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
    898         paddd       xmm5,xmm7   /* accumulate sum squares */
    899 
    900         paddw       xmm4,xmm6   /* accumulate sums */
    901         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
    902         paddd       xmm5,xmm6   /* accumulate sum squares */
    903 
    904         /* row 2 */
    905         mov         eax,pf2     /* load image pointer */
    906         movdqu      xmm7,[eax]  /* load 16 pixels */
    907         movdqa      xmm6,xmm7
    908 
    909         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
    910         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
    911 
    912         pand        xmm6,xmm1   /* mask out pixels 12-16 */
    913 
    914         movdqu      [ecx+2*22],xmm7 /* move short values to patch */
    915         movdqu      [ecx+2*22+16],xmm6  /* move short values to patch */
    916 
    917         paddusw     xmm4,xmm7   /* accumulate sums */
    918         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
    919         paddd       xmm5,xmm7   /* accumulate sum squares */
    920 
    921         paddw       xmm4,xmm6   /* accumulate sums */
    922         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
    923         paddd       xmm5,xmm6   /* accumulate sum squares */
    924 
    925         /* row 3 */
    926         mov         eax,pf3     /* load image pointer */
    927         movdqu      xmm7,[eax]  /* load 16 pixels */
    928         movdqa      xmm6,xmm7
    929 
    930         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
    931         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
    932 
    933         pand        xmm6,xmm1   /* mask out pixels 12-16 */
    934 
    935         movdqu      [ecx+3*22],xmm7 /* move short values to patch */
    936         movdqu      [ecx+3*22+16],xmm6  /* move short values to patch */
    937 
    938         paddusw     xmm4,xmm7   /* accumulate sums */
    939         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
    940         paddd       xmm5,xmm7   /* accumulate sum squares */
    941 
    942         paddw       xmm4,xmm6   /* accumulate sums */
    943         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
    944         paddd       xmm5,xmm6   /* accumulate sum squares */
    945 
    946         /* row 4 */
    947         mov         eax,pf4     /* load image pointer */
    948         movdqu      xmm7,[eax]  /* load 16 pixels */
    949         movdqa      xmm6,xmm7
    950 
    951         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
    952         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
    953 
    954         pand        xmm6,xmm1   /* mask out pixels 12-16 */
    955 
    956         movdqu      [ecx+4*22],xmm7 /* move short values to patch */
    957         movdqu      [ecx+4*22+16],xmm6  /* move short values to patch */
    958 
    959         paddusw     xmm4,xmm7   /* accumulate sums */
    960         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
    961         paddd       xmm5,xmm7   /* accumulate sum squares */
    962 
    963         paddw       xmm4,xmm6   /* accumulate sums */
    964         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
    965         paddd       xmm5,xmm6   /* accumulate sum squares */
    966 
    967         /* row 5 */
    968         mov         eax,pf5     /* load image pointer */
    969         movdqu      xmm7,[eax]  /* load 16 pixels */
    970         movdqa      xmm6,xmm7
    971 
    972         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
    973         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
    974 
    975         pand        xmm6,xmm1   /* mask out pixels 12-16 */
    976 
    977         movdqu      [ecx+5*22],xmm7 /* move short values to patch */
    978         movdqu      [ecx+5*22+16],xmm6  /* move short values to patch */
    979 
    980         paddusw     xmm4,xmm7   /* accumulate sums */
    981         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
    982         paddd       xmm5,xmm7   /* accumulate sum squares */
    983 
    984         paddw       xmm4,xmm6   /* accumulate sums */
    985         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
    986         paddd       xmm5,xmm6   /* accumulate sum squares */
    987 
    988         /* row 6 */
    989         mov         eax,pf6     /* load image pointer */
    990         movdqu      xmm7,[eax]  /* load 16 pixels */
    991         movdqa      xmm6,xmm7
    992 
    993         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
    994         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
    995 
    996         pand        xmm6,xmm1   /* mask out pixels 12-16 */
    997 
    998         movdqu      [ecx+6*22],xmm7 /* move short values to patch */
    999         movdqu      [ecx+6*22+16],xmm6  /* move short values to patch */
   1000 
   1001         paddusw     xmm4,xmm7   /* accumulate sums */
   1002         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
   1003         paddd       xmm5,xmm7   /* accumulate sum squares */
   1004 
   1005         paddw       xmm4,xmm6   /* accumulate sums */
   1006         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
   1007         paddd       xmm5,xmm6   /* accumulate sum squares */
   1008 
   1009         /* row 7 */
   1010         mov         eax,pf7     /* load image pointer */
   1011         movdqu      xmm7,[eax]  /* load 16 pixels */
   1012         movdqa      xmm6,xmm7
   1013 
   1014         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
   1015         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
   1016 
   1017         pand        xmm6,xmm1   /* mask out pixels 12-16 */
   1018 
   1019         movdqu      [ecx+7*22],xmm7 /* move short values to patch */
   1020         movdqu      [ecx+7*22+16],xmm6  /* move short values to patch */
   1021 
   1022         paddusw     xmm4,xmm7   /* accumulate sums */
   1023         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
   1024         paddd       xmm5,xmm7   /* accumulate sum squares */
   1025 
   1026         paddw       xmm4,xmm6   /* accumulate sums */
   1027         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
   1028         paddd       xmm5,xmm6   /* accumulate sum squares */
   1029 
   1030         /* row 8 */
   1031         mov         eax,pf8     /* load image pointer */
   1032         movdqu      xmm7,[eax]  /* load 16 pixels */
   1033         movdqa      xmm6,xmm7
   1034 
   1035         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
   1036         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
   1037 
   1038         pand        xmm6,xmm1   /* mask out pixels 12-16 */
   1039 
   1040         movdqa      [ecx+8*22],xmm7 /* move short values to patch */
   1041         movdqa      [ecx+8*22+16],xmm6  /* move short values to patch */
   1042 
   1043         paddusw     xmm4,xmm7   /* accumulate sums */
   1044         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
   1045         paddd       xmm5,xmm7   /* accumulate sum squares */
   1046 
   1047         paddw       xmm4,xmm6   /* accumulate sums */
   1048         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
   1049         paddd       xmm5,xmm6   /* accumulate sum squares */
   1050 
   1051         /* row 9 */
   1052         mov         eax,pf9     /* load image pointer */
   1053         movdqu      xmm7,[eax]  /* load 16 pixels */
   1054         movdqa      xmm6,xmm7
   1055 
   1056         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
   1057         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
   1058 
   1059         pand        xmm6,xmm1   /* mask out pixels 12-16 */
   1060 
   1061         movdqu      [ecx+9*22],xmm7 /* move short values to patch */
   1062         movdqu      [ecx+9*22+16],xmm6  /* move short values to patch */
   1063 
   1064         paddusw     xmm4,xmm7   /* accumulate sums */
   1065         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
   1066         paddd       xmm5,xmm7   /* accumulate sum squares */
   1067 
   1068         paddw       xmm4,xmm6   /* accumulate sums */
   1069         pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
   1070         paddd       xmm5,xmm6   /* accumulate sum squares */
   1071 
   1072         /* row 10 */
   1073         mov         eax,pf10    /* load image pointer */
   1074         movdqu      xmm7,[eax]  /* load 16 pixels */
   1075         movdqa      xmm6,xmm7
   1076 
   1077         punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
   1078         punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
   1079 
   1080         pand        xmm6,xmm1   /* mask out pixels 12-16 */
   1081 
   1082         movdqu      [ecx+10*22],xmm7    /* move short values to patch */
   1083         movdqu      [ecx+10*22+16],xmm6 /* move short values to patch */
   1084 
   1085         paddusw     xmm4,xmm7   /* accumulate sums */
   1086         pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
   1087         paddd       xmm5,xmm7   /* accumulate sum squares */
   1088 
   1089         paddw       xmm4,xmm6   /* accumulate sums */
   1090         pmaddwd     xmm6,xmm6   /* multiply 16 bit ints and add into 32 bit ints */
   1091         paddd       xmm5,xmm6   /* accumulate sum squares */
   1092 
   1093         /* add up the sum squares */
   1094         movhlps     xmm0,xmm5   /* high half to low half */
   1095         paddd       xmm5,xmm0   /* add high to low */
   1096         pshuflw     xmm0,xmm5, 0xE /* reshuffle */
   1097         paddd       xmm5,xmm0   /* add remaining */
   1098         movd        f2sum,xmm5
   1099 
   1100         /* add up the sum */
   1101         movhlps     xmm0,xmm4
   1102         paddw       xmm4,xmm0   /* halves added */
   1103         pshuflw     xmm0,xmm4,0xE
   1104         paddw       xmm4,xmm0   /* quarters added */
   1105         pshuflw     xmm0,xmm4,0x1
   1106         paddw       xmm4,xmm0   /* eighth added */
   1107         movd        fsum, xmm4
   1108 
   1109         emms
   1110     }
   1111 
   1112     fsum = fsum & 0xFFFF;
   1113 
   1114     patch[126] = 0;
   1115     patch[127] = 0;
   1116 #endif /* DB_USE_SSE2 */
   1117 
   1118     *sum= (float) fsum;
   1119     den=(121.0f*f2sum-fsum*fsum);
   1120     *recip= (float)((den!=0.0)?1.0/den:0.0);
   1121 }
   1122 
   1123 void AffineWarpPointOffset(float &r_w,float &c_w,double Hinv[9],int r,int c)
   1124 {
   1125     r_w=(float)(Hinv[3]*c+Hinv[4]*r);
   1126     c_w=(float)(Hinv[0]*c+Hinv[1]*r);
   1127 }
   1128 
   1129 
   1130 
   1131 /*!
   1132 Prewarp the patches with given affine transform. For a given homogeneous point "x", "H*x" is
   1133 the warped point and for any displacement "d" in the warped image resulting in point "y", the
   1134 corresponding point in the original image is given by "Hinv*y", which can be simplified for affine H.
   1135 If "affine" is 1, then nearest neighbor method is used, else if it is 2, then
   1136 bilinear method is used.
   1137  */
   1138 inline void db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(short *patch,const unsigned char * const *f_img,
   1139                                                                     int xi,int yi,float *sum,float *recip,
   1140                                                                     const double Hinv[9],int affine)
   1141 {
   1142     float den;
   1143     short f;
   1144     int f2sum,fsum;
   1145 
   1146     f2sum=0;
   1147     fsum=0;
   1148 
   1149     if (affine==1)
   1150     {
   1151         for (int r=0;r<11;r++){
   1152             for (int c=0;c<11;c++){
   1153                 f=f_img[yi+AffineWarpPoint_NN_LUT_y[r][c]][xi+AffineWarpPoint_NN_LUT_x[r][c]];
   1154                 f2sum+=f*f;
   1155                 fsum+=f;
   1156                 (*patch++)=f;
   1157             }
   1158         }
   1159     }
   1160     else if (affine==2)
   1161     {
   1162         for (int r=0;r<11;r++){
   1163             for (int c=0;c<11;c++){
   1164                 f=db_BilinearInterpolation(yi+AffineWarpPoint_BL_LUT_y[r][c]
   1165                 ,xi+AffineWarpPoint_BL_LUT_x[r][c],f_img);
   1166                 f2sum+=f*f;
   1167                 fsum+=f;
   1168                 (*patch++)=f;
   1169             }
   1170         }
   1171     }
   1172 
   1173 
   1174 
   1175     (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
   1176     (*patch++)=0; (*patch++)=0;
   1177 
   1178     *sum= (float) fsum;
   1179     den=(121.0f*f2sum-fsum*fsum);
   1180     *recip= (float)((den!=0.0)?1.0/den:0.0);
   1181 }
   1182 
   1183 
   1184 inline float db_SignedSquareNormCorr11x11_Post_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g,
   1185                                                 float fsum_gsum,float f_recip_g_recip)
   1186 {
   1187     unsigned char *pf,*pg;
   1188     int fgsum;
   1189     float fg_corr;
   1190     int xm_f,xm_g;
   1191 
   1192     xm_f=x_f-5;
   1193     xm_g=x_g-5;
   1194 
   1195     pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
   1196     fgsum=(*pf++)*(*pg++);  fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1197     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1198     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1199     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1200 
   1201     pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
   1202     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1203     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1204     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1205     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1206 
   1207     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
   1208     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1209     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1210     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1211     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1212 
   1213     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
   1214     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1215     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1216     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1217     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1218 
   1219     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
   1220     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1221     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1222     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1223     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1224 
   1225     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
   1226     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1227     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1228     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1229     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1230 
   1231     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
   1232     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1233     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1234     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1235     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1236 
   1237     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
   1238     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1239     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1240     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1241     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1242 
   1243     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
   1244     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1245     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1246     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1247     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1248 
   1249     pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
   1250     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1251     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1252     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1253     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1254 
   1255     pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
   1256     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1257     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1258     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   1259     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   1260 
   1261     fg_corr=121.0f*fgsum-fsum_gsum;
   1262     if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
   1263     return(-fg_corr*fg_corr*f_recip_g_recip);
   1264 }
   1265 
   1266 float db_SignedSquareNormCorr21x21Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
   1267 {
   1268     float fgsum,fg_corr;
   1269 
   1270     fgsum= (float) db_ScalarProduct512_s(f_patch,g_patch);
   1271 
   1272     fg_corr=441.0f*fgsum-fsum_gsum;
   1273     if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
   1274     return(-fg_corr*fg_corr*f_recip_g_recip);
   1275 }
   1276 
   1277 
   1278 float db_SignedSquareNormCorr11x11Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
   1279 {
   1280     float fgsum,fg_corr;
   1281 
   1282     fgsum= (float) db_ScalarProduct128_s(f_patch,g_patch);
   1283 
   1284     fg_corr=121.0f*fgsum-fsum_gsum;
   1285     if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
   1286     return(-fg_corr*fg_corr*f_recip_g_recip);
   1287 }
   1288 
   1289 float db_SignedSquareNormCorr5x5Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
   1290 {
   1291     float fgsum,fg_corr;
   1292 
   1293     fgsum= (float) db_ScalarProduct32_s(f_patch,g_patch);
   1294 
   1295     fg_corr=25.0f*fgsum-fsum_gsum;
   1296     if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
   1297     return(-fg_corr*fg_corr*f_recip_g_recip);
   1298 }
   1299 
   1300 
   1301 inline float db_SignedSquareNormCorr15x15_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
   1302 {
   1303     unsigned char *pf,*pg;
   1304     float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
   1305     int xm_f,xm_g;
   1306 
   1307     xm_f=x_f-7;
   1308     xm_g=x_g-7;
   1309     fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
   1310 
   1311     pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g;
   1312     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1313     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1314     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1315     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1316     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1317     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1318     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1319     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1320     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1321     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1322     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1323     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1324     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1325     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1326     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1327 
   1328     pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g;
   1329     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1330     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1331     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1332     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1333     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1334     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1335     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1336     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1337     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1338     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1339     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1340     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1341     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1342     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1343     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1344 
   1345     pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
   1346     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1347     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1348     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1349     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1350     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1351     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1352     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1353     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1354     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1355     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1356     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1357     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1358     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1359     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1360     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1361 
   1362     pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
   1363     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1364     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1365     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1366     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1367     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1368     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1369     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1370     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1371     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1372     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1373     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1374     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1375     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1376     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1377     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1378 
   1379     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
   1380     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1381     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1382     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1383     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1384     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1385     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1386     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1387     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1388     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1389     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1390     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1391     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1392     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1393     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1394     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1395 
   1396     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
   1397     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1398     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1399     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1400     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1401     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1402     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1403     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1404     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1405     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1406     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1407     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1408     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1409     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1410     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1411     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1412 
   1413     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
   1414     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1415     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1416     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1417     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1418     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1419     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1420     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1421     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1422     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1423     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1424     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1425     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1426     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1427     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1428     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1429 
   1430     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
   1431     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1432     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1433     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1434     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1435     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1436     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1437     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1438     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1439     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1440     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1441     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1442     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1443     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1444     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1445     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1446 
   1447     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
   1448     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1449     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1450     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1451     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1452     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1453     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1454     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1455     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1456     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1457     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1458     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1459     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1460     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1461     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1462     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1463 
   1464     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
   1465     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1466     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1467     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1468     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1469     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1470     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1471     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1472     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1473     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1474     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1475     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1476     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1477     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1478     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1479     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1480 
   1481     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
   1482     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1483     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1484     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1485     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1486     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1487     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1488     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1489     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1490     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1491     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1492     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1493     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1494     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1495     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1496     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1497 
   1498     pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
   1499     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1500     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1501     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1502     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1503     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1504     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1505     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1506     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1507     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1508     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1509     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1510     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1511     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1512     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1513     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1514 
   1515     pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
   1516     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1517     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1518     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1519     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1520     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1521     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1522     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1523     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1524     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1525     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1526     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1527     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1528     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1529     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1530     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1531 
   1532     pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g;
   1533     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1534     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1535     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1536     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1537     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1538     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1539     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1540     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1541     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1542     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1543     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1544     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1545     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1546     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1547     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1548 
   1549     pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g;
   1550     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1551     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1552     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1553     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1554     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1555     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1556     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1557     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1558     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1559     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1560     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1561     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1562     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1563     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1564     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1565 
   1566     fg_corr=225.0f*fgsum-fsum*gsum;
   1567     den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum);
   1568     if(den!=0.0)
   1569     {
   1570         if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
   1571         return(-fg_corr*fg_corr/den);
   1572     }
   1573     return(0.0);
   1574 }
   1575 
   1576 inline float db_SignedSquareNormCorr7x7_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
   1577 {
   1578     float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
   1579     int xm_f,xm_g;
   1580 
   1581     xm_f=x_f-3;
   1582     xm_g=x_g-3;
   1583     fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
   1584 
   1585     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
   1586     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1587     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1588     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1589     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1590     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1591     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1592     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1593 
   1594     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
   1595     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1596     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1597     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1598     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1599     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1600     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1601     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1602 
   1603     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
   1604     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1605     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1606     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1607     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1608     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1609     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1610     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1611 
   1612     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
   1613     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1614     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1615     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1616     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1617     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1618     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1619     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1620 
   1621     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
   1622     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1623     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1624     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1625     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1626     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1627     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1628     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1629 
   1630     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
   1631     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1632     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1633     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1634     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1635     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1636     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1637     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1638 
   1639     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
   1640     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1641     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1642     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1643     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1644     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1645     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1646     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1647 
   1648     fg_corr=49.0f*fgsum-fsum*gsum;
   1649     den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum);
   1650     if(den!=0.0)
   1651     {
   1652         if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
   1653         return(-fg_corr*fg_corr/den);
   1654     }
   1655     return(0.0);
   1656 }
   1657 
   1658 inline float db_SignedSquareNormCorr9x9_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
   1659 {
   1660     float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
   1661     int xm_f,xm_g;
   1662 
   1663     xm_f=x_f-4;
   1664     xm_g=x_g-4;
   1665     fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
   1666 
   1667     pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
   1668     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1669     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1670     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1671     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1672     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1673     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1674     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1675     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1676     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1677 
   1678     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
   1679     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1680     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1681     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1682     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1683     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1684     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1685     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1686     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1687     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1688 
   1689     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
   1690     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1691     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1692     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1693     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1694     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1695     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1696     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1697     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1698     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1699 
   1700     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
   1701     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1702     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1703     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1704     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1705     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1706     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1707     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1708     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1709     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1710 
   1711     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
   1712     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1713     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1714     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1715     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1716     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1717     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1718     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1719     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1720     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1721 
   1722     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
   1723     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1724     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1725     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1726     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1727     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1728     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1729     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1730     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1731     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1732 
   1733     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
   1734     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1735     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1736     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1737     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1738     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1739     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1740     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1741     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1742     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1743 
   1744     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
   1745     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1746     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1747     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1748     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1749     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1750     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1751     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1752     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1753     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1754 
   1755     pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
   1756     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1757     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1758     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1759     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1760     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1761     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1762     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1763     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1764     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1765 
   1766     fg_corr=81.0f*fgsum-fsum*gsum;
   1767     den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum);
   1768     if(den!=0.0)
   1769     {
   1770         if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
   1771         return(-fg_corr*fg_corr/den);
   1772     }
   1773     return(0.0);
   1774 }
   1775 
   1776 inline float db_SignedSquareNormCorr11x11_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
   1777 {
   1778     float *pf,*pg;
   1779     float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
   1780     int xm_f,xm_g;
   1781 
   1782     xm_f=x_f-5;
   1783     xm_g=x_g-5;
   1784     fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
   1785 
   1786     pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
   1787     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1788     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1789     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1790     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1791     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1792     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1793     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1794     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1795     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1796     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1797     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1798 
   1799     pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
   1800     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1801     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1802     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1803     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1804     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1805     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1806     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1807     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1808     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1809     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1810     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1811 
   1812     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
   1813     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1814     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1815     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1816     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1817     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1818     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1819     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1820     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1821     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1822     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1823     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1824 
   1825     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
   1826     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1827     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1828     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1829     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1830     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1831     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1832     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1833     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1834     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1835     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1836     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1837 
   1838     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
   1839     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1840     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1841     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1842     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1843     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1844     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1845     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1846     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1847     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1848     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1849     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1850 
   1851     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
   1852     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1853     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1854     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1855     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1856     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1857     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1858     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1859     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1860     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1861     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1862     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1863 
   1864     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
   1865     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1866     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1867     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1868     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1869     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1870     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1871     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1872     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1873     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1874     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1875     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1876 
   1877     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
   1878     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1879     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1880     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1881     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1882     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1883     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1884     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1885     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1886     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1887     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1888     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1889 
   1890     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
   1891     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1892     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1893     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1894     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1895     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1896     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1897     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1898     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1899     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1900     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1901     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1902 
   1903     pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
   1904     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1905     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1906     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1907     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1908     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1909     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1910     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1911     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1912     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1913     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1914     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1915 
   1916     pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
   1917     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1918     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1919     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1920     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1921     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1922     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1923     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1924     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1925     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1926     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1927     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   1928 
   1929     fg_corr=121.0f*fgsum-fsum*gsum;
   1930     den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum);
   1931     if(den!=0.0)
   1932     {
   1933         if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
   1934         return(-fg_corr*fg_corr/den);
   1935     }
   1936     return(0.0);
   1937 }
   1938 
   1939 inline void db_SignedSquareNormCorr11x11_Pre_f(float **f_img,int x_f,int y_f,float *sum,float *recip)
   1940 {
   1941     float *pf,den;
   1942     float f,f2sum,fsum;
   1943     int xm_f;
   1944 
   1945     xm_f=x_f-5;
   1946 
   1947     pf=f_img[y_f-5]+xm_f;
   1948     f= *pf++; f2sum=f*f;  fsum=f;
   1949     f= *pf++; f2sum+=f*f; fsum+=f;
   1950     f= *pf++; f2sum+=f*f; fsum+=f;
   1951     f= *pf++; f2sum+=f*f; fsum+=f;
   1952     f= *pf++; f2sum+=f*f; fsum+=f;
   1953     f= *pf++; f2sum+=f*f; fsum+=f;
   1954     f= *pf++; f2sum+=f*f; fsum+=f;
   1955     f= *pf++; f2sum+=f*f; fsum+=f;
   1956     f= *pf++; f2sum+=f*f; fsum+=f;
   1957     f= *pf++; f2sum+=f*f; fsum+=f;
   1958     f= *pf;   f2sum+=f*f; fsum+=f;
   1959 
   1960     pf=f_img[y_f-4]+xm_f;
   1961     f= *pf++; f2sum+=f*f; fsum+=f;
   1962     f= *pf++; f2sum+=f*f; fsum+=f;
   1963     f= *pf++; f2sum+=f*f; fsum+=f;
   1964     f= *pf++; f2sum+=f*f; fsum+=f;
   1965     f= *pf++; f2sum+=f*f; fsum+=f;
   1966     f= *pf++; f2sum+=f*f; fsum+=f;
   1967     f= *pf++; f2sum+=f*f; fsum+=f;
   1968     f= *pf++; f2sum+=f*f; fsum+=f;
   1969     f= *pf++; f2sum+=f*f; fsum+=f;
   1970     f= *pf++; f2sum+=f*f; fsum+=f;
   1971     f= *pf;   f2sum+=f*f; fsum+=f;
   1972 
   1973     pf=f_img[y_f-3]+xm_f;
   1974     f= *pf++; f2sum+=f*f; fsum+=f;
   1975     f= *pf++; f2sum+=f*f; fsum+=f;
   1976     f= *pf++; f2sum+=f*f; fsum+=f;
   1977     f= *pf++; f2sum+=f*f; fsum+=f;
   1978     f= *pf++; f2sum+=f*f; fsum+=f;
   1979     f= *pf++; f2sum+=f*f; fsum+=f;
   1980     f= *pf++; f2sum+=f*f; fsum+=f;
   1981     f= *pf++; f2sum+=f*f; fsum+=f;
   1982     f= *pf++; f2sum+=f*f; fsum+=f;
   1983     f= *pf++; f2sum+=f*f; fsum+=f;
   1984     f= *pf;   f2sum+=f*f; fsum+=f;
   1985 
   1986     pf=f_img[y_f-2]+xm_f;
   1987     f= *pf++; f2sum+=f*f; fsum+=f;
   1988     f= *pf++; f2sum+=f*f; fsum+=f;
   1989     f= *pf++; f2sum+=f*f; fsum+=f;
   1990     f= *pf++; f2sum+=f*f; fsum+=f;
   1991     f= *pf++; f2sum+=f*f; fsum+=f;
   1992     f= *pf++; f2sum+=f*f; fsum+=f;
   1993     f= *pf++; f2sum+=f*f; fsum+=f;
   1994     f= *pf++; f2sum+=f*f; fsum+=f;
   1995     f= *pf++; f2sum+=f*f; fsum+=f;
   1996     f= *pf++; f2sum+=f*f; fsum+=f;
   1997     f= *pf;   f2sum+=f*f; fsum+=f;
   1998 
   1999     pf=f_img[y_f-1]+xm_f;
   2000     f= *pf++; f2sum+=f*f; fsum+=f;
   2001     f= *pf++; f2sum+=f*f; fsum+=f;
   2002     f= *pf++; f2sum+=f*f; fsum+=f;
   2003     f= *pf++; f2sum+=f*f; fsum+=f;
   2004     f= *pf++; f2sum+=f*f; fsum+=f;
   2005     f= *pf++; f2sum+=f*f; fsum+=f;
   2006     f= *pf++; f2sum+=f*f; fsum+=f;
   2007     f= *pf++; f2sum+=f*f; fsum+=f;
   2008     f= *pf++; f2sum+=f*f; fsum+=f;
   2009     f= *pf++; f2sum+=f*f; fsum+=f;
   2010     f= *pf;   f2sum+=f*f; fsum+=f;
   2011 
   2012     pf=f_img[y_f]+xm_f;
   2013     f= *pf++; f2sum+=f*f; fsum+=f;
   2014     f= *pf++; f2sum+=f*f; fsum+=f;
   2015     f= *pf++; f2sum+=f*f; fsum+=f;
   2016     f= *pf++; f2sum+=f*f; fsum+=f;
   2017     f= *pf++; f2sum+=f*f; fsum+=f;
   2018     f= *pf++; f2sum+=f*f; fsum+=f;
   2019     f= *pf++; f2sum+=f*f; fsum+=f;
   2020     f= *pf++; f2sum+=f*f; fsum+=f;
   2021     f= *pf++; f2sum+=f*f; fsum+=f;
   2022     f= *pf++; f2sum+=f*f; fsum+=f;
   2023     f= *pf;   f2sum+=f*f; fsum+=f;
   2024 
   2025     pf=f_img[y_f+1]+xm_f;
   2026     f= *pf++; f2sum+=f*f; fsum+=f;
   2027     f= *pf++; f2sum+=f*f; fsum+=f;
   2028     f= *pf++; f2sum+=f*f; fsum+=f;
   2029     f= *pf++; f2sum+=f*f; fsum+=f;
   2030     f= *pf++; f2sum+=f*f; fsum+=f;
   2031     f= *pf++; f2sum+=f*f; fsum+=f;
   2032     f= *pf++; f2sum+=f*f; fsum+=f;
   2033     f= *pf++; f2sum+=f*f; fsum+=f;
   2034     f= *pf++; f2sum+=f*f; fsum+=f;
   2035     f= *pf++; f2sum+=f*f; fsum+=f;
   2036     f= *pf;   f2sum+=f*f; fsum+=f;
   2037 
   2038     pf=f_img[y_f+2]+xm_f;
   2039     f= *pf++; f2sum+=f*f; fsum+=f;
   2040     f= *pf++; f2sum+=f*f; fsum+=f;
   2041     f= *pf++; f2sum+=f*f; fsum+=f;
   2042     f= *pf++; f2sum+=f*f; fsum+=f;
   2043     f= *pf++; f2sum+=f*f; fsum+=f;
   2044     f= *pf++; f2sum+=f*f; fsum+=f;
   2045     f= *pf++; f2sum+=f*f; fsum+=f;
   2046     f= *pf++; f2sum+=f*f; fsum+=f;
   2047     f= *pf++; f2sum+=f*f; fsum+=f;
   2048     f= *pf++; f2sum+=f*f; fsum+=f;
   2049     f= *pf;   f2sum+=f*f; fsum+=f;
   2050 
   2051     pf=f_img[y_f+3]+xm_f;
   2052     f= *pf++; f2sum+=f*f; fsum+=f;
   2053     f= *pf++; f2sum+=f*f; fsum+=f;
   2054     f= *pf++; f2sum+=f*f; fsum+=f;
   2055     f= *pf++; f2sum+=f*f; fsum+=f;
   2056     f= *pf++; f2sum+=f*f; fsum+=f;
   2057     f= *pf++; f2sum+=f*f; fsum+=f;
   2058     f= *pf++; f2sum+=f*f; fsum+=f;
   2059     f= *pf++; f2sum+=f*f; fsum+=f;
   2060     f= *pf++; f2sum+=f*f; fsum+=f;
   2061     f= *pf++; f2sum+=f*f; fsum+=f;
   2062     f= *pf;   f2sum+=f*f; fsum+=f;
   2063 
   2064     pf=f_img[y_f+4]+xm_f;
   2065     f= *pf++; f2sum+=f*f; fsum+=f;
   2066     f= *pf++; f2sum+=f*f; fsum+=f;
   2067     f= *pf++; f2sum+=f*f; fsum+=f;
   2068     f= *pf++; f2sum+=f*f; fsum+=f;
   2069     f= *pf++; f2sum+=f*f; fsum+=f;
   2070     f= *pf++; f2sum+=f*f; fsum+=f;
   2071     f= *pf++; f2sum+=f*f; fsum+=f;
   2072     f= *pf++; f2sum+=f*f; fsum+=f;
   2073     f= *pf++; f2sum+=f*f; fsum+=f;
   2074     f= *pf++; f2sum+=f*f; fsum+=f;
   2075     f= *pf;   f2sum+=f*f; fsum+=f;
   2076 
   2077     pf=f_img[y_f+5]+xm_f;
   2078     f= *pf++; f2sum+=f*f; fsum+=f;
   2079     f= *pf++; f2sum+=f*f; fsum+=f;
   2080     f= *pf++; f2sum+=f*f; fsum+=f;
   2081     f= *pf++; f2sum+=f*f; fsum+=f;
   2082     f= *pf++; f2sum+=f*f; fsum+=f;
   2083     f= *pf++; f2sum+=f*f; fsum+=f;
   2084     f= *pf++; f2sum+=f*f; fsum+=f;
   2085     f= *pf++; f2sum+=f*f; fsum+=f;
   2086     f= *pf++; f2sum+=f*f; fsum+=f;
   2087     f= *pf++; f2sum+=f*f; fsum+=f;
   2088     f= *pf;   f2sum+=f*f; fsum+=f;
   2089 
   2090     *sum=fsum;
   2091     den=(121.0f*f2sum-fsum*fsum);
   2092     *recip= (float) ((den!=0.0)?1.0/den:0.0);
   2093 }
   2094 
   2095 inline void db_SignedSquareNormCorr11x11_PreAlign_f(float *patch,const float * const *f_img,int x_f,int y_f,float *sum,float *recip)
   2096 {
   2097     const float *pf;
   2098     float den,f,f2sum,fsum;
   2099     int xm_f;
   2100 
   2101     xm_f=x_f-5;
   2102 
   2103     pf=f_img[y_f-5]+xm_f;
   2104     f= *pf++; f2sum=f*f;  fsum=f;  (*patch++)=f;
   2105     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2106     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2107     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2108     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2109     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2110     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2111     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2112     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2113     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2114     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2115 
   2116     pf=f_img[y_f-4]+xm_f;
   2117     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2118     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2119     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2120     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2121     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2122     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2123     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2124     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2125     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2126     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2127     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2128 
   2129     pf=f_img[y_f-3]+xm_f;
   2130     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2131     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2132     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2133     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2134     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2135     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2136     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2137     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2138     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2139     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2140     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2141 
   2142     pf=f_img[y_f-2]+xm_f;
   2143     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2144     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2145     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2146     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2147     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2148     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2149     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2150     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2151     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2152     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2153     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2154 
   2155     pf=f_img[y_f-1]+xm_f;
   2156     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2157     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2158     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2159     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2160     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2161     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2162     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2163     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2164     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2165     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2166     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2167 
   2168     pf=f_img[y_f]+xm_f;
   2169     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2170     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2171     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2172     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2173     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2174     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2175     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2176     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2177     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2178     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2179     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2180 
   2181     pf=f_img[y_f+1]+xm_f;
   2182     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2183     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2184     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2185     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2186     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2187     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2188     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2189     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2190     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2191     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2192     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2193 
   2194     pf=f_img[y_f+2]+xm_f;
   2195     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2196     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2197     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2198     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2199     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2200     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2201     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2202     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2203     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2204     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2205     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2206 
   2207     pf=f_img[y_f+3]+xm_f;
   2208     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2209     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2210     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2211     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2212     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2213     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2214     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2215     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2216     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2217     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2218     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2219 
   2220     pf=f_img[y_f+4]+xm_f;
   2221     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2222     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2223     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2224     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2225     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2226     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2227     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2228     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2229     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2230     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2231     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2232 
   2233     pf=f_img[y_f+5]+xm_f;
   2234     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2235     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2236     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2237     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2238     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2239     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2240     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2241     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2242     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2243     f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
   2244     f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
   2245 
   2246     (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0;
   2247     (*patch++)=0.0; (*patch++)=0.0;
   2248 
   2249     *sum=fsum;
   2250     den=(121.0f*f2sum-fsum*fsum);
   2251     *recip= (float) ((den!=0.0)?1.0/den:0.0);
   2252 }
   2253 
   2254 inline float db_SignedSquareNormCorr11x11_Post_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g,
   2255                                                 float fsum_gsum,float f_recip_g_recip)
   2256 {
   2257     float *pf,*pg;
   2258     float fgsum,fg_corr;
   2259     int xm_f,xm_g;
   2260 
   2261     xm_f=x_f-5;
   2262     xm_g=x_g-5;
   2263 
   2264     pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
   2265     fgsum=(*pf++)*(*pg++);  fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2266     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2267     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2268     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2269 
   2270     pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
   2271     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2272     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2273     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2274     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2275 
   2276     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
   2277     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2278     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2279     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2280     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2281 
   2282     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
   2283     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2284     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2285     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2286     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2287 
   2288     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
   2289     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2290     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2291     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2292     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2293 
   2294     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
   2295     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2296     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2297     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2298     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2299 
   2300     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
   2301     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2302     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2303     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2304     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2305 
   2306     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
   2307     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2308     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2309     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2310     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2311 
   2312     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
   2313     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2314     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2315     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2316     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2317 
   2318     pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
   2319     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2320     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2321     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2322     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2323 
   2324     pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
   2325     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2326     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2327     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
   2328     fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
   2329 
   2330     fg_corr=121.0f*fgsum-fsum_gsum;
   2331     if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
   2332     return(-fg_corr*fg_corr*f_recip_g_recip);
   2333 }
   2334 
   2335 inline float db_SignedSquareNormCorr11x11Aligned_Post_f(const float *f_patch,const float *g_patch,float fsum_gsum,float f_recip_g_recip)
   2336 {
   2337     float fgsum,fg_corr;
   2338 
   2339     fgsum=db_ScalarProduct128Aligned16_f(f_patch,g_patch);
   2340 
   2341     fg_corr=121.0f*fgsum-fsum_gsum;
   2342     if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
   2343     return(-fg_corr*fg_corr*f_recip_g_recip);
   2344 }
   2345 
   2346 inline float db_SignedSquareNormCorr15x15_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
   2347 {
   2348     float *pf,*pg;
   2349     float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
   2350     int xm_f,xm_g;
   2351 
   2352     xm_f=x_f-7;
   2353     xm_g=x_g-7;
   2354     fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
   2355 
   2356     pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g;
   2357     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2358     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2359     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2360     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2361     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2362     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2363     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2364     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2365     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2366     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2367     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2368     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2369     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2370     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2371     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2372 
   2373     pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g;
   2374     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2375     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2376     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2377     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2378     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2379     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2380     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2381     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2382     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2383     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2384     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2385     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2386     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2387     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2388     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2389 
   2390     pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
   2391     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2392     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2393     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2394     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2395     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2396     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2397     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2398     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2399     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2400     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2401     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2402     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2403     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2404     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2405     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2406 
   2407     pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
   2408     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2409     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2410     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2411     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2412     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2413     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2414     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2415     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2416     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2417     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2418     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2419     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2420     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2421     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2422     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2423 
   2424     pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
   2425     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2426     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2427     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2428     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2429     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2430     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2431     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2432     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2433     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2434     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2435     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2436     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2437     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2438     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2439     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2440 
   2441     pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
   2442     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2443     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2444     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2445     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2446     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2447     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2448     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2449     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2450     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2451     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2452     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2453     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2454     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2455     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2456     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2457 
   2458     pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
   2459     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2460     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2461     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2462     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2463     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2464     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2465     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2466     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2467     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2468     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2469     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2470     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2471     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2472     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2473     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2474 
   2475     pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
   2476     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2477     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2478     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2479     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2480     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2481     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2482     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2483     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2484     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2485     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2486     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2487     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2488     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2489     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2490     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2491 
   2492     pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
   2493     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2494     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2495     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2496     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2497     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2498     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2499     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2500     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2501     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2502     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2503     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2504     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2505     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2506     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2507     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2508 
   2509     pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
   2510     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2511     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2512     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2513     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2514     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2515     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2516     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2517     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2518     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2519     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2520     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2521     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2522     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2523     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2524     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2525 
   2526     pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
   2527     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2528     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2529     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2530     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2531     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2532     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2533     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2534     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2535     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2536     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2537     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2538     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2539     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2540     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2541     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2542 
   2543     pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
   2544     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2545     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2546     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2547     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2548     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2549     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2550     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2551     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2552     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2553     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2554     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2555     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2556     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2557     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2558     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2559 
   2560     pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
   2561     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2562     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2563     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2564     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2565     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2566     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2567     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2568     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2569     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2570     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2571     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2572     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2573     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2574     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2575     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2576 
   2577     pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g;
   2578     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2579     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2580     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2581     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2582     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2583     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2584     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2585     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2586     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2587     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2588     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2589     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2590     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2591     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2592     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2593 
   2594     pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g;
   2595     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2596     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2597     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2598     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2599     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2600     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2601     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2602     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2603     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2604     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2605     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2606     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2607     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2608     f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2609     f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
   2610 
   2611     fg_corr=225.0f*fgsum-fsum*gsum;
   2612     den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum);
   2613     if(den!=0.0)
   2614     {
   2615         if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
   2616         return(-fg_corr*fg_corr/den);
   2617     }
   2618     return(0.0);
   2619 }
   2620 
   2621 db_Bucket_f** db_AllocBuckets_f(int nr_h,int nr_v,int bd)
   2622 {
   2623     int i,j;
   2624     db_Bucket_f **bp,*b;
   2625 
   2626     b=new db_Bucket_f [(nr_h+2)*(nr_v+2)];
   2627     bp=new db_Bucket_f* [(nr_v+2)];
   2628     bp=bp+1;
   2629     for(i= -1;i<=nr_v;i++)
   2630     {
   2631         bp[i]=b+1+(nr_h+2)*(i+1);
   2632         for(j= -1;j<=nr_h;j++)
   2633         {
   2634             bp[i][j].ptr=new db_PointInfo_f [bd];
   2635         }
   2636     }
   2637 
   2638     return(bp);
   2639 }
   2640 
   2641 db_Bucket_u** db_AllocBuckets_u(int nr_h,int nr_v,int bd)
   2642 {
   2643     int i,j;
   2644     db_Bucket_u **bp,*b;
   2645 
   2646     b=new db_Bucket_u [(nr_h+2)*(nr_v+2)];
   2647     bp=new db_Bucket_u* [(nr_v+2)];
   2648     bp=bp+1;
   2649     for(i= -1;i<=nr_v;i++)
   2650     {
   2651         bp[i]=b+1+(nr_h+2)*(i+1);
   2652         for(j= -1;j<=nr_h;j++)
   2653         {
   2654             bp[i][j].ptr=new db_PointInfo_u [bd];
   2655         }
   2656     }
   2657 
   2658     return(bp);
   2659 }
   2660 
   2661 void db_FreeBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v)
   2662 {
   2663     int i,j;
   2664 
   2665     for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++)
   2666     {
   2667         delete [] bp[i][j].ptr;
   2668     }
   2669     delete [] (bp[-1]-1);
   2670     delete [] (bp-1);
   2671 }
   2672 
   2673 void db_FreeBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v)
   2674 {
   2675     int i,j;
   2676 
   2677     for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++)
   2678     {
   2679         delete [] bp[i][j].ptr;
   2680     }
   2681     delete [] (bp[-1]-1);
   2682     delete [] (bp-1);
   2683 }
   2684 
   2685 void db_EmptyBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v)
   2686 {
   2687     int i,j;
   2688     for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0;
   2689 }
   2690 
   2691 void db_EmptyBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v)
   2692 {
   2693     int i,j;
   2694     for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0;
   2695 }
   2696 
   2697 float* db_FillBuckets_f(float *patch_space,const float * const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners)
   2698 {
   2699     int i,xi,yi,xpos,ypos,nr;
   2700     db_Bucket_f *br;
   2701     db_PointInfo_f *pir;
   2702 
   2703     db_EmptyBuckets_f(bp,nr_h,nr_v);
   2704     for(i=0;i<nr_corners;i++)
   2705     {
   2706         xi=(int) x[i];
   2707         yi=(int) y[i];
   2708         xpos=xi/bw;
   2709         ypos=yi/bh;
   2710         if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v)
   2711         {
   2712             br=&bp[ypos][xpos];
   2713             nr=br->nr;
   2714             if(nr<bd)
   2715             {
   2716                 pir=&(br->ptr[nr]);
   2717                 pir->x=xi;
   2718                 pir->y=yi;
   2719                 pir->id=i;
   2720                 pir->pir=0;
   2721                 pir->patch=patch_space;
   2722                 br->nr=nr+1;
   2723 
   2724                 db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
   2725                 patch_space+=128;
   2726             }
   2727         }
   2728     }
   2729     return(patch_space);
   2730 }
   2731 
   2732 short* db_FillBuckets_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,int use_smaller_matching_window, int use_21)
   2733 {
   2734     int i,xi,yi,xpos,ypos,nr;
   2735     db_Bucket_u *br;
   2736     db_PointInfo_u *pir;
   2737 
   2738     db_EmptyBuckets_u(bp,nr_h,nr_v);
   2739     for(i=0;i<nr_corners;i++)
   2740     {
   2741         xi=(int)db_roundi(x[i]);
   2742         yi=(int)db_roundi(y[i]);
   2743         xpos=xi/bw;
   2744         ypos=yi/bh;
   2745         if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v)
   2746         {
   2747             br=&bp[ypos][xpos];
   2748             nr=br->nr;
   2749             if(nr<bd)
   2750             {
   2751                 pir=&(br->ptr[nr]);
   2752                 pir->x=xi;
   2753                 pir->y=yi;
   2754                 pir->id=i;
   2755                 pir->pir=0;
   2756                 pir->patch=patch_space;
   2757                 br->nr=nr+1;
   2758 
   2759                 if(use_21)
   2760                 {
   2761                     db_SignedSquareNormCorr21x21_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
   2762                     patch_space+=512;
   2763                 }
   2764                 else
   2765                 {
   2766                 if(!use_smaller_matching_window)
   2767                 {
   2768                     db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
   2769                     patch_space+=128;
   2770                 }
   2771                 else
   2772                 {
   2773                     db_SignedSquareNormCorr5x5_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
   2774                     patch_space+=32;
   2775                 }
   2776                 }
   2777             }
   2778         }
   2779     }
   2780     return(patch_space);
   2781 }
   2782 
   2783 
   2784 
   2785 float* db_FillBucketsPrewarped_f(float *patch_space,const float *const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,const double H[9])
   2786 {
   2787     int i,xi,yi,xpos,ypos,nr,wxi,wyi;
   2788     db_Bucket_f *br;
   2789     db_PointInfo_f *pir;
   2790     double xd[2],wx[2];
   2791 
   2792     db_EmptyBuckets_f(bp,nr_h,nr_v);
   2793     for(i=0;i<nr_corners;i++)
   2794     {
   2795         xd[0]=x[i];
   2796         xd[1]=y[i];
   2797         xi=(int) xd[0];
   2798         yi=(int) xd[1];
   2799         db_ImageHomographyInhomogenous(wx,H,xd);
   2800         wxi=(int) wx[0];
   2801         wyi=(int) wx[1];
   2802 
   2803         xpos=((wxi+bw)/bw)-1;
   2804         ypos=((wyi+bh)/bh)-1;
   2805         if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
   2806         {
   2807             br=&bp[ypos][xpos];
   2808             nr=br->nr;
   2809             if(nr<bd)
   2810             {
   2811                 pir=&(br->ptr[nr]);
   2812                 pir->x=wxi;
   2813                 pir->y=wyi;
   2814                 pir->id=i;
   2815                 pir->pir=0;
   2816                 pir->patch=patch_space;
   2817                 br->nr=nr+1;
   2818 
   2819                 db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
   2820                 patch_space+=128;
   2821             }
   2822         }
   2823     }
   2824     return(patch_space);
   2825 }
   2826 
   2827 short* db_FillBucketsPrewarped_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,
   2828                                  int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,
   2829                                  int nr_corners,const double H[9])
   2830 {
   2831     int i,xi,yi,xpos,ypos,nr,wxi,wyi;
   2832     db_Bucket_u *br;
   2833     db_PointInfo_u *pir;
   2834     double xd[2],wx[2];
   2835 
   2836     db_EmptyBuckets_u(bp,nr_h,nr_v);
   2837     for(i=0;i<nr_corners;i++)
   2838     {
   2839         xd[0]=x[i];
   2840         xd[1]=y[i];
   2841         xi=(int) db_roundi(xd[0]);
   2842         yi=(int) db_roundi(xd[1]);
   2843         db_ImageHomographyInhomogenous(wx,H,xd);
   2844         wxi=(int) wx[0];
   2845         wyi=(int) wx[1];
   2846 
   2847         xpos=((wxi+bw)/bw)-1;
   2848         ypos=((wyi+bh)/bh)-1;
   2849         if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
   2850         {
   2851             br=&bp[ypos][xpos];
   2852             nr=br->nr;
   2853             if(nr<bd)
   2854             {
   2855                 pir=&(br->ptr[nr]);
   2856                 pir->x=wxi;
   2857                 pir->y=wyi;
   2858                 pir->id=i;
   2859                 pir->pir=0;
   2860                 pir->patch=patch_space;
   2861                 br->nr=nr+1;
   2862 
   2863                 db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
   2864                 patch_space+=128;
   2865             }
   2866         }
   2867     }
   2868     return(patch_space);
   2869 }
   2870 
   2871 
   2872 
   2873 short* db_FillBucketsPrewarpedAffine_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,
   2874                                  int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,
   2875                                  int nr_corners,const double H[9],const double Hinv[9],const int warpboundsp[4],
   2876                                  int affine)
   2877 {
   2878     int i,xi,yi,xpos,ypos,nr,wxi,wyi;
   2879     db_Bucket_u *br;
   2880     db_PointInfo_u *pir;
   2881     double xd[2],wx[2];
   2882 
   2883     db_EmptyBuckets_u(bp,nr_h,nr_v);
   2884     for(i=0;i<nr_corners;i++)
   2885     {
   2886         xd[0]=x[i];
   2887         xd[1]=y[i];
   2888         xi=(int) db_roundi(xd[0]);
   2889         yi=(int) db_roundi(xd[1]);
   2890         db_ImageHomographyInhomogenous(wx,H,xd);
   2891         wxi=(int) wx[0];
   2892         wyi=(int) wx[1];
   2893 
   2894         xpos=((wxi+bw)/bw)-1;
   2895         ypos=((wyi+bh)/bh)-1;
   2896 
   2897 
   2898         if (xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
   2899         {
   2900             if( xi>warpboundsp[0] && xi<warpboundsp[1] && yi>warpboundsp[2] && yi<warpboundsp[3])
   2901             {
   2902 
   2903                 br=&bp[ypos][xpos];
   2904                 nr=br->nr;
   2905                 if(nr<bd)
   2906                 {
   2907                     pir=&(br->ptr[nr]);
   2908                     pir->x=wxi;
   2909                     pir->y=wyi;
   2910                     pir->id=i;
   2911                     pir->pir=0;
   2912                     pir->patch=patch_space;
   2913                     br->nr=nr+1;
   2914 
   2915                     db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip),Hinv,affine);
   2916                     patch_space+=128;
   2917                 }
   2918             }
   2919         }
   2920     }
   2921     return(patch_space);
   2922 }
   2923 
   2924 
   2925 
   2926 inline void db_MatchPointPair_f(db_PointInfo_f *pir_l,db_PointInfo_f *pir_r,
   2927                             unsigned long kA,unsigned long kB)
   2928 {
   2929     int x_l,y_l,x_r,y_r,xm,ym;
   2930     double score;
   2931 
   2932     x_l=pir_l->x;
   2933     y_l=pir_l->y;
   2934     x_r=pir_r->x;
   2935     y_r=pir_r->y;
   2936     xm=x_l-x_r;
   2937     ym=y_l-y_r;
   2938     /*Check if disparity is within the maximum disparity
   2939     with the formula xm^2*256+ym^2*kA<kB
   2940     where kA=256*w^2/h^2
   2941     and   kB=256*max_disp^2*w^2*/
   2942     if(((xm*xm)<<8)+ym*ym*kA<kB)
   2943     {
   2944         /*Correlate*/
   2945         score=db_SignedSquareNormCorr11x11Aligned_Post_f(pir_l->patch,pir_r->patch,
   2946             (pir_l->sum)*(pir_r->sum),
   2947             (pir_l->recip)*(pir_r->recip));
   2948 
   2949         if((!(pir_l->pir)) || (score>pir_l->s))
   2950         {
   2951             /*Update left corner*/
   2952             pir_l->s=score;
   2953             pir_l->pir=pir_r;
   2954         }
   2955         if((!(pir_r->pir)) || (score>pir_r->s))
   2956         {
   2957             /*Update right corner*/
   2958             pir_r->s=score;
   2959             pir_r->pir=pir_l;
   2960         }
   2961     }
   2962 }
   2963 
   2964 inline void db_MatchPointPair_u(db_PointInfo_u *pir_l,db_PointInfo_u *pir_r,
   2965                             unsigned long kA,unsigned long kB, unsigned int rect_window,bool use_smaller_matching_window, int use_21)
   2966 {
   2967     int xm,ym;
   2968     double score;
   2969     bool compute_score;
   2970 
   2971 
   2972     if( rect_window )
   2973         compute_score = ((unsigned)db_absi(pir_l->x - pir_r->x)<kA && (unsigned)db_absi(pir_l->y - pir_r->y)<kB);
   2974     else
   2975     {   /*Check if disparity is within the maximum disparity
   2976         with the formula xm^2*256+ym^2*kA<kB
   2977         where kA=256*w^2/h^2
   2978         and   kB=256*max_disp^2*w^2*/
   2979         xm= pir_l->x - pir_r->x;
   2980         ym= pir_l->y - pir_r->y;
   2981         compute_score = ((xm*xm)<<8)+ym*ym*kA < kB;
   2982     }
   2983 
   2984     if ( compute_score )
   2985     {
   2986         if(use_21)
   2987         {
   2988             score=db_SignedSquareNormCorr21x21Aligned_Post_s(pir_l->patch,pir_r->patch,
   2989                 (pir_l->sum)*(pir_r->sum),
   2990                 (pir_l->recip)*(pir_r->recip));
   2991         }
   2992         else
   2993         {
   2994         /*Correlate*/
   2995         if(!use_smaller_matching_window)
   2996         {
   2997             score=db_SignedSquareNormCorr11x11Aligned_Post_s(pir_l->patch,pir_r->patch,
   2998                 (pir_l->sum)*(pir_r->sum),
   2999                 (pir_l->recip)*(pir_r->recip));
   3000         }
   3001         else
   3002         {
   3003             score=db_SignedSquareNormCorr5x5Aligned_Post_s(pir_l->patch,pir_r->patch,
   3004                 (pir_l->sum)*(pir_r->sum),
   3005                 (pir_l->recip)*(pir_r->recip));
   3006         }
   3007         }
   3008 
   3009         if((!(pir_l->pir)) || (score>pir_l->s))
   3010         {
   3011             /*Update left corner*/
   3012             pir_l->s=score;
   3013             pir_l->pir=pir_r;
   3014         }
   3015         if((!(pir_r->pir)) || (score>pir_r->s))
   3016         {
   3017             /*Update right corner*/
   3018             pir_r->s=score;
   3019             pir_r->pir=pir_l;
   3020         }
   3021     }
   3022 }
   3023 
   3024 inline void db_MatchPointAgainstBucket_f(db_PointInfo_f *pir_l,db_Bucket_f *b_r,
   3025                                        unsigned long kA,unsigned long kB)
   3026 {
   3027     int p_r,nr;
   3028     db_PointInfo_f *pir_r;
   3029 
   3030     nr=b_r->nr;
   3031     pir_r=b_r->ptr;
   3032     for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_f(pir_l,pir_r+p_r,kA,kB);
   3033 }
   3034 
   3035 inline void db_MatchPointAgainstBucket_u(db_PointInfo_u *pir_l,db_Bucket_u *b_r,
   3036                                        unsigned long kA,unsigned long kB,int rect_window, bool use_smaller_matching_window, int use_21)
   3037 {
   3038     int p_r,nr;
   3039     db_PointInfo_u *pir_r;
   3040 
   3041     nr=b_r->nr;
   3042     pir_r=b_r->ptr;
   3043 
   3044     for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_u(pir_l,pir_r+p_r,kA,kB, rect_window, use_smaller_matching_window, use_21);
   3045 
   3046 }
   3047 
   3048 void db_MatchBuckets_f(db_Bucket_f **bp_l,db_Bucket_f **bp_r,int nr_h,int nr_v,
   3049                      unsigned long kA,unsigned long kB)
   3050 {
   3051     int i,j,k,a,b,br_nr;
   3052     db_Bucket_f *br;
   3053     db_PointInfo_f *pir_l;
   3054 
   3055     /*For all buckets*/
   3056     for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
   3057     {
   3058         br=&bp_l[i][j];
   3059         br_nr=br->nr;
   3060         /*For all points in bucket*/
   3061         for(k=0;k<br_nr;k++)
   3062         {
   3063             pir_l=br->ptr+k;
   3064             for(a=i-1;a<=i+1;a++)
   3065             {
   3066                 for(b=j-1;b<=j+1;b++)
   3067                 {
   3068                     db_MatchPointAgainstBucket_f(pir_l,&bp_r[a][b],kA,kB);
   3069                 }
   3070             }
   3071         }
   3072     }
   3073 }
   3074 
   3075 void db_MatchBuckets_u(db_Bucket_u **bp_l,db_Bucket_u **bp_r,int nr_h,int nr_v,
   3076                      unsigned long kA,unsigned long kB,int rect_window,bool use_smaller_matching_window, int use_21)
   3077 {
   3078     int i,j,k,a,b,br_nr;
   3079     db_Bucket_u *br;
   3080     db_PointInfo_u *pir_l;
   3081 
   3082     /*For all buckets*/
   3083     for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
   3084     {
   3085         br=&bp_l[i][j];
   3086         br_nr=br->nr;
   3087         /*For all points in bucket*/
   3088         for(k=0;k<br_nr;k++)
   3089         {
   3090             pir_l=br->ptr+k;
   3091             for(a=i-1;a<=i+1;a++)
   3092             {
   3093                 for(b=j-1;b<=j+1;b++)
   3094                 {
   3095                     db_MatchPointAgainstBucket_u(pir_l,&bp_r[a][b],kA,kB,rect_window,use_smaller_matching_window, use_21);
   3096                 }
   3097             }
   3098         }
   3099     }
   3100 }
   3101 
   3102 void db_CollectMatches_f(db_Bucket_f **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches)
   3103 {
   3104     int i,j,k,br_nr;
   3105     unsigned long count;
   3106     db_Bucket_f *br;
   3107     db_PointInfo_f *pir,*pir2;
   3108 
   3109     count=0;
   3110     /*For all buckets*/
   3111     for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
   3112     {
   3113         br=&bp_l[i][j];
   3114         br_nr=br->nr;
   3115         /*For all points in bucket*/
   3116         for(k=0;k<br_nr;k++)
   3117         {
   3118             pir=br->ptr+k;
   3119             pir2=pir->pir;
   3120             if(pir2)
   3121             {
   3122                 /*This point has a best match*/
   3123                 if((pir2->pir)==pir)
   3124                 {
   3125                     /*We have a mutually consistent match*/
   3126                     if(count<target)
   3127                     {
   3128                         id_l[count]=pir->id;
   3129                         id_r[count]=pir2->id;
   3130                         count++;
   3131                     }
   3132                 }
   3133             }
   3134         }
   3135     }
   3136     *nr_matches=count;
   3137 }
   3138 
   3139 void db_CollectMatches_u(db_Bucket_u **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches)
   3140 {
   3141     int i,j,k,br_nr;
   3142     unsigned long count;
   3143     db_Bucket_u *br;
   3144     db_PointInfo_u *pir,*pir2;
   3145 
   3146     count=0;
   3147     /*For all buckets*/
   3148     for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
   3149     {
   3150         br=&bp_l[i][j];
   3151         br_nr=br->nr;
   3152         /*For all points in bucket*/
   3153         for(k=0;k<br_nr;k++)
   3154         {
   3155             pir=br->ptr+k;
   3156             pir2=pir->pir;
   3157             if(pir2)
   3158             {
   3159                 /*This point has a best match*/
   3160                 if((pir2->pir)==pir)
   3161                 {
   3162                     /*We have a mutually consistent match*/
   3163                     if(count<target)
   3164                     {
   3165                         id_l[count]=pir->id;
   3166                         id_r[count]=pir2->id;
   3167                         count++;
   3168                     }
   3169                 }
   3170             }
   3171         }
   3172     }
   3173     *nr_matches=count;
   3174 }
   3175 
   3176 db_Matcher_f::db_Matcher_f()
   3177 {
   3178     m_w=0; m_h=0;
   3179 }
   3180 
   3181 db_Matcher_f::~db_Matcher_f()
   3182 {
   3183     Clean();
   3184 }
   3185 
   3186 void db_Matcher_f::Clean()
   3187 {
   3188     if(m_w)
   3189     {
   3190         /*Free buckets*/
   3191         db_FreeBuckets_f(m_bp_l,m_nr_h,m_nr_v);
   3192         db_FreeBuckets_f(m_bp_r,m_nr_h,m_nr_v);
   3193         /*Free space for patch layouts*/
   3194         delete [] m_patch_space;
   3195     }
   3196     m_w=0; m_h=0;
   3197 }
   3198 
   3199 unsigned long db_Matcher_f::Init(int im_width,int im_height,double max_disparity,int target_nr_corners)
   3200 {
   3201     Clean();
   3202     m_w=im_width;
   3203     m_h=im_height;
   3204     m_bw=db_maxi(1,(int) (max_disparity*((double)im_width)));
   3205     m_bh=db_maxi(1,(int) (max_disparity*((double)im_height)));
   3206     m_nr_h=1+(im_width-1)/m_bw;
   3207     m_nr_v=1+(im_height-1)/m_bh;
   3208     m_bd=db_maxi(1,(int)(((double)target_nr_corners)*
   3209         max_disparity*max_disparity));
   3210     m_target=target_nr_corners;
   3211     m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h)));
   3212     m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w)));
   3213 
   3214     /*Alloc bucket structure*/
   3215     m_bp_l=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd);
   3216     m_bp_r=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd);
   3217 
   3218     /*Alloc 16byte-aligned space for patch layouts*/
   3219     m_patch_space=new float [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16];
   3220     m_aligned_patch_space=db_AlignPointer_f(m_patch_space,16);
   3221 
   3222     return(m_target);
   3223 }
   3224 
   3225 void db_Matcher_f::Match(const float * const *l_img,const float * const *r_img,
   3226         const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
   3227         int *id_l,int *id_r,int *nr_matches,const double H[9])
   3228 {
   3229     float *ps;
   3230 
   3231     /*Insert the corners into bucket structure*/
   3232     ps=db_FillBuckets_f(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l);
   3233     if(H==0) db_FillBuckets_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r);
   3234     else db_FillBucketsPrewarped_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H);
   3235 
   3236     /*Compute all the necessary match scores*/
   3237     db_MatchBuckets_f(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB);
   3238 
   3239     /*Collect the correspondences*/
   3240     db_CollectMatches_f(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches);
   3241 }
   3242 
   3243 db_Matcher_u::db_Matcher_u()
   3244 {
   3245     m_w=0; m_h=0;
   3246     m_rect_window = 0;
   3247     m_bw=m_bh=m_nr_h=m_nr_v=m_bd=m_target=0;
   3248     m_bp_l=m_bp_r=0;
   3249     m_patch_space=m_aligned_patch_space=0;
   3250 }
   3251 
   3252 db_Matcher_u::db_Matcher_u(const db_Matcher_u& cm)
   3253 {
   3254     Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v);
   3255 }
   3256 
   3257 db_Matcher_u& db_Matcher_u::operator= (const db_Matcher_u& cm)
   3258 {
   3259     if ( this == &cm ) return *this;
   3260     Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v);
   3261     return *this;
   3262 }
   3263 
   3264 
   3265 db_Matcher_u::~db_Matcher_u()
   3266 {
   3267     Clean();
   3268 }
   3269 
   3270 void db_Matcher_u::Clean()
   3271 {
   3272     if(m_w)
   3273     {
   3274         /*Free buckets*/
   3275         db_FreeBuckets_u(m_bp_l,m_nr_h,m_nr_v);
   3276         db_FreeBuckets_u(m_bp_r,m_nr_h,m_nr_v);
   3277         /*Free space for patch layouts*/
   3278         delete [] m_patch_space;
   3279     }
   3280     m_w=0; m_h=0;
   3281 }
   3282 
   3283 
   3284 unsigned long db_Matcher_u::Init(int im_width,int im_height,double max_disparity,int target_nr_corners,
   3285                                  double max_disparity_v, bool use_smaller_matching_window, int use_21)
   3286 {
   3287     Clean();
   3288     m_w=im_width;
   3289     m_h=im_height;
   3290     m_max_disparity=max_disparity;
   3291     m_max_disparity_v=max_disparity_v;
   3292 
   3293     if ( max_disparity_v != DB_DEFAULT_NO_DISPARITY )
   3294     {
   3295         m_rect_window = 1;
   3296 
   3297         m_bw=db_maxi(1,(int)(max_disparity*((double)im_width)));
   3298         m_bh=db_maxi(1,(int)(max_disparity_v*((double)im_height)));
   3299 
   3300         m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity_v));
   3301 
   3302         m_kA=(int)(max_disparity*m_w);
   3303         m_kB=(int)(max_disparity_v*m_h);
   3304 
   3305     } else
   3306     {
   3307         m_bw=(int)db_maxi(1,(int)(max_disparity*((double)im_width)));
   3308         m_bh=(int)db_maxi(1,(int)(max_disparity*((double)im_height)));
   3309 
   3310         m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity));
   3311 
   3312         m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h)));
   3313         m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w)));
   3314     }
   3315 
   3316     m_nr_h=1+(im_width-1)/m_bw;
   3317     m_nr_v=1+(im_height-1)/m_bh;
   3318 
   3319     m_target=target_nr_corners;
   3320 
   3321     /*Alloc bucket structure*/
   3322     m_bp_l=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd);
   3323     m_bp_r=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd);
   3324 
   3325     m_use_smaller_matching_window = use_smaller_matching_window;
   3326     m_use_21 = use_21;
   3327 
   3328     if(m_use_21)
   3329     {
   3330         /*Alloc 64byte-aligned space for patch layouts*/
   3331         m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*512+64];
   3332         m_aligned_patch_space=db_AlignPointer_s(m_patch_space,64);
   3333     }
   3334     else
   3335     {
   3336     if(!m_use_smaller_matching_window)
   3337     {
   3338         /*Alloc 16byte-aligned space for patch layouts*/
   3339         m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16];
   3340         m_aligned_patch_space=db_AlignPointer_s(m_patch_space,16);
   3341     }
   3342     else
   3343     {
   3344         /*Alloc 4byte-aligned space for patch layouts*/
   3345         m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*32+4];
   3346         m_aligned_patch_space=db_AlignPointer_s(m_patch_space,4);
   3347     }
   3348     }
   3349 
   3350     return(m_target);
   3351 }
   3352 
   3353 void db_Matcher_u::Match(const unsigned char * const *l_img,const unsigned char * const *r_img,
   3354         const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
   3355         int *id_l,int *id_r,int *nr_matches,const double H[9],int affine)
   3356 {
   3357     short *ps;
   3358 
   3359     /*Insert the corners into bucket structure*/
   3360     ps=db_FillBuckets_u(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l,m_use_smaller_matching_window,m_use_21);
   3361     if(H==0)
   3362         db_FillBuckets_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,m_use_smaller_matching_window,m_use_21);
   3363     else
   3364     {
   3365         if (affine)
   3366         {
   3367             double Hinv[9];
   3368             db_InvertAffineTransform(Hinv,H);
   3369             float r_w, c_w;
   3370             float stretch_x[2];
   3371             float stretch_y[2];
   3372             AffineWarpPointOffset(r_w,c_w,Hinv, 5,5);
   3373             stretch_x[0]=db_absf(c_w);stretch_y[0]=db_absf(r_w);
   3374             AffineWarpPointOffset(r_w,c_w,Hinv, 5,-5);
   3375             stretch_x[1]=db_absf(c_w);stretch_y[1]=db_absf(r_w);
   3376             int max_stretxh_x=(int) (db_maxd(stretch_x[0],stretch_x[1]));
   3377             int max_stretxh_y=(int) (db_maxd(stretch_y[0],stretch_y[1]));
   3378             int warpbounds[4]={max_stretxh_x,m_w-1-max_stretxh_x,max_stretxh_y,m_h-1-max_stretxh_y};
   3379 
   3380             for (int r=-5;r<=5;r++){
   3381                 for (int c=-5;c<=5;c++){
   3382                     AffineWarpPointOffset(r_w,c_w,Hinv,r,c);
   3383                     AffineWarpPoint_BL_LUT_y[r+5][c+5]=r_w;
   3384                     AffineWarpPoint_BL_LUT_x[r+5][c+5]=c_w;
   3385 
   3386                     AffineWarpPoint_NN_LUT_y[r+5][c+5]=db_roundi(r_w);
   3387                     AffineWarpPoint_NN_LUT_x[r+5][c+5]=db_roundi(c_w);
   3388 
   3389                 }
   3390             }
   3391 
   3392             db_FillBucketsPrewarpedAffine_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,
   3393                 x_r,y_r,nr_r,H,Hinv,warpbounds,affine);
   3394         }
   3395         else
   3396             db_FillBucketsPrewarped_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H);
   3397     }
   3398 
   3399 
   3400     /*Compute all the necessary match scores*/
   3401     db_MatchBuckets_u(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB, m_rect_window,m_use_smaller_matching_window,m_use_21);
   3402 
   3403     /*Collect the correspondences*/
   3404     db_CollectMatches_u(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches);
   3405 }
   3406 
   3407 int db_Matcher_u::IsAllocated()
   3408 {
   3409     return (int)(m_w != 0);
   3410 }
   3411