Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                        Intel License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of Intel Corporation may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 /* ////////////////////////////////////////////////////////////////////
     43 //
     44 //  Geometrical transforms on images and matrices: rotation, zoom etc.
     45 //
     46 // */
     47 
     48 #include "_cv.h"
     49 
     50 
     51 /************** interpolation constants and tables ***************/
     52 
     53 #define ICV_WARP_MUL_ONE_8U(x)  ((x) << ICV_WARP_SHIFT)
     54 #define ICV_WARP_DESCALE_8U(x)  CV_DESCALE((x), ICV_WARP_SHIFT*2)
     55 #define ICV_WARP_CLIP_X(x)      ((unsigned)(x) < (unsigned)ssize.width ? \
     56                                 (x) : (x) < 0 ? 0 : ssize.width - 1)
     57 #define ICV_WARP_CLIP_Y(y)      ((unsigned)(y) < (unsigned)ssize.height ? \
     58                                 (y) : (y) < 0 ? 0 : ssize.height - 1)
     59 
     60 float icvLinearCoeffs[(ICV_LINEAR_TAB_SIZE+1)*2];
     61 
     62 void icvInitLinearCoeffTab()
     63 {
     64     static int inittab = 0;
     65     if( !inittab )
     66     {
     67         for( int i = 0; i <= ICV_LINEAR_TAB_SIZE; i++ )
     68         {
     69             float x = (float)i/ICV_LINEAR_TAB_SIZE;
     70             icvLinearCoeffs[i*2] = x;
     71             icvLinearCoeffs[i*2+1] = 1.f - x;
     72         }
     73 
     74         inittab = 1;
     75     }
     76 }
     77 
     78 
     79 float icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE+1)*2];
     80 
     81 void icvInitCubicCoeffTab()
     82 {
     83     static int inittab = 0;
     84     if( !inittab )
     85     {
     86 #if 0
     87         // classical Mitchell-Netravali filter
     88         const double B = 1./3;
     89         const double C = 1./3;
     90         const double p0 = (6 - 2*B)/6.;
     91         const double p2 = (-18 + 12*B + 6*C)/6.;
     92         const double p3 = (12 - 9*B - 6*C)/6.;
     93         const double q0 = (8*B + 24*C)/6.;
     94         const double q1 = (-12*B - 48*C)/6.;
     95         const double q2 = (6*B + 30*C)/6.;
     96         const double q3 = (-B - 6*C)/6.;
     97 
     98         #define ICV_CUBIC_1(x)  (((x)*p3 + p2)*(x)*(x) + p0)
     99         #define ICV_CUBIC_2(x)  ((((x)*q3 + q2)*(x) + q1)*(x) + q0)
    100 #else
    101         // alternative "sharp" filter
    102         const double A = -0.75;
    103         #define ICV_CUBIC_1(x)  (((A + 2)*(x) - (A + 3))*(x)*(x) + 1)
    104         #define ICV_CUBIC_2(x)  (((A*(x) - 5*A)*(x) + 8*A)*(x) - 4*A)
    105 #endif
    106         for( int i = 0; i <= ICV_CUBIC_TAB_SIZE; i++ )
    107         {
    108             float x = (float)i/ICV_CUBIC_TAB_SIZE;
    109             icvCubicCoeffs[i*2] = (float)ICV_CUBIC_1(x);
    110             x += 1.f;
    111             icvCubicCoeffs[i*2+1] = (float)ICV_CUBIC_2(x);
    112         }
    113 
    114         inittab = 1;
    115     }
    116 }
    117 
    118 
    119 /****************************************************************************************\
    120 *                                         Resize                                         *
    121 \****************************************************************************************/
    122 
    123 static CvStatus CV_STDCALL
    124 icvResize_NN_8u_C1R( const uchar* src, int srcstep, CvSize ssize,
    125                      uchar* dst, int dststep, CvSize dsize, int pix_size )
    126 {
    127     int* x_ofs = (int*)cvStackAlloc( dsize.width * sizeof(x_ofs[0]) );
    128     int pix_size4 = pix_size / sizeof(int);
    129     int x, y, t;
    130 
    131     for( x = 0; x < dsize.width; x++ )
    132     {
    133         t = (ssize.width*x*2 + MIN(ssize.width, dsize.width) - 1)/(dsize.width*2);
    134         t -= t >= ssize.width;
    135         x_ofs[x] = t*pix_size;
    136     }
    137 
    138     for( y = 0; y < dsize.height; y++, dst += dststep )
    139     {
    140         const uchar* tsrc;
    141         t = (ssize.height*y*2 + MIN(ssize.height, dsize.height) - 1)/(dsize.height*2);
    142         t -= t >= ssize.height;
    143         tsrc = src + srcstep*t;
    144 
    145         switch( pix_size )
    146         {
    147         case 1:
    148             for( x = 0; x <= dsize.width - 2; x += 2 )
    149             {
    150                 uchar t0 = tsrc[x_ofs[x]];
    151                 uchar t1 = tsrc[x_ofs[x+1]];
    152 
    153                 dst[x] = t0;
    154                 dst[x+1] = t1;
    155             }
    156 
    157             for( ; x < dsize.width; x++ )
    158                 dst[x] = tsrc[x_ofs[x]];
    159             break;
    160         case 2:
    161             for( x = 0; x < dsize.width; x++ )
    162                 *(ushort*)(dst + x*2) = *(ushort*)(tsrc + x_ofs[x]);
    163             break;
    164         case 3:
    165             for( x = 0; x < dsize.width; x++ )
    166             {
    167                 const uchar* _tsrc = tsrc + x_ofs[x];
    168                 dst[x*3] = _tsrc[0]; dst[x*3+1] = _tsrc[1]; dst[x*3+2] = _tsrc[2];
    169             }
    170             break;
    171         case 4:
    172             for( x = 0; x < dsize.width; x++ )
    173                 *(int*)(dst + x*4) = *(int*)(tsrc + x_ofs[x]);
    174             break;
    175         case 6:
    176             for( x = 0; x < dsize.width; x++ )
    177             {
    178                 const ushort* _tsrc = (const ushort*)(tsrc + x_ofs[x]);
    179                 ushort* _tdst = (ushort*)(dst + x*6);
    180                 _tdst[0] = _tsrc[0]; _tdst[1] = _tsrc[1]; _tdst[2] = _tsrc[2];
    181             }
    182             break;
    183         default:
    184             for( x = 0; x < dsize.width; x++ )
    185                 CV_MEMCPY_INT( dst + x*pix_size, tsrc + x_ofs[x], pix_size4 );
    186         }
    187     }
    188 
    189     return CV_OK;
    190 }
    191 
    192 
    193 typedef struct CvResizeAlpha
    194 {
    195     int idx;
    196     union
    197     {
    198         float alpha;
    199         int ialpha;
    200     };
    201 }
    202 CvResizeAlpha;
    203 
    204 
    205 #define  ICV_DEF_RESIZE_BILINEAR_FUNC( flavor, arrtype, worktype, alpha_field,  \
    206                                        mul_one_macro, descale_macro )           \
    207 static CvStatus CV_STDCALL                                                      \
    208 icvResize_Bilinear_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,\
    209                                    arrtype* dst, int dststep, CvSize dsize,     \
    210                                    int cn, int xmax,                            \
    211                                    const CvResizeAlpha* xofs,                   \
    212                                    const CvResizeAlpha* yofs,                   \
    213                                    worktype* buf0, worktype* buf1 )             \
    214 {                                                                               \
    215     int prev_sy0 = -1, prev_sy1 = -1;                                           \
    216     int k, dx, dy;                                                              \
    217                                                                                 \
    218     srcstep /= sizeof(src[0]);                                                  \
    219     dststep /= sizeof(dst[0]);                                                  \
    220     dsize.width *= cn;                                                          \
    221     xmax *= cn;                                                                 \
    222                                                                                 \
    223     for( dy = 0; dy < dsize.height; dy++, dst += dststep )                      \
    224     {                                                                           \
    225         worktype fy = yofs[dy].alpha_field, *swap_t;                            \
    226         int sy0 = yofs[dy].idx, sy1 = sy0 + (fy > 0 && sy0 < ssize.height-1);   \
    227                                                                                 \
    228         if( sy0 == prev_sy0 && sy1 == prev_sy1 )                                \
    229             k = 2;                                                              \
    230         else if( sy0 == prev_sy1 )                                              \
    231         {                                                                       \
    232             CV_SWAP( buf0, buf1, swap_t );                                      \
    233             k = 1;                                                              \
    234         }                                                                       \
    235         else                                                                    \
    236             k = 0;                                                              \
    237                                                                                 \
    238         for( ; k < 2; k++ )                                                     \
    239         {                                                                       \
    240             worktype* _buf = k == 0 ? buf0 : buf1;                              \
    241             const arrtype* _src;                                                \
    242             int sy = k == 0 ? sy0 : sy1;                                        \
    243             if( k == 1 && sy1 == sy0 )                                          \
    244             {                                                                   \
    245                 memcpy( buf1, buf0, dsize.width*sizeof(buf0[0]) );              \
    246                 continue;                                                       \
    247             }                                                                   \
    248                                                                                 \
    249             _src = src + sy*srcstep;                                            \
    250             for( dx = 0; dx < xmax; dx++ )                                      \
    251             {                                                                   \
    252                 int sx = xofs[dx].idx;                                          \
    253                 worktype fx = xofs[dx].alpha_field;                             \
    254                 worktype t = _src[sx];                                          \
    255                 _buf[dx] = mul_one_macro(t) + fx*(_src[sx+cn] - t);             \
    256             }                                                                   \
    257                                                                                 \
    258             for( ; dx < dsize.width; dx++ )                                     \
    259                 _buf[dx] = mul_one_macro(_src[xofs[dx].idx]);                   \
    260         }                                                                       \
    261                                                                                 \
    262         prev_sy0 = sy0;                                                         \
    263         prev_sy1 = sy1;                                                         \
    264                                                                                 \
    265         if( sy0 == sy1 )                                                        \
    266             for( dx = 0; dx < dsize.width; dx++ )                               \
    267                 dst[dx] = (arrtype)descale_macro( mul_one_macro(buf0[dx]));     \
    268         else                                                                    \
    269             for( dx = 0; dx < dsize.width; dx++ )                               \
    270                 dst[dx] = (arrtype)descale_macro( mul_one_macro(buf0[dx]) +     \
    271                                                   fy*(buf1[dx] - buf0[dx]));    \
    272     }                                                                           \
    273                                                                                 \
    274     return CV_OK;                                                               \
    275 }
    276 
    277 
    278 typedef struct CvDecimateAlpha
    279 {
    280     int si, di;
    281     float alpha;
    282 }
    283 CvDecimateAlpha;
    284 
    285 
    286 #define  ICV_DEF_RESIZE_AREA_FAST_FUNC( flavor, arrtype, worktype, cast_macro ) \
    287 static CvStatus CV_STDCALL                                                      \
    288 icvResize_AreaFast_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,\
    289                               arrtype* dst, int dststep, CvSize dsize, int cn,  \
    290                               const int* ofs, const int* xofs )                 \
    291 {                                                                               \
    292     int dy, dx, k = 0;                                                          \
    293     int scale_x = ssize.width/dsize.width;                                      \
    294     int scale_y = ssize.height/dsize.height;                                    \
    295     int area = scale_x*scale_y;                                                 \
    296     float scale = 1.f/(scale_x*scale_y);                                        \
    297                                                                                 \
    298     srcstep /= sizeof(src[0]);                                                  \
    299     dststep /= sizeof(dst[0]);                                                  \
    300     dsize.width *= cn;                                                          \
    301                                                                                 \
    302     for( dy = 0; dy < dsize.height; dy++, dst += dststep )                      \
    303         for( dx = 0; dx < dsize.width; dx++ )                                   \
    304         {                                                                       \
    305             const arrtype* _src = src + dy*scale_y*srcstep + xofs[dx];          \
    306             worktype sum = 0;                                                   \
    307                                                                                 \
    308             for( k = 0; k <= area - 4; k += 4 )                                 \
    309                 sum += _src[ofs[k]] + _src[ofs[k+1]] +                          \
    310                        _src[ofs[k+2]] + _src[ofs[k+3]];                         \
    311                                                                                 \
    312             for( ; k < area; k++ )                                              \
    313                 sum += _src[ofs[k]];                                            \
    314                                                                                 \
    315             dst[dx] = (arrtype)cast_macro( sum*scale );                         \
    316         }                                                                       \
    317                                                                                 \
    318     return CV_OK;                                                               \
    319 }
    320 
    321 
    322 #define  ICV_DEF_RESIZE_AREA_FUNC( flavor, arrtype, load_macro, cast_macro )    \
    323 static CvStatus CV_STDCALL                                                      \
    324 icvResize_Area_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,   \
    325                                arrtype* dst, int dststep, CvSize dsize,         \
    326                                int cn, const CvDecimateAlpha* xofs,             \
    327                                int xofs_count, float* buf, float* sum )         \
    328 {                                                                               \
    329     int k, sy, dx, cur_dy = 0;                                                  \
    330     float scale_y = (float)ssize.height/dsize.height;                           \
    331                                                                                 \
    332     srcstep /= sizeof(src[0]);                                                  \
    333     dststep /= sizeof(dst[0]);                                                  \
    334     dsize.width *= cn;                                                          \
    335                                                                                 \
    336     for( sy = 0; sy < ssize.height; sy++, src += srcstep )                      \
    337     {                                                                           \
    338         if( cn == 1 )                                                           \
    339             for( k = 0; k < xofs_count; k++ )                                   \
    340             {                                                                   \
    341                 int dxn = xofs[k].di;                                           \
    342                 float alpha = xofs[k].alpha;                                    \
    343                 buf[dxn] = buf[dxn] + load_macro(src[xofs[k].si])*alpha;        \
    344             }                                                                   \
    345         else if( cn == 2 )                                                      \
    346             for( k = 0; k < xofs_count; k++ )                                   \
    347             {                                                                   \
    348                 int sxn = xofs[k].si;                                           \
    349                 int dxn = xofs[k].di;                                           \
    350                 float alpha = xofs[k].alpha;                                    \
    351                 float t0 = buf[dxn] + load_macro(src[sxn])*alpha;               \
    352                 float t1 = buf[dxn+1] + load_macro(src[sxn+1])*alpha;           \
    353                 buf[dxn] = t0; buf[dxn+1] = t1;                                 \
    354             }                                                                   \
    355         else if( cn == 3 )                                                      \
    356             for( k = 0; k < xofs_count; k++ )                                   \
    357             {                                                                   \
    358                 int sxn = xofs[k].si;                                           \
    359                 int dxn = xofs[k].di;                                           \
    360                 float alpha = xofs[k].alpha;                                    \
    361                 float t0 = buf[dxn] + load_macro(src[sxn])*alpha;               \
    362                 float t1 = buf[dxn+1] + load_macro(src[sxn+1])*alpha;           \
    363                 float t2 = buf[dxn+2] + load_macro(src[sxn+2])*alpha;           \
    364                 buf[dxn] = t0; buf[dxn+1] = t1; buf[dxn+2] = t2;                \
    365             }                                                                   \
    366         else                                                                    \
    367             for( k = 0; k < xofs_count; k++ )                                   \
    368             {                                                                   \
    369                 int sxn = xofs[k].si;                                           \
    370                 int dxn = xofs[k].di;                                           \
    371                 float alpha = xofs[k].alpha;                                    \
    372                 float t0 = buf[dxn] + load_macro(src[sxn])*alpha;               \
    373                 float t1 = buf[dxn+1] + load_macro(src[sxn+1])*alpha;           \
    374                 buf[dxn] = t0; buf[dxn+1] = t1;                                 \
    375                 t0 = buf[dxn+2] + load_macro(src[sxn+2])*alpha;                 \
    376                 t1 = buf[dxn+3] + load_macro(src[sxn+3])*alpha;                 \
    377                 buf[dxn+2] = t0; buf[dxn+3] = t1;                               \
    378             }                                                                   \
    379                                                                                 \
    380         if( (cur_dy + 1)*scale_y <= sy + 1 || sy == ssize.height - 1 )          \
    381         {                                                                       \
    382             float beta = sy + 1 - (cur_dy+1)*scale_y, beta1;                    \
    383             beta = MAX( beta, 0 );                                              \
    384             beta1 = 1 - beta;                                                   \
    385             if( fabs(beta) < 1e-3 )                                             \
    386                 for( dx = 0; dx < dsize.width; dx++ )                           \
    387                 {                                                               \
    388                     dst[dx] = (arrtype)cast_macro(sum[dx] + buf[dx]);           \
    389                     sum[dx] = buf[dx] = 0;                                      \
    390                 }                                                               \
    391             else                                                                \
    392                 for( dx = 0; dx < dsize.width; dx++ )                           \
    393                 {                                                               \
    394                     dst[dx] = (arrtype)cast_macro(sum[dx] + buf[dx]*beta1);     \
    395                     sum[dx] = buf[dx]*beta;                                     \
    396                     buf[dx] = 0;                                                \
    397                 }                                                               \
    398             dst += dststep;                                                     \
    399             cur_dy++;                                                           \
    400         }                                                                       \
    401         else                                                                    \
    402             for( dx = 0; dx < dsize.width; dx += 2 )                            \
    403             {                                                                   \
    404                 float t0 = sum[dx] + buf[dx];                                   \
    405                 float t1 = sum[dx+1] + buf[dx+1];                               \
    406                 sum[dx] = t0; sum[dx+1] = t1;                                   \
    407                 buf[dx] = buf[dx+1] = 0;                                        \
    408             }                                                                   \
    409     }                                                                           \
    410                                                                                 \
    411     return CV_OK;                                                               \
    412 }
    413 
    414 
    415 #define  ICV_DEF_RESIZE_BICUBIC_FUNC( flavor, arrtype, worktype, load_macro,    \
    416                                       cast_macro1, cast_macro2 )                \
    417 static CvStatus CV_STDCALL                                                      \
    418 icvResize_Bicubic_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,\
    419                                   arrtype* dst, int dststep, CvSize dsize,      \
    420                                   int cn, int xmin, int xmax,                   \
    421                                   const CvResizeAlpha* xofs, float** buf )      \
    422 {                                                                               \
    423     float scale_y = (float)ssize.height/dsize.height;                           \
    424     int dx, dy, sx, sy, sy2, ify;                                               \
    425     int prev_sy2 = -2;                                                          \
    426                                                                                 \
    427     xmin *= cn; xmax *= cn;                                                     \
    428     dsize.width *= cn;                                                          \
    429     ssize.width *= cn;                                                          \
    430     srcstep /= sizeof(src[0]);                                                  \
    431     dststep /= sizeof(dst[0]);                                                  \
    432                                                                                 \
    433     for( dy = 0; dy < dsize.height; dy++, dst += dststep )                      \
    434     {                                                                           \
    435         float w0, w1, w2, w3;                                                   \
    436         float fy, x, sum;                                                       \
    437         float *row, *row0, *row1, *row2, *row3;                                 \
    438         int k1, k = 4;                                                          \
    439                                                                                 \
    440         fy = dy*scale_y;                                                        \
    441         sy = cvFloor(fy);                                                       \
    442         fy -= sy;                                                               \
    443         ify = cvRound(fy*ICV_CUBIC_TAB_SIZE);                                   \
    444         sy2 = sy + 2;                                                           \
    445                                                                                 \
    446         if( sy2 > prev_sy2 )                                                    \
    447         {                                                                       \
    448             int delta = prev_sy2 - sy + 2;                                      \
    449             for( k = 0; k < delta; k++ )                                        \
    450                 CV_SWAP( buf[k], buf[k+4-delta], row );                         \
    451         }                                                                       \
    452                                                                                 \
    453         for( sy += k - 1; k < 4; k++, sy++ )                                    \
    454         {                                                                       \
    455             const arrtype* _src = src + sy*srcstep;                             \
    456                                                                                 \
    457             row = buf[k];                                                       \
    458             if( sy < 0 )                                                        \
    459                 continue;                                                       \
    460             if( sy >= ssize.height )                                            \
    461             {                                                                   \
    462                 assert( k > 0 );                                                \
    463                 memcpy( row, buf[k-1], dsize.width*sizeof(row[0]) );            \
    464                 continue;                                                       \
    465             }                                                                   \
    466                                                                                 \
    467             for( dx = 0; dx < xmin; dx++ )                                      \
    468             {                                                                   \
    469                 int ifx = xofs[dx].ialpha, sx0 = xofs[dx].idx;                  \
    470                 sx = sx0 + cn*2;                                                \
    471                 while( sx >= ssize.width )                                      \
    472                     sx -= cn;                                                   \
    473                 x = load_macro(_src[sx]);                                       \
    474                 sum = x*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ifx)*2 + 1];       \
    475                 if( (unsigned)(sx = sx0 + cn) < (unsigned)ssize.width )         \
    476                     x = load_macro(_src[sx]);                                   \
    477                 sum += x*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ifx)*2];          \
    478                 if( (unsigned)(sx = sx0) < (unsigned)ssize.width )              \
    479                     x = load_macro(_src[sx]);                                   \
    480                 sum += x*icvCubicCoeffs[ifx*2];                                 \
    481                 if( (unsigned)(sx = sx0 - cn) < (unsigned)ssize.width )         \
    482                     x = load_macro(_src[sx]);                                   \
    483                 row[dx] = sum + x*icvCubicCoeffs[ifx*2 + 1];                    \
    484             }                                                                   \
    485                                                                                 \
    486             for( ; dx < xmax; dx++ )                                            \
    487             {                                                                   \
    488                 int ifx = xofs[dx].ialpha;                                      \
    489                 int sx0 = xofs[dx].idx;                                         \
    490                 row[dx] = _src[sx0 - cn]*icvCubicCoeffs[ifx*2 + 1] +            \
    491                     _src[sx0]*icvCubicCoeffs[ifx*2] +                           \
    492                     _src[sx0 + cn]*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] + \
    493                     _src[sx0 + cn*2]*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\
    494             }                                                                   \
    495                                                                                 \
    496             for( ; dx < dsize.width; dx++ )                                     \
    497             {                                                                   \
    498                 int ifx = xofs[dx].ialpha, sx0 = xofs[dx].idx;                  \
    499                 x = load_macro(_src[sx0 - cn]);                                 \
    500                 sum = x*icvCubicCoeffs[ifx*2 + 1];                              \
    501                 if( (unsigned)(sx = sx0) < (unsigned)ssize.width )              \
    502                     x = load_macro(_src[sx]);                                   \
    503                 sum += x*icvCubicCoeffs[ifx*2];                                 \
    504                 if( (unsigned)(sx = sx0 + cn) < (unsigned)ssize.width )         \
    505                     x = load_macro(_src[sx]);                                   \
    506                 sum += x*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ifx)*2];          \
    507                 if( (unsigned)(sx = sx0 + cn*2) < (unsigned)ssize.width )       \
    508                     x = load_macro(_src[sx]);                                   \
    509                 row[dx] = sum + x*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1]; \
    510             }                                                                   \
    511                                                                                 \
    512             if( sy == 0 )                                                       \
    513                 for( k1 = 0; k1 < k; k1++ )                                     \
    514                     memcpy( buf[k1], row, dsize.width*sizeof(row[0]));          \
    515         }                                                                       \
    516                                                                                 \
    517         prev_sy2 = sy2;                                                         \
    518                                                                                 \
    519         row0 = buf[0]; row1 = buf[1];                                           \
    520         row2 = buf[2]; row3 = buf[3];                                           \
    521                                                                                 \
    522         w0 = icvCubicCoeffs[ify*2+1];                                           \
    523         w1 = icvCubicCoeffs[ify*2];                                             \
    524         w2 = icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ify)*2];                      \
    525         w3 = icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ify)*2 + 1];                  \
    526                                                                                 \
    527         for( dx = 0; dx < dsize.width; dx++ )                                   \
    528         {                                                                       \
    529             worktype val = cast_macro1( row0[dx]*w0 + row1[dx]*w1 +             \
    530                                         row2[dx]*w2 + row3[dx]*w3 );            \
    531             dst[dx] = cast_macro2(val);                                         \
    532         }                                                                       \
    533     }                                                                           \
    534                                                                                 \
    535     return CV_OK;                                                               \
    536 }
    537 
    538 
    539 ICV_DEF_RESIZE_BILINEAR_FUNC( 8u, uchar, int, ialpha,
    540                               ICV_WARP_MUL_ONE_8U, ICV_WARP_DESCALE_8U )
    541 ICV_DEF_RESIZE_BILINEAR_FUNC( 16u, ushort, float, alpha, CV_NOP, cvRound )
    542 ICV_DEF_RESIZE_BILINEAR_FUNC( 32f, float, float, alpha, CV_NOP, CV_NOP )
    543 
    544 ICV_DEF_RESIZE_BICUBIC_FUNC( 8u, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U )
    545 ICV_DEF_RESIZE_BICUBIC_FUNC( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U )
    546 ICV_DEF_RESIZE_BICUBIC_FUNC( 32f, float, float, CV_NOP, CV_NOP, CV_NOP )
    547 
    548 ICV_DEF_RESIZE_AREA_FAST_FUNC( 8u, uchar, int, cvRound )
    549 ICV_DEF_RESIZE_AREA_FAST_FUNC( 16u, ushort, int, cvRound )
    550 ICV_DEF_RESIZE_AREA_FAST_FUNC( 32f, float, float, CV_NOP )
    551 
    552 ICV_DEF_RESIZE_AREA_FUNC( 8u, uchar, CV_8TO32F, cvRound )
    553 ICV_DEF_RESIZE_AREA_FUNC( 16u, ushort, CV_NOP, cvRound )
    554 ICV_DEF_RESIZE_AREA_FUNC( 32f, float, CV_NOP, CV_NOP )
    555 
    556 
    557 static void icvInitResizeTab( CvFuncTable* bilin_tab,
    558                               CvFuncTable* bicube_tab,
    559                               CvFuncTable* areafast_tab,
    560                               CvFuncTable* area_tab )
    561 {
    562     bilin_tab->fn_2d[CV_8U] = (void*)icvResize_Bilinear_8u_CnR;
    563     bilin_tab->fn_2d[CV_16U] = (void*)icvResize_Bilinear_16u_CnR;
    564     bilin_tab->fn_2d[CV_32F] = (void*)icvResize_Bilinear_32f_CnR;
    565 
    566     bicube_tab->fn_2d[CV_8U] = (void*)icvResize_Bicubic_8u_CnR;
    567     bicube_tab->fn_2d[CV_16U] = (void*)icvResize_Bicubic_16u_CnR;
    568     bicube_tab->fn_2d[CV_32F] = (void*)icvResize_Bicubic_32f_CnR;
    569 
    570     areafast_tab->fn_2d[CV_8U] = (void*)icvResize_AreaFast_8u_CnR;
    571     areafast_tab->fn_2d[CV_16U] = (void*)icvResize_AreaFast_16u_CnR;
    572     areafast_tab->fn_2d[CV_32F] = (void*)icvResize_AreaFast_32f_CnR;
    573 
    574     area_tab->fn_2d[CV_8U] = (void*)icvResize_Area_8u_CnR;
    575     area_tab->fn_2d[CV_16U] = (void*)icvResize_Area_16u_CnR;
    576     area_tab->fn_2d[CV_32F] = (void*)icvResize_Area_32f_CnR;
    577 }
    578 
    579 
    580 typedef CvStatus (CV_STDCALL * CvResizeBilinearFunc)
    581                     ( const void* src, int srcstep, CvSize ssize,
    582                       void* dst, int dststep, CvSize dsize,
    583                       int cn, int xmax, const CvResizeAlpha* xofs,
    584                       const CvResizeAlpha* yofs, float* buf0, float* buf1 );
    585 
    586 typedef CvStatus (CV_STDCALL * CvResizeBicubicFunc)
    587                     ( const void* src, int srcstep, CvSize ssize,
    588                       void* dst, int dststep, CvSize dsize,
    589                       int cn, int xmin, int xmax,
    590                       const CvResizeAlpha* xofs, float** buf );
    591 
    592 typedef CvStatus (CV_STDCALL * CvResizeAreaFastFunc)
    593                     ( const void* src, int srcstep, CvSize ssize,
    594                       void* dst, int dststep, CvSize dsize,
    595                       int cn, const int* ofs, const int *xofs );
    596 
    597 typedef CvStatus (CV_STDCALL * CvResizeAreaFunc)
    598                     ( const void* src, int srcstep, CvSize ssize,
    599                       void* dst, int dststep, CvSize dsize,
    600                       int cn, const CvDecimateAlpha* xofs,
    601                       int xofs_count, float* buf, float* sum );
    602 
    603 
    604 ////////////////////////////////// IPP resize functions //////////////////////////////////
    605 
    606 icvResize_8u_C1R_t icvResize_8u_C1R_p = 0;
    607 icvResize_8u_C3R_t icvResize_8u_C3R_p = 0;
    608 icvResize_8u_C4R_t icvResize_8u_C4R_p = 0;
    609 icvResize_16u_C1R_t icvResize_16u_C1R_p = 0;
    610 icvResize_16u_C3R_t icvResize_16u_C3R_p = 0;
    611 icvResize_16u_C4R_t icvResize_16u_C4R_p = 0;
    612 icvResize_32f_C1R_t icvResize_32f_C1R_p = 0;
    613 icvResize_32f_C3R_t icvResize_32f_C3R_p = 0;
    614 icvResize_32f_C4R_t icvResize_32f_C4R_p = 0;
    615 
    616 typedef CvStatus (CV_STDCALL * CvResizeIPPFunc)
    617 ( const void* src, CvSize srcsize, int srcstep, CvRect srcroi,
    618   void* dst, int dststep, CvSize dstroi,
    619   double xfactor, double yfactor, int interpolation );
    620 
    621 //////////////////////////////////////////////////////////////////////////////////////////
    622 
    623 CV_IMPL void
    624 cvResize( const CvArr* srcarr, CvArr* dstarr, int method )
    625 {
    626     static CvFuncTable bilin_tab, bicube_tab, areafast_tab, area_tab;
    627     static int inittab = 0;
    628     void* temp_buf = 0;
    629 
    630     CV_FUNCNAME( "cvResize" );
    631 
    632     __BEGIN__;
    633 
    634     CvMat srcstub, *src = (CvMat*)srcarr;
    635     CvMat dststub, *dst = (CvMat*)dstarr;
    636     CvSize ssize, dsize;
    637     float scale_x, scale_y;
    638     int k, sx, sy, dx, dy;
    639     int type, depth, cn;
    640 
    641     CV_CALL( src = cvGetMat( srcarr, &srcstub ));
    642     CV_CALL( dst = cvGetMat( dstarr, &dststub ));
    643 
    644     if( CV_ARE_SIZES_EQ( src, dst ))
    645     {
    646         CV_CALL( cvCopy( src, dst ));
    647         EXIT;
    648     }
    649 
    650     if( !CV_ARE_TYPES_EQ( src, dst ))
    651         CV_ERROR( CV_StsUnmatchedFormats, "" );
    652 
    653     if( !inittab )
    654     {
    655         icvInitResizeTab( &bilin_tab, &bicube_tab, &areafast_tab, &area_tab );
    656         inittab = 1;
    657     }
    658 
    659     ssize = cvGetMatSize( src );
    660     dsize = cvGetMatSize( dst );
    661     type = CV_MAT_TYPE(src->type);
    662     depth = CV_MAT_DEPTH(type);
    663     cn = CV_MAT_CN(type);
    664     scale_x = (float)ssize.width/dsize.width;
    665     scale_y = (float)ssize.height/dsize.height;
    666 
    667     if( method == CV_INTER_CUBIC &&
    668         (MIN(ssize.width, dsize.width) <= 4 ||
    669         MIN(ssize.height, dsize.height) <= 4) )
    670         method = CV_INTER_LINEAR;
    671 
    672     if( icvResize_8u_C1R_p &&
    673         MIN(ssize.width, dsize.width) > 4 &&
    674         MIN(ssize.height, dsize.height) > 4 )
    675     {
    676         CvResizeIPPFunc ipp_func =
    677             type == CV_8UC1 ? icvResize_8u_C1R_p :
    678             type == CV_8UC3 ? icvResize_8u_C3R_p :
    679             type == CV_8UC4 ? icvResize_8u_C4R_p :
    680             type == CV_16UC1 ? icvResize_16u_C1R_p :
    681             type == CV_16UC3 ? icvResize_16u_C3R_p :
    682             type == CV_16UC4 ? icvResize_16u_C4R_p :
    683             type == CV_32FC1 ? icvResize_32f_C1R_p :
    684             type == CV_32FC3 ? icvResize_32f_C3R_p :
    685             type == CV_32FC4 ? icvResize_32f_C4R_p : 0;
    686         if( ipp_func && (CV_INTER_NN < method && method < CV_INTER_AREA))
    687         {
    688             int srcstep = src->step ? src->step : CV_STUB_STEP;
    689             int dststep = dst->step ? dst->step : CV_STUB_STEP;
    690             IPPI_CALL( ipp_func( src->data.ptr, ssize, srcstep,
    691                                  cvRect(0,0,ssize.width,ssize.height),
    692                                  dst->data.ptr, dststep, dsize,
    693                                  (double)dsize.width/ssize.width,
    694                                  (double)dsize.height/ssize.height, 1 << method ));
    695             EXIT;
    696         }
    697     }
    698 
    699     if( method == CV_INTER_NN )
    700     {
    701         IPPI_CALL( icvResize_NN_8u_C1R( src->data.ptr, src->step, ssize,
    702                                         dst->data.ptr, dst->step, dsize,
    703                                         CV_ELEM_SIZE(src->type)));
    704     }
    705     else if( method == CV_INTER_LINEAR || method == CV_INTER_AREA )
    706     {
    707         if( method == CV_INTER_AREA &&
    708             ssize.width >= dsize.width && ssize.height >= dsize.height )
    709         {
    710             // "area" method for (scale_x > 1 & scale_y > 1)
    711             int iscale_x = cvRound(scale_x);
    712             int iscale_y = cvRound(scale_y);
    713 
    714             if( fabs(scale_x - iscale_x) < DBL_EPSILON &&
    715                 fabs(scale_y - iscale_y) < DBL_EPSILON )
    716             {
    717                 int area = iscale_x*iscale_y;
    718                 int srcstep = src->step / CV_ELEM_SIZE(depth);
    719                 int* ofs = (int*)cvStackAlloc( (area + dsize.width*cn)*sizeof(int) );
    720                 int* xofs = ofs + area;
    721                 CvResizeAreaFastFunc func = (CvResizeAreaFastFunc)areafast_tab.fn_2d[depth];
    722 
    723                 if( !func )
    724                     CV_ERROR( CV_StsUnsupportedFormat, "" );
    725 
    726                 for( sy = 0, k = 0; sy < iscale_y; sy++ )
    727                     for( sx = 0; sx < iscale_x; sx++ )
    728                         ofs[k++] = sy*srcstep + sx*cn;
    729 
    730                 for( dx = 0; dx < dsize.width; dx++ )
    731                 {
    732                     sx = dx*iscale_x*cn;
    733                     for( k = 0; k < cn; k++ )
    734                         xofs[dx*cn + k] = sx + k;
    735                 }
    736 
    737                 IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr,
    738                                  dst->step, dsize, cn, ofs, xofs ));
    739             }
    740             else
    741             {
    742                 int buf_len = dsize.width*cn + 4, buf_size, xofs_count = 0;
    743                 float scale = 1.f/(scale_x*scale_y);
    744                 float *buf, *sum;
    745                 CvDecimateAlpha* xofs;
    746                 CvResizeAreaFunc func = (CvResizeAreaFunc)area_tab.fn_2d[depth];
    747 
    748                 if( !func || cn > 4 )
    749                     CV_ERROR( CV_StsUnsupportedFormat, "" );
    750 
    751                 buf_size = buf_len*2*sizeof(float) + ssize.width*2*sizeof(CvDecimateAlpha);
    752                 if( buf_size < CV_MAX_LOCAL_SIZE )
    753                     buf = (float*)cvStackAlloc(buf_size);
    754                 else
    755                     CV_CALL( temp_buf = buf = (float*)cvAlloc(buf_size));
    756                 sum = buf + buf_len;
    757                 xofs = (CvDecimateAlpha*)(sum + buf_len);
    758 
    759                 for( dx = 0, k = 0; dx < dsize.width; dx++ )
    760                 {
    761                     float fsx1 = dx*scale_x, fsx2 = fsx1 + scale_x;
    762                     int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
    763 
    764                     assert( (unsigned)sx1 < (unsigned)ssize.width );
    765 
    766                     if( sx1 > fsx1 )
    767                     {
    768                         assert( k < ssize.width*2 );
    769                         xofs[k].di = dx*cn;
    770                         xofs[k].si = (sx1-1)*cn;
    771                         xofs[k++].alpha = (sx1 - fsx1)*scale;
    772                     }
    773 
    774                     for( sx = sx1; sx < sx2; sx++ )
    775                     {
    776                         assert( k < ssize.width*2 );
    777                         xofs[k].di = dx*cn;
    778                         xofs[k].si = sx*cn;
    779                         xofs[k++].alpha = scale;
    780                     }
    781 
    782                     if( fsx2 - sx2 > 1e-3 )
    783                     {
    784                         assert( k < ssize.width*2 );
    785                         assert((unsigned)sx2 < (unsigned)ssize.width );
    786                         xofs[k].di = dx*cn;
    787                         xofs[k].si = sx2*cn;
    788                         xofs[k++].alpha = (fsx2 - sx2)*scale;
    789                     }
    790                 }
    791 
    792                 xofs_count = k;
    793                 memset( sum, 0, buf_len*sizeof(float) );
    794                 memset( buf, 0, buf_len*sizeof(float) );
    795 
    796                 IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr,
    797                                  dst->step, dsize, cn, xofs, xofs_count, buf, sum ));
    798             }
    799         }
    800         else // true "area" method for the cases (scale_x > 1 & scale_y < 1) and
    801              // (scale_x < 1 & scale_y > 1) is not implemented.
    802              // instead, it is emulated via some variant of bilinear interpolation.
    803         {
    804             float inv_scale_x = (float)dsize.width/ssize.width;
    805             float inv_scale_y = (float)dsize.height/ssize.height;
    806             int xmax = dsize.width, width = dsize.width*cn, buf_size;
    807             float *buf0, *buf1;
    808             CvResizeAlpha *xofs, *yofs;
    809             int area_mode = method == CV_INTER_AREA;
    810             float fx, fy;
    811             CvResizeBilinearFunc func = (CvResizeBilinearFunc)bilin_tab.fn_2d[depth];
    812 
    813             if( !func )
    814                 CV_ERROR( CV_StsUnsupportedFormat, "" );
    815 
    816             buf_size = width*2*sizeof(float) + (width + dsize.height)*sizeof(CvResizeAlpha);
    817             if( buf_size < CV_MAX_LOCAL_SIZE )
    818                 buf0 = (float*)cvStackAlloc(buf_size);
    819             else
    820                 CV_CALL( temp_buf = buf0 = (float*)cvAlloc(buf_size));
    821             buf1 = buf0 + width;
    822             xofs = (CvResizeAlpha*)(buf1 + width);
    823             yofs = xofs + width;
    824 
    825             for( dx = 0; dx < dsize.width; dx++ )
    826             {
    827                 if( !area_mode )
    828                 {
    829                     fx = (float)((dx+0.5)*scale_x - 0.5);
    830                     sx = cvFloor(fx);
    831                     fx -= sx;
    832                 }
    833                 else
    834                 {
    835                     sx = cvFloor(dx*scale_x);
    836                     fx = (dx+1) - (sx+1)*inv_scale_x;
    837                     fx = fx <= 0 ? 0.f : fx - cvFloor(fx);
    838                 }
    839 
    840                 if( sx < 0 )
    841                     fx = 0, sx = 0;
    842 
    843                 if( sx >= ssize.width-1 )
    844                 {
    845                     fx = 0, sx = ssize.width-1;
    846                     if( xmax >= dsize.width )
    847                         xmax = dx;
    848                 }
    849 
    850                 if( depth != CV_8U )
    851                     for( k = 0, sx *= cn; k < cn; k++ )
    852                         xofs[dx*cn + k].idx = sx + k, xofs[dx*cn + k].alpha = fx;
    853                 else
    854                     for( k = 0, sx *= cn; k < cn; k++ )
    855                         xofs[dx*cn + k].idx = sx + k,
    856                         xofs[dx*cn + k].ialpha = CV_FLT_TO_FIX(fx, ICV_WARP_SHIFT);
    857             }
    858 
    859             for( dy = 0; dy < dsize.height; dy++ )
    860             {
    861                 if( !area_mode )
    862                 {
    863                     fy = (float)((dy+0.5)*scale_y - 0.5);
    864                     sy = cvFloor(fy);
    865                     fy -= sy;
    866                     if( sy < 0 )
    867                         sy = 0, fy = 0;
    868                 }
    869                 else
    870                 {
    871                     sy = cvFloor(dy*scale_y);
    872                     fy = (dy+1) - (sy+1)*inv_scale_y;
    873                     fy = fy <= 0 ? 0.f : fy - cvFloor(fy);
    874                 }
    875 
    876                 yofs[dy].idx = sy;
    877                 if( depth != CV_8U )
    878                     yofs[dy].alpha = fy;
    879                 else
    880                     yofs[dy].ialpha = CV_FLT_TO_FIX(fy, ICV_WARP_SHIFT);
    881             }
    882 
    883             IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr,
    884                              dst->step, dsize, cn, xmax, xofs, yofs, buf0, buf1 ));
    885         }
    886     }
    887     else if( method == CV_INTER_CUBIC )
    888     {
    889         int width = dsize.width*cn, buf_size;
    890         int xmin = dsize.width, xmax = -1;
    891         CvResizeAlpha* xofs;
    892         float* buf[4];
    893         CvResizeBicubicFunc func = (CvResizeBicubicFunc)bicube_tab.fn_2d[depth];
    894 
    895         if( !func )
    896             CV_ERROR( CV_StsUnsupportedFormat, "" );
    897 
    898         buf_size = width*(4*sizeof(float) + sizeof(xofs[0]));
    899         if( buf_size < CV_MAX_LOCAL_SIZE )
    900             buf[0] = (float*)cvStackAlloc(buf_size);
    901         else
    902             CV_CALL( temp_buf = buf[0] = (float*)cvAlloc(buf_size));
    903 
    904         for( k = 1; k < 4; k++ )
    905             buf[k] = buf[k-1] + width;
    906         xofs = (CvResizeAlpha*)(buf[3] + width);
    907 
    908         icvInitCubicCoeffTab();
    909 
    910         for( dx = 0; dx < dsize.width; dx++ )
    911         {
    912             float fx = dx*scale_x;
    913             sx = cvFloor(fx);
    914             fx -= sx;
    915             int ifx = cvRound(fx*ICV_CUBIC_TAB_SIZE);
    916             if( sx-1 >= 0 && xmin > dx )
    917                 xmin = dx;
    918             if( sx+2 < ssize.width )
    919                 xmax = dx + 1;
    920 
    921             // at least one of 4 points should be within the image - to
    922             // be able to set other points to the same value. see the loops
    923             // for( dx = 0; dx < xmin; dx++ ) ... and for( ; dx < width; dx++ ) ...
    924             if( sx < -2 )
    925                 sx = -2;
    926             else if( sx > ssize.width )
    927                 sx = ssize.width;
    928 
    929             for( k = 0; k < cn; k++ )
    930             {
    931                 xofs[dx*cn + k].idx = sx*cn + k;
    932                 xofs[dx*cn + k].ialpha = ifx;
    933             }
    934         }
    935 
    936         IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr,
    937                          dst->step, dsize, cn, xmin, xmax, xofs, buf ));
    938     }
    939     else
    940         CV_ERROR( CV_StsBadFlag, "Unknown/unsupported interpolation method" );
    941 
    942     __END__;
    943 
    944     cvFree( &temp_buf );
    945 }
    946 
    947 
    948 /****************************************************************************************\
    949 *                                     WarpAffine                                         *
    950 \****************************************************************************************/
    951 
    952 #define ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( flavor, arrtype, worktype,       \
    953             scale_alpha_macro, mul_one_macro, descale_macro, cast_macro )   \
    954 static CvStatus CV_STDCALL                                                  \
    955 icvWarpAffine_Bilinear_##flavor##_CnR(                                      \
    956     const arrtype* src, int step, CvSize ssize,                             \
    957     arrtype* dst, int dststep, CvSize dsize,                                \
    958     const double* matrix, int cn,                                           \
    959     const arrtype* fillval, const int* ofs )                                \
    960 {                                                                           \
    961     int x, y, k;                                                            \
    962     double  A12 = matrix[1], b1 = matrix[2];                                \
    963     double  A22 = matrix[4], b2 = matrix[5];                                \
    964                                                                             \
    965     step /= sizeof(src[0]);                                                 \
    966     dststep /= sizeof(dst[0]);                                              \
    967                                                                             \
    968     for( y = 0; y < dsize.height; y++, dst += dststep )                     \
    969     {                                                                       \
    970         int xs = CV_FLT_TO_FIX( A12*y + b1, ICV_WARP_SHIFT );               \
    971         int ys = CV_FLT_TO_FIX( A22*y + b2, ICV_WARP_SHIFT );               \
    972                                                                             \
    973         for( x = 0; x < dsize.width; x++ )                                  \
    974         {                                                                   \
    975             int ixs = xs + ofs[x*2];                                        \
    976             int iys = ys + ofs[x*2+1];                                      \
    977             worktype a = scale_alpha_macro( ixs & ICV_WARP_MASK );          \
    978             worktype b = scale_alpha_macro( iys & ICV_WARP_MASK );          \
    979             worktype p0, p1;                                                \
    980             ixs >>= ICV_WARP_SHIFT;                                         \
    981             iys >>= ICV_WARP_SHIFT;                                         \
    982                                                                             \
    983             if( (unsigned)ixs < (unsigned)(ssize.width - 1) &&              \
    984                 (unsigned)iys < (unsigned)(ssize.height - 1) )              \
    985             {                                                               \
    986                 const arrtype* ptr = src + step*iys + ixs*cn;               \
    987                                                                             \
    988                 for( k = 0; k < cn; k++ )                                   \
    989                 {                                                           \
    990                     p0 = mul_one_macro(ptr[k]) +                            \
    991                         a * (ptr[k+cn] - ptr[k]);                           \
    992                     p1 = mul_one_macro(ptr[k+step]) +                       \
    993                         a * (ptr[k+cn+step] - ptr[k+step]);                 \
    994                     p0 = descale_macro(mul_one_macro(p0) + b*(p1 - p0));    \
    995                     dst[x*cn+k] = (arrtype)cast_macro(p0);                  \
    996                 }                                                           \
    997             }                                                               \
    998             else if( (unsigned)(ixs+1) < (unsigned)(ssize.width+1) &&       \
    999                      (unsigned)(iys+1) < (unsigned)(ssize.height+1))        \
   1000             {                                                               \
   1001                 int x0 = ICV_WARP_CLIP_X( ixs );                            \
   1002                 int y0 = ICV_WARP_CLIP_Y( iys );                            \
   1003                 int x1 = ICV_WARP_CLIP_X( ixs + 1 );                        \
   1004                 int y1 = ICV_WARP_CLIP_Y( iys + 1 );                        \
   1005                 const arrtype* ptr0, *ptr1, *ptr2, *ptr3;                   \
   1006                                                                             \
   1007                 ptr0 = src + y0*step + x0*cn;                               \
   1008                 ptr1 = src + y0*step + x1*cn;                               \
   1009                 ptr2 = src + y1*step + x0*cn;                               \
   1010                 ptr3 = src + y1*step + x1*cn;                               \
   1011                                                                             \
   1012                 for( k = 0; k < cn; k++ )                                   \
   1013                 {                                                           \
   1014                     p0 = mul_one_macro(ptr0[k]) + a * (ptr1[k] - ptr0[k]);  \
   1015                     p1 = mul_one_macro(ptr2[k]) + a * (ptr3[k] - ptr2[k]);  \
   1016                     p0 = descale_macro( mul_one_macro(p0) + b*(p1 - p0) );  \
   1017                     dst[x*cn+k] = (arrtype)cast_macro(p0);                  \
   1018                 }                                                           \
   1019             }                                                               \
   1020             else if( fillval )                                              \
   1021                 for( k = 0; k < cn; k++ )                                   \
   1022                     dst[x*cn+k] = fillval[k];                               \
   1023         }                                                                   \
   1024     }                                                                       \
   1025                                                                             \
   1026     return CV_OK;                                                           \
   1027 }
   1028 
   1029 
   1030 #define ICV_WARP_SCALE_ALPHA(x) ((x)*(1./(ICV_WARP_MASK+1)))
   1031 
   1032 ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( 8u, uchar, int, CV_NOP, ICV_WARP_MUL_ONE_8U,
   1033                                    ICV_WARP_DESCALE_8U, CV_NOP )
   1034 //ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( 8u, uchar, double, ICV_WARP_SCALE_ALPHA, CV_NOP,
   1035 //                                   CV_NOP, ICV_WARP_CAST_8U )
   1036 ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( 16u, ushort, double, ICV_WARP_SCALE_ALPHA, CV_NOP,
   1037                                    CV_NOP, cvRound )
   1038 ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( 32f, float, double, ICV_WARP_SCALE_ALPHA, CV_NOP,
   1039                                    CV_NOP, CV_NOP )
   1040 
   1041 
   1042 typedef CvStatus (CV_STDCALL * CvWarpAffineFunc)(
   1043     const void* src, int srcstep, CvSize ssize,
   1044     void* dst, int dststep, CvSize dsize,
   1045     const double* matrix, int cn,
   1046     const void* fillval, const int* ofs );
   1047 
   1048 static void icvInitWarpAffineTab( CvFuncTable* bilin_tab )
   1049 {
   1050     bilin_tab->fn_2d[CV_8U] = (void*)icvWarpAffine_Bilinear_8u_CnR;
   1051     bilin_tab->fn_2d[CV_16U] = (void*)icvWarpAffine_Bilinear_16u_CnR;
   1052     bilin_tab->fn_2d[CV_32F] = (void*)icvWarpAffine_Bilinear_32f_CnR;
   1053 }
   1054 
   1055 
   1056 /////////////////////////////// IPP warpaffine functions /////////////////////////////////
   1057 
   1058 icvWarpAffineBack_8u_C1R_t icvWarpAffineBack_8u_C1R_p = 0;
   1059 icvWarpAffineBack_8u_C3R_t icvWarpAffineBack_8u_C3R_p = 0;
   1060 icvWarpAffineBack_8u_C4R_t icvWarpAffineBack_8u_C4R_p = 0;
   1061 icvWarpAffineBack_32f_C1R_t icvWarpAffineBack_32f_C1R_p = 0;
   1062 icvWarpAffineBack_32f_C3R_t icvWarpAffineBack_32f_C3R_p = 0;
   1063 icvWarpAffineBack_32f_C4R_t icvWarpAffineBack_32f_C4R_p = 0;
   1064 
   1065 typedef CvStatus (CV_STDCALL * CvWarpAffineBackIPPFunc)
   1066 ( const void* src, CvSize srcsize, int srcstep, CvRect srcroi,
   1067   void* dst, int dststep, CvRect dstroi,
   1068   const double* coeffs, int interpolation );
   1069 
   1070 //////////////////////////////////////////////////////////////////////////////////////////
   1071 
   1072 CV_IMPL void
   1073 cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* matrix,
   1074               int flags, CvScalar fillval )
   1075 {
   1076     static CvFuncTable bilin_tab;
   1077     static int inittab = 0;
   1078 
   1079     CV_FUNCNAME( "cvWarpAffine" );
   1080 
   1081     __BEGIN__;
   1082 
   1083     CvMat srcstub, *src = (CvMat*)srcarr;
   1084     CvMat dststub, *dst = (CvMat*)dstarr;
   1085     int k, type, depth, cn, *ofs = 0;
   1086     double src_matrix[6], dst_matrix[6];
   1087     double fillbuf[4];
   1088     int method = flags & 3;
   1089     CvMat srcAb = cvMat( 2, 3, CV_64F, src_matrix ),
   1090           dstAb = cvMat( 2, 3, CV_64F, dst_matrix ),
   1091           A, b, invA, invAb;
   1092     CvWarpAffineFunc func;
   1093     CvSize ssize, dsize;
   1094 
   1095     if( !inittab )
   1096     {
   1097         icvInitWarpAffineTab( &bilin_tab );
   1098         inittab = 1;
   1099     }
   1100 
   1101     CV_CALL( src = cvGetMat( srcarr, &srcstub ));
   1102     CV_CALL( dst = cvGetMat( dstarr, &dststub ));
   1103 
   1104     if( !CV_ARE_TYPES_EQ( src, dst ))
   1105         CV_ERROR( CV_StsUnmatchedFormats, "" );
   1106 
   1107     if( !CV_IS_MAT(matrix) || CV_MAT_CN(matrix->type) != 1 ||
   1108         CV_MAT_DEPTH(matrix->type) < CV_32F || matrix->rows != 2 || matrix->cols != 3 )
   1109         CV_ERROR( CV_StsBadArg,
   1110         "Transformation matrix should be 2x3 floating-point single-channel matrix" );
   1111 
   1112     if( flags & CV_WARP_INVERSE_MAP )
   1113         cvConvertScale( matrix, &dstAb );
   1114     else
   1115     {
   1116         // [R|t] -> [R^-1 | -(R^-1)*t]
   1117         cvConvertScale( matrix, &srcAb );
   1118         cvGetCols( &srcAb, &A, 0, 2 );
   1119         cvGetCol( &srcAb, &b, 2 );
   1120         cvGetCols( &dstAb, &invA, 0, 2 );
   1121         cvGetCol( &dstAb, &invAb, 2 );
   1122         cvInvert( &A, &invA, CV_SVD );
   1123         cvGEMM( &invA, &b, -1, 0, 0, &invAb );
   1124     }
   1125 
   1126     type = CV_MAT_TYPE(src->type);
   1127     depth = CV_MAT_DEPTH(type);
   1128     cn = CV_MAT_CN(type);
   1129     if( cn > 4 )
   1130         CV_ERROR( CV_BadNumChannels, "" );
   1131 
   1132     ssize = cvGetMatSize(src);
   1133     dsize = cvGetMatSize(dst);
   1134 
   1135     if( icvWarpAffineBack_8u_C1R_p && MIN( ssize.width, dsize.width ) >= 4 &&
   1136         MIN( ssize.height, dsize.height ) >= 4 )
   1137     {
   1138         CvWarpAffineBackIPPFunc ipp_func =
   1139             type == CV_8UC1 ? icvWarpAffineBack_8u_C1R_p :
   1140             type == CV_8UC3 ? icvWarpAffineBack_8u_C3R_p :
   1141             type == CV_8UC4 ? icvWarpAffineBack_8u_C4R_p :
   1142             type == CV_32FC1 ? icvWarpAffineBack_32f_C1R_p :
   1143             type == CV_32FC3 ? icvWarpAffineBack_32f_C3R_p :
   1144             type == CV_32FC4 ? icvWarpAffineBack_32f_C4R_p : 0;
   1145 
   1146         if( ipp_func && CV_INTER_NN <= method && method <= CV_INTER_AREA )
   1147         {
   1148             int srcstep = src->step ? src->step : CV_STUB_STEP;
   1149             int dststep = dst->step ? dst->step : CV_STUB_STEP;
   1150             CvRect srcroi = {0, 0, ssize.width, ssize.height};
   1151             CvRect dstroi = {0, 0, dsize.width, dsize.height};
   1152 
   1153             // this is not the most efficient way to fill outliers
   1154             if( flags & CV_WARP_FILL_OUTLIERS )
   1155                 cvSet( dst, fillval );
   1156 
   1157             if( ipp_func( src->data.ptr, ssize, srcstep, srcroi,
   1158                           dst->data.ptr, dststep, dstroi,
   1159                           dstAb.data.db, 1 << method ) >= 0 )
   1160                 EXIT;
   1161         }
   1162     }
   1163 
   1164     cvScalarToRawData( &fillval, fillbuf, CV_MAT_TYPE(src->type), 0 );
   1165     ofs = (int*)cvStackAlloc( dst->cols*2*sizeof(ofs[0]) );
   1166     for( k = 0; k < dst->cols; k++ )
   1167     {
   1168         ofs[2*k] = CV_FLT_TO_FIX( dst_matrix[0]*k, ICV_WARP_SHIFT );
   1169         ofs[2*k+1] = CV_FLT_TO_FIX( dst_matrix[3]*k, ICV_WARP_SHIFT );
   1170     }
   1171 
   1172     /*if( method == CV_INTER_LINEAR )*/
   1173     {
   1174         func = (CvWarpAffineFunc)bilin_tab.fn_2d[depth];
   1175         if( !func )
   1176             CV_ERROR( CV_StsUnsupportedFormat, "" );
   1177 
   1178         IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr,
   1179                          dst->step, dsize, dst_matrix, cn,
   1180                          flags & CV_WARP_FILL_OUTLIERS ? fillbuf : 0, ofs ));
   1181     }
   1182 
   1183     __END__;
   1184 }
   1185 
   1186 
   1187 CV_IMPL CvMat*
   1188 cv2DRotationMatrix( CvPoint2D32f center, double angle,
   1189                     double scale, CvMat* matrix )
   1190 {
   1191     CV_FUNCNAME( "cvGetRotationMatrix" );
   1192 
   1193     __BEGIN__;
   1194 
   1195     double m[2][3];
   1196     CvMat M = cvMat( 2, 3, CV_64FC1, m );
   1197     double alpha, beta;
   1198 
   1199     if( !matrix )
   1200         CV_ERROR( CV_StsNullPtr, "" );
   1201 
   1202     angle *= CV_PI/180;
   1203     alpha = cos(angle)*scale;
   1204     beta = sin(angle)*scale;
   1205 
   1206     m[0][0] = alpha;
   1207     m[0][1] = beta;
   1208     m[0][2] = (1-alpha)*center.x - beta*center.y;
   1209     m[1][0] = -beta;
   1210     m[1][1] = alpha;
   1211     m[1][2] = beta*center.x + (1-alpha)*center.y;
   1212 
   1213     cvConvert( &M, matrix );
   1214 
   1215     __END__;
   1216 
   1217     return matrix;
   1218 }
   1219 
   1220 
   1221 /****************************************************************************************\
   1222 *                                    WarpPerspective                                     *
   1223 \****************************************************************************************/
   1224 
   1225 #define ICV_DEF_WARP_PERSPECTIVE_BILINEAR_FUNC( flavor, arrtype, load_macro, cast_macro )\
   1226 static CvStatus CV_STDCALL                                                  \
   1227 icvWarpPerspective_Bilinear_##flavor##_CnR(                                 \
   1228     const arrtype* src, int step, CvSize ssize,                             \
   1229     arrtype* dst, int dststep, CvSize dsize,                                \
   1230     const double* matrix, int cn,                                           \
   1231     const arrtype* fillval )                                                \
   1232 {                                                                           \
   1233     int x, y, k;                                                            \
   1234     float A11 = (float)matrix[0], A12 = (float)matrix[1], A13 = (float)matrix[2];\
   1235     float A21 = (float)matrix[3], A22 = (float)matrix[4], A23 = (float)matrix[5];\
   1236     float A31 = (float)matrix[6], A32 = (float)matrix[7], A33 = (float)matrix[8];\
   1237                                                                             \
   1238     step /= sizeof(src[0]);                                                 \
   1239     dststep /= sizeof(dst[0]);                                              \
   1240                                                                             \
   1241     for( y = 0; y < dsize.height; y++, dst += dststep )                     \
   1242     {                                                                       \
   1243         float xs0 = A12*y + A13;                                            \
   1244         float ys0 = A22*y + A23;                                            \
   1245         float ws = A32*y + A33;                                             \
   1246                                                                             \
   1247         for( x = 0; x < dsize.width; x++, xs0 += A11, ys0 += A21, ws += A31 )\
   1248         {                                                                   \
   1249             float inv_ws = 1.f/ws;                                          \
   1250             float xs = xs0*inv_ws;                                          \
   1251             float ys = ys0*inv_ws;                                          \
   1252             int ixs = cvFloor(xs);                                          \
   1253             int iys = cvFloor(ys);                                          \
   1254             float a = xs - ixs;                                             \
   1255             float b = ys - iys;                                             \
   1256             float p0, p1;                                                   \
   1257                                                                             \
   1258             if( (unsigned)ixs < (unsigned)(ssize.width - 1) &&              \
   1259                 (unsigned)iys < (unsigned)(ssize.height - 1) )              \
   1260             {                                                               \
   1261                 const arrtype* ptr = src + step*iys + ixs*cn;               \
   1262                                                                             \
   1263                 for( k = 0; k < cn; k++ )                                   \
   1264                 {                                                           \
   1265                     p0 = load_macro(ptr[k]) +                               \
   1266                         a * (load_macro(ptr[k+cn]) - load_macro(ptr[k]));   \
   1267                     p1 = load_macro(ptr[k+step]) +                          \
   1268                         a * (load_macro(ptr[k+cn+step]) -                   \
   1269                              load_macro(ptr[k+step]));                      \
   1270                     dst[x*cn+k] = (arrtype)cast_macro(p0 + b*(p1 - p0));    \
   1271                 }                                                           \
   1272             }                                                               \
   1273             else if( (unsigned)(ixs+1) < (unsigned)(ssize.width+1) &&       \
   1274                      (unsigned)(iys+1) < (unsigned)(ssize.height+1))        \
   1275             {                                                               \
   1276                 int x0 = ICV_WARP_CLIP_X( ixs );                            \
   1277                 int y0 = ICV_WARP_CLIP_Y( iys );                            \
   1278                 int x1 = ICV_WARP_CLIP_X( ixs + 1 );                        \
   1279                 int y1 = ICV_WARP_CLIP_Y( iys + 1 );                        \
   1280                 const arrtype* ptr0, *ptr1, *ptr2, *ptr3;                   \
   1281                                                                             \
   1282                 ptr0 = src + y0*step + x0*cn;                               \
   1283                 ptr1 = src + y0*step + x1*cn;                               \
   1284                 ptr2 = src + y1*step + x0*cn;                               \
   1285                 ptr3 = src + y1*step + x1*cn;                               \
   1286                                                                             \
   1287                 for( k = 0; k < cn; k++ )                                   \
   1288                 {                                                           \
   1289                     p0 = load_macro(ptr0[k]) +                              \
   1290                         a * (load_macro(ptr1[k]) - load_macro(ptr0[k]));    \
   1291                     p1 = load_macro(ptr2[k]) +                              \
   1292                         a * (load_macro(ptr3[k]) - load_macro(ptr2[k]));    \
   1293                     dst[x*cn+k] = (arrtype)cast_macro(p0 + b*(p1 - p0));    \
   1294                 }                                                           \
   1295             }                                                               \
   1296             else if( fillval )                                              \
   1297                 for( k = 0; k < cn; k++ )                                   \
   1298                     dst[x*cn+k] = fillval[k];                               \
   1299         }                                                                   \
   1300     }                                                                       \
   1301                                                                             \
   1302     return CV_OK;                                                           \
   1303 }
   1304 
   1305 
   1306 #define ICV_WARP_SCALE_ALPHA(x) ((x)*(1./(ICV_WARP_MASK+1)))
   1307 
   1308 ICV_DEF_WARP_PERSPECTIVE_BILINEAR_FUNC( 8u, uchar, CV_8TO32F, cvRound )
   1309 ICV_DEF_WARP_PERSPECTIVE_BILINEAR_FUNC( 16u, ushort, CV_NOP, cvRound )
   1310 ICV_DEF_WARP_PERSPECTIVE_BILINEAR_FUNC( 32f, float, CV_NOP, CV_NOP )
   1311 
   1312 typedef CvStatus (CV_STDCALL * CvWarpPerspectiveFunc)(
   1313     const void* src, int srcstep, CvSize ssize,
   1314     void* dst, int dststep, CvSize dsize,
   1315     const double* matrix, int cn, const void* fillval );
   1316 
   1317 static void icvInitWarpPerspectiveTab( CvFuncTable* bilin_tab )
   1318 {
   1319     bilin_tab->fn_2d[CV_8U] = (void*)icvWarpPerspective_Bilinear_8u_CnR;
   1320     bilin_tab->fn_2d[CV_16U] = (void*)icvWarpPerspective_Bilinear_16u_CnR;
   1321     bilin_tab->fn_2d[CV_32F] = (void*)icvWarpPerspective_Bilinear_32f_CnR;
   1322 }
   1323 
   1324 
   1325 /////////////////////////// IPP warpperspective functions ////////////////////////////////
   1326 
   1327 icvWarpPerspectiveBack_8u_C1R_t icvWarpPerspectiveBack_8u_C1R_p = 0;
   1328 icvWarpPerspectiveBack_8u_C3R_t icvWarpPerspectiveBack_8u_C3R_p = 0;
   1329 icvWarpPerspectiveBack_8u_C4R_t icvWarpPerspectiveBack_8u_C4R_p = 0;
   1330 icvWarpPerspectiveBack_32f_C1R_t icvWarpPerspectiveBack_32f_C1R_p = 0;
   1331 icvWarpPerspectiveBack_32f_C3R_t icvWarpPerspectiveBack_32f_C3R_p = 0;
   1332 icvWarpPerspectiveBack_32f_C4R_t icvWarpPerspectiveBack_32f_C4R_p = 0;
   1333 
   1334 icvWarpPerspective_8u_C1R_t icvWarpPerspective_8u_C1R_p = 0;
   1335 icvWarpPerspective_8u_C3R_t icvWarpPerspective_8u_C3R_p = 0;
   1336 icvWarpPerspective_8u_C4R_t icvWarpPerspective_8u_C4R_p = 0;
   1337 icvWarpPerspective_32f_C1R_t icvWarpPerspective_32f_C1R_p = 0;
   1338 icvWarpPerspective_32f_C3R_t icvWarpPerspective_32f_C3R_p = 0;
   1339 icvWarpPerspective_32f_C4R_t icvWarpPerspective_32f_C4R_p = 0;
   1340 
   1341 typedef CvStatus (CV_STDCALL * CvWarpPerspectiveBackIPPFunc)
   1342 ( const void* src, CvSize srcsize, int srcstep, CvRect srcroi,
   1343   void* dst, int dststep, CvRect dstroi,
   1344   const double* coeffs, int interpolation );
   1345 
   1346 //////////////////////////////////////////////////////////////////////////////////////////
   1347 
   1348 CV_IMPL void
   1349 cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr,
   1350                    const CvMat* matrix, int flags, CvScalar fillval )
   1351 {
   1352     static CvFuncTable bilin_tab;
   1353     static int inittab = 0;
   1354 
   1355     CV_FUNCNAME( "cvWarpPerspective" );
   1356 
   1357     __BEGIN__;
   1358 
   1359     CvMat srcstub, *src = (CvMat*)srcarr;
   1360     CvMat dststub, *dst = (CvMat*)dstarr;
   1361     int type, depth, cn;
   1362     int method = flags & 3;
   1363     double src_matrix[9], dst_matrix[9];
   1364     double fillbuf[4];
   1365     CvMat A = cvMat( 3, 3, CV_64F, src_matrix ),
   1366           invA = cvMat( 3, 3, CV_64F, dst_matrix );
   1367     CvWarpPerspectiveFunc func;
   1368     CvSize ssize, dsize;
   1369 
   1370     if( method == CV_INTER_NN || method == CV_INTER_AREA )
   1371         method = CV_INTER_LINEAR;
   1372 
   1373     if( !inittab )
   1374     {
   1375         icvInitWarpPerspectiveTab( &bilin_tab );
   1376         inittab = 1;
   1377     }
   1378 
   1379     CV_CALL( src = cvGetMat( srcarr, &srcstub ));
   1380     CV_CALL( dst = cvGetMat( dstarr, &dststub ));
   1381 
   1382     if( !CV_ARE_TYPES_EQ( src, dst ))
   1383         CV_ERROR( CV_StsUnmatchedFormats, "" );
   1384 
   1385     if( !CV_IS_MAT(matrix) || CV_MAT_CN(matrix->type) != 1 ||
   1386         CV_MAT_DEPTH(matrix->type) < CV_32F || matrix->rows != 3 || matrix->cols != 3 )
   1387         CV_ERROR( CV_StsBadArg,
   1388         "Transformation matrix should be 3x3 floating-point single-channel matrix" );
   1389 
   1390     if( flags & CV_WARP_INVERSE_MAP )
   1391         cvConvertScale( matrix, &invA );
   1392     else
   1393     {
   1394         cvConvertScale( matrix, &A );
   1395         cvInvert( &A, &invA, CV_SVD );
   1396     }
   1397 
   1398     type = CV_MAT_TYPE(src->type);
   1399     depth = CV_MAT_DEPTH(type);
   1400     cn = CV_MAT_CN(type);
   1401     if( cn > 4 )
   1402         CV_ERROR( CV_BadNumChannels, "" );
   1403 
   1404     ssize = cvGetMatSize(src);
   1405     dsize = cvGetMatSize(dst);
   1406 
   1407     if( icvWarpPerspectiveBack_8u_C1R_p )
   1408     {
   1409         CvWarpPerspectiveBackIPPFunc ipp_func =
   1410             type == CV_8UC1 ? icvWarpPerspectiveBack_8u_C1R_p :
   1411             type == CV_8UC3 ? icvWarpPerspectiveBack_8u_C3R_p :
   1412             type == CV_8UC4 ? icvWarpPerspectiveBack_8u_C4R_p :
   1413             type == CV_32FC1 ? icvWarpPerspectiveBack_32f_C1R_p :
   1414             type == CV_32FC3 ? icvWarpPerspectiveBack_32f_C3R_p :
   1415             type == CV_32FC4 ? icvWarpPerspectiveBack_32f_C4R_p : 0;
   1416 
   1417         if( ipp_func && CV_INTER_NN <= method && method <= CV_INTER_AREA &&
   1418             MIN(ssize.width,ssize.height) >= 4 && MIN(dsize.width,dsize.height) >= 4 )
   1419         {
   1420             int srcstep = src->step ? src->step : CV_STUB_STEP;
   1421             int dststep = dst->step ? dst->step : CV_STUB_STEP;
   1422             CvStatus status;
   1423             CvRect srcroi = {0, 0, ssize.width, ssize.height};
   1424             CvRect dstroi = {0, 0, dsize.width, dsize.height};
   1425 
   1426             // this is not the most efficient way to fill outliers
   1427             if( flags & CV_WARP_FILL_OUTLIERS )
   1428                 cvSet( dst, fillval );
   1429 
   1430             status = ipp_func( src->data.ptr, ssize, srcstep, srcroi,
   1431                                dst->data.ptr, dststep, dstroi,
   1432                                invA.data.db, 1 << method );
   1433             if( status >= 0 )
   1434                 EXIT;
   1435 
   1436             ipp_func = type == CV_8UC1 ? icvWarpPerspective_8u_C1R_p :
   1437                 type == CV_8UC3 ? icvWarpPerspective_8u_C3R_p :
   1438                 type == CV_8UC4 ? icvWarpPerspective_8u_C4R_p :
   1439                 type == CV_32FC1 ? icvWarpPerspective_32f_C1R_p :
   1440                 type == CV_32FC3 ? icvWarpPerspective_32f_C3R_p :
   1441                 type == CV_32FC4 ? icvWarpPerspective_32f_C4R_p : 0;
   1442 
   1443             if( ipp_func )
   1444             {
   1445                 if( flags & CV_WARP_INVERSE_MAP )
   1446                     cvInvert( &invA, &A, CV_SVD );
   1447 
   1448                 status = ipp_func( src->data.ptr, ssize, srcstep, srcroi,
   1449                                dst->data.ptr, dststep, dstroi,
   1450                                A.data.db, 1 << method );
   1451                 if( status >= 0 )
   1452                     EXIT;
   1453             }
   1454         }
   1455     }
   1456 
   1457     cvScalarToRawData( &fillval, fillbuf, CV_MAT_TYPE(src->type), 0 );
   1458 
   1459     /*if( method == CV_INTER_LINEAR )*/
   1460     {
   1461         func = (CvWarpPerspectiveFunc)bilin_tab.fn_2d[depth];
   1462         if( !func )
   1463             CV_ERROR( CV_StsUnsupportedFormat, "" );
   1464 
   1465         IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr,
   1466                          dst->step, dsize, dst_matrix, cn,
   1467                          flags & CV_WARP_FILL_OUTLIERS ? fillbuf : 0 ));
   1468     }
   1469 
   1470     __END__;
   1471 }
   1472 
   1473 
   1474 /* Calculates coefficients of perspective transformation
   1475  * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
   1476  *
   1477  *      c00*xi + c01*yi + c02
   1478  * ui = ---------------------
   1479  *      c20*xi + c21*yi + c22
   1480  *
   1481  *      c10*xi + c11*yi + c12
   1482  * vi = ---------------------
   1483  *      c20*xi + c21*yi + c22
   1484  *
   1485  * Coefficients are calculated by solving linear system:
   1486  * / x0 y0  1  0  0  0 -x0*u0 -y0*u0 \ /c00\ /u0\
   1487  * | x1 y1  1  0  0  0 -x1*u1 -y1*u1 | |c01| |u1|
   1488  * | x2 y2  1  0  0  0 -x2*u2 -y2*u2 | |c02| |u2|
   1489  * | x3 y3  1  0  0  0 -x3*u3 -y3*u3 |.|c10|=|u3|,
   1490  * |  0  0  0 x0 y0  1 -x0*v0 -y0*v0 | |c11| |v0|
   1491  * |  0  0  0 x1 y1  1 -x1*v1 -y1*v1 | |c12| |v1|
   1492  * |  0  0  0 x2 y2  1 -x2*v2 -y2*v2 | |c20| |v2|
   1493  * \  0  0  0 x3 y3  1 -x3*v3 -y3*v3 / \c21/ \v3/
   1494  *
   1495  * where:
   1496  *   cij - matrix coefficients, c22 = 1
   1497  */
   1498 CV_IMPL CvMat*
   1499 cvGetPerspectiveTransform( const CvPoint2D32f* src,
   1500                           const CvPoint2D32f* dst,
   1501                           CvMat* matrix )
   1502 {
   1503     CV_FUNCNAME( "cvGetPerspectiveTransform" );
   1504 
   1505     __BEGIN__;
   1506 
   1507     double a[8][8];
   1508     double b[8], x[9];
   1509 
   1510     CvMat A = cvMat( 8, 8, CV_64FC1, a );
   1511     CvMat B = cvMat( 8, 1, CV_64FC1, b );
   1512     CvMat X = cvMat( 8, 1, CV_64FC1, x );
   1513 
   1514     int i;
   1515 
   1516     if( !src || !dst || !matrix )
   1517         CV_ERROR( CV_StsNullPtr, "" );
   1518 
   1519     for( i = 0; i < 4; ++i )
   1520     {
   1521         a[i][0] = a[i+4][3] = src[i].x;
   1522         a[i][1] = a[i+4][4] = src[i].y;
   1523         a[i][2] = a[i+4][5] = 1;
   1524         a[i][3] = a[i][4] = a[i][5] =
   1525         a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
   1526         a[i][6] = -src[i].x*dst[i].x;
   1527         a[i][7] = -src[i].y*dst[i].x;
   1528         a[i+4][6] = -src[i].x*dst[i].y;
   1529         a[i+4][7] = -src[i].y*dst[i].y;
   1530         b[i] = dst[i].x;
   1531         b[i+4] = dst[i].y;
   1532     }
   1533 
   1534     cvSolve( &A, &B, &X, CV_SVD );
   1535     x[8] = 1;
   1536 
   1537     X = cvMat( 3, 3, CV_64FC1, x );
   1538     cvConvert( &X, matrix );
   1539 
   1540     __END__;
   1541 
   1542     return matrix;
   1543 }
   1544 
   1545 /* Calculates coefficients of affine transformation
   1546  * which maps (xi,yi) to (ui,vi), (i=1,2,3):
   1547  *
   1548  * ui = c00*xi + c01*yi + c02
   1549  *
   1550  * vi = c10*xi + c11*yi + c12
   1551  *
   1552  * Coefficients are calculated by solving linear system:
   1553  * / x0 y0  1  0  0  0 \ /c00\ /u0\
   1554  * | x1 y1  1  0  0  0 | |c01| |u1|
   1555  * | x2 y2  1  0  0  0 | |c02| |u2|
   1556  * |  0  0  0 x0 y0  1 | |c10| |v0|
   1557  * |  0  0  0 x1 y1  1 | |c11| |v1|
   1558  * \  0  0  0 x2 y2  1 / |c12| |v2|
   1559  *
   1560  * where:
   1561  *   cij - matrix coefficients
   1562  */
   1563 CV_IMPL CvMat*
   1564 cvGetAffineTransform( const CvPoint2D32f * src, const CvPoint2D32f * dst, CvMat * map_matrix )
   1565 {
   1566     CV_FUNCNAME( "cvGetAffineTransform" );
   1567 
   1568     __BEGIN__;
   1569 
   1570     CvMat mA, mX, mB;
   1571     double A[6*6];
   1572     double B[6];
   1573 	double x[6];
   1574     int i;
   1575 
   1576     cvInitMatHeader(&mA, 6, 6, CV_64F, A);
   1577     cvInitMatHeader(&mB, 6, 1, CV_64F, B);
   1578 	cvInitMatHeader(&mX, 6, 1, CV_64F, x);
   1579 
   1580 	if( !src || !dst || !map_matrix )
   1581 		CV_ERROR( CV_StsNullPtr, "" );
   1582 
   1583     for( i = 0; i < 3; i++ )
   1584     {
   1585         int j = i*12;
   1586         int k = i*12+6;
   1587         A[j] = A[k+3] = src[i].x;
   1588         A[j+1] = A[k+4] = src[i].y;
   1589         A[j+2] = A[k+5] = 1;
   1590         A[j+3] = A[j+4] = A[j+5] = 0;
   1591         A[k] = A[k+1] = A[k+2] = 0;
   1592         B[i*2] = dst[i].x;
   1593         B[i*2+1] = dst[i].y;
   1594     }
   1595     cvSolve(&mA, &mB, &mX);
   1596 
   1597     mX = cvMat( 2, 3, CV_64FC1, x );
   1598 	cvConvert( &mX, map_matrix );
   1599 
   1600 	__END__;
   1601     return map_matrix;
   1602 }
   1603 
   1604 /****************************************************************************************\
   1605 *                          Generic Geometric Transformation: Remap                       *
   1606 \****************************************************************************************/
   1607 
   1608 #define  ICV_DEF_REMAP_BILINEAR_FUNC( flavor, arrtype, load_macro, cast_macro ) \
   1609 static CvStatus CV_STDCALL                                                      \
   1610 icvRemap_Bilinear_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,\
   1611                          arrtype* dst, int dststep, CvSize dsize,           \
   1612                          const float* mapx, int mxstep,                     \
   1613                          const float* mapy, int mystep,                     \
   1614                          int cn, const arrtype* fillval )                   \
   1615 {                                                                           \
   1616     int i, j, k;                                                            \
   1617     ssize.width--;                                                          \
   1618     ssize.height--;                                                         \
   1619                                                                             \
   1620     srcstep /= sizeof(src[0]);                                              \
   1621     dststep /= sizeof(dst[0]);                                              \
   1622     mxstep /= sizeof(mapx[0]);                                              \
   1623     mystep /= sizeof(mapy[0]);                                              \
   1624                                                                             \
   1625     for( i = 0; i < dsize.height; i++, dst += dststep,                      \
   1626                                   mapx += mxstep, mapy += mystep )          \
   1627     {                                                                       \
   1628         for( j = 0; j < dsize.width; j++ )                                  \
   1629         {                                                                   \
   1630             float _x = mapx[j], _y = mapy[j];                               \
   1631             int ix = cvFloor(_x), iy = cvFloor(_y);                         \
   1632                                                                             \
   1633             if( (unsigned)ix < (unsigned)ssize.width &&                     \
   1634                 (unsigned)iy < (unsigned)ssize.height )                     \
   1635             {                                                               \
   1636                 const arrtype* s = src + iy*srcstep + ix*cn;                \
   1637                 _x -= ix; _y -= iy;                                         \
   1638                 for( k = 0; k < cn; k++, s++ )                              \
   1639                 {                                                           \
   1640                     float t0 = load_macro(s[0]), t1 = load_macro(s[srcstep]); \
   1641                     t0 += _x*(load_macro(s[cn]) - t0);                      \
   1642                     t1 += _x*(load_macro(s[srcstep + cn]) - t1);            \
   1643                     dst[j*cn + k] = (arrtype)cast_macro(t0 + _y*(t1 - t0)); \
   1644                 }                                                           \
   1645             }                                                               \
   1646             else if( fillval )                                              \
   1647                 for( k = 0; k < cn; k++ )                                   \
   1648                     dst[j*cn + k] = fillval[k];                             \
   1649         }                                                                   \
   1650     }                                                                       \
   1651                                                                             \
   1652     return CV_OK;                                                           \
   1653 }
   1654 
   1655 
   1656 #define  ICV_DEF_REMAP_BICUBIC_FUNC( flavor, arrtype, worktype,                 \
   1657                                      load_macro, cast_macro1, cast_macro2 )     \
   1658 static CvStatus CV_STDCALL                                                      \
   1659 icvRemap_Bicubic_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize, \
   1660                          arrtype* dst, int dststep, CvSize dsize,               \
   1661                          const float* mapx, int mxstep,                         \
   1662                          const float* mapy, int mystep,                         \
   1663                          int cn, const arrtype* fillval )                       \
   1664 {                                                                               \
   1665     int i, j, k;                                                                \
   1666     ssize.width = MAX( ssize.width - 3, 0 );                                    \
   1667     ssize.height = MAX( ssize.height - 3, 0 );                                  \
   1668                                                                                 \
   1669     srcstep /= sizeof(src[0]);                                                  \
   1670     dststep /= sizeof(dst[0]);                                                  \
   1671     mxstep /= sizeof(mapx[0]);                                                  \
   1672     mystep /= sizeof(mapy[0]);                                                  \
   1673                                                                                 \
   1674     for( i = 0; i < dsize.height; i++, dst += dststep,                          \
   1675                                   mapx += mxstep, mapy += mystep )              \
   1676     {                                                                           \
   1677         for( j = 0; j < dsize.width; j++ )                                      \
   1678         {                                                                       \
   1679             int ix = cvRound(mapx[j]*(1 << ICV_WARP_SHIFT));                    \
   1680             int iy = cvRound(mapy[j]*(1 << ICV_WARP_SHIFT));                    \
   1681             int ifx = ix & ICV_WARP_MASK;                                       \
   1682             int ify = iy & ICV_WARP_MASK;                                       \
   1683             ix >>= ICV_WARP_SHIFT;                                              \
   1684             iy >>= ICV_WARP_SHIFT;                                              \
   1685                                                                                 \
   1686             if( (unsigned)(ix-1) < (unsigned)ssize.width &&                     \
   1687                 (unsigned)(iy-1) < (unsigned)ssize.height )                     \
   1688             {                                                                   \
   1689                 for( k = 0; k < cn; k++ )                                       \
   1690                 {                                                               \
   1691                     const arrtype* s = src + (iy-1)*srcstep + ix*cn + k;        \
   1692                                                                                 \
   1693                     float t0 = load_macro(s[-cn])*icvCubicCoeffs[ifx*2 + 1] +   \
   1694                                load_macro(s[0])*icvCubicCoeffs[ifx*2] +         \
   1695                                load_macro(s[cn])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] +\
   1696                                load_macro(s[cn*2])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\
   1697                                                                                 \
   1698                     s += srcstep;                                               \
   1699                                                                                 \
   1700                     float t1 = load_macro(s[-cn])*icvCubicCoeffs[ifx*2 + 1] +   \
   1701                                load_macro(s[0])*icvCubicCoeffs[ifx*2] +         \
   1702                                load_macro(s[cn])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] +\
   1703                                load_macro(s[cn*2])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\
   1704                                                                                 \
   1705                     s += srcstep;                                               \
   1706                                                                                 \
   1707                     float t2 = load_macro(s[-cn])*icvCubicCoeffs[ifx*2 + 1] +   \
   1708                                load_macro(s[0])*icvCubicCoeffs[ifx*2] +         \
   1709                                load_macro(s[cn])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] +\
   1710                                load_macro(s[cn*2])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\
   1711                                                                                 \
   1712                     s += srcstep;                                               \
   1713                                                                                 \
   1714                     float t3 = load_macro(s[-cn])*icvCubicCoeffs[ifx*2 + 1] +   \
   1715                                load_macro(s[0])*icvCubicCoeffs[ifx*2] +         \
   1716                                load_macro(s[cn])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] +\
   1717                                load_macro(s[cn*2])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\
   1718                                                                                 \
   1719                     worktype t = cast_macro1( t0*icvCubicCoeffs[ify*2 + 1] +    \
   1720                                t1*icvCubicCoeffs[ify*2] +                       \
   1721                                t2*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ify)*2] +  \
   1722                                t3*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ify)*2+1] );\
   1723                                                                                 \
   1724                     dst[j*cn + k] = cast_macro2(t);                             \
   1725                 }                                                               \
   1726             }                                                                   \
   1727             else if( fillval )                                                  \
   1728                 for( k = 0; k < cn; k++ )                                       \
   1729                     dst[j*cn + k] = fillval[k];                                 \
   1730         }                                                                       \
   1731     }                                                                           \
   1732                                                                                 \
   1733     return CV_OK;                                                               \
   1734 }
   1735 
   1736 
   1737 ICV_DEF_REMAP_BILINEAR_FUNC( 8u, uchar, CV_8TO32F, cvRound )
   1738 ICV_DEF_REMAP_BILINEAR_FUNC( 16u, ushort, CV_NOP, cvRound )
   1739 ICV_DEF_REMAP_BILINEAR_FUNC( 32f, float, CV_NOP, CV_NOP )
   1740 
   1741 ICV_DEF_REMAP_BICUBIC_FUNC( 8u, uchar, int, CV_8TO32F, cvRound, CV_FAST_CAST_8U )
   1742 ICV_DEF_REMAP_BICUBIC_FUNC( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U )
   1743 ICV_DEF_REMAP_BICUBIC_FUNC( 32f, float, float, CV_NOP, CV_NOP, CV_NOP )
   1744 
   1745 typedef CvStatus (CV_STDCALL * CvRemapFunc)(
   1746     const void* src, int srcstep, CvSize ssize,
   1747     void* dst, int dststep, CvSize dsize,
   1748     const float* mapx, int mxstep,
   1749     const float* mapy, int mystep,
   1750     int cn, const void* fillval );
   1751 
   1752 static void icvInitRemapTab( CvFuncTable* bilinear_tab, CvFuncTable* bicubic_tab )
   1753 {
   1754     bilinear_tab->fn_2d[CV_8U] = (void*)icvRemap_Bilinear_8u_CnR;
   1755     bilinear_tab->fn_2d[CV_16U] = (void*)icvRemap_Bilinear_16u_CnR;
   1756     bilinear_tab->fn_2d[CV_32F] = (void*)icvRemap_Bilinear_32f_CnR;
   1757 
   1758     bicubic_tab->fn_2d[CV_8U] = (void*)icvRemap_Bicubic_8u_CnR;
   1759     bicubic_tab->fn_2d[CV_16U] = (void*)icvRemap_Bicubic_16u_CnR;
   1760     bicubic_tab->fn_2d[CV_32F] = (void*)icvRemap_Bicubic_32f_CnR;
   1761 }
   1762 
   1763 
   1764 /******************** IPP remap functions *********************/
   1765 
   1766 typedef CvStatus (CV_STDCALL * CvRemapIPPFunc)(
   1767     const void* src, CvSize srcsize, int srcstep, CvRect srcroi,
   1768     const float* xmap, int xmapstep, const float* ymap, int ymapstep,
   1769     void* dst, int dststep, CvSize dstsize, int interpolation );
   1770 
   1771 icvRemap_8u_C1R_t icvRemap_8u_C1R_p = 0;
   1772 icvRemap_8u_C3R_t icvRemap_8u_C3R_p = 0;
   1773 icvRemap_8u_C4R_t icvRemap_8u_C4R_p = 0;
   1774 
   1775 icvRemap_32f_C1R_t icvRemap_32f_C1R_p = 0;
   1776 icvRemap_32f_C3R_t icvRemap_32f_C3R_p = 0;
   1777 icvRemap_32f_C4R_t icvRemap_32f_C4R_p = 0;
   1778 
   1779 /**************************************************************/
   1780 
   1781 #define CV_REMAP_SHIFT 5
   1782 #define CV_REMAP_MASK ((1 << CV_REMAP_SHIFT) - 1)
   1783 
   1784 #if CV_SSE2 && defined(__GNUC__)
   1785 #define align(x) __attribute__ ((aligned (x)))
   1786 #elif CV_SSE2 && (defined(__ICL) || defined _MSC_VER && _MSC_VER >= 1300)
   1787 #define align(x) __declspec(align(x))
   1788 #else
   1789 #define align(x)
   1790 #endif
   1791 
   1792 static void icvRemapFixedPt_8u( const CvMat* src, CvMat* dst,
   1793     const CvMat* xymap, const CvMat* amap, const uchar* fillval )
   1794 {
   1795     const int TABSZ = 1 << (CV_REMAP_SHIFT*2);
   1796     static ushort align(8) atab[TABSZ][4];
   1797     static int inittab = 0;
   1798 
   1799     int x, y, cols = src->cols, rows = src->rows;
   1800     const uchar* sptr0 = src->data.ptr;
   1801     int sstep = src->step;
   1802     uchar fv0 = fillval[0], fv1 = fillval[1], fv2 = fillval[2], fv3 = fillval[3];
   1803     int cn = CV_MAT_CN(src->type);
   1804 #if CV_SSE2
   1805     const uchar* sptr1 = sptr0 + sstep;
   1806     __m128i br = _mm_set1_epi32((cols-2) + ((rows-2)<<16));
   1807     __m128i xy2ofs = _mm_set1_epi32(1 + (sstep << 16));
   1808     __m128i z = _mm_setzero_si128();
   1809     int align(16) iofs0[4], iofs1[4];
   1810 #endif
   1811 
   1812     if( !inittab )
   1813     {
   1814         for( y = 0; y <= CV_REMAP_MASK; y++ )
   1815             for( x = 0; x <= CV_REMAP_MASK; x++ )
   1816             {
   1817                 int k = (y << CV_REMAP_SHIFT) + x;
   1818                 atab[k][0] = (ushort)((CV_REMAP_MASK+1 - y)*(CV_REMAP_MASK+1 - x));
   1819                 atab[k][1] = (ushort)((CV_REMAP_MASK+1 - y)*x);
   1820                 atab[k][2] = (ushort)(y*(CV_REMAP_MASK+1 - x));
   1821                 atab[k][3] = (ushort)(y*x);
   1822             }
   1823         inittab = 1;
   1824     }
   1825 
   1826     for( y = 0; y < rows; y++ )
   1827     {
   1828         const short* xy = (const short*)(xymap->data.ptr + xymap->step*y);
   1829         const ushort* alpha = (const ushort*)(amap->data.ptr + amap->step*y);
   1830         uchar* dptr = (uchar*)(dst->data.ptr + dst->step*y);
   1831         int x = 0;
   1832 
   1833         if( cn == 1 )
   1834         {
   1835     #if CV_SSE2
   1836             for( ; x <= cols - 8; x += 8 )
   1837             {
   1838                 __m128i xy0 = _mm_load_si128( (const __m128i*)(xy + x*2));
   1839                 __m128i xy1 = _mm_load_si128( (const __m128i*)(xy + x*2 + 8));
   1840                 // 0|0|0|0|... <= x0|y0|x1|y1|... < cols-1|rows-1|cols-1|rows-1|... ?
   1841                 __m128i mask0 = _mm_cmpeq_epi32(_mm_or_si128(_mm_cmpgt_epi16(z, xy0),
   1842                                                 _mm_cmpgt_epi16(xy0,br)), z);
   1843                 __m128i mask1 = _mm_cmpeq_epi32(_mm_or_si128(_mm_cmpgt_epi16(z, xy1),
   1844                                                 _mm_cmpgt_epi16(xy1,br)), z);
   1845                 __m128i ofs0 = _mm_and_si128(_mm_madd_epi16( xy0, xy2ofs ), mask0 );
   1846                 __m128i ofs1 = _mm_and_si128(_mm_madd_epi16( xy1, xy2ofs ), mask1 );
   1847                 unsigned i0, i1;
   1848                 __m128i v0, v1, v2, v3, a0, a1, b0, b1;
   1849                 _mm_store_si128( (__m128i*)iofs0, ofs0 );
   1850                 _mm_store_si128( (__m128i*)iofs1, ofs1 );
   1851                 i0 = *(ushort*)(sptr0 + iofs0[0]) + (*(ushort*)(sptr0 + iofs0[1]) << 16);
   1852                 i1 = *(ushort*)(sptr0 + iofs0[2]) + (*(ushort*)(sptr0 + iofs0[3]) << 16);
   1853                 v0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
   1854                 i0 = *(ushort*)(sptr1 + iofs0[0]) + (*(ushort*)(sptr1 + iofs0[1]) << 16);
   1855                 i1 = *(ushort*)(sptr1 + iofs0[2]) + (*(ushort*)(sptr1 + iofs0[3]) << 16);
   1856                 v1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
   1857                 v0 = _mm_unpacklo_epi8(v0, z);
   1858                 v1 = _mm_unpacklo_epi8(v1, z);
   1859 
   1860                 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)atab[alpha[x]]),
   1861                                         _mm_loadl_epi64((__m128i*)atab[alpha[x+1]]));
   1862                 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)atab[alpha[x+2]]),
   1863                                         _mm_loadl_epi64((__m128i*)atab[alpha[x+3]]));
   1864                 b0 = _mm_unpacklo_epi64(a0, a1);
   1865                 b1 = _mm_unpackhi_epi64(a0, a1);
   1866                 v0 = _mm_madd_epi16(v0, b0);
   1867                 v1 = _mm_madd_epi16(v1, b1);
   1868                 v0 = _mm_and_si128(_mm_add_epi32(v0, v1), mask0);
   1869 
   1870                 i0 = *(ushort*)(sptr0 + iofs1[0]) + (*(ushort*)(sptr0 + iofs1[1]) << 16);
   1871                 i1 = *(ushort*)(sptr0 + iofs1[2]) + (*(ushort*)(sptr0 + iofs1[3]) << 16);
   1872                 v2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
   1873                 i0 = *(ushort*)(sptr1 + iofs1[0]) + (*(ushort*)(sptr1 + iofs1[1]) << 16);
   1874                 i1 = *(ushort*)(sptr1 + iofs1[2]) + (*(ushort*)(sptr1 + iofs1[3]) << 16);
   1875                 v3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
   1876                 v2 = _mm_unpacklo_epi8(v2, z);
   1877                 v3 = _mm_unpacklo_epi8(v3, z);
   1878 
   1879                 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)atab[alpha[x+4]]),
   1880                                         _mm_loadl_epi64((__m128i*)atab[alpha[x+5]]));
   1881                 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)atab[alpha[x+6]]),
   1882                                         _mm_loadl_epi64((__m128i*)atab[alpha[x+7]]));
   1883                 b0 = _mm_unpacklo_epi64(a0, a1);
   1884                 b1 = _mm_unpackhi_epi64(a0, a1);
   1885                 v2 = _mm_madd_epi16(v2, b0);
   1886                 v3 = _mm_madd_epi16(v3, b1);
   1887                 v2 = _mm_and_si128(_mm_add_epi32(v2, v3), mask1);
   1888 
   1889                 v0 = _mm_srai_epi32(v0, CV_REMAP_SHIFT*2);
   1890                 v2 = _mm_srai_epi32(v2, CV_REMAP_SHIFT*2);
   1891                 v0 = _mm_packus_epi16(_mm_packs_epi32(v0, v2), z);
   1892                 _mm_storel_epi64( (__m128i*)(dptr + x), v0 );
   1893             }
   1894     #endif
   1895 
   1896             for( ; x < cols; x++ )
   1897             {
   1898                 int xi = xy[x*2], yi = xy[x*2+1];
   1899                 if( (unsigned)yi >= (unsigned)(rows - 1) ||
   1900                     (unsigned)xi >= (unsigned)(cols - 1))
   1901                 {
   1902                     dptr[x] = fv0;
   1903                 }
   1904                 else
   1905                 {
   1906                     const uchar* sptr = sptr0 + sstep*yi + xi;
   1907                     const ushort* a = atab[alpha[x]];
   1908                     dptr[x] = (uchar)((sptr[0]*a[0] + sptr[1]*a[1] + sptr[sstep]*a[2] +
   1909                                       sptr[sstep+1]*a[3])>>CV_REMAP_SHIFT*2);
   1910                 }
   1911             }
   1912         }
   1913         else if( cn == 3 )
   1914         {
   1915             for( ; x < cols; x++ )
   1916             {
   1917                 int xi = xy[x*2], yi = xy[x*2+1];
   1918                 if( (unsigned)yi >= (unsigned)(rows - 1) ||
   1919                     (unsigned)xi >= (unsigned)(cols - 1))
   1920                 {
   1921                     dptr[x*3] = fv0; dptr[x*3+1] = fv1; dptr[x*3+2] = fv2;
   1922                 }
   1923                 else
   1924                 {
   1925                     const uchar* sptr = sptr0 + sstep*yi + xi*3;
   1926                     const ushort* a = atab[alpha[x]];
   1927                     int v0, v1, v2;
   1928                     v0 = (sptr[0]*a[0] + sptr[3]*a[1] +
   1929                         sptr[sstep]*a[2] + sptr[sstep+3]*a[3])>>CV_REMAP_SHIFT*2;
   1930                     v1 = (sptr[1]*a[0] + sptr[4]*a[1] +
   1931                         sptr[sstep+1]*a[2] + sptr[sstep+4]*a[3])>>CV_REMAP_SHIFT*2;
   1932                     v2 = (sptr[2]*a[0] + sptr[5]*a[1] +
   1933                         sptr[sstep+2]*a[2] + sptr[sstep+5]*a[3])>>CV_REMAP_SHIFT*2;
   1934                     dptr[x*3] = (uchar)v0; dptr[x*3+1] = (uchar)v1; dptr[x*3+2] = (uchar)v2;
   1935                 }
   1936             }
   1937         }
   1938         else
   1939         {
   1940             assert( cn == 4 );
   1941             for( ; x < cols; x++ )
   1942             {
   1943                 int xi = xy[x*2], yi = xy[x*2+1];
   1944                 if( (unsigned)yi >= (unsigned)(rows - 1) ||
   1945                     (unsigned)xi >= (unsigned)(cols - 1))
   1946                 {
   1947                     dptr[x*4] = fv0; dptr[x*4+1] = fv1;
   1948                     dptr[x*4+2] = fv2; dptr[x*4+3] = fv3;
   1949                 }
   1950                 else
   1951                 {
   1952                     const uchar* sptr = sptr0 + sstep*yi + xi*3;
   1953                     const ushort* a = atab[alpha[x]];
   1954                     int v0, v1;
   1955                     v0 = (sptr[0]*a[0] + sptr[4]*a[1] +
   1956                         sptr[sstep]*a[2] + sptr[sstep+3]*a[3])>>CV_REMAP_SHIFT*2;
   1957                     v1 = (sptr[1]*a[0] + sptr[5]*a[1] +
   1958                         sptr[sstep+1]*a[2] + sptr[sstep+5]*a[3])>>CV_REMAP_SHIFT*2;
   1959                     dptr[x*4] = (uchar)v0; dptr[x*4+1] = (uchar)v1;
   1960                     v0 = (sptr[2]*a[0] + sptr[6]*a[1] +
   1961                         sptr[sstep+2]*a[2] + sptr[sstep+6]*a[3])>>CV_REMAP_SHIFT*2;
   1962                     v1 = (sptr[3]*a[0] + sptr[7]*a[1] +
   1963                         sptr[sstep+3]*a[2] + sptr[sstep+7]*a[3])>>CV_REMAP_SHIFT*2;
   1964                     dptr[x*4+2] = (uchar)v0; dptr[x*4+3] = (uchar)v1;
   1965                 }
   1966             }
   1967         }
   1968     }
   1969 }
   1970 
   1971 
   1972 CV_IMPL void
   1973 cvRemap( const CvArr* srcarr, CvArr* dstarr,
   1974          const CvArr* _mapx, const CvArr* _mapy,
   1975          int flags, CvScalar fillval )
   1976 {
   1977     static CvFuncTable bilinear_tab;
   1978     static CvFuncTable bicubic_tab;
   1979     static int inittab = 0;
   1980 
   1981     CV_FUNCNAME( "cvRemap" );
   1982 
   1983     __BEGIN__;
   1984 
   1985     CvMat srcstub, *src = (CvMat*)srcarr;
   1986     CvMat dststub, *dst = (CvMat*)dstarr;
   1987     CvMat mxstub, *mapx = (CvMat*)_mapx;
   1988     CvMat mystub, *mapy = (CvMat*)_mapy;
   1989     int type, depth, cn;
   1990     bool fltremap;
   1991     int method = flags & 3;
   1992     double fillbuf[4];
   1993     CvSize ssize, dsize;
   1994 
   1995     if( !inittab )
   1996     {
   1997         icvInitRemapTab( &bilinear_tab, &bicubic_tab );
   1998         icvInitLinearCoeffTab();
   1999         icvInitCubicCoeffTab();
   2000         inittab = 1;
   2001     }
   2002 
   2003     CV_CALL( src = cvGetMat( srcarr, &srcstub ));
   2004     CV_CALL( dst = cvGetMat( dstarr, &dststub ));
   2005     CV_CALL( mapx = cvGetMat( mapx, &mxstub ));
   2006     CV_CALL( mapy = cvGetMat( mapy, &mystub ));
   2007 
   2008     if( !CV_ARE_TYPES_EQ( src, dst ))
   2009         CV_ERROR( CV_StsUnmatchedFormats, "" );
   2010 
   2011     if( CV_MAT_TYPE(mapx->type) == CV_16SC1 && CV_MAT_TYPE(mapy->type) == CV_16SC2 )
   2012     {
   2013         CvMat* temp;
   2014         CV_SWAP(mapx, mapy, temp);
   2015     }
   2016 
   2017     if( (CV_MAT_TYPE(mapx->type) != CV_32FC1 || CV_MAT_TYPE(mapy->type) != CV_32FC1) &&
   2018         (CV_MAT_TYPE(mapx->type) != CV_16SC2 || CV_MAT_TYPE(mapy->type) != CV_16SC1))
   2019         CV_ERROR( CV_StsUnmatchedFormats, "Either both map arrays must have 32fC1 type, "
   2020         "or one of them must be 16sC2 and the other one must be 16sC1" );
   2021 
   2022     if( !CV_ARE_SIZES_EQ( mapx, mapy ) || !CV_ARE_SIZES_EQ( mapx, dst ))
   2023         CV_ERROR( CV_StsUnmatchedSizes,
   2024         "Both map arrays and the destination array must have the same size" );
   2025 
   2026     fltremap = CV_MAT_TYPE(mapx->type) == CV_32FC1;
   2027     type = CV_MAT_TYPE(src->type);
   2028     depth = CV_MAT_DEPTH(type);
   2029     cn = CV_MAT_CN(type);
   2030     if( cn > 4 )
   2031         CV_ERROR( CV_BadNumChannels, "" );
   2032 
   2033     ssize = cvGetMatSize(src);
   2034     dsize = cvGetMatSize(dst);
   2035 
   2036     cvScalarToRawData( &fillval, fillbuf, CV_MAT_TYPE(src->type), 0 );
   2037 
   2038     if( !fltremap )
   2039     {
   2040         if( CV_MAT_TYPE(src->type) != CV_8UC1 && CV_MAT_TYPE(src->type) != CV_8UC3 &&
   2041             CV_MAT_TYPE(src->type) != CV_8UC4 )
   2042             CV_ERROR( CV_StsUnsupportedFormat,
   2043             "Only 8-bit input/output is supported by the fixed-point variant of cvRemap" );
   2044         icvRemapFixedPt_8u( src, dst, mapx, mapy, (uchar*)fillbuf );
   2045         EXIT;
   2046     }
   2047 
   2048     if( icvRemap_8u_C1R_p )
   2049     {
   2050         CvRemapIPPFunc ipp_func =
   2051             type == CV_8UC1 ? icvRemap_8u_C1R_p :
   2052             type == CV_8UC3 ? icvRemap_8u_C3R_p :
   2053             type == CV_8UC4 ? icvRemap_8u_C4R_p :
   2054             type == CV_32FC1 ? icvRemap_32f_C1R_p :
   2055             type == CV_32FC3 ? icvRemap_32f_C3R_p :
   2056             type == CV_32FC4 ? icvRemap_32f_C4R_p : 0;
   2057 
   2058         if( ipp_func )
   2059         {
   2060             int srcstep = src->step ? src->step : CV_STUB_STEP;
   2061             int dststep = dst->step ? dst->step : CV_STUB_STEP;
   2062             int mxstep = mapx->step ? mapx->step : CV_STUB_STEP;
   2063             int mystep = mapy->step ? mapy->step : CV_STUB_STEP;
   2064             CvStatus status;
   2065             CvRect srcroi = {0, 0, ssize.width, ssize.height};
   2066 
   2067             // this is not the most efficient way to fill outliers
   2068             if( flags & CV_WARP_FILL_OUTLIERS )
   2069                 cvSet( dst, fillval );
   2070 
   2071             status = ipp_func( src->data.ptr, ssize, srcstep, srcroi,
   2072                                mapx->data.fl, mxstep, mapy->data.fl, mystep,
   2073                                dst->data.ptr, dststep, dsize,
   2074                                1 << (method == CV_INTER_NN || method == CV_INTER_LINEAR ||
   2075                                method == CV_INTER_CUBIC ? method : CV_INTER_LINEAR) );
   2076             if( status >= 0 )
   2077                 EXIT;
   2078         }
   2079     }
   2080 
   2081     {
   2082         CvRemapFunc func = method == CV_INTER_CUBIC ?
   2083             (CvRemapFunc)bicubic_tab.fn_2d[depth] :
   2084             (CvRemapFunc)bilinear_tab.fn_2d[depth];
   2085 
   2086         if( !func )
   2087             CV_ERROR( CV_StsUnsupportedFormat, "" );
   2088 
   2089         func( src->data.ptr, src->step, ssize, dst->data.ptr, dst->step, dsize,
   2090               mapx->data.fl, mapx->step, mapy->data.fl, mapy->step,
   2091               cn, flags & CV_WARP_FILL_OUTLIERS ? fillbuf : 0 );
   2092     }
   2093 
   2094     __END__;
   2095 }
   2096 
   2097 CV_IMPL void
   2098 cvConvertMaps( const CvArr* arrx, const CvArr* arry,
   2099                CvArr* arrxy, CvArr* arra )
   2100 {
   2101     CV_FUNCNAME( "cvConvertMaps" );
   2102 
   2103     __BEGIN__;
   2104 
   2105     CvMat xstub, *mapx = cvGetMat( arrx, &xstub );
   2106     CvMat ystub, *mapy = cvGetMat( arry, &ystub );
   2107     CvMat xystub, *mapxy = cvGetMat( arrxy, &xystub );
   2108     CvMat astub, *mapa = cvGetMat( arra, &astub );
   2109     int x, y, cols = mapx->cols, rows = mapx->rows;
   2110 
   2111     CV_ASSERT( CV_ARE_SIZES_EQ(mapx, mapy) && CV_ARE_TYPES_EQ(mapx, mapy) &&
   2112         CV_MAT_TYPE(mapx->type) == CV_32FC1 &&
   2113         CV_ARE_SIZES_EQ(mapxy, mapx) && CV_ARE_SIZES_EQ(mapxy, mapa) &&
   2114         CV_MAT_TYPE(mapxy->type) == CV_16SC2 &&
   2115         CV_MAT_TYPE(mapa->type) == CV_16SC1 );
   2116 
   2117     for( y = 0; y < rows; y++ )
   2118     {
   2119         const float* xrow = (const float*)(mapx->data.ptr + mapx->step*y);
   2120         const float* yrow = (const float*)(mapy->data.ptr + mapy->step*y);
   2121         short* xy = (short*)(mapxy->data.ptr + mapxy->step*y);
   2122         short* alpha = (short*)(mapa->data.ptr + mapa->step*y);
   2123 
   2124         for( x = 0; x < cols; x++ )
   2125         {
   2126             int xi = cvRound(xrow[x]*(1 << CV_REMAP_SHIFT));
   2127             int yi = cvRound(yrow[x]*(1 << CV_REMAP_SHIFT));
   2128             xy[x*2] = (short)(xi >> CV_REMAP_SHIFT);
   2129             xy[x*2+1] = (short)(yi >> CV_REMAP_SHIFT);
   2130             alpha[x] = (short)((xi & CV_REMAP_MASK) + ((yi & CV_REMAP_MASK)<<CV_REMAP_SHIFT));
   2131         }
   2132     }
   2133 
   2134     __END__;
   2135 }
   2136 
   2137 
   2138 /****************************************************************************************\
   2139 *                                   Log-Polar Transform                                  *
   2140 \****************************************************************************************/
   2141 
   2142 /* now it is done via Remap; more correct implementation should use
   2143    some super-sampling technique outside of the "fovea" circle */
   2144 CV_IMPL void
   2145 cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
   2146             CvPoint2D32f center, double M, int flags )
   2147 {
   2148     CvMat* mapx = 0;
   2149     CvMat* mapy = 0;
   2150     double* exp_tab = 0;
   2151     float* buf = 0;
   2152 
   2153     CV_FUNCNAME( "cvLogPolar" );
   2154 
   2155     __BEGIN__;
   2156 
   2157     CvMat srcstub, *src = (CvMat*)srcarr;
   2158     CvMat dststub, *dst = (CvMat*)dstarr;
   2159     CvSize ssize, dsize;
   2160 
   2161     CV_CALL( src = cvGetMat( srcarr, &srcstub ));
   2162     CV_CALL( dst = cvGetMat( dstarr, &dststub ));
   2163 
   2164     if( !CV_ARE_TYPES_EQ( src, dst ))
   2165         CV_ERROR( CV_StsUnmatchedFormats, "" );
   2166 
   2167     if( M <= 0 )
   2168         CV_ERROR( CV_StsOutOfRange, "M should be >0" );
   2169 
   2170     ssize = cvGetMatSize(src);
   2171     dsize = cvGetMatSize(dst);
   2172 
   2173     CV_CALL( mapx = cvCreateMat( dsize.height, dsize.width, CV_32F ));
   2174     CV_CALL( mapy = cvCreateMat( dsize.height, dsize.width, CV_32F ));
   2175 
   2176     if( !(flags & CV_WARP_INVERSE_MAP) )
   2177     {
   2178         int phi, rho;
   2179 
   2180         CV_CALL( exp_tab = (double*)cvAlloc( dsize.width*sizeof(exp_tab[0])) );
   2181 
   2182         for( rho = 0; rho < dst->width; rho++ )
   2183             exp_tab[rho] = exp(rho/M);
   2184 
   2185         for( phi = 0; phi < dsize.height; phi++ )
   2186         {
   2187             double cp = cos(phi*2*CV_PI/dsize.height);
   2188             double sp = sin(phi*2*CV_PI/dsize.height);
   2189             float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
   2190             float* my = (float*)(mapy->data.ptr + phi*mapy->step);
   2191 
   2192             for( rho = 0; rho < dsize.width; rho++ )
   2193             {
   2194                 double r = exp_tab[rho];
   2195                 double x = r*cp + center.x;
   2196                 double y = r*sp + center.y;
   2197 
   2198                 mx[rho] = (float)x;
   2199                 my[rho] = (float)y;
   2200             }
   2201         }
   2202     }
   2203     else
   2204     {
   2205         int x, y;
   2206         CvMat bufx, bufy, bufp, bufa;
   2207         double ascale = (ssize.width-1)/(2*CV_PI);
   2208 
   2209         CV_CALL( buf = (float*)cvAlloc( 4*dsize.width*sizeof(buf[0]) ));
   2210 
   2211         bufx = cvMat( 1, dsize.width, CV_32F, buf );
   2212         bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
   2213         bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
   2214         bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
   2215 
   2216         for( x = 0; x < dsize.width; x++ )
   2217             bufx.data.fl[x] = (float)x - center.x;
   2218 
   2219         for( y = 0; y < dsize.height; y++ )
   2220         {
   2221             float* mx = (float*)(mapx->data.ptr + y*mapx->step);
   2222             float* my = (float*)(mapy->data.ptr + y*mapy->step);
   2223 
   2224             for( x = 0; x < dsize.width; x++ )
   2225                 bufy.data.fl[x] = (float)y - center.y;
   2226 
   2227 #if 1
   2228             cvCartToPolar( &bufx, &bufy, &bufp, &bufa );
   2229 
   2230             for( x = 0; x < dsize.width; x++ )
   2231                 bufp.data.fl[x] += 1.f;
   2232 
   2233             cvLog( &bufp, &bufp );
   2234 
   2235             for( x = 0; x < dsize.width; x++ )
   2236             {
   2237                 double rho = bufp.data.fl[x]*M;
   2238                 double phi = bufa.data.fl[x]*ascale;
   2239 
   2240                 mx[x] = (float)rho;
   2241                 my[x] = (float)phi;
   2242             }
   2243 #else
   2244             for( x = 0; x < dsize.width; x++ )
   2245             {
   2246                 double xx = bufx.data.fl[x];
   2247                 double yy = bufy.data.fl[x];
   2248 
   2249                 double p = log(sqrt(xx*xx + yy*yy) + 1.)*M;
   2250                 double a = atan2(yy,xx);
   2251                 if( a < 0 )
   2252                     a = 2*CV_PI + a;
   2253                 a *= ascale;
   2254 
   2255                 mx[x] = (float)p;
   2256                 my[x] = (float)a;
   2257             }
   2258 #endif
   2259         }
   2260     }
   2261 
   2262     cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );
   2263 
   2264     __END__;
   2265 
   2266     cvFree( &exp_tab );
   2267     cvFree( &buf );
   2268     cvReleaseMat( &mapx );
   2269     cvReleaseMat( &mapy );
   2270 }
   2271 
   2272 /* End of file. */
   2273