Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 #undef HAVE_IPP
     46 
     47 namespace cv { namespace hal {
     48 
     49 ///////////////////////////////////// ATAN2 ////////////////////////////////////
     50 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
     51 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
     52 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
     53 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
     54 
     55 #if CV_NEON
     56 static inline float32x4_t cv_vrecpq_f32(float32x4_t val)
     57 {
     58     float32x4_t reciprocal = vrecpeq_f32(val);
     59     reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
     60     reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
     61     return reciprocal;
     62 }
     63 #endif
     64 
     65 void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
     66 {
     67     int i = 0;
     68     float scale = angleInDegrees ? 1 : (float)(CV_PI/180);
     69 
     70 #ifdef HAVE_TEGRA_OPTIMIZATION
     71     if (tegra::useTegra() && tegra::FastAtan2_32f(Y, X, angle, len, scale))
     72         return;
     73 #endif
     74 
     75 #if CV_SSE2
     76     Cv32suf iabsmask; iabsmask.i = 0x7fffffff;
     77     __m128 eps = _mm_set1_ps((float)DBL_EPSILON), absmask = _mm_set1_ps(iabsmask.f);
     78     __m128 _90 = _mm_set1_ps(90.f), _180 = _mm_set1_ps(180.f), _360 = _mm_set1_ps(360.f);
     79     __m128 z = _mm_setzero_ps(), scale4 = _mm_set1_ps(scale);
     80     __m128 p1 = _mm_set1_ps(atan2_p1), p3 = _mm_set1_ps(atan2_p3);
     81     __m128 p5 = _mm_set1_ps(atan2_p5), p7 = _mm_set1_ps(atan2_p7);
     82 
     83     for( ; i <= len - 4; i += 4 )
     84     {
     85         __m128 x = _mm_loadu_ps(X + i), y = _mm_loadu_ps(Y + i);
     86         __m128 ax = _mm_and_ps(x, absmask), ay = _mm_and_ps(y, absmask);
     87         __m128 mask = _mm_cmplt_ps(ax, ay);
     88         __m128 tmin = _mm_min_ps(ax, ay), tmax = _mm_max_ps(ax, ay);
     89         __m128 c = _mm_div_ps(tmin, _mm_add_ps(tmax, eps));
     90         __m128 c2 = _mm_mul_ps(c, c);
     91         __m128 a = _mm_mul_ps(c2, p7);
     92         a = _mm_mul_ps(_mm_add_ps(a, p5), c2);
     93         a = _mm_mul_ps(_mm_add_ps(a, p3), c2);
     94         a = _mm_mul_ps(_mm_add_ps(a, p1), c);
     95 
     96         __m128 b = _mm_sub_ps(_90, a);
     97         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
     98 
     99         b = _mm_sub_ps(_180, a);
    100         mask = _mm_cmplt_ps(x, z);
    101         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
    102 
    103         b = _mm_sub_ps(_360, a);
    104         mask = _mm_cmplt_ps(y, z);
    105         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
    106 
    107         a = _mm_mul_ps(a, scale4);
    108         _mm_storeu_ps(angle + i, a);
    109     }
    110 #elif CV_NEON
    111     float32x4_t eps = vdupq_n_f32((float)DBL_EPSILON);
    112     float32x4_t _90 = vdupq_n_f32(90.f), _180 = vdupq_n_f32(180.f), _360 = vdupq_n_f32(360.f);
    113     float32x4_t z = vdupq_n_f32(0.0f), scale4 = vdupq_n_f32(scale);
    114     float32x4_t p1 = vdupq_n_f32(atan2_p1), p3 = vdupq_n_f32(atan2_p3);
    115     float32x4_t p5 = vdupq_n_f32(atan2_p5), p7 = vdupq_n_f32(atan2_p7);
    116 
    117     for( ; i <= len - 4; i += 4 )
    118     {
    119         float32x4_t x = vld1q_f32(X + i), y = vld1q_f32(Y + i);
    120         float32x4_t ax = vabsq_f32(x), ay = vabsq_f32(y);
    121         float32x4_t tmin = vminq_f32(ax, ay), tmax = vmaxq_f32(ax, ay);
    122         float32x4_t c = vmulq_f32(tmin, cv_vrecpq_f32(vaddq_f32(tmax, eps)));
    123         float32x4_t c2 = vmulq_f32(c, c);
    124         float32x4_t a = vmulq_f32(c2, p7);
    125         a = vmulq_f32(vaddq_f32(a, p5), c2);
    126         a = vmulq_f32(vaddq_f32(a, p3), c2);
    127         a = vmulq_f32(vaddq_f32(a, p1), c);
    128 
    129         a = vbslq_f32(vcgeq_f32(ax, ay), a, vsubq_f32(_90, a));
    130         a = vbslq_f32(vcltq_f32(x, z), vsubq_f32(_180, a), a);
    131         a = vbslq_f32(vcltq_f32(y, z), vsubq_f32(_360, a), a);
    132 
    133         vst1q_f32(angle + i, vmulq_f32(a, scale4));
    134     }
    135 #endif
    136 
    137     for( ; i < len; i++ )
    138     {
    139         float x = X[i], y = Y[i];
    140         float ax = std::abs(x), ay = std::abs(y);
    141         float a, c, c2;
    142         if( ax >= ay )
    143         {
    144             c = ay/(ax + (float)DBL_EPSILON);
    145             c2 = c*c;
    146             a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
    147         }
    148         else
    149         {
    150             c = ax/(ay + (float)DBL_EPSILON);
    151             c2 = c*c;
    152             a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
    153         }
    154         if( x < 0 )
    155             a = 180.f - a;
    156         if( y < 0 )
    157             a = 360.f - a;
    158         angle[i] = (float)(a*scale);
    159     }
    160 }
    161 
    162 
    163 void magnitude(const float* x, const float* y, float* mag, int len)
    164 {
    165 #if defined HAVE_IPP
    166     CV_IPP_CHECK()
    167     {
    168         IppStatus status = ippsMagnitude_32f(x, y, mag, len);
    169         if (status >= 0)
    170         {
    171             CV_IMPL_ADD(CV_IMPL_IPP);
    172             return;
    173         }
    174         setIppErrorStatus();
    175     }
    176 #endif
    177 
    178     int i = 0;
    179 
    180 #if CV_SIMD128
    181     for( ; i <= len - 8; i += 8 )
    182     {
    183         v_float32x4 x0 = v_load(x + i), x1 = v_load(x + i + 4);
    184         v_float32x4 y0 = v_load(y + i), y1 = v_load(y + i + 4);
    185         x0 = v_sqrt(v_muladd(x0, x0, y0*y0));
    186         x1 = v_sqrt(v_muladd(x1, x1, y1*y1));
    187         v_store(mag + i, x0);
    188         v_store(mag + i + 4, x1);
    189     }
    190 #endif
    191 
    192     for( ; i < len; i++ )
    193     {
    194         float x0 = x[i], y0 = y[i];
    195         mag[i] = std::sqrt(x0*x0 + y0*y0);
    196     }
    197 }
    198 
    199 void magnitude(const double* x, const double* y, double* mag, int len)
    200 {
    201 #if defined(HAVE_IPP)
    202     CV_IPP_CHECK()
    203     {
    204         IppStatus status = ippsMagnitude_64f(x, y, mag, len);
    205         if (status >= 0)
    206         {
    207             CV_IMPL_ADD(CV_IMPL_IPP);
    208             return;
    209         }
    210         setIppErrorStatus();
    211     }
    212 #endif
    213 
    214     int i = 0;
    215 
    216 #if CV_SIMD128_64F
    217     for( ; i <= len - 4; i += 4 )
    218     {
    219         v_float64x2 x0 = v_load(x + i), x1 = v_load(x + i + 2);
    220         v_float64x2 y0 = v_load(y + i), y1 = v_load(y + i + 2);
    221         x0 = v_sqrt(v_muladd(x0, x0, y0*y0));
    222         x1 = v_sqrt(v_muladd(x1, x1, y1*y1));
    223         v_store(mag + i, x0);
    224         v_store(mag + i + 2, x1);
    225     }
    226 #endif
    227 
    228     for( ; i < len; i++ )
    229     {
    230         double x0 = x[i], y0 = y[i];
    231         mag[i] = std::sqrt(x0*x0 + y0*y0);
    232     }
    233 }
    234 
    235 
    236 void invSqrt(const float* src, float* dst, int len)
    237 {
    238 #if defined(HAVE_IPP)
    239     CV_IPP_CHECK()
    240     {
    241         if (ippsInvSqrt_32f_A21(src, dst, len) >= 0)
    242         {
    243             CV_IMPL_ADD(CV_IMPL_IPP);
    244             return;
    245         }
    246         setIppErrorStatus();
    247     }
    248 #endif
    249 
    250     int i = 0;
    251 
    252 #if CV_SIMD128
    253     for( ; i <= len - 8; i += 8 )
    254     {
    255         v_float32x4 t0 = v_load(src + i), t1 = v_load(src + i + 4);
    256         t0 = v_invsqrt(t0);
    257         t1 = v_invsqrt(t1);
    258         v_store(dst + i, t0); v_store(dst + i + 4, t1);
    259     }
    260 #endif
    261 
    262     for( ; i < len; i++ )
    263         dst[i] = 1/std::sqrt(src[i]);
    264 }
    265 
    266 
    267 void invSqrt(const double* src, double* dst, int len)
    268 {
    269     int i = 0;
    270 
    271 #if CV_SSE2
    272     __m128d v_1 = _mm_set1_pd(1.0);
    273     for ( ; i <= len - 2; i += 2)
    274         _mm_storeu_pd(dst + i, _mm_div_pd(v_1, _mm_sqrt_pd(_mm_loadu_pd(src + i))));
    275 #endif
    276 
    277     for( ; i < len; i++ )
    278         dst[i] = 1/std::sqrt(src[i]);
    279 }
    280 
    281 
    282 void sqrt(const float* src, float* dst, int len)
    283 {
    284 #if defined(HAVE_IPP)
    285     CV_IPP_CHECK()
    286     {
    287         if (ippsSqrt_32f_A21(src, dst, len) >= 0)
    288         {
    289             CV_IMPL_ADD(CV_IMPL_IPP);
    290             return;
    291         }
    292         setIppErrorStatus();
    293     }
    294 #endif
    295 
    296     int i = 0;
    297 
    298 #if CV_SIMD128
    299     for( ; i <= len - 8; i += 8 )
    300     {
    301         v_float32x4 t0 = v_load(src + i), t1 = v_load(src + i + 4);
    302         t0 = v_sqrt(t0);
    303         t1 = v_sqrt(t1);
    304         v_store(dst + i, t0); v_store(dst + i + 4, t1);
    305     }
    306 #endif
    307 
    308     for( ; i < len; i++ )
    309         dst[i] = std::sqrt(src[i]);
    310 }
    311 
    312 
    313 void sqrt(const double* src, double* dst, int len)
    314 {
    315 #if defined(HAVE_IPP)
    316     CV_IPP_CHECK()
    317     {
    318         if (ippsSqrt_64f_A50(src, dst, len) >= 0)
    319         {
    320             CV_IMPL_ADD(CV_IMPL_IPP);
    321             return;
    322         }
    323         setIppErrorStatus();
    324     }
    325 #endif
    326 
    327     int i = 0;
    328 
    329 #if CV_SIMD128_64F
    330     for( ; i <= len - 4; i += 4 )
    331     {
    332         v_float64x2 t0 = v_load(src + i), t1 = v_load(src + i + 2);
    333         t0 = v_sqrt(t0);
    334         t1 = v_sqrt(t1);
    335         v_store(dst + i, t0); v_store(dst + i + 2, t1);
    336     }
    337 #endif
    338 
    339     for( ; i < len; i++ )
    340         dst[i] = std::sqrt(src[i]);
    341 }
    342 
    343 ////////////////////////////////////// EXP /////////////////////////////////////
    344 
    345 typedef union
    346 {
    347     struct {
    348 #if ( defined( WORDS_BIGENDIAN ) && !defined( OPENCV_UNIVERSAL_BUILD ) ) || defined( __BIG_ENDIAN__ )
    349         int hi;
    350         int lo;
    351 #else
    352         int lo;
    353         int hi;
    354 #endif
    355     } i;
    356     double d;
    357 }
    358 DBLINT;
    359 
    360 #define EXPTAB_SCALE 6
    361 #define EXPTAB_MASK  ((1 << EXPTAB_SCALE) - 1)
    362 
    363 #define EXPPOLY_32F_A0 .9670371139572337719125840413672004409288e-2
    364 
    365 static const double expTab[] = {
    366     1.0 * EXPPOLY_32F_A0,
    367     1.0108892860517004600204097905619 * EXPPOLY_32F_A0,
    368     1.0218971486541166782344801347833 * EXPPOLY_32F_A0,
    369     1.0330248790212284225001082839705 * EXPPOLY_32F_A0,
    370     1.0442737824274138403219664787399 * EXPPOLY_32F_A0,
    371     1.0556451783605571588083413251529 * EXPPOLY_32F_A0,
    372     1.0671404006768236181695211209928 * EXPPOLY_32F_A0,
    373     1.0787607977571197937406800374385 * EXPPOLY_32F_A0,
    374     1.0905077326652576592070106557607 * EXPPOLY_32F_A0,
    375     1.1023825833078409435564142094256 * EXPPOLY_32F_A0,
    376     1.1143867425958925363088129569196 * EXPPOLY_32F_A0,
    377     1.126521618608241899794798643787 * EXPPOLY_32F_A0,
    378     1.1387886347566916537038302838415 * EXPPOLY_32F_A0,
    379     1.151189229952982705817759635202 * EXPPOLY_32F_A0,
    380     1.1637248587775775138135735990922 * EXPPOLY_32F_A0,
    381     1.1763969916502812762846457284838 * EXPPOLY_32F_A0,
    382     1.1892071150027210667174999705605 * EXPPOLY_32F_A0,
    383     1.2021567314527031420963969574978 * EXPPOLY_32F_A0,
    384     1.2152473599804688781165202513388 * EXPPOLY_32F_A0,
    385     1.2284805361068700056940089577928 * EXPPOLY_32F_A0,
    386     1.2418578120734840485936774687266 * EXPPOLY_32F_A0,
    387     1.2553807570246910895793906574423 * EXPPOLY_32F_A0,
    388     1.2690509571917332225544190810323 * EXPPOLY_32F_A0,
    389     1.2828700160787782807266697810215 * EXPPOLY_32F_A0,
    390     1.2968395546510096659337541177925 * EXPPOLY_32F_A0,
    391     1.3109612115247643419229917863308 * EXPPOLY_32F_A0,
    392     1.3252366431597412946295370954987 * EXPPOLY_32F_A0,
    393     1.3396675240533030053600306697244 * EXPPOLY_32F_A0,
    394     1.3542555469368927282980147401407 * EXPPOLY_32F_A0,
    395     1.3690024229745906119296011329822 * EXPPOLY_32F_A0,
    396     1.3839098819638319548726595272652 * EXPPOLY_32F_A0,
    397     1.3989796725383111402095281367152 * EXPPOLY_32F_A0,
    398     1.4142135623730950488016887242097 * EXPPOLY_32F_A0,
    399     1.4296133383919700112350657782751 * EXPPOLY_32F_A0,
    400     1.4451808069770466200370062414717 * EXPPOLY_32F_A0,
    401     1.4609177941806469886513028903106 * EXPPOLY_32F_A0,
    402     1.476826145939499311386907480374 * EXPPOLY_32F_A0,
    403     1.4929077282912648492006435314867 * EXPPOLY_32F_A0,
    404     1.5091644275934227397660195510332 * EXPPOLY_32F_A0,
    405     1.5255981507445383068512536895169 * EXPPOLY_32F_A0,
    406     1.5422108254079408236122918620907 * EXPPOLY_32F_A0,
    407     1.5590044002378369670337280894749 * EXPPOLY_32F_A0,
    408     1.5759808451078864864552701601819 * EXPPOLY_32F_A0,
    409     1.5931421513422668979372486431191 * EXPPOLY_32F_A0,
    410     1.6104903319492543081795206673574 * EXPPOLY_32F_A0,
    411     1.628027421857347766848218522014 * EXPPOLY_32F_A0,
    412     1.6457554781539648445187567247258 * EXPPOLY_32F_A0,
    413     1.6636765803267364350463364569764 * EXPPOLY_32F_A0,
    414     1.6817928305074290860622509524664 * EXPPOLY_32F_A0,
    415     1.7001063537185234695013625734975 * EXPPOLY_32F_A0,
    416     1.7186192981224779156293443764563 * EXPPOLY_32F_A0,
    417     1.7373338352737062489942020818722 * EXPPOLY_32F_A0,
    418     1.7562521603732994831121606193753 * EXPPOLY_32F_A0,
    419     1.7753764925265212525505592001993 * EXPPOLY_32F_A0,
    420     1.7947090750031071864277032421278 * EXPPOLY_32F_A0,
    421     1.8142521755003987562498346003623 * EXPPOLY_32F_A0,
    422     1.8340080864093424634870831895883 * EXPPOLY_32F_A0,
    423     1.8539791250833855683924530703377 * EXPPOLY_32F_A0,
    424     1.8741676341102999013299989499544 * EXPPOLY_32F_A0,
    425     1.8945759815869656413402186534269 * EXPPOLY_32F_A0,
    426     1.9152065613971472938726112702958 * EXPPOLY_32F_A0,
    427     1.9360617934922944505980559045667 * EXPPOLY_32F_A0,
    428     1.9571441241754002690183222516269 * EXPPOLY_32F_A0,
    429     1.9784560263879509682582499181312 * EXPPOLY_32F_A0,
    430 };
    431 
    432 
    433 // the code below uses _mm_cast* intrinsics, which are not avialable on VS2005
    434 #if (defined _MSC_VER && _MSC_VER < 1500) || \
    435 (!defined __APPLE__ && defined __GNUC__ && __GNUC__*100 + __GNUC_MINOR__ < 402)
    436 #undef CV_SSE2
    437 #define CV_SSE2 0
    438 #endif
    439 
    440 static const double exp_prescale = 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE);
    441 static const double exp_postscale = 1./(1 << EXPTAB_SCALE);
    442 static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000
    443 
    444 void exp( const float *_x, float *y, int n )
    445 {
    446     static const float
    447     A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
    448     A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
    449     A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0),
    450     A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0);
    451 
    452 #undef EXPPOLY
    453 #define EXPPOLY(x)  \
    454 (((((x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)
    455 
    456     int i = 0;
    457     const Cv32suf* x = (const Cv32suf*)_x;
    458     Cv32suf buf[4];
    459 
    460 #if CV_SSE2
    461     if( n >= 8 )
    462     {
    463         static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
    464         static const __m128 postscale4 = _mm_set1_ps((float)exp_postscale);
    465         static const __m128 maxval4 = _mm_set1_ps((float)(exp_max_val/exp_prescale));
    466         static const __m128 minval4 = _mm_set1_ps((float)(-exp_max_val/exp_prescale));
    467 
    468         static const __m128 mA1 = _mm_set1_ps(A1);
    469         static const __m128 mA2 = _mm_set1_ps(A2);
    470         static const __m128 mA3 = _mm_set1_ps(A3);
    471         static const __m128 mA4 = _mm_set1_ps(A4);
    472         bool y_aligned = (size_t)(void*)y % 16 == 0;
    473 
    474         ushort CV_DECL_ALIGNED(16) tab_idx[8];
    475 
    476         for( ; i <= n - 8; i += 8 )
    477         {
    478             __m128 xf0, xf1;
    479             xf0 = _mm_loadu_ps(&x[i].f);
    480             xf1 = _mm_loadu_ps(&x[i+4].f);
    481             __m128i xi0, xi1, xi2, xi3;
    482 
    483             xf0 = _mm_min_ps(_mm_max_ps(xf0, minval4), maxval4);
    484             xf1 = _mm_min_ps(_mm_max_ps(xf1, minval4), maxval4);
    485 
    486             __m128d xd0 = _mm_cvtps_pd(xf0);
    487             __m128d xd2 = _mm_cvtps_pd(_mm_movehl_ps(xf0, xf0));
    488             __m128d xd1 = _mm_cvtps_pd(xf1);
    489             __m128d xd3 = _mm_cvtps_pd(_mm_movehl_ps(xf1, xf1));
    490 
    491             xd0 = _mm_mul_pd(xd0, prescale2);
    492             xd2 = _mm_mul_pd(xd2, prescale2);
    493             xd1 = _mm_mul_pd(xd1, prescale2);
    494             xd3 = _mm_mul_pd(xd3, prescale2);
    495 
    496             xi0 = _mm_cvtpd_epi32(xd0);
    497             xi2 = _mm_cvtpd_epi32(xd2);
    498 
    499             xi1 = _mm_cvtpd_epi32(xd1);
    500             xi3 = _mm_cvtpd_epi32(xd3);
    501 
    502             xd0 = _mm_sub_pd(xd0, _mm_cvtepi32_pd(xi0));
    503             xd2 = _mm_sub_pd(xd2, _mm_cvtepi32_pd(xi2));
    504             xd1 = _mm_sub_pd(xd1, _mm_cvtepi32_pd(xi1));
    505             xd3 = _mm_sub_pd(xd3, _mm_cvtepi32_pd(xi3));
    506 
    507             xf0 = _mm_movelh_ps(_mm_cvtpd_ps(xd0), _mm_cvtpd_ps(xd2));
    508             xf1 = _mm_movelh_ps(_mm_cvtpd_ps(xd1), _mm_cvtpd_ps(xd3));
    509 
    510             xf0 = _mm_mul_ps(xf0, postscale4);
    511             xf1 = _mm_mul_ps(xf1, postscale4);
    512 
    513             xi0 = _mm_unpacklo_epi64(xi0, xi2);
    514             xi1 = _mm_unpacklo_epi64(xi1, xi3);
    515             xi0 = _mm_packs_epi32(xi0, xi1);
    516 
    517             _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi16(EXPTAB_MASK)));
    518 
    519             xi0 = _mm_add_epi16(_mm_srai_epi16(xi0, EXPTAB_SCALE), _mm_set1_epi16(127));
    520             xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
    521             xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(255));
    522             xi1 = _mm_unpackhi_epi16(xi0, _mm_setzero_si128());
    523             xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
    524 
    525             __m128d yd0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
    526             __m128d yd1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
    527             __m128d yd2 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[4]), _mm_load_sd(expTab + tab_idx[5]));
    528             __m128d yd3 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[6]), _mm_load_sd(expTab + tab_idx[7]));
    529 
    530             __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
    531             __m128 yf1 = _mm_movelh_ps(_mm_cvtpd_ps(yd2), _mm_cvtpd_ps(yd3));
    532 
    533             yf0 = _mm_mul_ps(yf0, _mm_castsi128_ps(_mm_slli_epi32(xi0, 23)));
    534             yf1 = _mm_mul_ps(yf1, _mm_castsi128_ps(_mm_slli_epi32(xi1, 23)));
    535 
    536             __m128 zf0 = _mm_add_ps(xf0, mA1);
    537             __m128 zf1 = _mm_add_ps(xf1, mA1);
    538 
    539             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA2);
    540             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA2);
    541 
    542             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA3);
    543             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA3);
    544 
    545             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA4);
    546             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA4);
    547 
    548             zf0 = _mm_mul_ps(zf0, yf0);
    549             zf1 = _mm_mul_ps(zf1, yf1);
    550 
    551             if( y_aligned )
    552             {
    553                 _mm_store_ps(y + i, zf0);
    554                 _mm_store_ps(y + i + 4, zf1);
    555             }
    556             else
    557             {
    558                 _mm_storeu_ps(y + i, zf0);
    559                 _mm_storeu_ps(y + i + 4, zf1);
    560             }
    561         }
    562     }
    563     else
    564 #endif
    565         for( ; i <= n - 4; i += 4 )
    566         {
    567             double x0 = x[i].f * exp_prescale;
    568             double x1 = x[i + 1].f * exp_prescale;
    569             double x2 = x[i + 2].f * exp_prescale;
    570             double x3 = x[i + 3].f * exp_prescale;
    571             int val0, val1, val2, val3, t;
    572 
    573             if( ((x[i].i >> 23) & 255) > 127 + 10 )
    574                 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
    575 
    576             if( ((x[i+1].i >> 23) & 255) > 127 + 10 )
    577                 x1 = x[i+1].i < 0 ? -exp_max_val : exp_max_val;
    578 
    579             if( ((x[i+2].i >> 23) & 255) > 127 + 10 )
    580                 x2 = x[i+2].i < 0 ? -exp_max_val : exp_max_val;
    581 
    582             if( ((x[i+3].i >> 23) & 255) > 127 + 10 )
    583                 x3 = x[i+3].i < 0 ? -exp_max_val : exp_max_val;
    584 
    585             val0 = cvRound(x0);
    586             val1 = cvRound(x1);
    587             val2 = cvRound(x2);
    588             val3 = cvRound(x3);
    589 
    590             x0 = (x0 - val0)*exp_postscale;
    591             x1 = (x1 - val1)*exp_postscale;
    592             x2 = (x2 - val2)*exp_postscale;
    593             x3 = (x3 - val3)*exp_postscale;
    594 
    595             t = (val0 >> EXPTAB_SCALE) + 127;
    596             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
    597             buf[0].i = t << 23;
    598 
    599             t = (val1 >> EXPTAB_SCALE) + 127;
    600             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
    601             buf[1].i = t << 23;
    602 
    603             t = (val2 >> EXPTAB_SCALE) + 127;
    604             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
    605             buf[2].i = t << 23;
    606 
    607             t = (val3 >> EXPTAB_SCALE) + 127;
    608             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
    609             buf[3].i = t << 23;
    610 
    611             x0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
    612             x1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
    613 
    614             y[i] = (float)x0;
    615             y[i + 1] = (float)x1;
    616 
    617             x2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
    618             x3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
    619 
    620             y[i + 2] = (float)x2;
    621             y[i + 3] = (float)x3;
    622         }
    623 
    624     for( ; i < n; i++ )
    625     {
    626         double x0 = x[i].f * exp_prescale;
    627         int val0, t;
    628 
    629         if( ((x[i].i >> 23) & 255) > 127 + 10 )
    630             x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
    631 
    632         val0 = cvRound(x0);
    633         t = (val0 >> EXPTAB_SCALE) + 127;
    634         t = !(t & ~255) ? t : t < 0 ? 0 : 255;
    635 
    636         buf[0].i = t << 23;
    637         x0 = (x0 - val0)*exp_postscale;
    638 
    639         y[i] = (float)(buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY(x0));
    640     }
    641 }
    642 
    643 void exp( const double *_x, double *y, int n )
    644 {
    645     static const double
    646     A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
    647     A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
    648     A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0,
    649     A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0,
    650     A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0,
    651     A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0;
    652 
    653 #undef EXPPOLY
    654 #define EXPPOLY(x)  (((((A0*(x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)*(x) + A5)
    655 
    656     int i = 0;
    657     Cv64suf buf[4];
    658     const Cv64suf* x = (const Cv64suf*)_x;
    659 
    660 #if CV_SSE2
    661     static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
    662     static const __m128d postscale2 = _mm_set1_pd(exp_postscale);
    663     static const __m128d maxval2 = _mm_set1_pd(exp_max_val);
    664     static const __m128d minval2 = _mm_set1_pd(-exp_max_val);
    665 
    666     static const __m128d mA0 = _mm_set1_pd(A0);
    667     static const __m128d mA1 = _mm_set1_pd(A1);
    668     static const __m128d mA2 = _mm_set1_pd(A2);
    669     static const __m128d mA3 = _mm_set1_pd(A3);
    670     static const __m128d mA4 = _mm_set1_pd(A4);
    671     static const __m128d mA5 = _mm_set1_pd(A5);
    672 
    673     int CV_DECL_ALIGNED(16) tab_idx[4];
    674 
    675     for( ; i <= n - 4; i += 4 )
    676     {
    677         __m128d xf0 = _mm_loadu_pd(&x[i].f), xf1 = _mm_loadu_pd(&x[i+2].f);
    678         __m128i xi0, xi1;
    679         xf0 = _mm_min_pd(_mm_max_pd(xf0, minval2), maxval2);
    680         xf1 = _mm_min_pd(_mm_max_pd(xf1, minval2), maxval2);
    681         xf0 = _mm_mul_pd(xf0, prescale2);
    682         xf1 = _mm_mul_pd(xf1, prescale2);
    683 
    684         xi0 = _mm_cvtpd_epi32(xf0);
    685         xi1 = _mm_cvtpd_epi32(xf1);
    686         xf0 = _mm_mul_pd(_mm_sub_pd(xf0, _mm_cvtepi32_pd(xi0)), postscale2);
    687         xf1 = _mm_mul_pd(_mm_sub_pd(xf1, _mm_cvtepi32_pd(xi1)), postscale2);
    688 
    689         xi0 = _mm_unpacklo_epi64(xi0, xi1);
    690         _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi32(EXPTAB_MASK)));
    691 
    692         xi0 = _mm_add_epi32(_mm_srai_epi32(xi0, EXPTAB_SCALE), _mm_set1_epi32(1023));
    693         xi0 = _mm_packs_epi32(xi0, xi0);
    694         xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
    695         xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(2047));
    696         xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
    697         xi1 = _mm_unpackhi_epi32(xi0, _mm_setzero_si128());
    698         xi0 = _mm_unpacklo_epi32(xi0, _mm_setzero_si128());
    699 
    700         __m128d yf0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
    701         __m128d yf1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
    702         yf0 = _mm_mul_pd(yf0, _mm_castsi128_pd(_mm_slli_epi64(xi0, 52)));
    703         yf1 = _mm_mul_pd(yf1, _mm_castsi128_pd(_mm_slli_epi64(xi1, 52)));
    704 
    705         __m128d zf0 = _mm_add_pd(_mm_mul_pd(mA0, xf0), mA1);
    706         __m128d zf1 = _mm_add_pd(_mm_mul_pd(mA0, xf1), mA1);
    707 
    708         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA2);
    709         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA2);
    710 
    711         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA3);
    712         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA3);
    713 
    714         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA4);
    715         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA4);
    716 
    717         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA5);
    718         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA5);
    719 
    720         zf0 = _mm_mul_pd(zf0, yf0);
    721         zf1 = _mm_mul_pd(zf1, yf1);
    722 
    723         _mm_storeu_pd(y + i, zf0);
    724         _mm_storeu_pd(y + i + 2, zf1);
    725     }
    726 #endif
    727     for( ; i <= n - 4; i += 4 )
    728     {
    729         double x0 = x[i].f * exp_prescale;
    730         double x1 = x[i + 1].f * exp_prescale;
    731         double x2 = x[i + 2].f * exp_prescale;
    732         double x3 = x[i + 3].f * exp_prescale;
    733 
    734         double y0, y1, y2, y3;
    735         int val0, val1, val2, val3, t;
    736 
    737         t = (int)(x[i].i >> 52);
    738         if( (t & 2047) > 1023 + 10 )
    739             x0 = t < 0 ? -exp_max_val : exp_max_val;
    740 
    741         t = (int)(x[i+1].i >> 52);
    742         if( (t & 2047) > 1023 + 10 )
    743             x1 = t < 0 ? -exp_max_val : exp_max_val;
    744 
    745         t = (int)(x[i+2].i >> 52);
    746         if( (t & 2047) > 1023 + 10 )
    747             x2 = t < 0 ? -exp_max_val : exp_max_val;
    748 
    749         t = (int)(x[i+3].i >> 52);
    750         if( (t & 2047) > 1023 + 10 )
    751             x3 = t < 0 ? -exp_max_val : exp_max_val;
    752 
    753         val0 = cvRound(x0);
    754         val1 = cvRound(x1);
    755         val2 = cvRound(x2);
    756         val3 = cvRound(x3);
    757 
    758         x0 = (x0 - val0)*exp_postscale;
    759         x1 = (x1 - val1)*exp_postscale;
    760         x2 = (x2 - val2)*exp_postscale;
    761         x3 = (x3 - val3)*exp_postscale;
    762 
    763         t = (val0 >> EXPTAB_SCALE) + 1023;
    764         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
    765         buf[0].i = (int64)t << 52;
    766 
    767         t = (val1 >> EXPTAB_SCALE) + 1023;
    768         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
    769         buf[1].i = (int64)t << 52;
    770 
    771         t = (val2 >> EXPTAB_SCALE) + 1023;
    772         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
    773         buf[2].i = (int64)t << 52;
    774 
    775         t = (val3 >> EXPTAB_SCALE) + 1023;
    776         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
    777         buf[3].i = (int64)t << 52;
    778 
    779         y0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
    780         y1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
    781 
    782         y[i] = y0;
    783         y[i + 1] = y1;
    784 
    785         y2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
    786         y3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
    787 
    788         y[i + 2] = y2;
    789         y[i + 3] = y3;
    790     }
    791 
    792     for( ; i < n; i++ )
    793     {
    794         double x0 = x[i].f * exp_prescale;
    795         int val0, t;
    796 
    797         t = (int)(x[i].i >> 52);
    798         if( (t & 2047) > 1023 + 10 )
    799             x0 = t < 0 ? -exp_max_val : exp_max_val;
    800 
    801         val0 = cvRound(x0);
    802         t = (val0 >> EXPTAB_SCALE) + 1023;
    803         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
    804 
    805         buf[0].i = (int64)t << 52;
    806         x0 = (x0 - val0)*exp_postscale;
    807 
    808         y[i] = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
    809     }
    810 }
    811 
    812 #undef EXPTAB_SCALE
    813 #undef EXPTAB_MASK
    814 #undef EXPPOLY_32F_A0
    815 
    816 /////////////////////////////////////////// LOG ///////////////////////////////////////
    817 
    818 #define LOGTAB_SCALE    8
    819 #define LOGTAB_MASK         ((1 << LOGTAB_SCALE) - 1)
    820 #define LOGTAB_MASK2        ((1 << (20 - LOGTAB_SCALE)) - 1)
    821 #define LOGTAB_MASK2_32F    ((1 << (23 - LOGTAB_SCALE)) - 1)
    822 
    823 static const double CV_DECL_ALIGNED(16) icvLogTab[] = {
    824     0.0000000000000000000000000000000000000000,    1.000000000000000000000000000000000000000,
    825     .00389864041565732288852075271279318258166,    .9961089494163424124513618677042801556420,
    826     .00778214044205494809292034119607706088573,    .9922480620155038759689922480620155038760,
    827     .01165061721997527263705585198749759001657,    .9884169884169884169884169884169884169884,
    828     .01550418653596525274396267235488267033361,    .9846153846153846153846153846153846153846,
    829     .01934296284313093139406447562578250654042,    .9808429118773946360153256704980842911877,
    830     .02316705928153437593630670221500622574241,    .9770992366412213740458015267175572519084,
    831     .02697658769820207233514075539915211265906,    .9733840304182509505703422053231939163498,
    832     .03077165866675368732785500469617545604706,    .9696969696969696969696969696969696969697,
    833     .03455238150665972812758397481047722976656,    .9660377358490566037735849056603773584906,
    834     .03831886430213659461285757856785494368522,    .9624060150375939849624060150375939849624,
    835     .04207121392068705056921373852674150839447,    .9588014981273408239700374531835205992509,
    836     .04580953603129420126371940114040626212953,    .9552238805970149253731343283582089552239,
    837     .04953393512227662748292900118940451648088,    .9516728624535315985130111524163568773234,
    838     .05324451451881227759255210685296333394944,    .9481481481481481481481481481481481481481,
    839     .05694137640013842427411105973078520037234,    .9446494464944649446494464944649446494465,
    840     .06062462181643483993820353816772694699466,    .9411764705882352941176470588235294117647,
    841     .06429435070539725460836422143984236754475,    .9377289377289377289377289377289377289377,
    842     .06795066190850773679699159401934593915938,    .9343065693430656934306569343065693430657,
    843     .07159365318700880442825962290953611955044,    .9309090909090909090909090909090909090909,
    844     .07522342123758751775142172846244648098944,    .9275362318840579710144927536231884057971,
    845     .07884006170777602129362549021607264876369,    .9241877256317689530685920577617328519856,
    846     .08244366921107458556772229485432035289706,    .9208633093525179856115107913669064748201,
    847     .08603433734180314373940490213499288074675,    .9175627240143369175627240143369175627240,
    848     .08961215868968712416897659522874164395031,    .9142857142857142857142857142857142857143,
    849     .09317722485418328259854092721070628613231,    .9110320284697508896797153024911032028470,
    850     .09672962645855109897752299730200320482256,    .9078014184397163120567375886524822695035,
    851     .10026945316367513738597949668474029749630,    .9045936395759717314487632508833922261484,
    852     .10379679368164355934833764649738441221420,    .9014084507042253521126760563380281690141,
    853     .10731173578908805021914218968959175981580,    .8982456140350877192982456140350877192982,
    854     .11081436634029011301105782649756292812530,    .8951048951048951048951048951048951048951,
    855     .11430477128005862852422325204315711744130,    .8919860627177700348432055749128919860627,
    856     .11778303565638344185817487641543266363440,    .8888888888888888888888888888888888888889,
    857     .12124924363286967987640707633545389398930,    .8858131487889273356401384083044982698962,
    858     .12470347850095722663787967121606925502420,    .8827586206896551724137931034482758620690,
    859     .12814582269193003360996385708858724683530,    .8797250859106529209621993127147766323024,
    860     .13157635778871926146571524895989568904040,    .8767123287671232876712328767123287671233,
    861     .13499516453750481925766280255629681050780,    .8737201365187713310580204778156996587031,
    862     .13840232285911913123754857224412262439730,    .8707482993197278911564625850340136054422,
    863     .14179791186025733629172407290752744302150,    .8677966101694915254237288135593220338983,
    864     .14518200984449788903951628071808954700830,    .8648648648648648648648648648648648648649,
    865     .14855469432313711530824207329715136438610,    .8619528619528619528619528619528619528620,
    866     .15191604202584196858794030049466527998450,    .8590604026845637583892617449664429530201,
    867     .15526612891112392955683674244937719777230,    .8561872909698996655518394648829431438127,
    868     .15860503017663857283636730244325008243330,    .8533333333333333333333333333333333333333,
    869     .16193282026931324346641360989451641216880,    .8504983388704318936877076411960132890365,
    870     .16524957289530714521497145597095368430010,    .8476821192052980132450331125827814569536,
    871     .16855536102980664403538924034364754334090,    .8448844884488448844884488448844884488449,
    872     .17185025692665920060697715143760433420540,    .8421052631578947368421052631578947368421,
    873     .17513433212784912385018287750426679849630,    .8393442622950819672131147540983606557377,
    874     .17840765747281828179637841458315961062910,    .8366013071895424836601307189542483660131,
    875     .18167030310763465639212199675966985523700,    .8338762214983713355048859934853420195440,
    876     .18492233849401198964024217730184318497780,    .8311688311688311688311688311688311688312,
    877     .18816383241818296356839823602058459073300,    .8284789644012944983818770226537216828479,
    878     .19139485299962943898322009772527962923050,    .8258064516129032258064516129032258064516,
    879     .19461546769967164038916962454095482826240,    .8231511254019292604501607717041800643087,
    880     .19782574332991986754137769821682013571260,    .8205128205128205128205128205128205128205,
    881     .20102574606059073203390141770796617493040,    .8178913738019169329073482428115015974441,
    882     .20421554142869088876999228432396193966280,    .8152866242038216560509554140127388535032,
    883     .20739519434607056602715147164417430758480,    .8126984126984126984126984126984126984127,
    884     .21056476910734961416338251183333341032260,    .8101265822784810126582278481012658227848,
    885     .21372432939771812687723695489694364368910,    .8075709779179810725552050473186119873817,
    886     .21687393830061435506806333251006435602900,    .8050314465408805031446540880503144654088,
    887     .22001365830528207823135744547471404075630,    .8025078369905956112852664576802507836991,
    888     .22314355131420973710199007200571941211830,    .8000000000000000000000000000000000000000,
    889     .22626367865045338145790765338460914790630,    .7975077881619937694704049844236760124611,
    890     .22937410106484582006380890106811420992010,    .7950310559006211180124223602484472049689,
    891     .23247487874309405442296849741978803649550,    .7925696594427244582043343653250773993808,
    892     .23556607131276688371634975283086532726890,    .7901234567901234567901234567901234567901,
    893     .23864773785017498464178231643018079921600,    .7876923076923076923076923076923076923077,
    894     .24171993688714515924331749374687206000090,    .7852760736196319018404907975460122699387,
    895     .24478272641769091566565919038112042471760,    .7828746177370030581039755351681957186544,
    896     .24783616390458124145723672882013488560910,    .7804878048780487804878048780487804878049,
    897     .25088030628580937353433455427875742316250,    .7781155015197568389057750759878419452888,
    898     .25391520998096339667426946107298135757450,    .7757575757575757575757575757575757575758,
    899     .25694093089750041913887912414793390780680,    .7734138972809667673716012084592145015106,
    900     .25995752443692604627401010475296061486000,    .7710843373493975903614457831325301204819,
    901     .26296504550088134477547896494797896593800,    .7687687687687687687687687687687687687688,
    902     .26596354849713793599974565040611196309330,    .7664670658682634730538922155688622754491,
    903     .26895308734550393836570947314612567424780,    .7641791044776119402985074626865671641791,
    904     .27193371548364175804834985683555714786050,    .7619047619047619047619047619047619047619,
    905     .27490548587279922676529508862586226314300,    .7596439169139465875370919881305637982196,
    906     .27786845100345625159121709657483734190480,    .7573964497041420118343195266272189349112,
    907     .28082266290088775395616949026589281857030,    .7551622418879056047197640117994100294985,
    908     .28376817313064456316240580235898960381750,    .7529411764705882352941176470588235294118,
    909     .28670503280395426282112225635501090437180,    .7507331378299120234604105571847507331378,
    910     .28963329258304265634293983566749375313530,    .7485380116959064327485380116959064327485,
    911     .29255300268637740579436012922087684273730,    .7463556851311953352769679300291545189504,
    912     .29546421289383584252163927885703742504130,    .7441860465116279069767441860465116279070,
    913     .29836697255179722709783618483925238251680,    .7420289855072463768115942028985507246377,
    914     .30126133057816173455023545102449133992200,    .7398843930635838150289017341040462427746,
    915     .30414733546729666446850615102448500692850,    .7377521613832853025936599423631123919308,
    916     .30702503529491181888388950937951449304830,    .7356321839080459770114942528735632183908,
    917     .30989447772286465854207904158101882785550,    .7335243553008595988538681948424068767908,
    918     .31275571000389684739317885942000430077330,    .7314285714285714285714285714285714285714,
    919     .31560877898630329552176476681779604405180,    .7293447293447293447293447293447293447293,
    920     .31845373111853458869546784626436419785030,    .7272727272727272727272727272727272727273,
    921     .32129061245373424782201254856772720813750,    .7252124645892351274787535410764872521246,
    922     .32411946865421192853773391107097268104550,    .7231638418079096045197740112994350282486,
    923     .32694034499585328257253991068864706903700,    .7211267605633802816901408450704225352113,
    924     .32975328637246797969240219572384376078850,    .7191011235955056179775280898876404494382,
    925     .33255833730007655635318997155991382896900,    .7170868347338935574229691876750700280112,
    926     .33535554192113781191153520921943709254280,    .7150837988826815642458100558659217877095,
    927     .33814494400871636381467055798566434532400,    .7130919220055710306406685236768802228412,
    928     .34092658697059319283795275623560883104800,    .7111111111111111111111111111111111111111,
    929     .34370051385331840121395430287520866841080,    .7091412742382271468144044321329639889197,
    930     .34646676734620857063262633346312213689100,    .7071823204419889502762430939226519337017,
    931     .34922538978528827602332285096053965389730,    .7052341597796143250688705234159779614325,
    932     .35197642315717814209818925519357435405250,    .7032967032967032967032967032967032967033,
    933     .35471990910292899856770532096561510115850,    .7013698630136986301369863013698630136986,
    934     .35745588892180374385176833129662554711100,    .6994535519125683060109289617486338797814,
    935     .36018440357500774995358483465679455548530,    .6975476839237057220708446866485013623978,
    936     .36290549368936841911903457003063522279280,    .6956521739130434782608695652173913043478,
    937     .36561919956096466943762379742111079394830,    .6937669376693766937669376693766937669377,
    938     .36832556115870762614150635272380895912650,    .6918918918918918918918918918918918918919,
    939     .37102461812787262962487488948681857436900,    .6900269541778975741239892183288409703504,
    940     .37371640979358405898480555151763837784530,    .6881720430107526881720430107526881720430,
    941     .37640097516425302659470730759494472295050,    .6863270777479892761394101876675603217158,
    942     .37907835293496944251145919224654790014030,    .6844919786096256684491978609625668449198,
    943     .38174858149084833769393299007788300514230,    .6826666666666666666666666666666666666667,
    944     .38441169891033200034513583887019194662580,    .6808510638297872340425531914893617021277,
    945     .38706774296844825844488013899535872042180,    .6790450928381962864721485411140583554377,
    946     .38971675114002518602873692543653305619950,    .6772486772486772486772486772486772486772,
    947     .39235876060286384303665840889152605086580,    .6754617414248021108179419525065963060686,
    948     .39499380824086893770896722344332374632350,    .6736842105263157894736842105263157894737,
    949     .39762193064713846624158577469643205404280,    .6719160104986876640419947506561679790026,
    950     .40024316412701266276741307592601515352730,    .6701570680628272251308900523560209424084,
    951     .40285754470108348090917615991202183067800,    .6684073107049608355091383812010443864230,
    952     .40546510810816432934799991016916465014230,    .6666666666666666666666666666666666666667,
    953     .40806588980822172674223224930756259709600,    .6649350649350649350649350649350649350649,
    954     .41065992498526837639616360320360399782650,    .6632124352331606217616580310880829015544,
    955     .41324724855021932601317757871584035456180,    .6614987080103359173126614987080103359173,
    956     .41582789514371093497757669865677598863850,    .6597938144329896907216494845360824742268,
    957     .41840189913888381489925905043492093682300,    .6580976863753213367609254498714652956298,
    958     .42096929464412963239894338585145305842150,    .6564102564102564102564102564102564102564,
    959     .42353011550580327293502591601281892508280,    .6547314578005115089514066496163682864450,
    960     .42608439531090003260516141381231136620050,    .6530612244897959183673469387755102040816,
    961     .42863216738969872610098832410585600882780,    .6513994910941475826972010178117048346056,
    962     .43117346481837132143866142541810404509300,    .6497461928934010152284263959390862944162,
    963     .43370832042155937902094819946796633303180,    .6481012658227848101265822784810126582278,
    964     .43623676677491801667585491486534010618930,    .6464646464646464646464646464646464646465,
    965     .43875883620762790027214350629947148263450,    .6448362720403022670025188916876574307305,
    966     .44127456080487520440058801796112675219780,    .6432160804020100502512562814070351758794,
    967     .44378397241030093089975139264424797147500,    .6416040100250626566416040100250626566416,
    968     .44628710262841947420398014401143882423650,    .6400000000000000000000000000000000000000,
    969     .44878398282700665555822183705458883196130,    .6384039900249376558603491271820448877805,
    970     .45127464413945855836729492693848442286250,    .6368159203980099502487562189054726368159,
    971     .45375911746712049854579618113348260521900,    .6352357320099255583126550868486352357320,
    972     .45623743348158757315857769754074979573500,    .6336633663366336633663366336633663366337,
    973     .45870962262697662081833982483658473938700,    .6320987654320987654320987654320987654321,
    974     .46117571512217014895185229761409573256980,    .6305418719211822660098522167487684729064,
    975     .46363574096303250549055974261136725544930,    .6289926289926289926289926289926289926290,
    976     .46608972992459918316399125615134835243230,    .6274509803921568627450980392156862745098,
    977     .46853771156323925639597405279346276074650,    .6259168704156479217603911980440097799511,
    978     .47097971521879100631480241645476780831830,    .6243902439024390243902439024390243902439,
    979     .47341577001667212165614273544633761048330,    .6228710462287104622871046228710462287105,
    980     .47584590486996386493601107758877333253630,    .6213592233009708737864077669902912621359,
    981     .47827014848147025860569669930555392056700,    .6198547215496368038740920096852300242131,
    982     .48068852934575190261057286988943815231330,    .6183574879227053140096618357487922705314,
    983     .48310107575113581113157579238759353756900,    .6168674698795180722891566265060240963855,
    984     .48550781578170076890899053978500887751580,    .6153846153846153846153846153846153846154,
    985     .48790877731923892879351001283794175833480,    .6139088729016786570743405275779376498801,
    986     .49030398804519381705802061333088204264650,    .6124401913875598086124401913875598086124,
    987     .49269347544257524607047571407747454941280,    .6109785202863961813842482100238663484487,
    988     .49507726679785146739476431321236304938800,    .6095238095238095238095238095238095238095,
    989     .49745538920281889838648226032091770321130,    .6080760095011876484560570071258907363420,
    990     .49982786955644931126130359189119189977650,    .6066350710900473933649289099526066350711,
    991     .50219473456671548383667413872899487614650,    .6052009456264775413711583924349881796690,
    992     .50455601075239520092452494282042607665050,    .6037735849056603773584905660377358490566,
    993     .50691172444485432801997148999362252652650,    .6023529411764705882352941176470588235294,
    994     .50926190178980790257412536448100581765150,    .6009389671361502347417840375586854460094,
    995     .51160656874906207391973111953120678663250,    .5995316159250585480093676814988290398126,
    996     .51394575110223428282552049495279788970950,    .5981308411214953271028037383177570093458,
    997     .51627947444845445623684554448118433356300,    .5967365967365967365967365967365967365967,
    998     .51860776420804555186805373523384332656850,    .5953488372093023255813953488372093023256,
    999     .52093064562418522900344441950437612831600,    .5939675174013921113689095127610208816705,
   1000     .52324814376454775732838697877014055848100,    .5925925925925925925925925925925925925926,
   1001     .52556028352292727401362526507000438869000,    .5912240184757505773672055427251732101617,
   1002     .52786708962084227803046587723656557500350,    .5898617511520737327188940092165898617512,
   1003     .53016858660912158374145519701414741575700,    .5885057471264367816091954022988505747126,
   1004     .53246479886947173376654518506256863474850,    .5871559633027522935779816513761467889908,
   1005     .53475575061602764748158733709715306758900,    .5858123569794050343249427917620137299771,
   1006     .53704146589688361856929077475797384977350,    .5844748858447488584474885844748858447489,
   1007     .53932196859560876944783558428753167390800,    .5831435079726651480637813211845102505695,
   1008     .54159728243274429804188230264117009937750,    .5818181818181818181818181818181818181818,
   1009     .54386743096728351609669971367111429572100,    .5804988662131519274376417233560090702948,
   1010     .54613243759813556721383065450936555862450,    .5791855203619909502262443438914027149321,
   1011     .54839232556557315767520321969641372561450,    .5778781038374717832957110609480812641084,
   1012     .55064711795266219063194057525834068655950,    .5765765765765765765765765765765765765766,
   1013     .55289683768667763352766542084282264113450,    .5752808988764044943820224719101123595506,
   1014     .55514150754050151093110798683483153581600,    .5739910313901345291479820627802690582960,
   1015     .55738115013400635344709144192165695130850,    .5727069351230425055928411633109619686801,
   1016     .55961578793542265941596269840374588966350,    .5714285714285714285714285714285714285714,
   1017     .56184544326269181269140062795486301183700,    .5701559020044543429844097995545657015590,
   1018     .56407013828480290218436721261241473257550,    .5688888888888888888888888888888888888889,
   1019     .56628989502311577464155334382667206227800,    .5676274944567627494456762749445676274945,
   1020     .56850473535266865532378233183408156037350,    .5663716814159292035398230088495575221239,
   1021     .57071468100347144680739575051120482385150,    .5651214128035320088300220750551876379691,
   1022     .57291975356178548306473885531886480748650,    .5638766519823788546255506607929515418502,
   1023     .57511997447138785144460371157038025558000,    .5626373626373626373626373626373626373626,
   1024     .57731536503482350219940144597785547375700,    .5614035087719298245614035087719298245614,
   1025     .57950594641464214795689713355386629700650,    .5601750547045951859956236323851203501094,
   1026     .58169173963462239562716149521293118596100,    .5589519650655021834061135371179039301310,
   1027     .58387276558098266665552955601015128195300,    .5577342047930283224400871459694989106754,
   1028     .58604904500357812846544902640744112432000,    .5565217391304347826086956521739130434783,
   1029     .58822059851708596855957011939608491957200,    .5553145336225596529284164859002169197397,
   1030     .59038744660217634674381770309992134571100,    .5541125541125541125541125541125541125541,
   1031     .59254960960667157898740242671919986605650,    .5529157667386609071274298056155507559395,
   1032     .59470710774669277576265358220553025603300,    .5517241379310344827586206896551724137931,
   1033     .59685996110779382384237123915227130055450,    .5505376344086021505376344086021505376344,
   1034     .59900818964608337768851242799428291618800,    .5493562231759656652360515021459227467811,
   1035     .60115181318933474940990890900138765573500,    .5481798715203426124197002141327623126338,
   1036     .60329085143808425240052883964381180703650,    .5470085470085470085470085470085470085470,
   1037     .60542532396671688843525771517306566238400,    .5458422174840085287846481876332622601279,
   1038     .60755525022454170969155029524699784815300,    .5446808510638297872340425531914893617021,
   1039     .60968064953685519036241657886421307921400,    .5435244161358811040339702760084925690021,
   1040     .61180154110599282990534675263916142284850,    .5423728813559322033898305084745762711864,
   1041     .61391794401237043121710712512140162289150,    .5412262156448202959830866807610993657505,
   1042     .61602987721551394351138242200249806046500,    .5400843881856540084388185654008438818565,
   1043     .61813735955507864705538167982012964785100,    .5389473684210526315789473684210526315789,
   1044     .62024040975185745772080281312810257077200,    .5378151260504201680672268907563025210084,
   1045     .62233904640877868441606324267922900617100,    .5366876310272536687631027253668763102725,
   1046     .62443328801189346144440150965237990021700,    .5355648535564853556485355648535564853556,
   1047     .62652315293135274476554741340805776417250,    .5344467640918580375782881002087682672234,
   1048     .62860865942237409420556559780379757285100,    .5333333333333333333333333333333333333333,
   1049     .63068982562619868570408243613201193511500,    .5322245322245322245322245322245322245322,
   1050     .63276666957103777644277897707070223987100,    .5311203319502074688796680497925311203320,
   1051     .63483920917301017716738442686619237065300,    .5300207039337474120082815734989648033126,
   1052     .63690746223706917739093569252872839570050,    .5289256198347107438016528925619834710744,
   1053     .63897144645792069983514238629140891134750,    .5278350515463917525773195876288659793814,
   1054     .64103117942093124081992527862894348800200,    .5267489711934156378600823045267489711934,
   1055     .64308667860302726193566513757104985415950,    .5256673511293634496919917864476386036961,
   1056     .64513796137358470073053240412264131009600,    .5245901639344262295081967213114754098361,
   1057     .64718504499530948859131740391603671014300,    .5235173824130879345603271983640081799591,
   1058     .64922794662510974195157587018911726772800,    .5224489795918367346938775510204081632653,
   1059     .65126668331495807251485530287027359008800,    .5213849287169042769857433808553971486762,
   1060     .65330127201274557080523663898929953575150,    .5203252032520325203252032520325203252033,
   1061     .65533172956312757406749369692988693714150,    .5192697768762677484787018255578093306288,
   1062     .65735807270835999727154330685152672231200,    .5182186234817813765182186234817813765182,
   1063     .65938031808912778153342060249997302889800,    .5171717171717171717171717171717171717172,
   1064     .66139848224536490484126716182800009846700,    .5161290322580645161290322580645161290323,
   1065     .66341258161706617713093692145776003599150,    .5150905432595573440643863179074446680080,
   1066     .66542263254509037562201001492212526500250,    .5140562248995983935742971887550200803213,
   1067     .66742865127195616370414654738851822912700,    .5130260521042084168336673346693386773547,
   1068     .66943065394262923906154583164607174694550,    .5120000000000000000000000000000000000000,
   1069     .67142865660530226534774556057527661323550,    .5109780439121756487025948103792415169661,
   1070     .67342267521216669923234121597488410770900,    .5099601593625498007968127490039840637450,
   1071     .67541272562017662384192817626171745359900,    .5089463220675944333996023856858846918489,
   1072     .67739882359180603188519853574689477682100,    .5079365079365079365079365079365079365079,
   1073     .67938098479579733801614338517538271844400,    .5069306930693069306930693069306930693069,
   1074     .68135922480790300781450241629499942064300,    .5059288537549407114624505928853754940711,
   1075     .68333355911162063645036823800182901322850,    .5049309664694280078895463510848126232742,
   1076     .68530400309891936760919861626462079584600,    .5039370078740157480314960629921259842520,
   1077     .68727057207096020619019327568821609020250,    .5029469548133595284872298624754420432220,
   1078     .68923328123880889251040571252815425395950,    .5019607843137254901960784313725490196078,
   1079     .69314718055994530941723212145818, 5.0e-01,
   1080 };
   1081 
   1082 
   1083 
   1084 #define LOGTAB_TRANSLATE(x,h) (((x) - 1.)*icvLogTab[(h)+1])
   1085 static const double ln_2 = 0.69314718055994530941723212145818;
   1086 
   1087 void log( const float *_x, float *y, int n )
   1088 {
   1089     static const float shift[] = { 0, -1.f/512 };
   1090     static const float
   1091     A0 = 0.3333333333333333333333333f,
   1092     A1 = -0.5f,
   1093     A2 = 1.f;
   1094 
   1095 #undef LOGPOLY
   1096 #define LOGPOLY(x) (((A0*(x) + A1)*(x) + A2)*(x))
   1097 
   1098     int i = 0;
   1099     Cv32suf buf[4];
   1100     const int* x = (const int*)_x;
   1101 
   1102 #if CV_SSE2
   1103     static const __m128d ln2_2 = _mm_set1_pd(ln_2);
   1104     static const __m128 _1_4 = _mm_set1_ps(1.f);
   1105     static const __m128 shift4 = _mm_set1_ps(-1.f/512);
   1106 
   1107     static const __m128 mA0 = _mm_set1_ps(A0);
   1108     static const __m128 mA1 = _mm_set1_ps(A1);
   1109     static const __m128 mA2 = _mm_set1_ps(A2);
   1110 
   1111     int CV_DECL_ALIGNED(16) idx[4];
   1112 
   1113     for( ; i <= n - 4; i += 4 )
   1114     {
   1115         __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
   1116         __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 23), _mm_set1_epi32(255)), _mm_set1_epi32(127));
   1117         __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
   1118         __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0,yi0)), ln2_2);
   1119 
   1120         __m128i xi0 = _mm_or_si128(_mm_and_si128(h0, _mm_set1_epi32(LOGTAB_MASK2_32F)), _mm_set1_epi32(127 << 23));
   1121 
   1122         h0 = _mm_and_si128(_mm_srli_epi32(h0, 23 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK*2));
   1123         _mm_store_si128((__m128i*)idx, h0);
   1124         h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
   1125 
   1126         __m128d t0, t1, t2, t3, t4;
   1127         t0 = _mm_load_pd(icvLogTab + idx[0]);
   1128         t2 = _mm_load_pd(icvLogTab + idx[1]);
   1129         t1 = _mm_unpackhi_pd(t0, t2);
   1130         t0 = _mm_unpacklo_pd(t0, t2);
   1131         t2 = _mm_load_pd(icvLogTab + idx[2]);
   1132         t4 = _mm_load_pd(icvLogTab + idx[3]);
   1133         t3 = _mm_unpackhi_pd(t2, t4);
   1134         t2 = _mm_unpacklo_pd(t2, t4);
   1135 
   1136         yd0 = _mm_add_pd(yd0, t0);
   1137         yd1 = _mm_add_pd(yd1, t2);
   1138 
   1139         __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
   1140 
   1141         __m128 xf0 = _mm_sub_ps(_mm_castsi128_ps(xi0), _1_4);
   1142         xf0 = _mm_mul_ps(xf0, _mm_movelh_ps(_mm_cvtpd_ps(t1), _mm_cvtpd_ps(t3)));
   1143         xf0 = _mm_add_ps(xf0, _mm_and_ps(_mm_castsi128_ps(h0), shift4));
   1144 
   1145         __m128 zf0 = _mm_mul_ps(xf0, mA0);
   1146         zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA1), xf0);
   1147         zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA2), xf0);
   1148         yf0 = _mm_add_ps(yf0, zf0);
   1149 
   1150         _mm_storeu_ps(y + i, yf0);
   1151     }
   1152 #endif
   1153     for( ; i <= n - 4; i += 4 )
   1154     {
   1155         double x0, x1, x2, x3;
   1156         double y0, y1, y2, y3;
   1157         int h0, h1, h2, h3;
   1158 
   1159         h0 = x[i];
   1160         h1 = x[i+1];
   1161         buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
   1162         buf[1].i = (h1 & LOGTAB_MASK2_32F) | (127 << 23);
   1163 
   1164         y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
   1165         y1 = (((h1 >> 23) & 0xff) - 127) * ln_2;
   1166 
   1167         h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1168         h1 = (h1 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1169 
   1170         y0 += icvLogTab[h0];
   1171         y1 += icvLogTab[h1];
   1172 
   1173         h2 = x[i+2];
   1174         h3 = x[i+3];
   1175 
   1176         x0 = LOGTAB_TRANSLATE( buf[0].f, h0 );
   1177         x1 = LOGTAB_TRANSLATE( buf[1].f, h1 );
   1178 
   1179         buf[2].i = (h2 & LOGTAB_MASK2_32F) | (127 << 23);
   1180         buf[3].i = (h3 & LOGTAB_MASK2_32F) | (127 << 23);
   1181 
   1182         y2 = (((h2 >> 23) & 0xff) - 127) * ln_2;
   1183         y3 = (((h3 >> 23) & 0xff) - 127) * ln_2;
   1184 
   1185         h2 = (h2 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1186         h3 = (h3 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1187 
   1188         y2 += icvLogTab[h2];
   1189         y3 += icvLogTab[h3];
   1190 
   1191         x2 = LOGTAB_TRANSLATE( buf[2].f, h2 );
   1192         x3 = LOGTAB_TRANSLATE( buf[3].f, h3 );
   1193 
   1194         x0 += shift[h0 == 510];
   1195         x1 += shift[h1 == 510];
   1196         y0 += LOGPOLY( x0 );
   1197         y1 += LOGPOLY( x1 );
   1198 
   1199         y[i] = (float) y0;
   1200         y[i + 1] = (float) y1;
   1201 
   1202         x2 += shift[h2 == 510];
   1203         x3 += shift[h3 == 510];
   1204         y2 += LOGPOLY( x2 );
   1205         y3 += LOGPOLY( x3 );
   1206 
   1207         y[i + 2] = (float) y2;
   1208         y[i + 3] = (float) y3;
   1209     }
   1210 
   1211     for( ; i < n; i++ )
   1212     {
   1213         int h0 = x[i];
   1214         double y0;
   1215         float x0;
   1216 
   1217         y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
   1218 
   1219         buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
   1220         h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1221 
   1222         y0 += icvLogTab[h0];
   1223         x0 = (float)LOGTAB_TRANSLATE( buf[0].f, h0 );
   1224         x0 += shift[h0 == 510];
   1225         y0 += LOGPOLY( x0 );
   1226 
   1227         y[i] = (float)y0;
   1228     }
   1229 }
   1230 
   1231 void log( const double *x, double *y, int n )
   1232 {
   1233     static const double shift[] = { 0, -1./512 };
   1234     static const double
   1235     A7 = 1.0,
   1236     A6 = -0.5,
   1237     A5 = 0.333333333333333314829616256247390992939472198486328125,
   1238     A4 = -0.25,
   1239     A3 = 0.2,
   1240     A2 = -0.1666666666666666574148081281236954964697360992431640625,
   1241     A1 = 0.1428571428571428769682682968777953647077083587646484375,
   1242     A0 = -0.125;
   1243 
   1244 #undef LOGPOLY
   1245 #define LOGPOLY(x,k) ((x)+=shift[k], xq = (x)*(x),\
   1246 (((A0*xq + A2)*xq + A4)*xq + A6)*xq + \
   1247 (((A1*xq + A3)*xq + A5)*xq + A7)*(x))
   1248 
   1249     int i = 0;
   1250     DBLINT buf[4];
   1251     DBLINT *X = (DBLINT *) x;
   1252 
   1253 #if CV_SSE2
   1254     static const __m128d ln2_2 = _mm_set1_pd(ln_2);
   1255     static const __m128d _1_2 = _mm_set1_pd(1.);
   1256     static const __m128d shift2 = _mm_set1_pd(-1./512);
   1257 
   1258     static const __m128i log_and_mask2 = _mm_set_epi32(LOGTAB_MASK2, 0xffffffff, LOGTAB_MASK2, 0xffffffff);
   1259     static const __m128i log_or_mask2 = _mm_set_epi32(1023 << 20, 0, 1023 << 20, 0);
   1260 
   1261     static const __m128d mA0 = _mm_set1_pd(A0);
   1262     static const __m128d mA1 = _mm_set1_pd(A1);
   1263     static const __m128d mA2 = _mm_set1_pd(A2);
   1264     static const __m128d mA3 = _mm_set1_pd(A3);
   1265     static const __m128d mA4 = _mm_set1_pd(A4);
   1266     static const __m128d mA5 = _mm_set1_pd(A5);
   1267     static const __m128d mA6 = _mm_set1_pd(A6);
   1268     static const __m128d mA7 = _mm_set1_pd(A7);
   1269 
   1270     int CV_DECL_ALIGNED(16) idx[4];
   1271 
   1272     for( ; i <= n - 4; i += 4 )
   1273     {
   1274         __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
   1275         __m128i h1 = _mm_loadu_si128((const __m128i*)(x + i + 2));
   1276 
   1277         __m128d xd0 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h0, log_and_mask2), log_or_mask2));
   1278         __m128d xd1 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h1, log_and_mask2), log_or_mask2));
   1279 
   1280         h0 = _mm_unpackhi_epi32(_mm_unpacklo_epi32(h0, h1), _mm_unpackhi_epi32(h0, h1));
   1281 
   1282         __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 20),
   1283                                                   _mm_set1_epi32(2047)), _mm_set1_epi32(1023));
   1284         __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
   1285         __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0, yi0)), ln2_2);
   1286 
   1287         h0 = _mm_and_si128(_mm_srli_epi32(h0, 20 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK * 2));
   1288         _mm_store_si128((__m128i*)idx, h0);
   1289         h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
   1290 
   1291         __m128d t0, t1, t2, t3, t4;
   1292         t0 = _mm_load_pd(icvLogTab + idx[0]);
   1293         t2 = _mm_load_pd(icvLogTab + idx[1]);
   1294         t1 = _mm_unpackhi_pd(t0, t2);
   1295         t0 = _mm_unpacklo_pd(t0, t2);
   1296         t2 = _mm_load_pd(icvLogTab + idx[2]);
   1297         t4 = _mm_load_pd(icvLogTab + idx[3]);
   1298         t3 = _mm_unpackhi_pd(t2, t4);
   1299         t2 = _mm_unpacklo_pd(t2, t4);
   1300 
   1301         yd0 = _mm_add_pd(yd0, t0);
   1302         yd1 = _mm_add_pd(yd1, t2);
   1303 
   1304         xd0 = _mm_mul_pd(_mm_sub_pd(xd0, _1_2), t1);
   1305         xd1 = _mm_mul_pd(_mm_sub_pd(xd1, _1_2), t3);
   1306 
   1307         xd0 = _mm_add_pd(xd0, _mm_and_pd(_mm_castsi128_pd(_mm_unpacklo_epi32(h0, h0)), shift2));
   1308         xd1 = _mm_add_pd(xd1, _mm_and_pd(_mm_castsi128_pd(_mm_unpackhi_epi32(h0, h0)), shift2));
   1309 
   1310         __m128d zd0 = _mm_mul_pd(xd0, mA0);
   1311         __m128d zd1 = _mm_mul_pd(xd1, mA0);
   1312         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA1), xd0);
   1313         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA1), xd1);
   1314         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA2), xd0);
   1315         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA2), xd1);
   1316         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA3), xd0);
   1317         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA3), xd1);
   1318         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA4), xd0);
   1319         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA4), xd1);
   1320         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA5), xd0);
   1321         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA5), xd1);
   1322         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA6), xd0);
   1323         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA6), xd1);
   1324         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA7), xd0);
   1325         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA7), xd1);
   1326 
   1327         yd0 = _mm_add_pd(yd0, zd0);
   1328         yd1 = _mm_add_pd(yd1, zd1);
   1329 
   1330         _mm_storeu_pd(y + i, yd0);
   1331         _mm_storeu_pd(y + i + 2, yd1);
   1332     }
   1333 #endif
   1334     for( ; i <= n - 4; i += 4 )
   1335     {
   1336         double xq;
   1337         double x0, x1, x2, x3;
   1338         double y0, y1, y2, y3;
   1339         int h0, h1, h2, h3;
   1340 
   1341         h0 = X[i].i.lo;
   1342         h1 = X[i + 1].i.lo;
   1343         buf[0].i.lo = h0;
   1344         buf[1].i.lo = h1;
   1345 
   1346         h0 = X[i].i.hi;
   1347         h1 = X[i + 1].i.hi;
   1348         buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
   1349         buf[1].i.hi = (h1 & LOGTAB_MASK2) | (1023 << 20);
   1350 
   1351         y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
   1352         y1 = (((h1 >> 20) & 0x7ff) - 1023) * ln_2;
   1353 
   1354         h2 = X[i + 2].i.lo;
   1355         h3 = X[i + 3].i.lo;
   1356         buf[2].i.lo = h2;
   1357         buf[3].i.lo = h3;
   1358 
   1359         h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1360         h1 = (h1 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1361 
   1362         y0 += icvLogTab[h0];
   1363         y1 += icvLogTab[h1];
   1364 
   1365         h2 = X[i + 2].i.hi;
   1366         h3 = X[i + 3].i.hi;
   1367 
   1368         x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
   1369         x1 = LOGTAB_TRANSLATE( buf[1].d, h1 );
   1370 
   1371         buf[2].i.hi = (h2 & LOGTAB_MASK2) | (1023 << 20);
   1372         buf[3].i.hi = (h3 & LOGTAB_MASK2) | (1023 << 20);
   1373 
   1374         y2 = (((h2 >> 20) & 0x7ff) - 1023) * ln_2;
   1375         y3 = (((h3 >> 20) & 0x7ff) - 1023) * ln_2;
   1376 
   1377         h2 = (h2 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1378         h3 = (h3 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1379 
   1380         y2 += icvLogTab[h2];
   1381         y3 += icvLogTab[h3];
   1382 
   1383         x2 = LOGTAB_TRANSLATE( buf[2].d, h2 );
   1384         x3 = LOGTAB_TRANSLATE( buf[3].d, h3 );
   1385 
   1386         y0 += LOGPOLY( x0, h0 == 510 );
   1387         y1 += LOGPOLY( x1, h1 == 510 );
   1388 
   1389         y[i] = y0;
   1390         y[i + 1] = y1;
   1391 
   1392         y2 += LOGPOLY( x2, h2 == 510 );
   1393         y3 += LOGPOLY( x3, h3 == 510 );
   1394 
   1395         y[i + 2] = y2;
   1396         y[i + 3] = y3;
   1397     }
   1398 
   1399     for( ; i < n; i++ )
   1400     {
   1401         int h0 = X[i].i.hi;
   1402         double xq;
   1403         double x0, y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
   1404 
   1405         buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
   1406         buf[0].i.lo = X[i].i.lo;
   1407         h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
   1408 
   1409         y0 += icvLogTab[h0];
   1410         x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
   1411         y0 += LOGPOLY( x0, h0 == 510 );
   1412         y[i] = y0;
   1413     }
   1414 }
   1415 
   1416 }}
   1417