Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 /* ////////////////////////////////////////////////////////////////////
     44 //
     45 //  Filling CvMat/IplImage instances with random numbers
     46 //
     47 // */
     48 
     49 #include "precomp.hpp"
     50 
     51 #if defined WIN32 || defined WINCE
     52     #include <windows.h>
     53     #undef small
     54     #undef min
     55     #undef max
     56     #undef abs
     57 #else
     58     #include <pthread.h>
     59 #endif
     60 
     61 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
     62     #include "emmintrin.h"
     63 #endif
     64 
     65 namespace cv
     66 {
     67 
     68 ///////////////////////////// Functions Declaration //////////////////////////////////////
     69 
     70 /*
     71    Multiply-with-carry generator is used here:
     72    temp = ( A*X(n) + carry )
     73    X(n+1) = temp mod (2^32)
     74    carry = temp / (2^32)
     75 */
     76 
     77 #define  RNG_NEXT(x)    ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
     78 
     79 /***************************************************************************************\
     80 *                           Pseudo-Random Number Generators (PRNGs)                     *
     81 \***************************************************************************************/
     82 
     83 template<typename T> static void
     84 randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
     85 {
     86     uint64 temp = *state;
     87     int i;
     88 
     89     if( !small_flag )
     90     {
     91         for( i = 0; i <= len - 4; i += 4 )
     92         {
     93             int t0, t1;
     94 
     95             temp = RNG_NEXT(temp);
     96             t0 = ((int)temp & p[i][0]) + p[i][1];
     97             temp = RNG_NEXT(temp);
     98             t1 = ((int)temp & p[i+1][0]) + p[i+1][1];
     99             arr[i] = saturate_cast<T>(t0);
    100             arr[i+1] = saturate_cast<T>(t1);
    101 
    102             temp = RNG_NEXT(temp);
    103             t0 = ((int)temp & p[i+2][0]) + p[i+2][1];
    104             temp = RNG_NEXT(temp);
    105             t1 = ((int)temp & p[i+3][0]) + p[i+3][1];
    106             arr[i+2] = saturate_cast<T>(t0);
    107             arr[i+3] = saturate_cast<T>(t1);
    108         }
    109     }
    110     else
    111     {
    112         for( i = 0; i <= len - 4; i += 4 )
    113         {
    114             int t0, t1, t;
    115             temp = RNG_NEXT(temp);
    116             t = (int)temp;
    117             t0 = (t & p[i][0]) + p[i][1];
    118             t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1];
    119             arr[i] = saturate_cast<T>(t0);
    120             arr[i+1] = saturate_cast<T>(t1);
    121 
    122             t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1];
    123             t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1];
    124             arr[i+2] = saturate_cast<T>(t0);
    125             arr[i+3] = saturate_cast<T>(t1);
    126         }
    127     }
    128 
    129     for( ; i < len; i++ )
    130     {
    131         int t0;
    132         temp = RNG_NEXT(temp);
    133 
    134         t0 = ((int)temp & p[i][0]) + p[i][1];
    135         arr[i] = saturate_cast<T>(t0);
    136     }
    137 
    138     *state = temp;
    139 }
    140 
    141 struct DivStruct
    142 {
    143     unsigned d;
    144     unsigned M;
    145     int sh1, sh2;
    146     int delta;
    147 };
    148 
    149 template<typename T> static void
    150 randi_( T* arr, int len, uint64* state, const DivStruct* p )
    151 {
    152     uint64 temp = *state;
    153     int i = 0;
    154     unsigned t0, t1, v0, v1;
    155 
    156     for( i = 0; i <= len - 4; i += 4 )
    157     {
    158         temp = RNG_NEXT(temp);
    159         t0 = (unsigned)temp;
    160         temp = RNG_NEXT(temp);
    161         t1 = (unsigned)temp;
    162         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
    163         v1 = (unsigned)(((uint64)t1 * p[i+1].M) >> 32);
    164         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
    165         v1 = (v1 + ((t1 - v1) >> p[i+1].sh1)) >> p[i+1].sh2;
    166         v0 = t0 - v0*p[i].d + p[i].delta;
    167         v1 = t1 - v1*p[i+1].d + p[i+1].delta;
    168         arr[i] = saturate_cast<T>((int)v0);
    169         arr[i+1] = saturate_cast<T>((int)v1);
    170 
    171         temp = RNG_NEXT(temp);
    172         t0 = (unsigned)temp;
    173         temp = RNG_NEXT(temp);
    174         t1 = (unsigned)temp;
    175         v0 = (unsigned)(((uint64)t0 * p[i+2].M) >> 32);
    176         v1 = (unsigned)(((uint64)t1 * p[i+3].M) >> 32);
    177         v0 = (v0 + ((t0 - v0) >> p[i+2].sh1)) >> p[i+2].sh2;
    178         v1 = (v1 + ((t1 - v1) >> p[i+3].sh1)) >> p[i+3].sh2;
    179         v0 = t0 - v0*p[i+2].d + p[i+2].delta;
    180         v1 = t1 - v1*p[i+3].d + p[i+3].delta;
    181         arr[i+2] = saturate_cast<T>((int)v0);
    182         arr[i+3] = saturate_cast<T>((int)v1);
    183     }
    184 
    185     for( ; i < len; i++ )
    186     {
    187         temp = RNG_NEXT(temp);
    188         t0 = (unsigned)temp;
    189         v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
    190         v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
    191         v0 = t0 - v0*p[i].d + p[i].delta;
    192         arr[i] = saturate_cast<T>((int)v0);
    193     }
    194 
    195     *state = temp;
    196 }
    197 
    198 
    199 #define DEF_RANDI_FUNC(suffix, type) \
    200 static void randBits_##suffix(type* arr, int len, uint64* state, \
    201                               const Vec2i* p, bool small_flag) \
    202 { randBits_(arr, len, state, p, small_flag); } \
    203 \
    204 static void randi_##suffix(type* arr, int len, uint64* state, \
    205                            const DivStruct* p, bool ) \
    206 { randi_(arr, len, state, p); }
    207 
    208 DEF_RANDI_FUNC(8u, uchar)
    209 DEF_RANDI_FUNC(8s, schar)
    210 DEF_RANDI_FUNC(16u, ushort)
    211 DEF_RANDI_FUNC(16s, short)
    212 DEF_RANDI_FUNC(32s, int)
    213 
    214 static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
    215 {
    216     uint64 temp = *state;
    217     int i = 0;
    218 
    219     for( ; i <= len - 4; i += 4 )
    220     {
    221         float f[4];
    222         f[0] = (float)(int)(temp = RNG_NEXT(temp));
    223         f[1] = (float)(int)(temp = RNG_NEXT(temp));
    224         f[2] = (float)(int)(temp = RNG_NEXT(temp));
    225         f[3] = (float)(int)(temp = RNG_NEXT(temp));
    226 
    227         // handwritten SSE is required not for performance but for numerical stability!
    228         // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
    229         // while 64-bit compilers generate single precision SIMD instructions
    230         // so manual vectorisation forces all compilers to the single precision
    231 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
    232         __m128 q0 = _mm_loadu_ps((const float*)(p + i));
    233         __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));
    234 
    235         __m128 q01l = _mm_unpacklo_ps(q0, q1);
    236         __m128 q01h = _mm_unpackhi_ps(q0, q1);
    237 
    238         __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
    239         __m128 p1 = _mm_unpackhi_ps(q01l, q01h);
    240 
    241         _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
    242 #else
    243         arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
    244         arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
    245         arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
    246         arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
    247 #endif
    248     }
    249 
    250     for( ; i < len; i++ )
    251     {
    252         temp = RNG_NEXT(temp);
    253 #if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
    254         _mm_store_ss(arr + i, _mm_add_ss(
    255                 _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
    256                 _mm_set_ss(p[i][1]))
    257                 );
    258 #else
    259         arr[i] = (int)temp*p[i][0] + p[i][1];
    260 #endif
    261     }
    262 
    263     *state = temp;
    264 }
    265 
    266 
    267 static void
    268 randf_64f( double* arr, int len, uint64* state, const Vec2d* p, bool )
    269 {
    270     uint64 temp = *state;
    271     int64 v = 0;
    272     int i;
    273 
    274     for( i = 0; i <= len - 4; i += 4 )
    275     {
    276         double f0, f1;
    277 
    278         temp = RNG_NEXT(temp);
    279         v = (temp >> 32)|(temp << 32);
    280         f0 = v*p[i][0] + p[i][1];
    281         temp = RNG_NEXT(temp);
    282         v = (temp >> 32)|(temp << 32);
    283         f1 = v*p[i+1][0] + p[i+1][1];
    284         arr[i] = f0; arr[i+1] = f1;
    285 
    286         temp = RNG_NEXT(temp);
    287         v = (temp >> 32)|(temp << 32);
    288         f0 = v*p[i+2][0] + p[i+2][1];
    289         temp = RNG_NEXT(temp);
    290         v = (temp >> 32)|(temp << 32);
    291         f1 = v*p[i+3][0] + p[i+3][1];
    292         arr[i+2] = f0; arr[i+3] = f1;
    293     }
    294 
    295     for( ; i < len; i++ )
    296     {
    297         temp = RNG_NEXT(temp);
    298         v = (temp >> 32)|(temp << 32);
    299         arr[i] = v*p[i][0] + p[i][1];
    300     }
    301 
    302     *state = temp;
    303 }
    304 
    305 typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, bool small_flag);
    306 
    307 
    308 static RandFunc randTab[][8] =
    309 {
    310     {
    311         (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
    312         (RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, 0
    313     },
    314     {
    315         (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
    316         (RandFunc)randBits_32s, 0, 0, 0
    317     }
    318 };
    319 
    320 /*
    321    The code below implements the algorithm described in
    322    "The Ziggurat Method for Generating Random Variables"
    323    by Marsaglia and Tsang, Journal of Statistical Software.
    324 */
    325 static void
    326 randn_0_1_32f( float* arr, int len, uint64* state )
    327 {
    328     const float r = 3.442620f; // The start of the right tail
    329     const float rng_flt = 2.3283064365386962890625e-10f; // 2^-32
    330     static unsigned kn[128];
    331     static float wn[128], fn[128];
    332     uint64 temp = *state;
    333     static bool initialized=false;
    334     int i;
    335 
    336     if( !initialized )
    337     {
    338         const double m1 = 2147483648.0;
    339         double dn = 3.442619855899, tn = dn, vn = 9.91256303526217e-3;
    340 
    341         // Set up the tables
    342         double q = vn/std::exp(-.5*dn*dn);
    343         kn[0] = (unsigned)((dn/q)*m1);
    344         kn[1] = 0;
    345 
    346         wn[0] = (float)(q/m1);
    347         wn[127] = (float)(dn/m1);
    348 
    349         fn[0] = 1.f;
    350         fn[127] = (float)std::exp(-.5*dn*dn);
    351 
    352         for(i=126;i>=1;i--)
    353         {
    354             dn = std::sqrt(-2.*std::log(vn/dn+std::exp(-.5*dn*dn)));
    355             kn[i+1] = (unsigned)((dn/tn)*m1);
    356             tn = dn;
    357             fn[i] = (float)std::exp(-.5*dn*dn);
    358             wn[i] = (float)(dn/m1);
    359         }
    360         initialized = true;
    361     }
    362 
    363     for( i = 0; i < len; i++ )
    364     {
    365         float x, y;
    366         for(;;)
    367         {
    368             int hz = (int)temp;
    369             temp = RNG_NEXT(temp);
    370             int iz = hz & 127;
    371             x = hz*wn[iz];
    372             if( (unsigned)std::abs(hz) < kn[iz] )
    373                 break;
    374             if( iz == 0) // iz==0, handles the base strip
    375             {
    376                 do
    377                 {
    378                     x = (unsigned)temp*rng_flt;
    379                     temp = RNG_NEXT(temp);
    380                     y = (unsigned)temp*rng_flt;
    381                     temp = RNG_NEXT(temp);
    382                     x = (float)(-std::log(x+FLT_MIN)*0.2904764);
    383                     y = (float)-std::log(y+FLT_MIN);
    384                 }	// .2904764 is 1/r
    385                 while( y + y < x*x );
    386                 x = hz > 0 ? r + x : -r - x;
    387                 break;
    388             }
    389             // iz > 0, handle the wedges of other strips
    390             y = (unsigned)temp*rng_flt;
    391             temp = RNG_NEXT(temp);
    392             if( fn[iz] + y*(fn[iz - 1] - fn[iz]) < std::exp(-.5*x*x) )
    393                 break;
    394         }
    395         arr[i] = x;
    396     }
    397     *state = temp;
    398 }
    399 
    400 
    401 double RNG::gaussian(double sigma)
    402 {
    403     float temp;
    404     randn_0_1_32f( &temp, 1, &state );
    405     return temp*sigma;
    406 }
    407 
    408 
    409 template<typename T, typename PT> static void
    410 randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx )
    411 {
    412     int i, j, k;
    413     if( !stdmtx )
    414     {
    415         if( cn == 1 )
    416         {
    417             PT b = mean[0], a = stddev[0];
    418             for( i = 0; i < len; i++ )
    419                 dst[i] = saturate_cast<T>(src[i]*a + b);
    420         }
    421         else
    422         {
    423             for( i = 0; i < len; i++, src += cn, dst += cn )
    424                 for( k = 0; k < cn; k++ )
    425                     dst[k] = saturate_cast<T>(src[k]*stddev[k] + mean[k]);
    426         }
    427     }
    428     else
    429     {
    430         for( i = 0; i < len; i++, src += cn, dst += cn )
    431         {
    432             for( j = 0; j < cn; j++ )
    433             {
    434                 PT s = mean[j];
    435                 for( k = 0; k < cn; k++ )
    436                     s += src[k]*stddev[j*cn + k];
    437                 dst[j] = saturate_cast<T>(s);
    438             }
    439         }
    440     }
    441 }
    442 
    443 static void randnScale_8u( const float* src, uchar* dst, int len, int cn,
    444                             const float* mean, const float* stddev, bool stdmtx )
    445 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
    446 
    447 static void randnScale_8s( const float* src, schar* dst, int len, int cn,
    448                             const float* mean, const float* stddev, bool stdmtx )
    449 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
    450 
    451 static void randnScale_16u( const float* src, ushort* dst, int len, int cn,
    452                              const float* mean, const float* stddev, bool stdmtx )
    453 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
    454 
    455 static void randnScale_16s( const float* src, short* dst, int len, int cn,
    456                              const float* mean, const float* stddev, bool stdmtx )
    457 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
    458 
    459 static void randnScale_32s( const float* src, int* dst, int len, int cn,
    460                              const float* mean, const float* stddev, bool stdmtx )
    461 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
    462 
    463 static void randnScale_32f( const float* src, float* dst, int len, int cn,
    464                              const float* mean, const float* stddev, bool stdmtx )
    465 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
    466 
    467 static void randnScale_64f( const float* src, double* dst, int len, int cn,
    468                              const double* mean, const double* stddev, bool stdmtx )
    469 { randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
    470 
    471 typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn,
    472                                const uchar*, const uchar*, bool);
    473 
    474 static RandnScaleFunc randnScaleTab[] =
    475 {
    476     (RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u,
    477     (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f,
    478     (RandnScaleFunc)randnScale_64f, 0
    479 };
    480 
    481 void RNG::fill( InputOutputArray _mat, int disttype,
    482                 InputArray _param1arg, InputArray _param2arg, bool saturateRange )
    483 {
    484     Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
    485     int depth = mat.depth(), cn = mat.channels();
    486     AutoBuffer<double> _parambuf;
    487     int j, k, fast_int_mode = 0, smallFlag = 1;
    488     RandFunc func = 0;
    489     RandnScaleFunc scaleFunc = 0;
    490 
    491     CV_Assert(_param1.channels() == 1 && (_param1.rows == 1 || _param1.cols == 1) &&
    492               (_param1.rows + _param1.cols - 1 == cn || _param1.rows + _param1.cols - 1 == 1 ||
    493                (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4)));
    494     CV_Assert( _param2.channels() == 1 &&
    495                (((_param2.rows == 1 || _param2.cols == 1) &&
    496                 (_param2.rows + _param2.cols - 1 == cn || _param2.rows + _param2.cols - 1 == 1 ||
    497                 (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) ||
    498                 (_param2.rows == cn && _param2.cols == cn && disttype == NORMAL)));
    499 
    500     Vec2i* ip = 0;
    501     Vec2d* dp = 0;
    502     Vec2f* fp = 0;
    503     DivStruct* ds = 0;
    504     uchar* mean = 0;
    505     uchar* stddev = 0;
    506     bool stdmtx = false;
    507     int n1 = (int)_param1.total();
    508     int n2 = (int)_param2.total();
    509 
    510     if( disttype == UNIFORM )
    511     {
    512         _parambuf.allocate(cn*8 + n1 + n2);
    513         double* parambuf = _parambuf;
    514         double* p1 = _param1.ptr<double>();
    515         double* p2 = _param2.ptr<double>();
    516 
    517         if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn )
    518         {
    519             Mat tmp(_param1.size(), CV_64F, parambuf);
    520             _param1.convertTo(tmp, CV_64F);
    521             p1 = parambuf;
    522             if( n1 < cn )
    523                 for( j = n1; j < cn; j++ )
    524                     p1[j] = p1[j-n1];
    525         }
    526 
    527         if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn )
    528         {
    529             Mat tmp(_param2.size(), CV_64F, parambuf + cn);
    530             _param2.convertTo(tmp, CV_64F);
    531             p2 = parambuf + cn;
    532             if( n2 < cn )
    533                 for( j = n2; j < cn; j++ )
    534                     p2[j] = p2[j-n2];
    535         }
    536 
    537         if( depth <= CV_32S )
    538         {
    539             ip = (Vec2i*)(parambuf + cn*2);
    540             for( j = 0, fast_int_mode = 1; j < cn; j++ )
    541             {
    542                 double a = std::min(p1[j], p2[j]);
    543                 double b = std::max(p1[j], p2[j]);
    544                 if( saturateRange )
    545                 {
    546                     a = std::max(a, depth == CV_8U || depth == CV_16U ? 0. :
    547                             depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN);
    548                     b = std::min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. :
    549                             depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX);
    550                 }
    551                 ip[j][1] = cvCeil(a);
    552                 int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
    553                 double diff = b - a;
    554 
    555                 fast_int_mode &= diff <= 4294967296. && (idiff & (idiff+1)) == 0;
    556                 if( fast_int_mode )
    557                     smallFlag &= idiff <= 255;
    558                 else
    559                 {
    560                     if( diff > INT_MAX )
    561                         ip[j][0] = INT_MAX;
    562                     if( a < INT_MIN/2 )
    563                         ip[j][1] = INT_MIN/2;
    564                 }
    565             }
    566 
    567             if( !fast_int_mode )
    568             {
    569                 ds = (DivStruct*)(ip + cn);
    570                 for( j = 0; j < cn; j++ )
    571                 {
    572                     ds[j].delta = ip[j][1];
    573                     unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
    574                     int l = 0;
    575                     while(((uint64)1 << l) < d)
    576                         l++;
    577                     ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
    578                     ds[j].sh1 = std::min(l, 1);
    579                     ds[j].sh2 = std::max(l - 1, 0);
    580                 }
    581             }
    582 
    583             func = randTab[fast_int_mode][depth];
    584         }
    585         else
    586         {
    587             double scale = depth == CV_64F ?
    588                 5.4210108624275221700372640043497e-20 : // 2**-64
    589                 2.3283064365386962890625e-10;           // 2**-32
    590             double maxdiff = saturateRange ? (double)FLT_MAX : DBL_MAX;
    591 
    592             // for each channel i compute such dparam[0][i] & dparam[1][i],
    593             // so that a signed 32/64-bit integer X is transformed to
    594             // the range [param1.val[i], param2.val[i]) using
    595             // dparam[1][i]*X + dparam[0][i]
    596             if( depth == CV_32F )
    597             {
    598                 fp = (Vec2f*)(parambuf + cn*2);
    599                 for( j = 0; j < cn; j++ )
    600                 {
    601                     fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale);
    602                     fp[j][1] = (float)((p2[j] + p1[j])*0.5);
    603                 }
    604             }
    605             else
    606             {
    607                 dp = (Vec2d*)(parambuf + cn*2);
    608                 for( j = 0; j < cn; j++ )
    609                 {
    610                     dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale;
    611                     dp[j][1] = ((p2[j] + p1[j])*0.5);
    612                 }
    613             }
    614 
    615             func = randTab[0][depth];
    616         }
    617         CV_Assert( func != 0 );
    618     }
    619     else if( disttype == CV_RAND_NORMAL )
    620     {
    621         _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
    622         double* parambuf = _parambuf;
    623 
    624         int ptype = depth == CV_64F ? CV_64F : CV_32F;
    625         int esz = (int)CV_ELEM_SIZE(ptype);
    626 
    627         if( _param1.isContinuous() && _param1.type() == ptype )
    628             mean = _param1.ptr();
    629         else
    630         {
    631             Mat tmp(_param1.size(), ptype, parambuf);
    632             _param1.convertTo(tmp, ptype);
    633             mean = (uchar*)parambuf;
    634         }
    635 
    636         if( n1 < cn )
    637             for( j = n1*esz; j < cn*esz; j++ )
    638                 mean[j] = mean[j - n1*esz];
    639 
    640         if( _param2.isContinuous() && _param2.type() == ptype )
    641             stddev = _param2.ptr();
    642         else
    643         {
    644             Mat tmp(_param2.size(), ptype, parambuf + cn);
    645             _param2.convertTo(tmp, ptype);
    646             stddev = (uchar*)(parambuf + cn);
    647         }
    648 
    649         if( n1 < cn )
    650             for( j = n1*esz; j < cn*esz; j++ )
    651                 stddev[j] = stddev[j - n1*esz];
    652 
    653         stdmtx = _param2.rows == cn && _param2.cols == cn;
    654         scaleFunc = randnScaleTab[depth];
    655         CV_Assert( scaleFunc != 0 );
    656     }
    657     else
    658         CV_Error( CV_StsBadArg, "Unknown distribution type" );
    659 
    660     const Mat* arrays[] = {&mat, 0};
    661     uchar* ptr;
    662     NAryMatIterator it(arrays, &ptr);
    663     int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
    664     size_t esz = mat.elemSize();
    665     AutoBuffer<double> buf;
    666     uchar* param = 0;
    667     float* nbuf = 0;
    668 
    669     if( disttype == UNIFORM )
    670     {
    671         buf.allocate(blockSize*cn*4);
    672         param = (uchar*)(double*)buf;
    673 
    674         if( ip )
    675         {
    676             if( ds )
    677             {
    678                 DivStruct* p = (DivStruct*)param;
    679                 for( j = 0; j < blockSize*cn; j += cn )
    680                     for( k = 0; k < cn; k++ )
    681                         p[j + k] = ds[k];
    682             }
    683             else
    684             {
    685                 Vec2i* p = (Vec2i*)param;
    686                 for( j = 0; j < blockSize*cn; j += cn )
    687                     for( k = 0; k < cn; k++ )
    688                         p[j + k] = ip[k];
    689             }
    690         }
    691         else if( fp )
    692         {
    693             Vec2f* p = (Vec2f*)param;
    694             for( j = 0; j < blockSize*cn; j += cn )
    695                 for( k = 0; k < cn; k++ )
    696                     p[j + k] = fp[k];
    697         }
    698         else
    699         {
    700             Vec2d* p = (Vec2d*)param;
    701             for( j = 0; j < blockSize*cn; j += cn )
    702                 for( k = 0; k < cn; k++ )
    703                     p[j + k] = dp[k];
    704         }
    705     }
    706     else
    707     {
    708         buf.allocate((blockSize*cn+1)/2);
    709         nbuf = (float*)(double*)buf;
    710     }
    711 
    712     for( size_t i = 0; i < it.nplanes; i++, ++it )
    713     {
    714         for( j = 0; j < total; j += blockSize )
    715         {
    716             int len = std::min(total - j, blockSize);
    717 
    718             if( disttype == CV_RAND_UNI )
    719                 func( ptr, len*cn, &state, param, smallFlag != 0 );
    720             else
    721             {
    722                 randn_0_1_32f(nbuf, len*cn, &state);
    723                 scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx);
    724             }
    725             ptr += len*esz;
    726         }
    727     }
    728 }
    729 
    730 }
    731 
    732 cv::RNG& cv::theRNG()
    733 {
    734     return getCoreTlsData().get()->rng;
    735 }
    736 
    737 void cv::randu(InputOutputArray dst, InputArray low, InputArray high)
    738 {
    739     theRNG().fill(dst, RNG::UNIFORM, low, high);
    740 }
    741 
    742 void cv::randn(InputOutputArray dst, InputArray mean, InputArray stddev)
    743 {
    744     theRNG().fill(dst, RNG::NORMAL, mean, stddev);
    745 }
    746 
    747 namespace cv
    748 {
    749 
    750 template<typename T> static void
    751 randShuffle_( Mat& _arr, RNG& rng, double )
    752 {
    753     unsigned sz = (unsigned)_arr.total();
    754     if( _arr.isContinuous() )
    755     {
    756         T* arr = _arr.ptr<T>();
    757         for( unsigned i = 0; i < sz; i++ )
    758         {
    759             unsigned j = (unsigned)rng % sz;
    760             std::swap( arr[j], arr[i] );
    761         }
    762     }
    763     else
    764     {
    765         CV_Assert( _arr.dims <= 2 );
    766         uchar* data = _arr.ptr();
    767         size_t step = _arr.step;
    768         int rows = _arr.rows;
    769         int cols = _arr.cols;
    770         for( int i0 = 0; i0 < rows; i0++ )
    771         {
    772             T* p = _arr.ptr<T>(i0);
    773             for( int j0 = 0; j0 < cols; j0++ )
    774             {
    775                 unsigned k1 = (unsigned)rng % sz;
    776                 int i1 = (int)(k1 / cols);
    777                 int j1 = (int)(k1 - (unsigned)i1*(unsigned)cols);
    778                 std::swap( p[j0], ((T*)(data + step*i1))[j1] );
    779             }
    780         }
    781     }
    782 }
    783 
    784 typedef void (*RandShuffleFunc)( Mat& dst, RNG& rng, double iterFactor );
    785 
    786 }
    787 
    788 void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng )
    789 {
    790     RandShuffleFunc tab[] =
    791     {
    792         0,
    793         randShuffle_<uchar>, // 1
    794         randShuffle_<ushort>, // 2
    795         randShuffle_<Vec<uchar,3> >, // 3
    796         randShuffle_<int>, // 4
    797         0,
    798         randShuffle_<Vec<ushort,3> >, // 6
    799         0,
    800         randShuffle_<Vec<int,2> >, // 8
    801         0, 0, 0,
    802         randShuffle_<Vec<int,3> >, // 12
    803         0, 0, 0,
    804         randShuffle_<Vec<int,4> >, // 16
    805         0, 0, 0, 0, 0, 0, 0,
    806         randShuffle_<Vec<int,6> >, // 24
    807         0, 0, 0, 0, 0, 0, 0,
    808         randShuffle_<Vec<int,8> > // 32
    809     };
    810 
    811     Mat dst = _dst.getMat();
    812     RNG& rng = _rng ? *_rng : theRNG();
    813     CV_Assert( dst.elemSize() <= 32 );
    814     RandShuffleFunc func = tab[dst.elemSize()];
    815     CV_Assert( func != 0 );
    816     func( dst, rng, iterFactor );
    817 }
    818 
    819 CV_IMPL void
    820 cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
    821 {
    822     cv::Mat mat = cv::cvarrToMat(arr);
    823     // !!! this will only work for current 64-bit MWC RNG !!!
    824     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
    825     rng.fill(mat, disttype == CV_RAND_NORMAL ?
    826         cv::RNG::NORMAL : cv::RNG::UNIFORM, cv::Scalar(param1), cv::Scalar(param2) );
    827 }
    828 
    829 CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor )
    830 {
    831     cv::Mat dst = cv::cvarrToMat(arr);
    832     cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
    833     cv::randShuffle( dst, iter_factor, &rng );
    834 }
    835 
    836 // Mersenne Twister random number generator.
    837 // Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
    838 
    839 /*
    840    A C-program for MT19937, with initialization improved 2002/1/26.
    841    Coded by Takuji Nishimura and Makoto Matsumoto.
    842 
    843    Before using, initialize the state by using init_genrand(seed)
    844    or init_by_array(init_key, key_length).
    845 
    846    Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
    847    All rights reserved.
    848 
    849    Redistribution and use in source and binary forms, with or without
    850    modification, are permitted provided that the following conditions
    851    are met:
    852 
    853      1. Redistributions of source code must retain the above copyright
    854         notice, this list of conditions and the following disclaimer.
    855 
    856      2. Redistributions in binary form must reproduce the above copyright
    857         notice, this list of conditions and the following disclaimer in the
    858         documentation and/or other materials provided with the distribution.
    859 
    860      3. The names of its contributors may not be used to endorse or promote
    861         products derived from this software without specific prior written
    862         permission.
    863 
    864    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    865    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    866    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    867    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
    868    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    869    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    870    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    871    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    872    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    873    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    874    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    875 
    876 
    877    Any feedback is very welcome.
    878    http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
    879    email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
    880 */
    881 
    882 cv::RNG_MT19937::RNG_MT19937(unsigned s) { seed(s); }
    883 
    884 cv::RNG_MT19937::RNG_MT19937() { seed(5489U); }
    885 
    886 void cv::RNG_MT19937::seed(unsigned s)
    887 {
    888     state[0]= s;
    889     for (mti = 1; mti < N; mti++)
    890     {
    891         /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
    892         state[mti] = (1812433253U * (state[mti - 1] ^ (state[mti - 1] >> 30)) + mti);
    893     }
    894 }
    895 
    896 unsigned cv::RNG_MT19937::next()
    897 {
    898     /* mag01[x] = x * MATRIX_A  for x=0,1 */
    899     static unsigned mag01[2] = { 0x0U, /*MATRIX_A*/ 0x9908b0dfU};
    900 
    901     const unsigned UPPER_MASK = 0x80000000U;
    902     const unsigned LOWER_MASK = 0x7fffffffU;
    903 
    904     /* generate N words at one time */
    905     if (mti >= N)
    906     {
    907         int kk = 0;
    908 
    909         for (; kk < N - M; ++kk)
    910         {
    911             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
    912             state[kk] = state[kk + M] ^ (y >> 1) ^ mag01[y & 0x1U];
    913         }
    914 
    915         for (; kk < N - 1; ++kk)
    916         {
    917             unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
    918             state[kk] = state[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1U];
    919         }
    920 
    921         unsigned y = (state[N - 1] & UPPER_MASK) | (state[0] & LOWER_MASK);
    922         state[N - 1] = state[M - 1] ^ (y >> 1) ^ mag01[y & 0x1U];
    923 
    924         mti = 0;
    925     }
    926 
    927     unsigned y = state[mti++];
    928 
    929     /* Tempering */
    930     y ^= (y >> 11);
    931     y ^= (y <<  7) & 0x9d2c5680U;
    932     y ^= (y << 15) & 0xefc60000U;
    933     y ^= (y >> 18);
    934 
    935     return y;
    936 }
    937 
    938 cv::RNG_MT19937::operator unsigned() { return next(); }
    939 
    940 cv::RNG_MT19937::operator int() { return (int)next();}
    941 
    942 cv::RNG_MT19937::operator float() { return next() * (1.f / 4294967296.f); }
    943 
    944 cv::RNG_MT19937::operator double()
    945 {
    946     unsigned a = next() >> 5;
    947     unsigned b = next() >> 6;
    948     return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
    949 }
    950 
    951 int cv::RNG_MT19937::uniform(int a, int b) { return (int)(next() % (b - a) + a); }
    952 
    953 float cv::RNG_MT19937::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }
    954 
    955 double cv::RNG_MT19937::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
    956 
    957 unsigned cv::RNG_MT19937::operator ()(unsigned b) { return next() % b; }
    958 
    959 unsigned cv::RNG_MT19937::operator ()() { return next(); }
    960 
    961 /* End of file. */
    962