Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //            Intel License Agreement
     11 //        For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of Intel Corporation may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #include "precomp.hpp"
     43 #include "opencl_kernels_imgproc.hpp"
     44 
     45 namespace cv
     46 {
     47 
     48 ////////////////// Helper functions //////////////////////
     49 
     50 static const size_t OUT_OF_RANGE = (size_t)1 << (sizeof(size_t)*8 - 2);
     51 
     52 static void
     53 calcHistLookupTables_8u( const Mat& hist, const SparseMat& shist,
     54                          int dims, const float** ranges, const double* uniranges,
     55                          bool uniform, bool issparse, std::vector<size_t>& _tab )
     56 {
     57     const int low = 0, high = 256;
     58     int i, j;
     59     _tab.resize((high-low)*dims);
     60     size_t* tab = &_tab[0];
     61 
     62     if( uniform )
     63     {
     64         for( i = 0; i < dims; i++ )
     65         {
     66             double a = uniranges[i*2];
     67             double b = uniranges[i*2+1];
     68             int sz = !issparse ? hist.size[i] : shist.size(i);
     69             size_t step = !issparse ? hist.step[i] : 1;
     70 
     71             for( j = low; j < high; j++ )
     72             {
     73                 int idx = cvFloor(j*a + b);
     74                 size_t written_idx;
     75                 if( (unsigned)idx < (unsigned)sz )
     76                     written_idx = idx*step;
     77                 else
     78                     written_idx = OUT_OF_RANGE;
     79 
     80                 tab[i*(high - low) + j - low] = written_idx;
     81             }
     82         }
     83     }
     84     else
     85     {
     86         for( i = 0; i < dims; i++ )
     87         {
     88             int limit = std::min(cvCeil(ranges[i][0]), high);
     89             int idx = -1, sz = !issparse ? hist.size[i] : shist.size(i);
     90             size_t written_idx = OUT_OF_RANGE;
     91             size_t step = !issparse ? hist.step[i] : 1;
     92 
     93             for(j = low;;)
     94             {
     95                 for( ; j < limit; j++ )
     96                     tab[i*(high - low) + j - low] = written_idx;
     97 
     98                 if( (unsigned)(++idx) < (unsigned)sz )
     99                 {
    100                     limit = std::min(cvCeil(ranges[i][idx+1]), high);
    101                     written_idx = idx*step;
    102                 }
    103                 else
    104                 {
    105                     for( ; j < high; j++ )
    106                         tab[i*(high - low) + j - low] = OUT_OF_RANGE;
    107                     break;
    108                 }
    109             }
    110         }
    111     }
    112 }
    113 
    114 
    115 static void histPrepareImages( const Mat* images, int nimages, const int* channels,
    116                                const Mat& mask, int dims, const int* histSize,
    117                                const float** ranges, bool uniform,
    118                                std::vector<uchar*>& ptrs, std::vector<int>& deltas,
    119                                Size& imsize, std::vector<double>& uniranges )
    120 {
    121     int i, j, c;
    122     CV_Assert( channels != 0 || nimages == dims );
    123 
    124     imsize = images[0].size();
    125     int depth = images[0].depth(), esz1 = (int)images[0].elemSize1();
    126     bool isContinuous = true;
    127 
    128     ptrs.resize(dims + 1);
    129     deltas.resize((dims + 1)*2);
    130 
    131     for( i = 0; i < dims; i++ )
    132     {
    133         if(!channels)
    134         {
    135             j = i;
    136             c = 0;
    137             CV_Assert( images[j].channels() == 1 );
    138         }
    139         else
    140         {
    141             c = channels[i];
    142             CV_Assert( c >= 0 );
    143             for( j = 0; j < nimages; c -= images[j].channels(), j++ )
    144                 if( c < images[j].channels() )
    145                     break;
    146             CV_Assert( j < nimages );
    147         }
    148 
    149         CV_Assert( images[j].size() == imsize && images[j].depth() == depth );
    150         if( !images[j].isContinuous() )
    151             isContinuous = false;
    152         ptrs[i] = images[j].data + c*esz1;
    153         deltas[i*2] = images[j].channels();
    154         deltas[i*2+1] = (int)(images[j].step/esz1 - imsize.width*deltas[i*2]);
    155     }
    156 
    157     if( !mask.empty() )
    158     {
    159         CV_Assert( mask.size() == imsize && mask.channels() == 1 );
    160         isContinuous = isContinuous && mask.isContinuous();
    161         ptrs[dims] = mask.data;
    162         deltas[dims*2] = 1;
    163         deltas[dims*2 + 1] = (int)(mask.step/mask.elemSize1());
    164     }
    165 
    166 #ifndef HAVE_TBB
    167     if( isContinuous )
    168     {
    169         imsize.width *= imsize.height;
    170         imsize.height = 1;
    171     }
    172 #endif
    173 
    174     if( !ranges )
    175     {
    176         CV_Assert( depth == CV_8U );
    177 
    178         uniranges.resize( dims*2 );
    179         for( i = 0; i < dims; i++ )
    180         {
    181             uniranges[i*2] = histSize[i]/256.;
    182             uniranges[i*2+1] = 0;
    183         }
    184     }
    185     else if( uniform )
    186     {
    187         uniranges.resize( dims*2 );
    188         for( i = 0; i < dims; i++ )
    189         {
    190             CV_Assert( ranges[i] && ranges[i][0] < ranges[i][1] );
    191             double low = ranges[i][0], high = ranges[i][1];
    192             double t = histSize[i]/(high - low);
    193             uniranges[i*2] = t;
    194             uniranges[i*2+1] = -t*low;
    195         }
    196     }
    197     else
    198     {
    199         for( i = 0; i < dims; i++ )
    200         {
    201             size_t n = histSize[i];
    202             for(size_t k = 0; k < n; k++ )
    203                 CV_Assert( ranges[i][k] < ranges[i][k+1] );
    204         }
    205     }
    206 }
    207 
    208 
    209 ////////////////////////////////// C A L C U L A T E    H I S T O G R A M ////////////////////////////////////
    210 #ifdef HAVE_TBB
    211 enum {one = 1, two, three}; // array elements number
    212 
    213 template<typename T>
    214 class calcHist1D_Invoker
    215 {
    216 public:
    217     calcHist1D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    218                         Mat& hist, const double* _uniranges, int sz, int dims,
    219                         Size& imageSize )
    220         : mask_(_ptrs[dims]),
    221           mstep_(_deltas[dims*2 + 1]),
    222           imageWidth_(imageSize.width),
    223           histogramSize_(hist.size()), histogramType_(hist.type()),
    224           globalHistogram_((tbb::atomic<int>*)hist.data)
    225     {
    226         p_[0] = ((T**)&_ptrs[0])[0];
    227         step_[0] = (&_deltas[0])[1];
    228         d_[0] = (&_deltas[0])[0];
    229         a_[0] = (&_uniranges[0])[0];
    230         b_[0] = (&_uniranges[0])[1];
    231         size_[0] = sz;
    232     }
    233 
    234     void operator()( const BlockedRange& range ) const
    235     {
    236         T* p0 = p_[0] + range.begin() * (step_[0] + imageWidth_*d_[0]);
    237         uchar* mask = mask_ + range.begin()*mstep_;
    238 
    239         for( int row = range.begin(); row < range.end(); row++, p0 += step_[0] )
    240         {
    241             if( !mask_ )
    242             {
    243                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0] )
    244                 {
    245                     int idx = cvFloor(*p0*a_[0] + b_[0]);
    246                     if( (unsigned)idx < (unsigned)size_[0] )
    247                     {
    248                         globalHistogram_[idx].fetch_and_add(1);
    249                     }
    250                 }
    251             }
    252             else
    253             {
    254                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0] )
    255                 {
    256                     if( mask[x] )
    257                     {
    258                         int idx = cvFloor(*p0*a_[0] + b_[0]);
    259                         if( (unsigned)idx < (unsigned)size_[0] )
    260                         {
    261                             globalHistogram_[idx].fetch_and_add(1);
    262                         }
    263                     }
    264                 }
    265                 mask += mstep_;
    266             }
    267         }
    268     }
    269 
    270 private:
    271     calcHist1D_Invoker operator=(const calcHist1D_Invoker&);
    272 
    273     T* p_[one];
    274     uchar* mask_;
    275     int step_[one];
    276     int d_[one];
    277     int mstep_;
    278     double a_[one];
    279     double b_[one];
    280     int size_[one];
    281     int imageWidth_;
    282     Size histogramSize_;
    283     int histogramType_;
    284     tbb::atomic<int>* globalHistogram_;
    285 };
    286 
    287 template<typename T>
    288 class calcHist2D_Invoker
    289 {
    290 public:
    291     calcHist2D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    292                         Mat& hist, const double* _uniranges, const int* size,
    293                         int dims, Size& imageSize, size_t* hstep )
    294         : mask_(_ptrs[dims]),
    295           mstep_(_deltas[dims*2 + 1]),
    296           imageWidth_(imageSize.width),
    297           histogramSize_(hist.size()), histogramType_(hist.type()),
    298           globalHistogram_(hist.data)
    299     {
    300         p_[0] = ((T**)&_ptrs[0])[0]; p_[1] = ((T**)&_ptrs[0])[1];
    301         step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3];
    302         d_[0] = (&_deltas[0])[0];    d_[1] = (&_deltas[0])[2];
    303         a_[0] = (&_uniranges[0])[0]; a_[1] = (&_uniranges[0])[2];
    304         b_[0] = (&_uniranges[0])[1]; b_[1] = (&_uniranges[0])[3];
    305         size_[0] = size[0];          size_[1] = size[1];
    306         hstep_[0] = hstep[0];
    307     }
    308 
    309     void operator()(const BlockedRange& range) const
    310     {
    311         T* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]);
    312         T* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]);
    313         uchar* mask = mask_ + range.begin()*mstep_;
    314 
    315         for( int row = range.begin(); row < range.end(); row++, p0 += step_[0], p1 += step_[1] )
    316         {
    317             if( !mask_ )
    318             {
    319                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] )
    320                 {
    321                     int idx0 = cvFloor(*p0*a_[0] + b_[0]);
    322                     int idx1 = cvFloor(*p1*a_[1] + b_[1]);
    323                     if( (unsigned)idx0 < (unsigned)size_[0] && (unsigned)idx1 < (unsigned)size_[1] )
    324                         ( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0) )[idx1].fetch_and_add(1);
    325                 }
    326             }
    327             else
    328             {
    329                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] )
    330                 {
    331                     if( mask[x] )
    332                     {
    333                         int idx0 = cvFloor(*p0*a_[0] + b_[0]);
    334                         int idx1 = cvFloor(*p1*a_[1] + b_[1]);
    335                         if( (unsigned)idx0 < (unsigned)size_[0] && (unsigned)idx1 < (unsigned)size_[1] )
    336                             ((tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0))[idx1].fetch_and_add(1);
    337                     }
    338                 }
    339                 mask += mstep_;
    340             }
    341         }
    342     }
    343 
    344 private:
    345     calcHist2D_Invoker operator=(const calcHist2D_Invoker&);
    346 
    347     T* p_[two];
    348     uchar* mask_;
    349     int step_[two];
    350     int d_[two];
    351     int mstep_;
    352     double a_[two];
    353     double b_[two];
    354     int size_[two];
    355     const int imageWidth_;
    356     size_t hstep_[one];
    357     Size histogramSize_;
    358     int histogramType_;
    359     uchar* globalHistogram_;
    360 };
    361 
    362 
    363 template<typename T>
    364 class calcHist3D_Invoker
    365 {
    366 public:
    367     calcHist3D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    368                         Size imsize, Mat& hist, const double* uniranges, int _dims,
    369                         size_t* hstep, int* size )
    370         : mask_(_ptrs[_dims]),
    371           mstep_(_deltas[_dims*2 + 1]),
    372           imageWidth_(imsize.width),
    373           globalHistogram_(hist.data)
    374     {
    375         p_[0] = ((T**)&_ptrs[0])[0]; p_[1] = ((T**)&_ptrs[0])[1]; p_[2] = ((T**)&_ptrs[0])[2];
    376         step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3]; step_[2] = (&_deltas[0])[5];
    377         d_[0] = (&_deltas[0])[0];    d_[1] = (&_deltas[0])[2];    d_[2] = (&_deltas[0])[4];
    378         a_[0] = uniranges[0];        a_[1] = uniranges[2];        a_[2] = uniranges[4];
    379         b_[0] = uniranges[1];        b_[1] = uniranges[3];        b_[2] = uniranges[5];
    380         size_[0] = size[0];          size_[1] = size[1];          size_[2] = size[2];
    381         hstep_[0] = hstep[0];        hstep_[1] = hstep[1];
    382     }
    383 
    384     void operator()( const BlockedRange& range ) const
    385     {
    386         T* p0 = p_[0] + range.begin()*(imageWidth_*d_[0] + step_[0]);
    387         T* p1 = p_[1] + range.begin()*(imageWidth_*d_[1] + step_[1]);
    388         T* p2 = p_[2] + range.begin()*(imageWidth_*d_[2] + step_[2]);
    389         uchar* mask = mask_ + range.begin()*mstep_;
    390 
    391         for( int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1], p2 += step_[2] )
    392         {
    393             if( !mask_ )
    394             {
    395                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] )
    396                 {
    397                     int idx0 = cvFloor(*p0*a_[0] + b_[0]);
    398                     int idx1 = cvFloor(*p1*a_[1] + b_[1]);
    399                     int idx2 = cvFloor(*p2*a_[2] + b_[2]);
    400                     if( (unsigned)idx0 < (unsigned)size_[0] &&
    401                             (unsigned)idx1 < (unsigned)size_[1] &&
    402                             (unsigned)idx2 < (unsigned)size_[2] )
    403                     {
    404                         ( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0 + hstep_[1]*idx1) )[idx2].fetch_and_add(1);
    405                     }
    406                 }
    407             }
    408             else
    409             {
    410                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] )
    411                 {
    412                     if( mask[x] )
    413                     {
    414                         int idx0 = cvFloor(*p0*a_[0] + b_[0]);
    415                         int idx1 = cvFloor(*p1*a_[1] + b_[1]);
    416                         int idx2 = cvFloor(*p2*a_[2] + b_[2]);
    417                         if( (unsigned)idx0 < (unsigned)size_[0] &&
    418                                 (unsigned)idx1 < (unsigned)size_[1] &&
    419                                 (unsigned)idx2 < (unsigned)size_[2] )
    420                         {
    421                             ( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0 + hstep_[1]*idx1) )[idx2].fetch_and_add(1);
    422                         }
    423                     }
    424                 }
    425                 mask += mstep_;
    426             }
    427         }
    428     }
    429 
    430     static bool isFit( const Mat& histogram, const Size imageSize )
    431     {
    432         return ( imageSize.width * imageSize.height >= 320*240
    433                  && histogram.total() >= 8*8*8 );
    434     }
    435 
    436 private:
    437     calcHist3D_Invoker operator=(const calcHist3D_Invoker&);
    438 
    439     T* p_[three];
    440     uchar* mask_;
    441     int step_[three];
    442     int d_[three];
    443     const int mstep_;
    444     double a_[three];
    445     double b_[three];
    446     int size_[three];
    447     int imageWidth_;
    448     size_t hstep_[two];
    449     uchar* globalHistogram_;
    450 };
    451 
    452 class CalcHist1D_8uInvoker
    453 {
    454 public:
    455     CalcHist1D_8uInvoker( const std::vector<uchar*>& ptrs, const std::vector<int>& deltas,
    456                           Size imsize, Mat& hist, int dims, const std::vector<size_t>& tab,
    457                           tbb::mutex* lock )
    458         : mask_(ptrs[dims]),
    459           mstep_(deltas[dims*2 + 1]),
    460           imageWidth_(imsize.width),
    461           imageSize_(imsize),
    462           histSize_(hist.size()), histType_(hist.type()),
    463           tab_((size_t*)&tab[0]),
    464           histogramWriteLock_(lock),
    465           globalHistogram_(hist.data)
    466     {
    467         p_[0] = (&ptrs[0])[0];
    468         step_[0] = (&deltas[0])[1];
    469         d_[0] = (&deltas[0])[0];
    470     }
    471 
    472     void operator()( const BlockedRange& range ) const
    473     {
    474         int localHistogram[256] = { 0, };
    475         uchar* mask = mask_;
    476         uchar* p0 = p_[0];
    477         int x;
    478         tbb::mutex::scoped_lock lock;
    479 
    480         if( !mask_ )
    481         {
    482             int n = (imageWidth_ - 4) / 4 + 1;
    483             int tail = imageWidth_ - n*4;
    484 
    485             int xN = 4*n;
    486             p0 += (xN*d_[0] + tail*d_[0] + step_[0]) * range.begin();
    487         }
    488         else
    489         {
    490             p0 += (imageWidth_*d_[0] + step_[0]) * range.begin();
    491             mask += mstep_*range.begin();
    492         }
    493 
    494         for( int i = range.begin(); i < range.end(); i++, p0 += step_[0] )
    495         {
    496             if( !mask_ )
    497             {
    498                 if( d_[0] == 1 )
    499                 {
    500                     for( x = 0; x <= imageWidth_ - 4; x += 4 )
    501                     {
    502                         int t0 = p0[x], t1 = p0[x+1];
    503                         localHistogram[t0]++; localHistogram[t1]++;
    504                         t0 = p0[x+2]; t1 = p0[x+3];
    505                         localHistogram[t0]++; localHistogram[t1]++;
    506                     }
    507                     p0 += x;
    508                 }
    509                 else
    510                 {
    511                     for( x = 0; x <= imageWidth_ - 4; x += 4 )
    512                     {
    513                         int t0 = p0[0], t1 = p0[d_[0]];
    514                         localHistogram[t0]++; localHistogram[t1]++;
    515                         p0 += d_[0]*2;
    516                         t0 = p0[0]; t1 = p0[d_[0]];
    517                         localHistogram[t0]++; localHistogram[t1]++;
    518                         p0 += d_[0]*2;
    519                     }
    520                 }
    521 
    522                 for( ; x < imageWidth_; x++, p0 += d_[0] )
    523                 {
    524                     localHistogram[*p0]++;
    525                 }
    526             }
    527             else
    528             {
    529                 for( x = 0; x < imageWidth_; x++, p0 += d_[0] )
    530                 {
    531                     if( mask[x] )
    532                     {
    533                         localHistogram[*p0]++;
    534                     }
    535                 }
    536                 mask += mstep_;
    537             }
    538         }
    539 
    540         lock.acquire(*histogramWriteLock_);
    541         for(int i = 0; i < 256; i++ )
    542         {
    543             size_t hidx = tab_[i];
    544             if( hidx < OUT_OF_RANGE )
    545             {
    546                 *(int*)((globalHistogram_ + hidx)) += localHistogram[i];
    547             }
    548         }
    549         lock.release();
    550     }
    551 
    552     static bool isFit( const Mat& histogram, const Size imageSize )
    553     {
    554         return ( histogram.total() >= 8
    555                 && imageSize.width * imageSize.height >= 160*120 );
    556     }
    557 
    558 private:
    559     uchar* p_[one];
    560     uchar* mask_;
    561     int mstep_;
    562     int step_[one];
    563     int d_[one];
    564     int imageWidth_;
    565     Size imageSize_;
    566     Size histSize_;
    567     int histType_;
    568     size_t* tab_;
    569     tbb::mutex* histogramWriteLock_;
    570     uchar* globalHistogram_;
    571 };
    572 
    573 class CalcHist2D_8uInvoker
    574 {
    575 public:
    576     CalcHist2D_8uInvoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    577                           Size imsize, Mat& hist, int dims, const std::vector<size_t>& _tab,
    578                           tbb::mutex* lock )
    579         : mask_(_ptrs[dims]),
    580           mstep_(_deltas[dims*2 + 1]),
    581           imageWidth_(imsize.width),
    582           histSize_(hist.size()), histType_(hist.type()),
    583           tab_((size_t*)&_tab[0]),
    584           histogramWriteLock_(lock),
    585           globalHistogram_(hist.data)
    586     {
    587         p_[0] = (uchar*)(&_ptrs[0])[0]; p_[1] = (uchar*)(&_ptrs[0])[1];
    588         step_[0] = (&_deltas[0])[1];    step_[1] = (&_deltas[0])[3];
    589         d_[0] = (&_deltas[0])[0];       d_[1] = (&_deltas[0])[2];
    590     }
    591 
    592     void operator()( const BlockedRange& range ) const
    593     {
    594         uchar* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]);
    595         uchar* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]);
    596         uchar* mask = mask_ + range.begin()*mstep_;
    597 
    598         Mat localHist = Mat::zeros(histSize_, histType_);
    599         uchar* localHistData = localHist.data;
    600         tbb::mutex::scoped_lock lock;
    601 
    602         for(int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1])
    603         {
    604             if( !mask_ )
    605             {
    606                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] )
    607                 {
    608                     size_t idx = tab_[*p0] + tab_[*p1 + 256];
    609                     if( idx < OUT_OF_RANGE )
    610                     {
    611                         ++*(int*)(localHistData + idx);
    612                     }
    613                 }
    614             }
    615             else
    616             {
    617                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] )
    618                 {
    619                     size_t idx;
    620                     if( mask[x] && (idx = tab_[*p0] + tab_[*p1 + 256]) < OUT_OF_RANGE )
    621                     {
    622                         ++*(int*)(localHistData + idx);
    623                     }
    624                 }
    625                 mask += mstep_;
    626             }
    627         }
    628 
    629         lock.acquire(*histogramWriteLock_);
    630         for(int i = 0; i < histSize_.width*histSize_.height; i++)
    631         {
    632             ((int*)globalHistogram_)[i] += ((int*)localHistData)[i];
    633         }
    634         lock.release();
    635     }
    636 
    637     static bool isFit( const Mat& histogram, const Size imageSize )
    638     {
    639         return ( (histogram.total() > 4*4 &&  histogram.total() <= 116*116
    640                   && imageSize.width * imageSize.height >= 320*240)
    641                  || (histogram.total() > 116*116 && imageSize.width * imageSize.height >= 1280*720) );
    642     }
    643 
    644 private:
    645     uchar* p_[two];
    646     uchar* mask_;
    647     int step_[two];
    648     int d_[two];
    649     int mstep_;
    650     int imageWidth_;
    651     Size histSize_;
    652     int histType_;
    653     size_t* tab_;
    654     tbb::mutex* histogramWriteLock_;
    655     uchar* globalHistogram_;
    656 };
    657 
    658 class CalcHist3D_8uInvoker
    659 {
    660 public:
    661     CalcHist3D_8uInvoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    662                           Size imsize, Mat& hist, int dims, const std::vector<size_t>& tab )
    663         : mask_(_ptrs[dims]),
    664           mstep_(_deltas[dims*2 + 1]),
    665           histogramSize_(hist.size.p), histogramType_(hist.type()),
    666           imageWidth_(imsize.width),
    667           tab_((size_t*)&tab[0]),
    668           globalHistogram_(hist.data)
    669     {
    670         p_[0] = (uchar*)(&_ptrs[0])[0]; p_[1] = (uchar*)(&_ptrs[0])[1]; p_[2] = (uchar*)(&_ptrs[0])[2];
    671         step_[0] = (&_deltas[0])[1];    step_[1] = (&_deltas[0])[3];    step_[2] = (&_deltas[0])[5];
    672         d_[0] = (&_deltas[0])[0];       d_[1] = (&_deltas[0])[2];       d_[2] = (&_deltas[0])[4];
    673     }
    674 
    675     void operator()( const BlockedRange& range ) const
    676     {
    677         uchar* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]);
    678         uchar* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]);
    679         uchar* p2 = p_[2] + range.begin()*(step_[2] + imageWidth_*d_[2]);
    680         uchar* mask = mask_ + range.begin()*mstep_;
    681 
    682         for(int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1], p2 += step_[2] )
    683         {
    684             if( !mask_ )
    685             {
    686                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] )
    687                 {
    688                     size_t idx = tab_[*p0] + tab_[*p1 + 256] + tab_[*p2 + 512];
    689                     if( idx < OUT_OF_RANGE )
    690                     {
    691                         ( *(tbb::atomic<int>*)(globalHistogram_ + idx) ).fetch_and_add(1);
    692                     }
    693                 }
    694             }
    695             else
    696             {
    697                 for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] )
    698                 {
    699                     size_t idx;
    700                     if( mask[x] && (idx = tab_[*p0] + tab_[*p1 + 256] + tab_[*p2 + 512]) < OUT_OF_RANGE )
    701                     {
    702                         (*(tbb::atomic<int>*)(globalHistogram_ + idx)).fetch_and_add(1);
    703                     }
    704                 }
    705                 mask += mstep_;
    706             }
    707         }
    708     }
    709 
    710     static bool isFit( const Mat& histogram, const Size imageSize )
    711     {
    712         return ( histogram.total() >= 128*128*128
    713                  && imageSize.width * imageSize.width >= 320*240 );
    714     }
    715 
    716 private:
    717     uchar* p_[three];
    718     uchar* mask_;
    719     int mstep_;
    720     int step_[three];
    721     int d_[three];
    722     int* histogramSize_;
    723     int histogramType_;
    724     int imageWidth_;
    725     size_t* tab_;
    726     uchar* globalHistogram_;
    727 };
    728 
    729 static void
    730 callCalcHist2D_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    731                    Size imsize, Mat& hist, int dims,  std::vector<size_t>& _tab )
    732 {
    733     int grainSize = imsize.height / tbb::task_scheduler_init::default_num_threads();
    734     tbb::mutex histogramWriteLock;
    735 
    736     CalcHist2D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab, &histogramWriteLock);
    737     parallel_for(BlockedRange(0, imsize.height, grainSize), body);
    738 }
    739 
    740 static void
    741 callCalcHist3D_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    742                    Size imsize, Mat& hist, int dims,  std::vector<size_t>& _tab )
    743 {
    744     CalcHist3D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab);
    745     parallel_for(BlockedRange(0, imsize.height), body);
    746 }
    747 #endif
    748 
    749 template<typename T> static void
    750 calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    751            Size imsize, Mat& hist, int dims, const float** _ranges,
    752            const double* _uniranges, bool uniform )
    753 {
    754     T** ptrs = (T**)&_ptrs[0];
    755     const int* deltas = &_deltas[0];
    756     uchar* H = hist.ptr();
    757     int i, x;
    758     const uchar* mask = _ptrs[dims];
    759     int mstep = _deltas[dims*2 + 1];
    760     int size[CV_MAX_DIM];
    761     size_t hstep[CV_MAX_DIM];
    762 
    763     for( i = 0; i < dims; i++ )
    764     {
    765         size[i] = hist.size[i];
    766         hstep[i] = hist.step[i];
    767     }
    768 
    769     if( uniform )
    770     {
    771         const double* uniranges = &_uniranges[0];
    772 
    773         if( dims == 1 )
    774         {
    775 #ifdef HAVE_TBB
    776             calcHist1D_Invoker<T> body(_ptrs, _deltas, hist, _uniranges, size[0], dims, imsize);
    777             parallel_for(BlockedRange(0, imsize.height), body);
    778 #else
    779             double a = uniranges[0], b = uniranges[1];
    780             int sz = size[0], d0 = deltas[0], step0 = deltas[1];
    781             const T* p0 = (const T*)ptrs[0];
    782 
    783             for( ; imsize.height--; p0 += step0, mask += mstep )
    784             {
    785                 if( !mask )
    786                     for( x = 0; x < imsize.width; x++, p0 += d0 )
    787                     {
    788                         int idx = cvFloor(*p0*a + b);
    789                         if( (unsigned)idx < (unsigned)sz )
    790                             ((int*)H)[idx]++;
    791                     }
    792                 else
    793                     for( x = 0; x < imsize.width; x++, p0 += d0 )
    794                         if( mask[x] )
    795                         {
    796                             int idx = cvFloor(*p0*a + b);
    797                             if( (unsigned)idx < (unsigned)sz )
    798                                 ((int*)H)[idx]++;
    799                         }
    800             }
    801 #endif //HAVE_TBB
    802             return;
    803         }
    804         else if( dims == 2 )
    805         {
    806 #ifdef HAVE_TBB
    807             calcHist2D_Invoker<T> body(_ptrs, _deltas, hist, _uniranges, size, dims, imsize, hstep);
    808             parallel_for(BlockedRange(0, imsize.height), body);
    809 #else
    810             double a0 = uniranges[0], b0 = uniranges[1], a1 = uniranges[2], b1 = uniranges[3];
    811             int sz0 = size[0], sz1 = size[1];
    812             int d0 = deltas[0], step0 = deltas[1],
    813                 d1 = deltas[2], step1 = deltas[3];
    814             size_t hstep0 = hstep[0];
    815             const T* p0 = (const T*)ptrs[0];
    816             const T* p1 = (const T*)ptrs[1];
    817 
    818             for( ; imsize.height--; p0 += step0, p1 += step1, mask += mstep )
    819             {
    820                 if( !mask )
    821                     for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 )
    822                     {
    823                         int idx0 = cvFloor(*p0*a0 + b0);
    824                         int idx1 = cvFloor(*p1*a1 + b1);
    825                         if( (unsigned)idx0 < (unsigned)sz0 && (unsigned)idx1 < (unsigned)sz1 )
    826                             ((int*)(H + hstep0*idx0))[idx1]++;
    827                     }
    828                 else
    829                     for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 )
    830                         if( mask[x] )
    831                         {
    832                             int idx0 = cvFloor(*p0*a0 + b0);
    833                             int idx1 = cvFloor(*p1*a1 + b1);
    834                             if( (unsigned)idx0 < (unsigned)sz0 && (unsigned)idx1 < (unsigned)sz1 )
    835                                 ((int*)(H + hstep0*idx0))[idx1]++;
    836                         }
    837             }
    838 #endif //HAVE_TBB
    839             return;
    840         }
    841         else if( dims == 3 )
    842         {
    843 #ifdef HAVE_TBB
    844             if( calcHist3D_Invoker<T>::isFit(hist, imsize) )
    845             {
    846                 calcHist3D_Invoker<T> body(_ptrs, _deltas, imsize, hist, uniranges, dims, hstep, size);
    847                 parallel_for(BlockedRange(0, imsize.height), body);
    848                 return;
    849             }
    850 #endif
    851             double a0 = uniranges[0], b0 = uniranges[1],
    852                    a1 = uniranges[2], b1 = uniranges[3],
    853                    a2 = uniranges[4], b2 = uniranges[5];
    854             int sz0 = size[0], sz1 = size[1], sz2 = size[2];
    855             int d0 = deltas[0], step0 = deltas[1],
    856                 d1 = deltas[2], step1 = deltas[3],
    857                 d2 = deltas[4], step2 = deltas[5];
    858             size_t hstep0 = hstep[0], hstep1 = hstep[1];
    859             const T* p0 = (const T*)ptrs[0];
    860             const T* p1 = (const T*)ptrs[1];
    861             const T* p2 = (const T*)ptrs[2];
    862 
    863             for( ; imsize.height--; p0 += step0, p1 += step1, p2 += step2, mask += mstep )
    864             {
    865                 if( !mask )
    866                     for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 )
    867                     {
    868                         int idx0 = cvFloor(*p0*a0 + b0);
    869                         int idx1 = cvFloor(*p1*a1 + b1);
    870                         int idx2 = cvFloor(*p2*a2 + b2);
    871                         if( (unsigned)idx0 < (unsigned)sz0 &&
    872                             (unsigned)idx1 < (unsigned)sz1 &&
    873                             (unsigned)idx2 < (unsigned)sz2 )
    874                             ((int*)(H + hstep0*idx0 + hstep1*idx1))[idx2]++;
    875                     }
    876                 else
    877                     for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 )
    878                         if( mask[x] )
    879                         {
    880                             int idx0 = cvFloor(*p0*a0 + b0);
    881                             int idx1 = cvFloor(*p1*a1 + b1);
    882                             int idx2 = cvFloor(*p2*a2 + b2);
    883                             if( (unsigned)idx0 < (unsigned)sz0 &&
    884                                (unsigned)idx1 < (unsigned)sz1 &&
    885                                (unsigned)idx2 < (unsigned)sz2 )
    886                                 ((int*)(H + hstep0*idx0 + hstep1*idx1))[idx2]++;
    887                         }
    888             }
    889         }
    890         else
    891         {
    892             for( ; imsize.height--; mask += mstep )
    893             {
    894                 if( !mask )
    895                     for( x = 0; x < imsize.width; x++ )
    896                     {
    897                         uchar* Hptr = H;
    898                         for( i = 0; i < dims; i++ )
    899                         {
    900                             int idx = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]);
    901                             if( (unsigned)idx >= (unsigned)size[i] )
    902                                 break;
    903                             ptrs[i] += deltas[i*2];
    904                             Hptr += idx*hstep[i];
    905                         }
    906 
    907                         if( i == dims )
    908                             ++*((int*)Hptr);
    909                         else
    910                             for( ; i < dims; i++ )
    911                                 ptrs[i] += deltas[i*2];
    912                     }
    913                 else
    914                     for( x = 0; x < imsize.width; x++ )
    915                     {
    916                         uchar* Hptr = H;
    917                         i = 0;
    918                         if( mask[x] )
    919                             for( ; i < dims; i++ )
    920                             {
    921                                 int idx = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]);
    922                                 if( (unsigned)idx >= (unsigned)size[i] )
    923                                     break;
    924                                 ptrs[i] += deltas[i*2];
    925                                 Hptr += idx*hstep[i];
    926                             }
    927 
    928                         if( i == dims )
    929                             ++*((int*)Hptr);
    930                         else
    931                             for( ; i < dims; i++ )
    932                                 ptrs[i] += deltas[i*2];
    933                     }
    934                 for( i = 0; i < dims; i++ )
    935                     ptrs[i] += deltas[i*2 + 1];
    936             }
    937         }
    938     }
    939     else
    940     {
    941         // non-uniform histogram
    942         const float* ranges[CV_MAX_DIM];
    943         for( i = 0; i < dims; i++ )
    944             ranges[i] = &_ranges[i][0];
    945 
    946         for( ; imsize.height--; mask += mstep )
    947         {
    948             for( x = 0; x < imsize.width; x++ )
    949             {
    950                 uchar* Hptr = H;
    951                 i = 0;
    952 
    953                 if( !mask || mask[x] )
    954                     for( ; i < dims; i++ )
    955                     {
    956                         float v = (float)*ptrs[i];
    957                         const float* R = ranges[i];
    958                         int idx = -1, sz = size[i];
    959 
    960                         while( v >= R[idx+1] && ++idx < sz )
    961                             ; // nop
    962 
    963                         if( (unsigned)idx >= (unsigned)sz )
    964                             break;
    965 
    966                         ptrs[i] += deltas[i*2];
    967                         Hptr += idx*hstep[i];
    968                     }
    969 
    970                 if( i == dims )
    971                     ++*((int*)Hptr);
    972                 else
    973                     for( ; i < dims; i++ )
    974                         ptrs[i] += deltas[i*2];
    975             }
    976 
    977             for( i = 0; i < dims; i++ )
    978                 ptrs[i] += deltas[i*2 + 1];
    979         }
    980     }
    981 }
    982 
    983 
    984 static void
    985 calcHist_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    986              Size imsize, Mat& hist, int dims, const float** _ranges,
    987              const double* _uniranges, bool uniform )
    988 {
    989     uchar** ptrs = &_ptrs[0];
    990     const int* deltas = &_deltas[0];
    991     uchar* H = hist.ptr();
    992     int x;
    993     const uchar* mask = _ptrs[dims];
    994     int mstep = _deltas[dims*2 + 1];
    995     std::vector<size_t> _tab;
    996 
    997     calcHistLookupTables_8u( hist, SparseMat(), dims, _ranges, _uniranges, uniform, false, _tab );
    998     const size_t* tab = &_tab[0];
    999 
   1000     if( dims == 1 )
   1001     {
   1002 #ifdef HAVE_TBB
   1003         if( CalcHist1D_8uInvoker::isFit(hist, imsize) )
   1004         {
   1005             int treadsNumber = tbb::task_scheduler_init::default_num_threads();
   1006             int grainSize = imsize.height/treadsNumber;
   1007             tbb::mutex histogramWriteLock;
   1008 
   1009             CalcHist1D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab, &histogramWriteLock);
   1010             parallel_for(BlockedRange(0, imsize.height, grainSize), body);
   1011             return;
   1012         }
   1013 #endif
   1014         int d0 = deltas[0], step0 = deltas[1];
   1015         int matH[256] = { 0, };
   1016         const uchar* p0 = (const uchar*)ptrs[0];
   1017 
   1018         for( ; imsize.height--; p0 += step0, mask += mstep )
   1019         {
   1020             if( !mask )
   1021             {
   1022                 if( d0 == 1 )
   1023                 {
   1024                     for( x = 0; x <= imsize.width - 4; x += 4 )
   1025                     {
   1026                         int t0 = p0[x], t1 = p0[x+1];
   1027                         matH[t0]++; matH[t1]++;
   1028                         t0 = p0[x+2]; t1 = p0[x+3];
   1029                         matH[t0]++; matH[t1]++;
   1030                     }
   1031                     p0 += x;
   1032                 }
   1033                 else
   1034                     for( x = 0; x <= imsize.width - 4; x += 4 )
   1035                     {
   1036                         int t0 = p0[0], t1 = p0[d0];
   1037                         matH[t0]++; matH[t1]++;
   1038                         p0 += d0*2;
   1039                         t0 = p0[0]; t1 = p0[d0];
   1040                         matH[t0]++; matH[t1]++;
   1041                         p0 += d0*2;
   1042                     }
   1043 
   1044                 for( ; x < imsize.width; x++, p0 += d0 )
   1045                     matH[*p0]++;
   1046             }
   1047             else
   1048                 for( x = 0; x < imsize.width; x++, p0 += d0 )
   1049                     if( mask[x] )
   1050                         matH[*p0]++;
   1051         }
   1052 
   1053         for(int i = 0; i < 256; i++ )
   1054         {
   1055             size_t hidx = tab[i];
   1056             if( hidx < OUT_OF_RANGE )
   1057                 *(int*)(H + hidx) += matH[i];
   1058         }
   1059     }
   1060     else if( dims == 2 )
   1061     {
   1062 #ifdef HAVE_TBB
   1063         if( CalcHist2D_8uInvoker::isFit(hist, imsize) )
   1064         {
   1065             callCalcHist2D_8u(_ptrs, _deltas, imsize, hist, dims, _tab);
   1066             return;
   1067         }
   1068 #endif
   1069         int d0 = deltas[0], step0 = deltas[1],
   1070             d1 = deltas[2], step1 = deltas[3];
   1071         const uchar* p0 = (const uchar*)ptrs[0];
   1072         const uchar* p1 = (const uchar*)ptrs[1];
   1073 
   1074         for( ; imsize.height--; p0 += step0, p1 += step1, mask += mstep )
   1075         {
   1076             if( !mask )
   1077                 for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 )
   1078                 {
   1079                     size_t idx = tab[*p0] + tab[*p1 + 256];
   1080                     if( idx < OUT_OF_RANGE )
   1081                         ++*(int*)(H + idx);
   1082                 }
   1083             else
   1084                 for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 )
   1085                 {
   1086                     size_t idx;
   1087                     if( mask[x] && (idx = tab[*p0] + tab[*p1 + 256]) < OUT_OF_RANGE )
   1088                         ++*(int*)(H + idx);
   1089                 }
   1090         }
   1091     }
   1092     else if( dims == 3 )
   1093     {
   1094 #ifdef HAVE_TBB
   1095         if( CalcHist3D_8uInvoker::isFit(hist, imsize) )
   1096         {
   1097             callCalcHist3D_8u(_ptrs, _deltas, imsize, hist, dims, _tab);
   1098             return;
   1099         }
   1100 #endif
   1101         int d0 = deltas[0], step0 = deltas[1],
   1102             d1 = deltas[2], step1 = deltas[3],
   1103             d2 = deltas[4], step2 = deltas[5];
   1104 
   1105         const uchar* p0 = (const uchar*)ptrs[0];
   1106         const uchar* p1 = (const uchar*)ptrs[1];
   1107         const uchar* p2 = (const uchar*)ptrs[2];
   1108 
   1109         for( ; imsize.height--; p0 += step0, p1 += step1, p2 += step2, mask += mstep )
   1110         {
   1111             if( !mask )
   1112                 for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 )
   1113                 {
   1114                     size_t idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512];
   1115                     if( idx < OUT_OF_RANGE )
   1116                         ++*(int*)(H + idx);
   1117                 }
   1118             else
   1119                 for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 )
   1120                 {
   1121                     size_t idx;
   1122                     if( mask[x] && (idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512]) < OUT_OF_RANGE )
   1123                         ++*(int*)(H + idx);
   1124                 }
   1125         }
   1126     }
   1127     else
   1128     {
   1129         for( ; imsize.height--; mask += mstep )
   1130         {
   1131             if( !mask )
   1132                 for( x = 0; x < imsize.width; x++ )
   1133                 {
   1134                     uchar* Hptr = H;
   1135                     int i = 0;
   1136                     for( ; i < dims; i++ )
   1137                     {
   1138                         size_t idx = tab[*ptrs[i] + i*256];
   1139                         if( idx >= OUT_OF_RANGE )
   1140                             break;
   1141                         Hptr += idx;
   1142                         ptrs[i] += deltas[i*2];
   1143                     }
   1144 
   1145                     if( i == dims )
   1146                         ++*((int*)Hptr);
   1147                     else
   1148                         for( ; i < dims; i++ )
   1149                             ptrs[i] += deltas[i*2];
   1150                 }
   1151             else
   1152                 for( x = 0; x < imsize.width; x++ )
   1153                 {
   1154                     uchar* Hptr = H;
   1155                     int i = 0;
   1156                     if( mask[x] )
   1157                         for( ; i < dims; i++ )
   1158                         {
   1159                             size_t idx = tab[*ptrs[i] + i*256];
   1160                             if( idx >= OUT_OF_RANGE )
   1161                                 break;
   1162                             Hptr += idx;
   1163                             ptrs[i] += deltas[i*2];
   1164                         }
   1165 
   1166                     if( i == dims )
   1167                         ++*((int*)Hptr);
   1168                     else
   1169                         for( ; i < dims; i++ )
   1170                             ptrs[i] += deltas[i*2];
   1171                 }
   1172             for(int i = 0; i < dims; i++ )
   1173                 ptrs[i] += deltas[i*2 + 1];
   1174         }
   1175     }
   1176 }
   1177 
   1178 #ifdef HAVE_IPP
   1179 
   1180 class IPPCalcHistInvoker :
   1181     public ParallelLoopBody
   1182 {
   1183 public:
   1184     IPPCalcHistInvoker(const Mat & _src, Mat & _hist, AutoBuffer<Ipp32s> & _levels, Ipp32s _histSize, Ipp32s _low, Ipp32s _high, bool * _ok) :
   1185         ParallelLoopBody(), src(&_src), hist(&_hist), levels(&_levels), histSize(_histSize), low(_low), high(_high), ok(_ok)
   1186     {
   1187         *ok = true;
   1188     }
   1189 
   1190     virtual void operator() (const Range & range) const
   1191     {
   1192         Mat phist(hist->size(), hist->type(), Scalar::all(0));
   1193 
   1194         IppStatus status = ippiHistogramEven_8u_C1R(
   1195             src->ptr(range.start), (int)src->step, ippiSize(src->cols, range.end - range.start),
   1196             phist.ptr<Ipp32s>(), (Ipp32s *)*levels, histSize, low, high);
   1197 
   1198         if (status < 0)
   1199         {
   1200             *ok = false;
   1201             return;
   1202         }
   1203         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
   1204 
   1205         for (int i = 0; i < histSize; ++i)
   1206             CV_XADD((int *)(hist->data + i * hist->step), *(int *)(phist.data + i * phist.step));
   1207     }
   1208 
   1209 private:
   1210     const Mat * src;
   1211     Mat * hist;
   1212     AutoBuffer<Ipp32s> * levels;
   1213     Ipp32s histSize, low, high;
   1214     bool * ok;
   1215 
   1216     const IPPCalcHistInvoker & operator = (const IPPCalcHistInvoker & );
   1217 };
   1218 
   1219 #endif
   1220 
   1221 }
   1222 
   1223 void cv::calcHist( const Mat* images, int nimages, const int* channels,
   1224                    InputArray _mask, OutputArray _hist, int dims, const int* histSize,
   1225                    const float** ranges, bool uniform, bool accumulate )
   1226 {
   1227     Mat mask = _mask.getMat();
   1228 
   1229     CV_Assert(dims > 0 && histSize);
   1230 
   1231     const uchar* const histdata = _hist.getMat().ptr();
   1232     _hist.create(dims, histSize, CV_32F);
   1233     Mat hist = _hist.getMat(), ihist = hist;
   1234     ihist.flags = (ihist.flags & ~CV_MAT_TYPE_MASK)|CV_32S;
   1235 
   1236 #ifdef HAVE_IPP
   1237     CV_IPP_CHECK()
   1238     {
   1239         if (nimages == 1 && images[0].type() == CV_8UC1 && dims == 1 && channels &&
   1240                 channels[0] == 0 && mask.empty() && images[0].dims <= 2 &&
   1241                 !accumulate && uniform)
   1242         {
   1243             ihist.setTo(Scalar::all(0));
   1244             AutoBuffer<Ipp32s> levels(histSize[0] + 1);
   1245 
   1246             bool ok = true;
   1247             const Mat & src = images[0];
   1248             int nstripes = std::min<int>(8, static_cast<int>(src.total() / (1 << 16)));
   1249 #ifdef HAVE_CONCURRENCY
   1250             nstripes = 1;
   1251 #endif
   1252             IPPCalcHistInvoker invoker(src, ihist, levels, histSize[0] + 1, (Ipp32s)ranges[0][0], (Ipp32s)ranges[0][1], &ok);
   1253             Range range(0, src.rows);
   1254             parallel_for_(range, invoker, nstripes);
   1255 
   1256             if (ok)
   1257             {
   1258                 ihist.convertTo(hist, CV_32F);
   1259                 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
   1260                 return;
   1261             }
   1262             setIppErrorStatus();
   1263         }
   1264     }
   1265 #endif
   1266 
   1267     if( !accumulate || histdata != hist.data )
   1268         hist = Scalar(0.);
   1269     else
   1270         hist.convertTo(ihist, CV_32S);
   1271 
   1272     std::vector<uchar*> ptrs;
   1273     std::vector<int> deltas;
   1274     std::vector<double> uniranges;
   1275     Size imsize;
   1276 
   1277     CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
   1278     histPrepareImages( images, nimages, channels, mask, dims, hist.size, ranges,
   1279                        uniform, ptrs, deltas, imsize, uniranges );
   1280     const double* _uniranges = uniform ? &uniranges[0] : 0;
   1281 
   1282     int depth = images[0].depth();
   1283 
   1284     if( depth == CV_8U )
   1285         calcHist_8u(ptrs, deltas, imsize, ihist, dims, ranges, _uniranges, uniform );
   1286     else if( depth == CV_16U )
   1287         calcHist_<ushort>(ptrs, deltas, imsize, ihist, dims, ranges, _uniranges, uniform );
   1288     else if( depth == CV_32F )
   1289         calcHist_<float>(ptrs, deltas, imsize, ihist, dims, ranges, _uniranges, uniform );
   1290     else
   1291         CV_Error(CV_StsUnsupportedFormat, "");
   1292 
   1293     ihist.convertTo(hist, CV_32F);
   1294 }
   1295 
   1296 
   1297 namespace cv
   1298 {
   1299 
   1300 template<typename T> static void
   1301 calcSparseHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
   1302                  Size imsize, SparseMat& hist, int dims, const float** _ranges,
   1303                  const double* _uniranges, bool uniform )
   1304 {
   1305     T** ptrs = (T**)&_ptrs[0];
   1306     const int* deltas = &_deltas[0];
   1307     int i, x;
   1308     const uchar* mask = _ptrs[dims];
   1309     int mstep = _deltas[dims*2 + 1];
   1310     const int* size = hist.hdr->size;
   1311     int idx[CV_MAX_DIM];
   1312 
   1313     if( uniform )
   1314     {
   1315         const double* uniranges = &_uniranges[0];
   1316 
   1317         for( ; imsize.height--; mask += mstep )
   1318         {
   1319             for( x = 0; x < imsize.width; x++ )
   1320             {
   1321                 i = 0;
   1322                 if( !mask || mask[x] )
   1323                     for( ; i < dims; i++ )
   1324                     {
   1325                         idx[i] = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]);
   1326                         if( (unsigned)idx[i] >= (unsigned)size[i] )
   1327                             break;
   1328                         ptrs[i] += deltas[i*2];
   1329                     }
   1330 
   1331                 if( i == dims )
   1332                     ++*(int*)hist.ptr(idx, true);
   1333                 else
   1334                     for( ; i < dims; i++ )
   1335                         ptrs[i] += deltas[i*2];
   1336             }
   1337             for( i = 0; i < dims; i++ )
   1338                 ptrs[i] += deltas[i*2 + 1];
   1339         }
   1340     }
   1341     else
   1342     {
   1343         // non-uniform histogram
   1344         const float* ranges[CV_MAX_DIM];
   1345         for( i = 0; i < dims; i++ )
   1346             ranges[i] = &_ranges[i][0];
   1347 
   1348         for( ; imsize.height--; mask += mstep )
   1349         {
   1350             for( x = 0; x < imsize.width; x++ )
   1351             {
   1352                 i = 0;
   1353 
   1354                 if( !mask || mask[x] )
   1355                     for( ; i < dims; i++ )
   1356                     {
   1357                         float v = (float)*ptrs[i];
   1358                         const float* R = ranges[i];
   1359                         int j = -1, sz = size[i];
   1360 
   1361                         while( v >= R[j+1] && ++j < sz )
   1362                             ; // nop
   1363 
   1364                         if( (unsigned)j >= (unsigned)sz )
   1365                             break;
   1366                         ptrs[i] += deltas[i*2];
   1367                         idx[i] = j;
   1368                     }
   1369 
   1370                 if( i == dims )
   1371                     ++*(int*)hist.ptr(idx, true);
   1372                 else
   1373                     for( ; i < dims; i++ )
   1374                         ptrs[i] += deltas[i*2];
   1375             }
   1376 
   1377             for( i = 0; i < dims; i++ )
   1378                 ptrs[i] += deltas[i*2 + 1];
   1379         }
   1380     }
   1381 }
   1382 
   1383 
   1384 static void
   1385 calcSparseHist_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
   1386                    Size imsize, SparseMat& hist, int dims, const float** _ranges,
   1387                    const double* _uniranges, bool uniform )
   1388 {
   1389     uchar** ptrs = (uchar**)&_ptrs[0];
   1390     const int* deltas = &_deltas[0];
   1391     int x;
   1392     const uchar* mask = _ptrs[dims];
   1393     int mstep = _deltas[dims*2 + 1];
   1394     int idx[CV_MAX_DIM];
   1395     std::vector<size_t> _tab;
   1396 
   1397     calcHistLookupTables_8u( Mat(), hist, dims, _ranges, _uniranges, uniform, true, _tab );
   1398     const size_t* tab = &_tab[0];
   1399 
   1400     for( ; imsize.height--; mask += mstep )
   1401     {
   1402         for( x = 0; x < imsize.width; x++ )
   1403         {
   1404             int i = 0;
   1405             if( !mask || mask[x] )
   1406                 for( ; i < dims; i++ )
   1407                 {
   1408                     size_t hidx = tab[*ptrs[i] + i*256];
   1409                     if( hidx >= OUT_OF_RANGE )
   1410                         break;
   1411                     ptrs[i] += deltas[i*2];
   1412                     idx[i] = (int)hidx;
   1413                 }
   1414 
   1415             if( i == dims )
   1416                 ++*(int*)hist.ptr(idx,true);
   1417             else
   1418                 for( ; i < dims; i++ )
   1419                     ptrs[i] += deltas[i*2];
   1420         }
   1421         for(int i = 0; i < dims; i++ )
   1422             ptrs[i] += deltas[i*2 + 1];
   1423     }
   1424 }
   1425 
   1426 
   1427 static void calcHist( const Mat* images, int nimages, const int* channels,
   1428                       const Mat& mask, SparseMat& hist, int dims, const int* histSize,
   1429                       const float** ranges, bool uniform, bool accumulate, bool keepInt )
   1430 {
   1431     size_t i, N;
   1432 
   1433     if( !accumulate )
   1434         hist.create(dims, histSize, CV_32F);
   1435     else
   1436     {
   1437         SparseMatIterator it = hist.begin();
   1438         for( i = 0, N = hist.nzcount(); i < N; i++, ++it )
   1439         {
   1440             Cv32suf* val = (Cv32suf*)it.ptr;
   1441             val->i = cvRound(val->f);
   1442         }
   1443     }
   1444 
   1445     std::vector<uchar*> ptrs;
   1446     std::vector<int> deltas;
   1447     std::vector<double> uniranges;
   1448     Size imsize;
   1449 
   1450     CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
   1451     histPrepareImages( images, nimages, channels, mask, dims, hist.hdr->size, ranges,
   1452                        uniform, ptrs, deltas, imsize, uniranges );
   1453     const double* _uniranges = uniform ? &uniranges[0] : 0;
   1454 
   1455     int depth = images[0].depth();
   1456     if( depth == CV_8U )
   1457         calcSparseHist_8u(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, uniform );
   1458     else if( depth == CV_16U )
   1459         calcSparseHist_<ushort>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, uniform );
   1460     else if( depth == CV_32F )
   1461         calcSparseHist_<float>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, uniform );
   1462     else
   1463         CV_Error(CV_StsUnsupportedFormat, "");
   1464 
   1465     if( !keepInt )
   1466     {
   1467         SparseMatIterator it = hist.begin();
   1468         for( i = 0, N = hist.nzcount(); i < N; i++, ++it )
   1469         {
   1470             Cv32suf* val = (Cv32suf*)it.ptr;
   1471             val->f = (float)val->i;
   1472         }
   1473     }
   1474 }
   1475 
   1476 #ifdef HAVE_OPENCL
   1477 
   1478 enum
   1479 {
   1480     BINS = 256
   1481 };
   1482 
   1483 static bool ocl_calcHist1(InputArray _src, OutputArray _hist, int ddepth = CV_32S)
   1484 {
   1485     const ocl::Device & dev = ocl::Device::getDefault();
   1486     int compunits = dev.maxComputeUnits();
   1487     size_t wgs = dev.maxWorkGroupSize();
   1488     Size size = _src.size();
   1489     bool use16 = size.width % 16 == 0 && _src.offset() % 16 == 0 && _src.step() % 16 == 0;
   1490     int kercn = dev.isAMD() && use16 ? 16 : std::min(4, ocl::predictOptimalVectorWidth(_src));
   1491 
   1492     ocl::Kernel k1("calculate_histogram", ocl::imgproc::histogram_oclsrc,
   1493                    format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D kercn=%d -D T=%s%s",
   1494                           BINS, compunits, wgs, kercn,
   1495                           kercn == 4 ? "int" : ocl::typeToStr(CV_8UC(kercn)),
   1496                           _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
   1497     if (k1.empty())
   1498         return false;
   1499 
   1500     _hist.create(BINS, 1, ddepth);
   1501     UMat src = _src.getUMat(), ghist(1, BINS * compunits, CV_32SC1),
   1502             hist = _hist.getUMat();
   1503 
   1504     k1.args(ocl::KernelArg::ReadOnly(src),
   1505             ocl::KernelArg::PtrWriteOnly(ghist), (int)src.total());
   1506 
   1507     size_t globalsize = compunits * wgs;
   1508     if (!k1.run(1, &globalsize, &wgs, false))
   1509         return false;
   1510 
   1511     char cvt[40];
   1512     ocl::Kernel k2("merge_histogram", ocl::imgproc::histogram_oclsrc,
   1513                    format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D convertToHT=%s -D HT=%s",
   1514                           BINS, compunits, (int)wgs, ocl::convertTypeStr(CV_32S, ddepth, 1, cvt),
   1515                           ocl::typeToStr(ddepth)));
   1516     if (k2.empty())
   1517         return false;
   1518 
   1519     k2.args(ocl::KernelArg::PtrReadOnly(ghist),
   1520             ocl::KernelArg::WriteOnlyNoSize(hist));
   1521 
   1522     return k2.run(1, &wgs, &wgs, false);
   1523 }
   1524 
   1525 static bool ocl_calcHist(InputArrayOfArrays images, OutputArray hist)
   1526 {
   1527     std::vector<UMat> v;
   1528     images.getUMatVector(v);
   1529 
   1530     return ocl_calcHist1(v[0], hist, CV_32F);
   1531 }
   1532 
   1533 #endif
   1534 
   1535 }
   1536 
   1537 void cv::calcHist( const Mat* images, int nimages, const int* channels,
   1538                InputArray _mask, SparseMat& hist, int dims, const int* histSize,
   1539                const float** ranges, bool uniform, bool accumulate )
   1540 {
   1541     Mat mask = _mask.getMat();
   1542     calcHist( images, nimages, channels, mask, hist, dims, histSize,
   1543               ranges, uniform, accumulate, false );
   1544 }
   1545 
   1546 
   1547 void cv::calcHist( InputArrayOfArrays images, const std::vector<int>& channels,
   1548                    InputArray mask, OutputArray hist,
   1549                    const std::vector<int>& histSize,
   1550                    const std::vector<float>& ranges,
   1551                    bool accumulate )
   1552 {
   1553     CV_OCL_RUN(images.total() == 1 && channels.size() == 1 && images.channels(0) == 1 &&
   1554                channels[0] == 0 && images.isUMatVector() && mask.empty() && !accumulate &&
   1555                histSize.size() == 1 && histSize[0] == BINS && ranges.size() == 2 &&
   1556                ranges[0] == 0 && ranges[1] == BINS,
   1557                ocl_calcHist(images, hist))
   1558 
   1559     int i, dims = (int)histSize.size(), rsz = (int)ranges.size(), csz = (int)channels.size();
   1560     int nimages = (int)images.total();
   1561 
   1562     CV_Assert(nimages > 0 && dims > 0);
   1563     CV_Assert(rsz == dims*2 || (rsz == 0 && images.depth(0) == CV_8U));
   1564     CV_Assert(csz == 0 || csz == dims);
   1565     float* _ranges[CV_MAX_DIM];
   1566     if( rsz > 0 )
   1567     {
   1568         for( i = 0; i < rsz/2; i++ )
   1569             _ranges[i] = (float*)&ranges[i*2];
   1570     }
   1571 
   1572     AutoBuffer<Mat> buf(nimages);
   1573     for( i = 0; i < nimages; i++ )
   1574         buf[i] = images.getMat(i);
   1575 
   1576     calcHist(&buf[0], nimages, csz ? &channels[0] : 0,
   1577             mask, hist, dims, &histSize[0], rsz ? (const float**)_ranges : 0,
   1578             true, accumulate);
   1579 }
   1580 
   1581 
   1582 /////////////////////////////////////// B A C K   P R O J E C T ////////////////////////////////////
   1583 
   1584 namespace cv
   1585 {
   1586 
   1587 template<typename T, typename BT> static void
   1588 calcBackProj_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
   1589                Size imsize, const Mat& hist, int dims, const float** _ranges,
   1590                const double* _uniranges, float scale, bool uniform )
   1591 {
   1592     T** ptrs = (T**)&_ptrs[0];
   1593     const int* deltas = &_deltas[0];
   1594     const uchar* H = hist.ptr();
   1595     int i, x;
   1596     BT* bproj = (BT*)_ptrs[dims];
   1597     int bpstep = _deltas[dims*2 + 1];
   1598     int size[CV_MAX_DIM];
   1599     size_t hstep[CV_MAX_DIM];
   1600 
   1601     for( i = 0; i < dims; i++ )
   1602     {
   1603         size[i] = hist.size[i];
   1604         hstep[i] = hist.step[i];
   1605     }
   1606 
   1607     if( uniform )
   1608     {
   1609         const double* uniranges = &_uniranges[0];
   1610 
   1611         if( dims == 1 )
   1612         {
   1613             double a = uniranges[0], b = uniranges[1];
   1614             int sz = size[0], d0 = deltas[0], step0 = deltas[1];
   1615             const T* p0 = (const T*)ptrs[0];
   1616 
   1617             for( ; imsize.height--; p0 += step0, bproj += bpstep )
   1618             {
   1619                 for( x = 0; x < imsize.width; x++, p0 += d0 )
   1620                 {
   1621                     int idx = cvFloor(*p0*a + b);
   1622                     bproj[x] = (unsigned)idx < (unsigned)sz ? saturate_cast<BT>(((const float*)H)[idx]*scale) : 0;
   1623                 }
   1624             }
   1625         }
   1626         else if( dims == 2 )
   1627         {
   1628             double a0 = uniranges[0], b0 = uniranges[1],
   1629                    a1 = uniranges[2], b1 = uniranges[3];
   1630             int sz0 = size[0], sz1 = size[1];
   1631             int d0 = deltas[0], step0 = deltas[1],
   1632                 d1 = deltas[2], step1 = deltas[3];
   1633             size_t hstep0 = hstep[0];
   1634             const T* p0 = (const T*)ptrs[0];
   1635             const T* p1 = (const T*)ptrs[1];
   1636 
   1637             for( ; imsize.height--; p0 += step0, p1 += step1, bproj += bpstep )
   1638             {
   1639                 for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 )
   1640                 {
   1641                     int idx0 = cvFloor(*p0*a0 + b0);
   1642                     int idx1 = cvFloor(*p1*a1 + b1);
   1643                     bproj[x] = (unsigned)idx0 < (unsigned)sz0 &&
   1644                                (unsigned)idx1 < (unsigned)sz1 ?
   1645                         saturate_cast<BT>(((const float*)(H + hstep0*idx0))[idx1]*scale) : 0;
   1646                 }
   1647             }
   1648         }
   1649         else if( dims == 3 )
   1650         {
   1651             double a0 = uniranges[0], b0 = uniranges[1],
   1652                    a1 = uniranges[2], b1 = uniranges[3],
   1653                    a2 = uniranges[4], b2 = uniranges[5];
   1654             int sz0 = size[0], sz1 = size[1], sz2 = size[2];
   1655             int d0 = deltas[0], step0 = deltas[1],
   1656                 d1 = deltas[2], step1 = deltas[3],
   1657                 d2 = deltas[4], step2 = deltas[5];
   1658             size_t hstep0 = hstep[0], hstep1 = hstep[1];
   1659             const T* p0 = (const T*)ptrs[0];
   1660             const T* p1 = (const T*)ptrs[1];
   1661             const T* p2 = (const T*)ptrs[2];
   1662 
   1663             for( ; imsize.height--; p0 += step0, p1 += step1, p2 += step2, bproj += bpstep )
   1664             {
   1665                 for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 )
   1666                 {
   1667                     int idx0 = cvFloor(*p0*a0 + b0);
   1668                     int idx1 = cvFloor(*p1*a1 + b1);
   1669                     int idx2 = cvFloor(*p2*a2 + b2);
   1670                     bproj[x] = (unsigned)idx0 < (unsigned)sz0 &&
   1671                                (unsigned)idx1 < (unsigned)sz1 &&
   1672                                (unsigned)idx2 < (unsigned)sz2 ?
   1673                         saturate_cast<BT>(((const float*)(H + hstep0*idx0 + hstep1*idx1))[idx2]*scale) : 0;
   1674                 }
   1675             }
   1676         }
   1677         else
   1678         {
   1679             for( ; imsize.height--; bproj += bpstep )
   1680             {
   1681                 for( x = 0; x < imsize.width; x++ )
   1682                 {
   1683                     const uchar* Hptr = H;
   1684                     for( i = 0; i < dims; i++ )
   1685                     {
   1686                         int idx = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]);
   1687                         if( (unsigned)idx >= (unsigned)size[i] || (_ranges && *ptrs[i] >= _ranges[i][1]))
   1688                             break;
   1689                         ptrs[i] += deltas[i*2];
   1690                         Hptr += idx*hstep[i];
   1691                     }
   1692 
   1693                     if( i == dims )
   1694                         bproj[x] = saturate_cast<BT>(*(const float*)Hptr*scale);
   1695                     else
   1696                     {
   1697                         bproj[x] = 0;
   1698                         for( ; i < dims; i++ )
   1699                             ptrs[i] += deltas[i*2];
   1700                     }
   1701                 }
   1702                 for( i = 0; i < dims; i++ )
   1703                     ptrs[i] += deltas[i*2 + 1];
   1704             }
   1705         }
   1706     }
   1707     else
   1708     {
   1709         // non-uniform histogram
   1710         const float* ranges[CV_MAX_DIM];
   1711         for( i = 0; i < dims; i++ )
   1712             ranges[i] = &_ranges[i][0];
   1713 
   1714         for( ; imsize.height--; bproj += bpstep )
   1715         {
   1716             for( x = 0; x < imsize.width; x++ )
   1717             {
   1718                 const uchar* Hptr = H;
   1719                 for( i = 0; i < dims; i++ )
   1720                 {
   1721                     float v = (float)*ptrs[i];
   1722                     const float* R = ranges[i];
   1723                     int idx = -1, sz = size[i];
   1724 
   1725                     while( v >= R[idx+1] && ++idx < sz )
   1726                         ; // nop
   1727 
   1728                     if( (unsigned)idx >= (unsigned)sz )
   1729                         break;
   1730 
   1731                     ptrs[i] += deltas[i*2];
   1732                     Hptr += idx*hstep[i];
   1733                 }
   1734 
   1735                 if( i == dims )
   1736                     bproj[x] = saturate_cast<BT>(*(const float*)Hptr*scale);
   1737                 else
   1738                 {
   1739                     bproj[x] = 0;
   1740                     for( ; i < dims; i++ )
   1741                         ptrs[i] += deltas[i*2];
   1742                 }
   1743             }
   1744 
   1745             for( i = 0; i < dims; i++ )
   1746                 ptrs[i] += deltas[i*2 + 1];
   1747         }
   1748     }
   1749 }
   1750 
   1751 
   1752 static void
   1753 calcBackProj_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
   1754                  Size imsize, const Mat& hist, int dims, const float** _ranges,
   1755                  const double* _uniranges, float scale, bool uniform )
   1756 {
   1757     uchar** ptrs = &_ptrs[0];
   1758     const int* deltas = &_deltas[0];
   1759     const uchar* H = hist.ptr();
   1760     int i, x;
   1761     uchar* bproj = _ptrs[dims];
   1762     int bpstep = _deltas[dims*2 + 1];
   1763     std::vector<size_t> _tab;
   1764 
   1765     calcHistLookupTables_8u( hist, SparseMat(), dims, _ranges, _uniranges, uniform, false, _tab );
   1766     const size_t* tab = &_tab[0];
   1767 
   1768     if( dims == 1 )
   1769     {
   1770         int d0 = deltas[0], step0 = deltas[1];
   1771         uchar matH[256] = {0};
   1772         const uchar* p0 = (const uchar*)ptrs[0];
   1773 
   1774         for( i = 0; i < 256; i++ )
   1775         {
   1776             size_t hidx = tab[i];
   1777             if( hidx < OUT_OF_RANGE )
   1778                 matH[i] = saturate_cast<uchar>(*(float*)(H + hidx)*scale);
   1779         }
   1780 
   1781         for( ; imsize.height--; p0 += step0, bproj += bpstep )
   1782         {
   1783             if( d0 == 1 )
   1784             {
   1785                 for( x = 0; x <= imsize.width - 4; x += 4 )
   1786                 {
   1787                     uchar t0 = matH[p0[x]], t1 = matH[p0[x+1]];
   1788                     bproj[x] = t0; bproj[x+1] = t1;
   1789                     t0 = matH[p0[x+2]]; t1 = matH[p0[x+3]];
   1790                     bproj[x+2] = t0; bproj[x+3] = t1;
   1791                 }
   1792                 p0 += x;
   1793             }
   1794             else
   1795                 for( x = 0; x <= imsize.width - 4; x += 4 )
   1796                 {
   1797                     uchar t0 = matH[p0[0]], t1 = matH[p0[d0]];
   1798                     bproj[x] = t0; bproj[x+1] = t1;
   1799                     p0 += d0*2;
   1800                     t0 = matH[p0[0]]; t1 = matH[p0[d0]];
   1801                     bproj[x+2] = t0; bproj[x+3] = t1;
   1802                     p0 += d0*2;
   1803                 }
   1804 
   1805             for( ; x < imsize.width; x++, p0 += d0 )
   1806                 bproj[x] = matH[*p0];
   1807         }
   1808     }
   1809     else if( dims == 2 )
   1810     {
   1811         int d0 = deltas[0], step0 = deltas[1],
   1812             d1 = deltas[2], step1 = deltas[3];
   1813         const uchar* p0 = (const uchar*)ptrs[0];
   1814         const uchar* p1 = (const uchar*)ptrs[1];
   1815 
   1816         for( ; imsize.height--; p0 += step0, p1 += step1, bproj += bpstep )
   1817         {
   1818             for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1 )
   1819             {
   1820                 size_t idx = tab[*p0] + tab[*p1 + 256];
   1821                 bproj[x] = idx < OUT_OF_RANGE ? saturate_cast<uchar>(*(const float*)(H + idx)*scale) : 0;
   1822             }
   1823         }
   1824     }
   1825     else if( dims == 3 )
   1826     {
   1827         int d0 = deltas[0], step0 = deltas[1],
   1828         d1 = deltas[2], step1 = deltas[3],
   1829         d2 = deltas[4], step2 = deltas[5];
   1830         const uchar* p0 = (const uchar*)ptrs[0];
   1831         const uchar* p1 = (const uchar*)ptrs[1];
   1832         const uchar* p2 = (const uchar*)ptrs[2];
   1833 
   1834         for( ; imsize.height--; p0 += step0, p1 += step1, p2 += step2, bproj += bpstep )
   1835         {
   1836             for( x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2 )
   1837             {
   1838                 size_t idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512];
   1839                 bproj[x] = idx < OUT_OF_RANGE ? saturate_cast<uchar>(*(const float*)(H + idx)*scale) : 0;
   1840             }
   1841         }
   1842     }
   1843     else
   1844     {
   1845         for( ; imsize.height--; bproj += bpstep )
   1846         {
   1847             for( x = 0; x < imsize.width; x++ )
   1848             {
   1849                 const uchar* Hptr = H;
   1850                 for( i = 0; i < dims; i++ )
   1851                 {
   1852                     size_t idx = tab[*ptrs[i] + i*256];
   1853                     if( idx >= OUT_OF_RANGE )
   1854                         break;
   1855                     ptrs[i] += deltas[i*2];
   1856                     Hptr += idx;
   1857                 }
   1858 
   1859                 if( i == dims )
   1860                     bproj[x] = saturate_cast<uchar>(*(const float*)Hptr*scale);
   1861                 else
   1862                 {
   1863                     bproj[x] = 0;
   1864                     for( ; i < dims; i++ )
   1865                         ptrs[i] += deltas[i*2];
   1866                 }
   1867             }
   1868             for( i = 0; i < dims; i++ )
   1869                 ptrs[i] += deltas[i*2 + 1];
   1870         }
   1871     }
   1872 }
   1873 
   1874 }
   1875 
   1876 void cv::calcBackProject( const Mat* images, int nimages, const int* channels,
   1877                           InputArray _hist, OutputArray _backProject,
   1878                           const float** ranges, double scale, bool uniform )
   1879 {
   1880     Mat hist = _hist.getMat();
   1881     std::vector<uchar*> ptrs;
   1882     std::vector<int> deltas;
   1883     std::vector<double> uniranges;
   1884     Size imsize;
   1885     int dims = hist.dims == 2 && hist.size[1] == 1 ? 1 : hist.dims;
   1886 
   1887     CV_Assert( dims > 0 && !hist.empty() );
   1888     _backProject.create( images[0].size(), images[0].depth() );
   1889     Mat backProject = _backProject.getMat();
   1890     histPrepareImages( images, nimages, channels, backProject, dims, hist.size, ranges,
   1891                        uniform, ptrs, deltas, imsize, uniranges );
   1892     const double* _uniranges = uniform ? &uniranges[0] : 0;
   1893 
   1894     int depth = images[0].depth();
   1895     if( depth == CV_8U )
   1896         calcBackProj_8u(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform);
   1897     else if( depth == CV_16U )
   1898         calcBackProj_<ushort, ushort>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform );
   1899     else if( depth == CV_32F )
   1900         calcBackProj_<float, float>(ptrs, deltas, imsize, hist, dims, ranges, _uniranges, (float)scale, uniform );
   1901     else
   1902         CV_Error(CV_StsUnsupportedFormat, "");
   1903 }
   1904 
   1905 
   1906 namespace cv
   1907 {
   1908 
   1909 template<typename T, typename BT> static void
   1910 calcSparseBackProj_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
   1911                      Size imsize, const SparseMat& hist, int dims, const float** _ranges,
   1912                      const double* _uniranges, float scale, bool uniform )
   1913 {
   1914     T** ptrs = (T**)&_ptrs[0];
   1915     const int* deltas = &_deltas[0];
   1916     int i, x;
   1917     BT* bproj = (BT*)_ptrs[dims];
   1918     int bpstep = _deltas[dims*2 + 1];
   1919     const int* size = hist.hdr->size;
   1920     int idx[CV_MAX_DIM];
   1921     const SparseMat_<float>& hist_ = (const SparseMat_<float>&)hist;
   1922 
   1923     if( uniform )
   1924     {
   1925         const double* uniranges = &_uniranges[0];
   1926         for( ; imsize.height--; bproj += bpstep )
   1927         {
   1928             for( x = 0; x < imsize.width; x++ )
   1929             {
   1930                 for( i = 0; i < dims; i++ )
   1931                 {
   1932                     idx[i] = cvFloor(*ptrs[i]*uniranges[i*2] + uniranges[i*2+1]);
   1933                     if( (unsigned)idx[i] >= (unsigned)size[i] )
   1934                         break;
   1935                     ptrs[i] += deltas[i*2];
   1936                 }
   1937 
   1938                 if( i == dims )
   1939                     bproj[x] = saturate_cast<BT>(hist_(idx)*scale);
   1940                 else
   1941                 {
   1942                     bproj[x] = 0;
   1943                     for( ; i < dims; i++ )
   1944                         ptrs[i] += deltas[i*2];
   1945                 }
   1946             }
   1947             for( i = 0; i < dims; i++ )
   1948                 ptrs[i] += deltas[i*2 + 1];
   1949         }
   1950     }
   1951     else
   1952     {
   1953         // non-uniform histogram
   1954         const float* ranges[CV_MAX_DIM];
   1955         for( i = 0; i < dims; i++ )
   1956             ranges[i] = &_ranges[i][0];
   1957 
   1958         for( ; imsize.height--; bproj += bpstep )
   1959         {
   1960             for( x = 0; x < imsize.width; x++ )
   1961             {
   1962                 for( i = 0; i < dims; i++ )
   1963                 {
   1964                     float v = (float)*ptrs[i];
   1965                     const float* R = ranges[i];
   1966                     int j = -1, sz = size[i];
   1967 
   1968                     while( v >= R[j+1] && ++j < sz )
   1969                         ; // nop
   1970 
   1971                     if( (unsigned)j >= (unsigned)sz )
   1972                         break;
   1973                     idx[i] = j;
   1974                     ptrs[i] += deltas[i*2];
   1975                 }
   1976 
   1977                 if( i == dims )
   1978                     bproj[x] = saturate_cast<BT>(hist_(idx)*scale);
   1979                 else
   1980                 {
   1981                     bproj[x] = 0;
   1982                     for( ; i < dims; i++ )
   1983                         ptrs[i] += deltas[i*2];
   1984                 }
   1985             }
   1986 
   1987             for( i = 0; i < dims; i++ )
   1988                 ptrs[i] += deltas[i*2 + 1];
   1989         }
   1990     }
   1991 }
   1992 
   1993 
   1994 static void
   1995 calcSparseBackProj_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
   1996                        Size imsize, const SparseMat& hist, int dims, const float** _ranges,
   1997                        const double* _uniranges, float scale, bool uniform )
   1998 {
   1999     uchar** ptrs = &_ptrs[0];
   2000     const int* deltas = &_deltas[0];
   2001     int i, x;
   2002     uchar* bproj = _ptrs[dims];
   2003     int bpstep = _deltas[dims*2 + 1];
   2004     std::vector<size_t> _tab;
   2005     int idx[CV_MAX_DIM];
   2006 
   2007     calcHistLookupTables_8u( Mat(), hist, dims, _ranges, _uniranges, uniform, true, _tab );
   2008     const size_t* tab = &_tab[0];
   2009 
   2010     for( ; imsize.height--; bproj += bpstep )
   2011     {
   2012         for( x = 0; x < imsize.width; x++ )
   2013         {
   2014             for( i = 0; i < dims; i++ )
   2015             {
   2016                 size_t hidx = tab[*ptrs[i] + i*256];
   2017                 if( hidx >= OUT_OF_RANGE )
   2018                     break;
   2019                 idx[i] = (int)hidx;
   2020                 ptrs[i] += deltas[i*2];
   2021             }
   2022 
   2023             if( i == dims )
   2024                 bproj[x] = saturate_cast<uchar>(hist.value<float>(idx)*scale);
   2025             else
   2026             {
   2027                 bproj[x] = 0;
   2028                 for( ; i < dims; i++ )
   2029                     ptrs[i] += deltas[i*2];
   2030             }
   2031         }
   2032         for( i = 0; i < dims; i++ )
   2033             ptrs[i] += deltas[i*2 + 1];
   2034     }
   2035 }
   2036 
   2037 }
   2038 
   2039 void cv::calcBackProject( const Mat* images, int nimages, const int* channels,
   2040                           const SparseMat& hist, OutputArray _backProject,
   2041                           const float** ranges, double scale, bool uniform )
   2042 {
   2043     std::vector<uchar*> ptrs;
   2044     std::vector<int> deltas;
   2045     std::vector<double> uniranges;
   2046     Size imsize;
   2047     int dims = hist.dims();
   2048 
   2049     CV_Assert( dims > 0 );
   2050     _backProject.create( images[0].size(), images[0].depth() );
   2051     Mat backProject = _backProject.getMat();
   2052     histPrepareImages( images, nimages, channels, backProject,
   2053                        dims, hist.hdr->size, ranges,
   2054                        uniform, ptrs, deltas, imsize, uniranges );
   2055     const double* _uniranges = uniform ? &uniranges[0] : 0;
   2056 
   2057     int depth = images[0].depth();
   2058     if( depth == CV_8U )
   2059         calcSparseBackProj_8u(ptrs, deltas, imsize, hist, dims, ranges,
   2060                               _uniranges, (float)scale, uniform);
   2061     else if( depth == CV_16U )
   2062         calcSparseBackProj_<ushort, ushort>(ptrs, deltas, imsize, hist, dims, ranges,
   2063                                           _uniranges, (float)scale, uniform );
   2064     else if( depth == CV_32F )
   2065         calcSparseBackProj_<float, float>(ptrs, deltas, imsize, hist, dims, ranges,
   2066                                           _uniranges, (float)scale, uniform );
   2067     else
   2068         CV_Error(CV_StsUnsupportedFormat, "");
   2069 }
   2070 
   2071 #ifdef HAVE_OPENCL
   2072 
   2073 namespace cv {
   2074 
   2075 static void getUMatIndex(const std::vector<UMat> & um, int cn, int & idx, int & cnidx)
   2076 {
   2077     int totalChannels = 0;
   2078     for (size_t i = 0, size = um.size(); i < size; ++i)
   2079     {
   2080         int ccn = um[i].channels();
   2081         totalChannels += ccn;
   2082 
   2083         if (totalChannels == cn)
   2084         {
   2085             idx = (int)(i + 1);
   2086             cnidx = 0;
   2087             return;
   2088         }
   2089         else if (totalChannels > cn)
   2090         {
   2091             idx = (int)i;
   2092             cnidx = i == 0 ? cn : (cn - totalChannels + ccn);
   2093             return;
   2094         }
   2095     }
   2096 
   2097     idx = cnidx = -1;
   2098 }
   2099 
   2100 static bool ocl_calcBackProject( InputArrayOfArrays _images, std::vector<int> channels,
   2101                                  InputArray _hist, OutputArray _dst,
   2102                                  const std::vector<float>& ranges,
   2103                                  float scale, size_t histdims )
   2104 {
   2105     std::vector<UMat> images;
   2106     _images.getUMatVector(images);
   2107 
   2108     size_t nimages = images.size(), totalcn = images[0].channels();
   2109 
   2110     CV_Assert(nimages > 0);
   2111     Size size = images[0].size();
   2112     int depth = images[0].depth();
   2113 
   2114     //kernels are valid for this type only
   2115     if (depth != CV_8U)
   2116         return false;
   2117 
   2118     for (size_t i = 1; i < nimages; ++i)
   2119     {
   2120         const UMat & m = images[i];
   2121         totalcn += m.channels();
   2122         CV_Assert(size == m.size() && depth == m.depth());
   2123     }
   2124 
   2125     std::sort(channels.begin(), channels.end());
   2126     for (size_t i = 0; i < histdims; ++i)
   2127         CV_Assert(channels[i] < (int)totalcn);
   2128 
   2129     if (histdims == 1)
   2130     {
   2131         int idx, cnidx;
   2132         getUMatIndex(images, channels[0], idx, cnidx);
   2133         CV_Assert(idx >= 0);
   2134         UMat im = images[idx];
   2135 
   2136         String opts = format("-D histdims=1 -D scn=%d", im.channels());
   2137         ocl::Kernel lutk("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
   2138         if (lutk.empty())
   2139             return false;
   2140 
   2141         size_t lsize = 256;
   2142         UMat lut(1, (int)lsize, CV_32SC1), hist = _hist.getUMat(), uranges(ranges, true);
   2143 
   2144         lutk.args(ocl::KernelArg::ReadOnlyNoSize(hist), hist.rows,
   2145                   ocl::KernelArg::PtrWriteOnly(lut), scale, ocl::KernelArg::PtrReadOnly(uranges));
   2146         if (!lutk.run(1, &lsize, NULL, false))
   2147             return false;
   2148 
   2149         ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts);
   2150         if (mapk.empty())
   2151             return false;
   2152 
   2153         _dst.create(size, depth);
   2154         UMat dst = _dst.getUMat();
   2155 
   2156         im.offset += cnidx;
   2157         mapk.args(ocl::KernelArg::ReadOnlyNoSize(im), ocl::KernelArg::PtrReadOnly(lut),
   2158                   ocl::KernelArg::WriteOnly(dst));
   2159 
   2160         size_t globalsize[2] = { size.width, size.height };
   2161         return mapk.run(2, globalsize, NULL, false);
   2162     }
   2163     else if (histdims == 2)
   2164     {
   2165         int idx0, idx1, cnidx0, cnidx1;
   2166         getUMatIndex(images, channels[0], idx0, cnidx0);
   2167         getUMatIndex(images, channels[1], idx1, cnidx1);
   2168         CV_Assert(idx0 >= 0 && idx1 >= 0);
   2169         UMat im0 = images[idx0], im1 = images[idx1];
   2170 
   2171         // Lut for the first dimension
   2172         String opts = format("-D histdims=2 -D scn1=%d -D scn2=%d", im0.channels(), im1.channels());
   2173         ocl::Kernel lutk1("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
   2174         if (lutk1.empty())
   2175             return false;
   2176 
   2177         size_t lsize = 256;
   2178         UMat lut(1, (int)lsize<<1, CV_32SC1), uranges(ranges, true), hist = _hist.getUMat();
   2179 
   2180         lutk1.args(hist.rows, ocl::KernelArg::PtrWriteOnly(lut), (int)0, ocl::KernelArg::PtrReadOnly(uranges), (int)0);
   2181         if (!lutk1.run(1, &lsize, NULL, false))
   2182             return false;
   2183 
   2184         // lut for the second dimension
   2185         ocl::Kernel lutk2("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
   2186         if (lutk2.empty())
   2187             return false;
   2188 
   2189         lut.offset += lsize * sizeof(int);
   2190         lutk2.args(hist.cols, ocl::KernelArg::PtrWriteOnly(lut), (int)256, ocl::KernelArg::PtrReadOnly(uranges), (int)2);
   2191         if (!lutk2.run(1, &lsize, NULL, false))
   2192             return false;
   2193 
   2194         // perform lut
   2195         ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts);
   2196         if (mapk.empty())
   2197             return false;
   2198 
   2199         _dst.create(size, depth);
   2200         UMat dst = _dst.getUMat();
   2201 
   2202         im0.offset += cnidx0;
   2203         im1.offset += cnidx1;
   2204         mapk.args(ocl::KernelArg::ReadOnlyNoSize(im0), ocl::KernelArg::ReadOnlyNoSize(im1),
   2205                ocl::KernelArg::ReadOnlyNoSize(hist), ocl::KernelArg::PtrReadOnly(lut), scale, ocl::KernelArg::WriteOnly(dst));
   2206 
   2207         size_t globalsize[2] = { size.width, size.height };
   2208         return mapk.run(2, globalsize, NULL, false);
   2209     }
   2210     return false;
   2211 }
   2212 
   2213 }
   2214 
   2215 #endif
   2216 
   2217 void cv::calcBackProject( InputArrayOfArrays images, const std::vector<int>& channels,
   2218                           InputArray hist, OutputArray dst,
   2219                           const std::vector<float>& ranges,
   2220                           double scale )
   2221 {
   2222 #ifdef HAVE_OPENCL
   2223     Size histSize = hist.size();
   2224     bool _1D = histSize.height == 1 || histSize.width == 1;
   2225     size_t histdims = _1D ? 1 : hist.dims();
   2226 #endif
   2227 
   2228     CV_OCL_RUN(dst.isUMat() && hist.type() == CV_32FC1 &&
   2229                histdims <= 2 && ranges.size() == histdims * 2 && histdims == channels.size(),
   2230                ocl_calcBackProject(images, channels, hist, dst, ranges, (float)scale, histdims))
   2231 
   2232     Mat H0 = hist.getMat(), H;
   2233     int hcn = H0.channels();
   2234 
   2235     if( hcn > 1 )
   2236     {
   2237         CV_Assert( H0.isContinuous() );
   2238         int hsz[CV_CN_MAX+1];
   2239         memcpy(hsz, &H0.size[0], H0.dims*sizeof(hsz[0]));
   2240         hsz[H0.dims] = hcn;
   2241         H = Mat(H0.dims+1, hsz, H0.depth(), H0.ptr());
   2242     }
   2243     else
   2244         H = H0;
   2245 
   2246     bool _1d = H.rows == 1 || H.cols == 1;
   2247     int i, dims = H.dims, rsz = (int)ranges.size(), csz = (int)channels.size();
   2248     int nimages = (int)images.total();
   2249 
   2250     CV_Assert(nimages > 0);
   2251     CV_Assert(rsz == dims*2 || (rsz == 2 && _1d) || (rsz == 0 && images.depth(0) == CV_8U));
   2252     CV_Assert(csz == 0 || csz == dims || (csz == 1 && _1d));
   2253 
   2254     float* _ranges[CV_MAX_DIM];
   2255     if( rsz > 0 )
   2256     {
   2257         for( i = 0; i < rsz/2; i++ )
   2258             _ranges[i] = (float*)&ranges[i*2];
   2259     }
   2260 
   2261     AutoBuffer<Mat> buf(nimages);
   2262     for( i = 0; i < nimages; i++ )
   2263         buf[i] = images.getMat(i);
   2264 
   2265     calcBackProject(&buf[0], nimages, csz ? &channels[0] : 0,
   2266         hist, dst, rsz ? (const float**)_ranges : 0, scale, true);
   2267 }
   2268 
   2269 
   2270 ////////////////// C O M P A R E   H I S T O G R A M S ////////////////////////
   2271 
   2272 double cv::compareHist( InputArray _H1, InputArray _H2, int method )
   2273 {
   2274     Mat H1 = _H1.getMat(), H2 = _H2.getMat();
   2275     const Mat* arrays[] = {&H1, &H2, 0};
   2276     Mat planes[2];
   2277     NAryMatIterator it(arrays, planes);
   2278     double result = 0;
   2279     int j, len = (int)it.size;
   2280 
   2281     CV_Assert( H1.type() == H2.type() && H1.depth() == CV_32F );
   2282 
   2283     double s1 = 0, s2 = 0, s11 = 0, s12 = 0, s22 = 0;
   2284 
   2285     CV_Assert( it.planes[0].isContinuous() && it.planes[1].isContinuous() );
   2286 
   2287 #if CV_SSE2
   2288     bool haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
   2289 #endif
   2290 
   2291     for( size_t i = 0; i < it.nplanes; i++, ++it )
   2292     {
   2293         const float* h1 = it.planes[0].ptr<float>();
   2294         const float* h2 = it.planes[1].ptr<float>();
   2295         len = it.planes[0].rows*it.planes[0].cols*H1.channels();
   2296         j = 0;
   2297 
   2298         if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT))
   2299         {
   2300             for( ; j < len; j++ )
   2301             {
   2302                 double a = h1[j] - h2[j];
   2303                 double b = (method == CV_COMP_CHISQR) ? h1[j] : h1[j] + h2[j];
   2304                 if( fabs(b) > DBL_EPSILON )
   2305                     result += a*a/b;
   2306             }
   2307         }
   2308         else if( method == CV_COMP_CORREL )
   2309         {
   2310             #if CV_SSE2
   2311             if (haveSIMD)
   2312             {
   2313                 __m128d v_s1 = _mm_setzero_pd(), v_s2 = v_s1;
   2314                 __m128d v_s11 = v_s1, v_s22 = v_s1, v_s12 = v_s1;
   2315 
   2316                 for ( ; j <= len - 4; j += 4)
   2317                 {
   2318                     __m128 v_a = _mm_loadu_ps(h1 + j);
   2319                     __m128 v_b = _mm_loadu_ps(h2 + j);
   2320 
   2321                     // 0-1
   2322                     __m128d v_ad = _mm_cvtps_pd(v_a);
   2323                     __m128d v_bd = _mm_cvtps_pd(v_b);
   2324                     v_s12 = _mm_add_pd(v_s12, _mm_mul_pd(v_ad, v_bd));
   2325                     v_s11 = _mm_add_pd(v_s11, _mm_mul_pd(v_ad, v_ad));
   2326                     v_s22 = _mm_add_pd(v_s22, _mm_mul_pd(v_bd, v_bd));
   2327                     v_s1 = _mm_add_pd(v_s1, v_ad);
   2328                     v_s2 = _mm_add_pd(v_s2, v_bd);
   2329 
   2330                     // 2-3
   2331                     v_ad = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_a), 8)));
   2332                     v_bd = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_b), 8)));
   2333                     v_s12 = _mm_add_pd(v_s12, _mm_mul_pd(v_ad, v_bd));
   2334                     v_s11 = _mm_add_pd(v_s11, _mm_mul_pd(v_ad, v_ad));
   2335                     v_s22 = _mm_add_pd(v_s22, _mm_mul_pd(v_bd, v_bd));
   2336                     v_s1 = _mm_add_pd(v_s1, v_ad);
   2337                     v_s2 = _mm_add_pd(v_s2, v_bd);
   2338                 }
   2339 
   2340                 double CV_DECL_ALIGNED(16) ar[10];
   2341                 _mm_store_pd(ar, v_s12);
   2342                 _mm_store_pd(ar + 2, v_s11);
   2343                 _mm_store_pd(ar + 4, v_s22);
   2344                 _mm_store_pd(ar + 6, v_s1);
   2345                 _mm_store_pd(ar + 8, v_s2);
   2346 
   2347                 s12 += ar[0] + ar[1];
   2348                 s11 += ar[2] + ar[3];
   2349                 s22 += ar[4] + ar[5];
   2350                 s1 += ar[6] + ar[7];
   2351                 s2 += ar[8] + ar[9];
   2352             }
   2353             #endif
   2354             for( ; j < len; j++ )
   2355             {
   2356                 double a = h1[j];
   2357                 double b = h2[j];
   2358 
   2359                 s12 += a*b;
   2360                 s1 += a;
   2361                 s11 += a*a;
   2362                 s2 += b;
   2363                 s22 += b*b;
   2364             }
   2365         }
   2366         else if( method == CV_COMP_INTERSECT )
   2367         {
   2368             #if CV_NEON
   2369             float32x4_t v_result = vdupq_n_f32(0.0f);
   2370             for( ; j <= len - 4; j += 4 )
   2371                 v_result = vaddq_f32(v_result, vminq_f32(vld1q_f32(h1 + j), vld1q_f32(h2 + j)));
   2372             float CV_DECL_ALIGNED(16) ar[4];
   2373             vst1q_f32(ar, v_result);
   2374             result += ar[0] + ar[1] + ar[2] + ar[3];
   2375             #elif CV_SSE2
   2376             if (haveSIMD)
   2377             {
   2378                 __m128d v_result = _mm_setzero_pd();
   2379                 for ( ; j <= len - 4; j += 4)
   2380                 {
   2381                     __m128 v_src = _mm_min_ps(_mm_loadu_ps(h1 + j),
   2382                                               _mm_loadu_ps(h2 + j));
   2383                     v_result = _mm_add_pd(v_result, _mm_cvtps_pd(v_src));
   2384                     v_src = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8));
   2385                     v_result = _mm_add_pd(v_result, _mm_cvtps_pd(v_src));
   2386                 }
   2387 
   2388                 double CV_DECL_ALIGNED(16) ar[2];
   2389                 _mm_store_pd(ar, v_result);
   2390                 result += ar[0] + ar[1];
   2391             }
   2392             #endif
   2393             for( ; j < len; j++ )
   2394                 result += std::min(h1[j], h2[j]);
   2395         }
   2396         else if( method == CV_COMP_BHATTACHARYYA )
   2397         {
   2398             #if CV_SSE2
   2399             if (haveSIMD)
   2400             {
   2401                 __m128d v_s1 = _mm_setzero_pd(), v_s2 = v_s1, v_result = v_s1;
   2402                 for ( ; j <= len - 4; j += 4)
   2403                 {
   2404                     __m128 v_a = _mm_loadu_ps(h1 + j);
   2405                     __m128 v_b = _mm_loadu_ps(h2 + j);
   2406 
   2407                     __m128d v_ad = _mm_cvtps_pd(v_a);
   2408                     __m128d v_bd = _mm_cvtps_pd(v_b);
   2409                     v_s1 = _mm_add_pd(v_s1, v_ad);
   2410                     v_s2 = _mm_add_pd(v_s2, v_bd);
   2411                     v_result = _mm_add_pd(v_result, _mm_sqrt_pd(_mm_mul_pd(v_ad, v_bd)));
   2412 
   2413                     v_ad = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_a), 8)));
   2414                     v_bd = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_b), 8)));
   2415                     v_s1 = _mm_add_pd(v_s1, v_ad);
   2416                     v_s2 = _mm_add_pd(v_s2, v_bd);
   2417                     v_result = _mm_add_pd(v_result, _mm_sqrt_pd(_mm_mul_pd(v_ad, v_bd)));
   2418                 }
   2419 
   2420                 double CV_DECL_ALIGNED(16) ar[6];
   2421                 _mm_store_pd(ar, v_s1);
   2422                 _mm_store_pd(ar + 2, v_s2);
   2423                 _mm_store_pd(ar + 4, v_result);
   2424                 s1 += ar[0] + ar[1];
   2425                 s2 += ar[2] + ar[3];
   2426                 result += ar[4] + ar[5];
   2427             }
   2428             #endif
   2429             for( ; j < len; j++ )
   2430             {
   2431                 double a = h1[j];
   2432                 double b = h2[j];
   2433                 result += std::sqrt(a*b);
   2434                 s1 += a;
   2435                 s2 += b;
   2436             }
   2437         }
   2438         else if( method == CV_COMP_KL_DIV )
   2439         {
   2440             for( ; j < len; j++ )
   2441             {
   2442                 double p = h1[j];
   2443                 double q = h2[j];
   2444                 if( fabs(p) <= DBL_EPSILON ) {
   2445                     continue;
   2446                 }
   2447                 if(  fabs(q) <= DBL_EPSILON ) {
   2448                     q = 1e-10;
   2449                 }
   2450                 result += p * std::log( p / q );
   2451             }
   2452         }
   2453         else
   2454             CV_Error( CV_StsBadArg, "Unknown comparison method" );
   2455     }
   2456 
   2457     if( method == CV_COMP_CHISQR_ALT )
   2458         result *= 2;
   2459     else if( method == CV_COMP_CORREL )
   2460     {
   2461         size_t total = H1.total();
   2462         double scale = 1./total;
   2463         double num = s12 - s1*s2*scale;
   2464         double denom2 = (s11 - s1*s1*scale)*(s22 - s2*s2*scale);
   2465         result = std::abs(denom2) > DBL_EPSILON ? num/std::sqrt(denom2) : 1.;
   2466     }
   2467     else if( method == CV_COMP_BHATTACHARYYA )
   2468     {
   2469         s1 *= s2;
   2470         s1 = fabs(s1) > FLT_EPSILON ? 1./std::sqrt(s1) : 1.;
   2471         result = std::sqrt(std::max(1. - result*s1, 0.));
   2472     }
   2473 
   2474     return result;
   2475 }
   2476 
   2477 
   2478 double cv::compareHist( const SparseMat& H1, const SparseMat& H2, int method )
   2479 {
   2480     double result = 0;
   2481     int i, dims = H1.dims();
   2482 
   2483     CV_Assert( dims > 0 && dims == H2.dims() && H1.type() == H2.type() && H1.type() == CV_32F );
   2484     for( i = 0; i < dims; i++ )
   2485         CV_Assert( H1.size(i) == H2.size(i) );
   2486 
   2487     const SparseMat *PH1 = &H1, *PH2 = &H2;
   2488     if( PH1->nzcount() > PH2->nzcount() && method != CV_COMP_CHISQR && method != CV_COMP_CHISQR_ALT && method != CV_COMP_KL_DIV )
   2489         std::swap(PH1, PH2);
   2490 
   2491     SparseMatConstIterator it = PH1->begin();
   2492     int N1 = (int)PH1->nzcount(), N2 = (int)PH2->nzcount();
   2493 
   2494     if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT) )
   2495     {
   2496         for( i = 0; i < N1; i++, ++it )
   2497         {
   2498             float v1 = it.value<float>();
   2499             const SparseMat::Node* node = it.node();
   2500             float v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval);
   2501             double a = v1 - v2;
   2502             double b = (method == CV_COMP_CHISQR) ? v1 : v1 + v2;
   2503             if( fabs(b) > DBL_EPSILON )
   2504                 result += a*a/b;
   2505         }
   2506     }
   2507     else if( method == CV_COMP_CORREL )
   2508     {
   2509         double s1 = 0, s2 = 0, s11 = 0, s12 = 0, s22 = 0;
   2510 
   2511         for( i = 0; i < N1; i++, ++it )
   2512         {
   2513             double v1 = it.value<float>();
   2514             const SparseMat::Node* node = it.node();
   2515             s12 += v1*PH2->value<float>(node->idx, (size_t*)&node->hashval);
   2516             s1 += v1;
   2517             s11 += v1*v1;
   2518         }
   2519 
   2520         it = PH2->begin();
   2521         for( i = 0; i < N2; i++, ++it )
   2522         {
   2523             double v2 = it.value<float>();
   2524             s2 += v2;
   2525             s22 += v2*v2;
   2526         }
   2527 
   2528         size_t total = 1;
   2529         for( i = 0; i < H1.dims(); i++ )
   2530             total *= H1.size(i);
   2531         double scale = 1./total;
   2532         double num = s12 - s1*s2*scale;
   2533         double denom2 = (s11 - s1*s1*scale)*(s22 - s2*s2*scale);
   2534         result = std::abs(denom2) > DBL_EPSILON ? num/std::sqrt(denom2) : 1.;
   2535     }
   2536     else if( method == CV_COMP_INTERSECT )
   2537     {
   2538         for( i = 0; i < N1; i++, ++it )
   2539         {
   2540             float v1 = it.value<float>();
   2541             const SparseMat::Node* node = it.node();
   2542             float v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval);
   2543             if( v2 )
   2544                 result += std::min(v1, v2);
   2545         }
   2546     }
   2547     else if( method == CV_COMP_BHATTACHARYYA )
   2548     {
   2549         double s1 = 0, s2 = 0;
   2550 
   2551         for( i = 0; i < N1; i++, ++it )
   2552         {
   2553             double v1 = it.value<float>();
   2554             const SparseMat::Node* node = it.node();
   2555             double v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval);
   2556             result += std::sqrt(v1*v2);
   2557             s1 += v1;
   2558         }
   2559 
   2560         it = PH2->begin();
   2561         for( i = 0; i < N2; i++, ++it )
   2562             s2 += it.value<float>();
   2563 
   2564         s1 *= s2;
   2565         s1 = fabs(s1) > FLT_EPSILON ? 1./std::sqrt(s1) : 1.;
   2566         result = std::sqrt(std::max(1. - result*s1, 0.));
   2567     }
   2568     else if( method == CV_COMP_KL_DIV )
   2569     {
   2570         for( i = 0; i < N1; i++, ++it )
   2571         {
   2572             double v1 = it.value<float>();
   2573             const SparseMat::Node* node = it.node();
   2574             double v2 = PH2->value<float>(node->idx, (size_t*)&node->hashval);
   2575             if( !v2 )
   2576                 v2 = 1e-10;
   2577             result += v1 * std::log( v1 / v2 );
   2578         }
   2579     }
   2580     else
   2581         CV_Error( CV_StsBadArg, "Unknown comparison method" );
   2582 
   2583     if( method == CV_COMP_CHISQR_ALT )
   2584         result *= 2;
   2585 
   2586     return result;
   2587 }
   2588 
   2589 
   2590 const int CV_HIST_DEFAULT_TYPE = CV_32F;
   2591 
   2592 /* Creates new histogram */
   2593 CvHistogram *
   2594 cvCreateHist( int dims, int *sizes, CvHistType type, float** ranges, int uniform )
   2595 {
   2596     CvHistogram *hist = 0;
   2597 
   2598     if( (unsigned)dims > CV_MAX_DIM )
   2599         CV_Error( CV_BadOrder, "Number of dimensions is out of range" );
   2600 
   2601     if( !sizes )
   2602         CV_Error( CV_HeaderIsNull, "Null <sizes> pointer" );
   2603 
   2604     hist = (CvHistogram *)cvAlloc( sizeof( CvHistogram ));
   2605     hist->type = CV_HIST_MAGIC_VAL + ((int)type & 1);
   2606     if (uniform) hist->type|= CV_HIST_UNIFORM_FLAG;
   2607     hist->thresh2 = 0;
   2608     hist->bins = 0;
   2609     if( type == CV_HIST_ARRAY )
   2610     {
   2611         hist->bins = cvInitMatNDHeader( &hist->mat, dims, sizes,
   2612                                         CV_HIST_DEFAULT_TYPE );
   2613         cvCreateData( hist->bins );
   2614     }
   2615     else if( type == CV_HIST_SPARSE )
   2616         hist->bins = cvCreateSparseMat( dims, sizes, CV_HIST_DEFAULT_TYPE );
   2617     else
   2618         CV_Error( CV_StsBadArg, "Invalid histogram type" );
   2619 
   2620     if( ranges )
   2621         cvSetHistBinRanges( hist, ranges, uniform );
   2622 
   2623     return hist;
   2624 }
   2625 
   2626 
   2627 /* Creates histogram wrapping header for given array */
   2628 CV_IMPL CvHistogram*
   2629 cvMakeHistHeaderForArray( int dims, int *sizes, CvHistogram *hist,
   2630                           float *data, float **ranges, int uniform )
   2631 {
   2632     if( !hist )
   2633         CV_Error( CV_StsNullPtr, "Null histogram header pointer" );
   2634 
   2635     if( !data )
   2636         CV_Error( CV_StsNullPtr, "Null data pointer" );
   2637 
   2638     hist->thresh2 = 0;
   2639     hist->type = CV_HIST_MAGIC_VAL;
   2640     hist->bins = cvInitMatNDHeader( &hist->mat, dims, sizes, CV_HIST_DEFAULT_TYPE, data );
   2641 
   2642     if( ranges )
   2643     {
   2644         if( !uniform )
   2645             CV_Error( CV_StsBadArg, "Only uniform bin ranges can be used here "
   2646                                     "(to avoid memory allocation)" );
   2647         cvSetHistBinRanges( hist, ranges, uniform );
   2648     }
   2649 
   2650     return hist;
   2651 }
   2652 
   2653 
   2654 CV_IMPL void
   2655 cvReleaseHist( CvHistogram **hist )
   2656 {
   2657     if( !hist )
   2658         CV_Error( CV_StsNullPtr, "" );
   2659 
   2660     if( *hist )
   2661     {
   2662         CvHistogram* temp = *hist;
   2663 
   2664         if( !CV_IS_HIST(temp))
   2665             CV_Error( CV_StsBadArg, "Invalid histogram header" );
   2666         *hist = 0;
   2667 
   2668         if( CV_IS_SPARSE_HIST( temp ))
   2669             cvReleaseSparseMat( (CvSparseMat**)&temp->bins );
   2670         else
   2671         {
   2672             cvReleaseData( temp->bins );
   2673             temp->bins = 0;
   2674         }
   2675 
   2676         if( temp->thresh2 )
   2677             cvFree( &temp->thresh2 );
   2678         cvFree( &temp );
   2679     }
   2680 }
   2681 
   2682 CV_IMPL void
   2683 cvClearHist( CvHistogram *hist )
   2684 {
   2685     if( !CV_IS_HIST(hist) )
   2686         CV_Error( CV_StsBadArg, "Invalid histogram header" );
   2687     cvZero( hist->bins );
   2688 }
   2689 
   2690 
   2691 // Clears histogram bins that are below than threshold
   2692 CV_IMPL void
   2693 cvThreshHist( CvHistogram* hist, double thresh )
   2694 {
   2695     if( !CV_IS_HIST(hist) )
   2696         CV_Error( CV_StsBadArg, "Invalid histogram header" );
   2697 
   2698     if( !CV_IS_SPARSE_MAT(hist->bins) )
   2699     {
   2700         CvMat mat;
   2701         cvGetMat( hist->bins, &mat, 0, 1 );
   2702         cvThreshold( &mat, &mat, thresh, 0, CV_THRESH_TOZERO );
   2703     }
   2704     else
   2705     {
   2706         CvSparseMat* mat = (CvSparseMat*)hist->bins;
   2707         CvSparseMatIterator iterator;
   2708         CvSparseNode *node;
   2709 
   2710         for( node = cvInitSparseMatIterator( mat, &iterator );
   2711              node != 0; node = cvGetNextSparseNode( &iterator ))
   2712         {
   2713             float* val = (float*)CV_NODE_VAL( mat, node );
   2714             if( *val <= thresh )
   2715                 *val = 0;
   2716         }
   2717     }
   2718 }
   2719 
   2720 
   2721 // Normalizes histogram (make sum of the histogram bins == factor)
   2722 CV_IMPL void
   2723 cvNormalizeHist( CvHistogram* hist, double factor )
   2724 {
   2725     double sum = 0;
   2726 
   2727     if( !CV_IS_HIST(hist) )
   2728         CV_Error( CV_StsBadArg, "Invalid histogram header" );
   2729 
   2730     if( !CV_IS_SPARSE_HIST(hist) )
   2731     {
   2732         CvMat mat;
   2733         cvGetMat( hist->bins, &mat, 0, 1 );
   2734         sum = cvSum( &mat ).val[0];
   2735         if( fabs(sum) < DBL_EPSILON )
   2736             sum = 1;
   2737         cvScale( &mat, &mat, factor/sum, 0 );
   2738     }
   2739     else
   2740     {
   2741         CvSparseMat* mat = (CvSparseMat*)hist->bins;
   2742         CvSparseMatIterator iterator;
   2743         CvSparseNode *node;
   2744         float scale;
   2745 
   2746         for( node = cvInitSparseMatIterator( mat, &iterator );
   2747              node != 0; node = cvGetNextSparseNode( &iterator ))
   2748         {
   2749             sum += *(float*)CV_NODE_VAL(mat,node);
   2750         }
   2751 
   2752         if( fabs(sum) < DBL_EPSILON )
   2753             sum = 1;
   2754         scale = (float)(factor/sum);
   2755 
   2756         for( node = cvInitSparseMatIterator( mat, &iterator );
   2757              node != 0; node = cvGetNextSparseNode( &iterator ))
   2758         {
   2759             *(float*)CV_NODE_VAL(mat,node) *= scale;
   2760         }
   2761     }
   2762 }
   2763 
   2764 
   2765 // Retrieves histogram global min, max and their positions
   2766 CV_IMPL void
   2767 cvGetMinMaxHistValue( const CvHistogram* hist,
   2768                       float *value_min, float* value_max,
   2769                       int* idx_min, int* idx_max )
   2770 {
   2771     double minVal, maxVal;
   2772     int dims, size[CV_MAX_DIM];
   2773 
   2774     if( !CV_IS_HIST(hist) )
   2775         CV_Error( CV_StsBadArg, "Invalid histogram header" );
   2776 
   2777     dims = cvGetDims( hist->bins, size );
   2778 
   2779     if( !CV_IS_SPARSE_HIST(hist) )
   2780     {
   2781         CvMat mat;
   2782         CvPoint minPt, maxPt;
   2783 
   2784         cvGetMat( hist->bins, &mat, 0, 1 );
   2785         cvMinMaxLoc( &mat, &minVal, &maxVal, &minPt, &maxPt );
   2786 
   2787         if( dims == 1 )
   2788         {
   2789             if( idx_min )
   2790                 *idx_min = minPt.y + minPt.x;
   2791             if( idx_max )
   2792                 *idx_max = maxPt.y + maxPt.x;
   2793         }
   2794         else if( dims == 2 )
   2795         {
   2796             if( idx_min )
   2797                 idx_min[0] = minPt.y, idx_min[1] = minPt.x;
   2798             if( idx_max )
   2799                 idx_max[0] = maxPt.y, idx_max[1] = maxPt.x;
   2800         }
   2801         else if( idx_min || idx_max )
   2802         {
   2803             int imin = minPt.y*mat.cols + minPt.x;
   2804             int imax = maxPt.y*mat.cols + maxPt.x;
   2805 
   2806             for(int i = dims - 1; i >= 0; i-- )
   2807             {
   2808                 if( idx_min )
   2809                 {
   2810                     int t = imin / size[i];
   2811                     idx_min[i] = imin - t*size[i];
   2812                     imin = t;
   2813                 }
   2814 
   2815                 if( idx_max )
   2816                 {
   2817                     int t = imax / size[i];
   2818                     idx_max[i] = imax - t*size[i];
   2819                     imax = t;
   2820                 }
   2821             }
   2822         }
   2823     }
   2824     else
   2825     {
   2826         CvSparseMat* mat = (CvSparseMat*)hist->bins;
   2827         CvSparseMatIterator iterator;
   2828         CvSparseNode *node;
   2829         int minv = INT_MAX;
   2830         int maxv = INT_MIN;
   2831         CvSparseNode* minNode = 0;
   2832         CvSparseNode* maxNode = 0;
   2833         const int *_idx_min = 0, *_idx_max = 0;
   2834         Cv32suf m;
   2835 
   2836         for( node = cvInitSparseMatIterator( mat, &iterator );
   2837              node != 0; node = cvGetNextSparseNode( &iterator ))
   2838         {
   2839             int value = *(int*)CV_NODE_VAL(mat,node);
   2840             value = CV_TOGGLE_FLT(value);
   2841             if( value < minv )
   2842             {
   2843                 minv = value;
   2844                 minNode = node;
   2845             }
   2846 
   2847             if( value > maxv )
   2848             {
   2849                 maxv = value;
   2850                 maxNode = node;
   2851             }
   2852         }
   2853 
   2854         if( minNode )
   2855         {
   2856             _idx_min = CV_NODE_IDX(mat,minNode);
   2857             _idx_max = CV_NODE_IDX(mat,maxNode);
   2858             m.i = CV_TOGGLE_FLT(minv); minVal = m.f;
   2859             m.i = CV_TOGGLE_FLT(maxv); maxVal = m.f;
   2860         }
   2861         else
   2862         {
   2863             minVal = maxVal = 0;
   2864         }
   2865 
   2866         for(int i = 0; i < dims; i++ )
   2867         {
   2868             if( idx_min )
   2869                 idx_min[i] = _idx_min ? _idx_min[i] : -1;
   2870             if( idx_max )
   2871                 idx_max[i] = _idx_max ? _idx_max[i] : -1;
   2872         }
   2873     }
   2874 
   2875     if( value_min )
   2876         *value_min = (float)minVal;
   2877 
   2878     if( value_max )
   2879         *value_max = (float)maxVal;
   2880 }
   2881 
   2882 
   2883 // Compares two histograms using one of a few methods
   2884 CV_IMPL double
   2885 cvCompareHist( const CvHistogram* hist1,
   2886                const CvHistogram* hist2,
   2887                int method )
   2888 {
   2889     int i;
   2890     int size1[CV_MAX_DIM], size2[CV_MAX_DIM], total = 1;
   2891 
   2892     if( !CV_IS_HIST(hist1) || !CV_IS_HIST(hist2) )
   2893         CV_Error( CV_StsBadArg, "Invalid histogram header[s]" );
   2894 
   2895     if( CV_IS_SPARSE_MAT(hist1->bins) != CV_IS_SPARSE_MAT(hist2->bins))
   2896         CV_Error(CV_StsUnmatchedFormats, "One of histograms is sparse and other is not");
   2897 
   2898     if( !CV_IS_SPARSE_MAT(hist1->bins) )
   2899     {
   2900         cv::Mat H1 = cv::cvarrToMat(hist1->bins);
   2901         cv::Mat H2 = cv::cvarrToMat(hist2->bins);
   2902         return cv::compareHist(H1, H2, method);
   2903     }
   2904 
   2905     int dims1 = cvGetDims( hist1->bins, size1 );
   2906     int dims2 = cvGetDims( hist2->bins, size2 );
   2907 
   2908     if( dims1 != dims2 )
   2909         CV_Error( CV_StsUnmatchedSizes,
   2910                  "The histograms have different numbers of dimensions" );
   2911 
   2912     for( i = 0; i < dims1; i++ )
   2913     {
   2914         if( size1[i] != size2[i] )
   2915             CV_Error( CV_StsUnmatchedSizes, "The histograms have different sizes" );
   2916         total *= size1[i];
   2917     }
   2918 
   2919     double result = 0;
   2920     CvSparseMat* mat1 = (CvSparseMat*)(hist1->bins);
   2921     CvSparseMat* mat2 = (CvSparseMat*)(hist2->bins);
   2922     CvSparseMatIterator iterator;
   2923     CvSparseNode *node1, *node2;
   2924 
   2925     if( mat1->heap->active_count > mat2->heap->active_count && method != CV_COMP_CHISQR && method != CV_COMP_CHISQR_ALT && method != CV_COMP_KL_DIV )
   2926     {
   2927         CvSparseMat* t;
   2928         CV_SWAP( mat1, mat2, t );
   2929     }
   2930 
   2931     if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT) )
   2932     {
   2933         for( node1 = cvInitSparseMatIterator( mat1, &iterator );
   2934              node1 != 0; node1 = cvGetNextSparseNode( &iterator ))
   2935         {
   2936             double v1 = *(float*)CV_NODE_VAL(mat1,node1);
   2937             uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1), 0, 0, &node1->hashval );
   2938             double v2 = node2_data ? *(float*)node2_data : 0.f;
   2939             double a = v1 - v2;
   2940             double b = (method == CV_COMP_CHISQR) ? v1 : v1 + v2;
   2941             if( fabs(b) > DBL_EPSILON )
   2942                 result += a*a/b;
   2943         }
   2944     }
   2945     else if( method == CV_COMP_CORREL )
   2946     {
   2947         double s1 = 0, s11 = 0;
   2948         double s2 = 0, s22 = 0;
   2949         double s12 = 0;
   2950         double num, denom2, scale = 1./total;
   2951 
   2952         for( node1 = cvInitSparseMatIterator( mat1, &iterator );
   2953              node1 != 0; node1 = cvGetNextSparseNode( &iterator ))
   2954         {
   2955             double v1 = *(float*)CV_NODE_VAL(mat1,node1);
   2956             uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1),
   2957                                         0, 0, &node1->hashval );
   2958             if( node2_data )
   2959             {
   2960                 double v2 = *(float*)node2_data;
   2961                 s12 += v1*v2;
   2962             }
   2963             s1 += v1;
   2964             s11 += v1*v1;
   2965         }
   2966 
   2967         for( node2 = cvInitSparseMatIterator( mat2, &iterator );
   2968              node2 != 0; node2 = cvGetNextSparseNode( &iterator ))
   2969         {
   2970             double v2 = *(float*)CV_NODE_VAL(mat2,node2);
   2971             s2 += v2;
   2972             s22 += v2*v2;
   2973         }
   2974 
   2975         num = s12 - s1*s2*scale;
   2976         denom2 = (s11 - s1*s1*scale)*(s22 - s2*s2*scale);
   2977         result = fabs(denom2) > DBL_EPSILON ? num/sqrt(denom2) : 1;
   2978     }
   2979     else if( method == CV_COMP_INTERSECT )
   2980     {
   2981         for( node1 = cvInitSparseMatIterator( mat1, &iterator );
   2982              node1 != 0; node1 = cvGetNextSparseNode( &iterator ))
   2983         {
   2984             float v1 = *(float*)CV_NODE_VAL(mat1,node1);
   2985             uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1),
   2986                                          0, 0, &node1->hashval );
   2987             if( node2_data )
   2988             {
   2989                 float v2 = *(float*)node2_data;
   2990                 if( v1 <= v2 )
   2991                     result += v1;
   2992                 else
   2993                     result += v2;
   2994             }
   2995         }
   2996     }
   2997     else if( method == CV_COMP_BHATTACHARYYA )
   2998     {
   2999         double s1 = 0, s2 = 0;
   3000 
   3001         for( node1 = cvInitSparseMatIterator( mat1, &iterator );
   3002              node1 != 0; node1 = cvGetNextSparseNode( &iterator ))
   3003         {
   3004             double v1 = *(float*)CV_NODE_VAL(mat1,node1);
   3005             uchar* node2_data = cvPtrND( mat2, CV_NODE_IDX(mat1,node1),
   3006                                          0, 0, &node1->hashval );
   3007             s1 += v1;
   3008             if( node2_data )
   3009             {
   3010                 double v2 = *(float*)node2_data;
   3011                 result += sqrt(v1 * v2);
   3012             }
   3013         }
   3014 
   3015         for( node1 = cvInitSparseMatIterator( mat2, &iterator );
   3016              node1 != 0; node1 = cvGetNextSparseNode( &iterator ))
   3017         {
   3018             double v2 = *(float*)CV_NODE_VAL(mat2,node1);
   3019             s2 += v2;
   3020         }
   3021 
   3022         s1 *= s2;
   3023         s1 = fabs(s1) > FLT_EPSILON ? 1./sqrt(s1) : 1.;
   3024         result = 1. - result*s1;
   3025         result = sqrt(MAX(result,0.));
   3026     }
   3027     else if( method == CV_COMP_KL_DIV )
   3028     {
   3029         cv::SparseMat sH1, sH2;
   3030         ((const CvSparseMat*)hist1->bins)->copyToSparseMat(sH1);
   3031         ((const CvSparseMat*)hist2->bins)->copyToSparseMat(sH2);
   3032         result = cv::compareHist( sH1, sH2, CV_COMP_KL_DIV );
   3033     }
   3034     else
   3035         CV_Error( CV_StsBadArg, "Unknown comparison method" );
   3036 
   3037     if( method == CV_COMP_CHISQR_ALT )
   3038         result *= 2;
   3039 
   3040     return result;
   3041 }
   3042 
   3043 // copies one histogram to another
   3044 CV_IMPL void
   3045 cvCopyHist( const CvHistogram* src, CvHistogram** _dst )
   3046 {
   3047     if( !_dst )
   3048         CV_Error( CV_StsNullPtr, "Destination double pointer is NULL" );
   3049 
   3050     CvHistogram* dst = *_dst;
   3051 
   3052     if( !CV_IS_HIST(src) || (dst && !CV_IS_HIST(dst)) )
   3053         CV_Error( CV_StsBadArg, "Invalid histogram header[s]" );
   3054 
   3055     bool eq = false;
   3056     int size1[CV_MAX_DIM];
   3057     bool is_sparse = CV_IS_SPARSE_MAT(src->bins);
   3058     int dims1 = cvGetDims( src->bins, size1 );
   3059 
   3060     if( dst && (is_sparse == CV_IS_SPARSE_MAT(dst->bins)))
   3061     {
   3062         int size2[CV_MAX_DIM];
   3063         int dims2 = cvGetDims( dst->bins, size2 );
   3064 
   3065         if( dims1 == dims2 )
   3066         {
   3067             int i;
   3068 
   3069             for( i = 0; i < dims1; i++ )
   3070             {
   3071                 if( size1[i] != size2[i] )
   3072                     break;
   3073             }
   3074 
   3075             eq = (i == dims1);
   3076         }
   3077     }
   3078 
   3079     if( !eq )
   3080     {
   3081         cvReleaseHist( _dst );
   3082         dst = cvCreateHist( dims1, size1, !is_sparse ? CV_HIST_ARRAY : CV_HIST_SPARSE, 0, 0 );
   3083         *_dst = dst;
   3084     }
   3085 
   3086     if( CV_HIST_HAS_RANGES( src ))
   3087     {
   3088         float* ranges[CV_MAX_DIM];
   3089         float** thresh = 0;
   3090 
   3091         if( CV_IS_UNIFORM_HIST( src ))
   3092         {
   3093             for( int i = 0; i < dims1; i++ )
   3094                 ranges[i] = (float*)src->thresh[i];
   3095 
   3096             thresh = ranges;
   3097         }
   3098         else
   3099         {
   3100             thresh = src->thresh2;
   3101         }
   3102 
   3103         cvSetHistBinRanges( dst, thresh, CV_IS_UNIFORM_HIST(src));
   3104     }
   3105 
   3106     cvCopy( src->bins, dst->bins );
   3107 }
   3108 
   3109 
   3110 // Sets a value range for every histogram bin
   3111 CV_IMPL void
   3112 cvSetHistBinRanges( CvHistogram* hist, float** ranges, int uniform )
   3113 {
   3114     int dims, size[CV_MAX_DIM], total = 0;
   3115     int i, j;
   3116 
   3117     if( !ranges )
   3118         CV_Error( CV_StsNullPtr, "NULL ranges pointer" );
   3119 
   3120     if( !CV_IS_HIST(hist) )
   3121         CV_Error( CV_StsBadArg, "Invalid histogram header" );
   3122 
   3123     dims = cvGetDims( hist->bins, size );
   3124     for( i = 0; i < dims; i++ )
   3125         total += size[i]+1;
   3126 
   3127     if( uniform )
   3128     {
   3129         for( i = 0; i < dims; i++ )
   3130         {
   3131             if( !ranges[i] )
   3132                 CV_Error( CV_StsNullPtr, "One of <ranges> elements is NULL" );
   3133             hist->thresh[i][0] = ranges[i][0];
   3134             hist->thresh[i][1] = ranges[i][1];
   3135         }
   3136 
   3137         hist->type |= CV_HIST_UNIFORM_FLAG + CV_HIST_RANGES_FLAG;
   3138     }
   3139     else
   3140     {
   3141         float* dim_ranges;
   3142 
   3143         if( !hist->thresh2 )
   3144         {
   3145             hist->thresh2 = (float**)cvAlloc(
   3146                         dims*sizeof(hist->thresh2[0])+
   3147                         total*sizeof(hist->thresh2[0][0]));
   3148         }
   3149         dim_ranges = (float*)(hist->thresh2 + dims);
   3150 
   3151         for( i = 0; i < dims; i++ )
   3152         {
   3153             float val0 = -FLT_MAX;
   3154 
   3155             if( !ranges[i] )
   3156                 CV_Error( CV_StsNullPtr, "One of <ranges> elements is NULL" );
   3157 
   3158             for( j = 0; j <= size[i]; j++ )
   3159             {
   3160                 float val = ranges[i][j];
   3161                 if( val <= val0 )
   3162                     CV_Error(CV_StsOutOfRange, "Bin ranges should go in ascenting order");
   3163                 val0 = dim_ranges[j] = val;
   3164             }
   3165 
   3166             hist->thresh2[i] = dim_ranges;
   3167             dim_ranges += size[i] + 1;
   3168         }
   3169 
   3170         hist->type |= CV_HIST_RANGES_FLAG;
   3171         hist->type &= ~CV_HIST_UNIFORM_FLAG;
   3172     }
   3173 }
   3174 
   3175 
   3176 CV_IMPL void
   3177 cvCalcArrHist( CvArr** img, CvHistogram* hist, int accumulate, const CvArr* mask )
   3178 {
   3179     if( !CV_IS_HIST(hist))
   3180         CV_Error( CV_StsBadArg, "Bad histogram pointer" );
   3181 
   3182     if( !img )
   3183         CV_Error( CV_StsNullPtr, "Null double array pointer" );
   3184 
   3185     int size[CV_MAX_DIM];
   3186     int i, dims = cvGetDims( hist->bins, size);
   3187     bool uniform = CV_IS_UNIFORM_HIST(hist);
   3188 
   3189     std::vector<cv::Mat> images(dims);
   3190     for( i = 0; i < dims; i++ )
   3191         images[i] = cv::cvarrToMat(img[i]);
   3192 
   3193     cv::Mat _mask;
   3194     if( mask )
   3195         _mask = cv::cvarrToMat(mask);
   3196 
   3197     const float* uranges[CV_MAX_DIM] = {0};
   3198     const float** ranges = 0;
   3199 
   3200     if( hist->type & CV_HIST_RANGES_FLAG )
   3201     {
   3202         ranges = (const float**)hist->thresh2;
   3203         if( uniform )
   3204         {
   3205             for( i = 0; i < dims; i++ )
   3206                 uranges[i] = &hist->thresh[i][0];
   3207             ranges = uranges;
   3208         }
   3209     }
   3210 
   3211     if( !CV_IS_SPARSE_HIST(hist) )
   3212     {
   3213         cv::Mat H = cv::cvarrToMat(hist->bins);
   3214         cv::calcHist( &images[0], (int)images.size(), 0, _mask,
   3215                       H, cvGetDims(hist->bins), H.size, ranges, uniform, accumulate != 0 );
   3216     }
   3217     else
   3218     {
   3219         CvSparseMat* sparsemat = (CvSparseMat*)hist->bins;
   3220 
   3221         if( !accumulate )
   3222             cvZero( hist->bins );
   3223         cv::SparseMat sH;
   3224         sparsemat->copyToSparseMat(sH);
   3225         cv::calcHist( &images[0], (int)images.size(), 0, _mask, sH, sH.dims(),
   3226                       sH.dims() > 0 ? sH.hdr->size : 0, ranges, uniform, accumulate != 0, true );
   3227 
   3228         if( accumulate )
   3229             cvZero( sparsemat );
   3230 
   3231         cv::SparseMatConstIterator it = sH.begin();
   3232         int nz = (int)sH.nzcount();
   3233         for( i = 0; i < nz; i++, ++it )
   3234             *(float*)cvPtrND(sparsemat, it.node()->idx, 0, -2) = (float)*(const int*)it.ptr;
   3235     }
   3236 }
   3237 
   3238 
   3239 CV_IMPL void
   3240 cvCalcArrBackProject( CvArr** img, CvArr* dst, const CvHistogram* hist )
   3241 {
   3242     if( !CV_IS_HIST(hist))
   3243         CV_Error( CV_StsBadArg, "Bad histogram pointer" );
   3244 
   3245     if( !img )
   3246         CV_Error( CV_StsNullPtr, "Null double array pointer" );
   3247 
   3248     int size[CV_MAX_DIM];
   3249     int i, dims = cvGetDims( hist->bins, size );
   3250 
   3251     bool uniform = CV_IS_UNIFORM_HIST(hist);
   3252     const float* uranges[CV_MAX_DIM] = {0};
   3253     const float** ranges = 0;
   3254 
   3255     if( hist->type & CV_HIST_RANGES_FLAG )
   3256     {
   3257         ranges = (const float**)hist->thresh2;
   3258         if( uniform )
   3259         {
   3260             for( i = 0; i < dims; i++ )
   3261                 uranges[i] = &hist->thresh[i][0];
   3262             ranges = uranges;
   3263         }
   3264     }
   3265 
   3266     std::vector<cv::Mat> images(dims);
   3267     for( i = 0; i < dims; i++ )
   3268         images[i] = cv::cvarrToMat(img[i]);
   3269 
   3270     cv::Mat _dst = cv::cvarrToMat(dst);
   3271 
   3272     CV_Assert( _dst.size() == images[0].size() && _dst.depth() == images[0].depth() );
   3273 
   3274     if( !CV_IS_SPARSE_HIST(hist) )
   3275     {
   3276         cv::Mat H = cv::cvarrToMat(hist->bins);
   3277         cv::calcBackProject( &images[0], (int)images.size(),
   3278                             0, H, _dst, ranges, 1, uniform );
   3279     }
   3280     else
   3281     {
   3282         cv::SparseMat sH;
   3283         ((const CvSparseMat*)hist->bins)->copyToSparseMat(sH);
   3284         cv::calcBackProject( &images[0], (int)images.size(),
   3285                              0, sH, _dst, ranges, 1, uniform );
   3286     }
   3287 }
   3288 
   3289 
   3290 ////////////////////// B A C K   P R O J E C T   P A T C H /////////////////////////
   3291 
   3292 CV_IMPL void
   3293 cvCalcArrBackProjectPatch( CvArr** arr, CvArr* dst, CvSize patch_size, CvHistogram* hist,
   3294                            int method, double norm_factor )
   3295 {
   3296     CvHistogram* model = 0;
   3297 
   3298     IplImage imgstub[CV_MAX_DIM], *img[CV_MAX_DIM];
   3299     IplROI roi;
   3300     CvMat dststub, *dstmat;
   3301     int i, dims;
   3302     int x, y;
   3303     CvSize size;
   3304 
   3305     if( !CV_IS_HIST(hist))
   3306         CV_Error( CV_StsBadArg, "Bad histogram pointer" );
   3307 
   3308     if( !arr )
   3309         CV_Error( CV_StsNullPtr, "Null double array pointer" );
   3310 
   3311     if( norm_factor <= 0 )
   3312         CV_Error( CV_StsOutOfRange,
   3313                   "Bad normalization factor (set it to 1.0 if unsure)" );
   3314 
   3315     if( patch_size.width <= 0 || patch_size.height <= 0 )
   3316         CV_Error( CV_StsBadSize, "The patch width and height must be positive" );
   3317 
   3318     dims = cvGetDims( hist->bins );
   3319     cvNormalizeHist( hist, norm_factor );
   3320 
   3321     for( i = 0; i < dims; i++ )
   3322     {
   3323         CvMat stub, *mat;
   3324         mat = cvGetMat( arr[i], &stub, 0, 0 );
   3325         img[i] = cvGetImage( mat, &imgstub[i] );
   3326         img[i]->roi = &roi;
   3327     }
   3328 
   3329     dstmat = cvGetMat( dst, &dststub, 0, 0 );
   3330     if( CV_MAT_TYPE( dstmat->type ) != CV_32FC1 )
   3331         CV_Error( CV_StsUnsupportedFormat, "Resultant image must have 32fC1 type" );
   3332 
   3333     if( dstmat->cols != img[0]->width - patch_size.width + 1 ||
   3334         dstmat->rows != img[0]->height - patch_size.height + 1 )
   3335         CV_Error( CV_StsUnmatchedSizes,
   3336             "The output map must be (W-w+1 x H-h+1), "
   3337             "where the input images are (W x H) each and the patch is (w x h)" );
   3338 
   3339     cvCopyHist( hist, &model );
   3340 
   3341     size = cvGetMatSize(dstmat);
   3342     roi.coi = 0;
   3343     roi.width = patch_size.width;
   3344     roi.height = patch_size.height;
   3345 
   3346     for( y = 0; y < size.height; y++ )
   3347     {
   3348         for( x = 0; x < size.width; x++ )
   3349         {
   3350             double result;
   3351             roi.xOffset = x;
   3352             roi.yOffset = y;
   3353 
   3354             cvCalcHist( img, model );
   3355             cvNormalizeHist( model, norm_factor );
   3356             result = cvCompareHist( model, hist, method );
   3357             CV_MAT_ELEM( *dstmat, float, y, x ) = (float)result;
   3358         }
   3359     }
   3360 
   3361     cvReleaseHist( &model );
   3362 }
   3363 
   3364 
   3365 // Calculates Bayes probabilistic histograms
   3366 CV_IMPL void
   3367 cvCalcBayesianProb( CvHistogram** src, int count, CvHistogram** dst )
   3368 {
   3369     int i;
   3370 
   3371     if( !src || !dst )
   3372         CV_Error( CV_StsNullPtr, "NULL histogram array pointer" );
   3373 
   3374     if( count < 2 )
   3375         CV_Error( CV_StsOutOfRange, "Too small number of histograms" );
   3376 
   3377     for( i = 0; i < count; i++ )
   3378     {
   3379         if( !CV_IS_HIST(src[i]) || !CV_IS_HIST(dst[i]) )
   3380             CV_Error( CV_StsBadArg, "Invalid histogram header" );
   3381 
   3382         if( !CV_IS_MATND(src[i]->bins) || !CV_IS_MATND(dst[i]->bins) )
   3383             CV_Error( CV_StsBadArg, "The function supports dense histograms only" );
   3384     }
   3385 
   3386     cvZero( dst[0]->bins );
   3387     // dst[0] = src[0] + ... + src[count-1]
   3388     for( i = 0; i < count; i++ )
   3389         cvAdd( src[i]->bins, dst[0]->bins, dst[0]->bins );
   3390 
   3391     cvDiv( 0, dst[0]->bins, dst[0]->bins );
   3392 
   3393     // dst[i] = src[i]*(1/dst[0])
   3394     for( i = count - 1; i >= 0; i-- )
   3395         cvMul( src[i]->bins, dst[0]->bins, dst[i]->bins );
   3396 }
   3397 
   3398 
   3399 CV_IMPL void
   3400 cvCalcProbDensity( const CvHistogram* hist, const CvHistogram* hist_mask,
   3401                    CvHistogram* hist_dens, double scale )
   3402 {
   3403     if( scale <= 0 )
   3404         CV_Error( CV_StsOutOfRange, "scale must be positive" );
   3405 
   3406     if( !CV_IS_HIST(hist) || !CV_IS_HIST(hist_mask) || !CV_IS_HIST(hist_dens) )
   3407         CV_Error( CV_StsBadArg, "Invalid histogram pointer[s]" );
   3408 
   3409     {
   3410         CvArr* arrs[] = { hist->bins, hist_mask->bins, hist_dens->bins };
   3411         CvMatND stubs[3];
   3412         CvNArrayIterator iterator;
   3413 
   3414         cvInitNArrayIterator( 3, arrs, 0, stubs, &iterator );
   3415 
   3416         if( CV_MAT_TYPE(iterator.hdr[0]->type) != CV_32FC1 )
   3417             CV_Error( CV_StsUnsupportedFormat, "All histograms must have 32fC1 type" );
   3418 
   3419         do
   3420         {
   3421             const float* srcdata = (const float*)(iterator.ptr[0]);
   3422             const float* maskdata = (const float*)(iterator.ptr[1]);
   3423             float* dstdata = (float*)(iterator.ptr[2]);
   3424             int i;
   3425 
   3426             for( i = 0; i < iterator.size.width; i++ )
   3427             {
   3428                 float s = srcdata[i];
   3429                 float m = maskdata[i];
   3430                 if( s > FLT_EPSILON )
   3431                     if( m <= s )
   3432                         dstdata[i] = (float)(m*scale/s);
   3433                     else
   3434                         dstdata[i] = (float)scale;
   3435                 else
   3436                     dstdata[i] = (float)0;
   3437             }
   3438         }
   3439         while( cvNextNArraySlice( &iterator ));
   3440     }
   3441 }
   3442 
   3443 class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody
   3444 {
   3445 public:
   3446     enum {HIST_SZ = 256};
   3447 
   3448     EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock)
   3449         : src_(src), globalHistogram_(histogram), histogramLock_(histogramLock)
   3450     { }
   3451 
   3452     void operator()( const cv::Range& rowRange ) const
   3453     {
   3454         int localHistogram[HIST_SZ] = {0, };
   3455 
   3456         const size_t sstep = src_.step;
   3457 
   3458         int width = src_.cols;
   3459         int height = rowRange.end - rowRange.start;
   3460 
   3461         if (src_.isContinuous())
   3462         {
   3463             width *= height;
   3464             height = 1;
   3465         }
   3466 
   3467         for (const uchar* ptr = src_.ptr<uchar>(rowRange.start); height--; ptr += sstep)
   3468         {
   3469             int x = 0;
   3470             for (; x <= width - 4; x += 4)
   3471             {
   3472                 int t0 = ptr[x], t1 = ptr[x+1];
   3473                 localHistogram[t0]++; localHistogram[t1]++;
   3474                 t0 = ptr[x+2]; t1 = ptr[x+3];
   3475                 localHistogram[t0]++; localHistogram[t1]++;
   3476             }
   3477 
   3478             for (; x < width; ++x)
   3479                 localHistogram[ptr[x]]++;
   3480         }
   3481 
   3482         cv::AutoLock lock(*histogramLock_);
   3483 
   3484         for( int i = 0; i < HIST_SZ; i++ )
   3485             globalHistogram_[i] += localHistogram[i];
   3486     }
   3487 
   3488     static bool isWorthParallel( const cv::Mat& src )
   3489     {
   3490         return ( src.total() >= 640*480 );
   3491     }
   3492 
   3493 private:
   3494     EqualizeHistCalcHist_Invoker& operator=(const EqualizeHistCalcHist_Invoker&);
   3495 
   3496     cv::Mat& src_;
   3497     int* globalHistogram_;
   3498     cv::Mutex* histogramLock_;
   3499 };
   3500 
   3501 class EqualizeHistLut_Invoker : public cv::ParallelLoopBody
   3502 {
   3503 public:
   3504     EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut )
   3505         : src_(src),
   3506           dst_(dst),
   3507           lut_(lut)
   3508     { }
   3509 
   3510     void operator()( const cv::Range& rowRange ) const
   3511     {
   3512         const size_t sstep = src_.step;
   3513         const size_t dstep = dst_.step;
   3514 
   3515         int width = src_.cols;
   3516         int height = rowRange.end - rowRange.start;
   3517         int* lut = lut_;
   3518 
   3519         if (src_.isContinuous() && dst_.isContinuous())
   3520         {
   3521             width *= height;
   3522             height = 1;
   3523         }
   3524 
   3525         const uchar* sptr = src_.ptr<uchar>(rowRange.start);
   3526         uchar* dptr = dst_.ptr<uchar>(rowRange.start);
   3527 
   3528         for (; height--; sptr += sstep, dptr += dstep)
   3529         {
   3530             int x = 0;
   3531             for (; x <= width - 4; x += 4)
   3532             {
   3533                 int v0 = sptr[x];
   3534                 int v1 = sptr[x+1];
   3535                 int x0 = lut[v0];
   3536                 int x1 = lut[v1];
   3537                 dptr[x] = (uchar)x0;
   3538                 dptr[x+1] = (uchar)x1;
   3539 
   3540                 v0 = sptr[x+2];
   3541                 v1 = sptr[x+3];
   3542                 x0 = lut[v0];
   3543                 x1 = lut[v1];
   3544                 dptr[x+2] = (uchar)x0;
   3545                 dptr[x+3] = (uchar)x1;
   3546             }
   3547 
   3548             for (; x < width; ++x)
   3549                 dptr[x] = (uchar)lut[sptr[x]];
   3550         }
   3551     }
   3552 
   3553     static bool isWorthParallel( const cv::Mat& src )
   3554     {
   3555         return ( src.total() >= 640*480 );
   3556     }
   3557 
   3558 private:
   3559     EqualizeHistLut_Invoker& operator=(const EqualizeHistLut_Invoker&);
   3560 
   3561     cv::Mat& src_;
   3562     cv::Mat& dst_;
   3563     int* lut_;
   3564 };
   3565 
   3566 CV_IMPL void cvEqualizeHist( const CvArr* srcarr, CvArr* dstarr )
   3567 {
   3568     cv::equalizeHist(cv::cvarrToMat(srcarr), cv::cvarrToMat(dstarr));
   3569 }
   3570 
   3571 #ifdef HAVE_OPENCL
   3572 
   3573 namespace cv {
   3574 
   3575 static bool ocl_equalizeHist(InputArray _src, OutputArray _dst)
   3576 {
   3577     const ocl::Device & dev = ocl::Device::getDefault();
   3578     int compunits = dev.maxComputeUnits();
   3579     size_t wgs = dev.maxWorkGroupSize();
   3580     Size size = _src.size();
   3581     bool use16 = size.width % 16 == 0 && _src.offset() % 16 == 0 && _src.step() % 16 == 0;
   3582     int kercn = dev.isAMD() && use16 ? 16 : std::min(4, ocl::predictOptimalVectorWidth(_src));
   3583 
   3584     ocl::Kernel k1("calculate_histogram", ocl::imgproc::histogram_oclsrc,
   3585                    format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D kercn=%d -D T=%s%s",
   3586                           BINS, compunits, wgs, kercn,
   3587                           kercn == 4 ? "int" : ocl::typeToStr(CV_8UC(kercn)),
   3588                           _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
   3589     if (k1.empty())
   3590         return false;
   3591 
   3592     UMat src = _src.getUMat(), ghist(1, BINS * compunits, CV_32SC1);
   3593 
   3594     k1.args(ocl::KernelArg::ReadOnly(src),
   3595             ocl::KernelArg::PtrWriteOnly(ghist), (int)src.total());
   3596 
   3597     size_t globalsize = compunits * wgs;
   3598     if (!k1.run(1, &globalsize, &wgs, false))
   3599         return false;
   3600 
   3601     wgs = std::min<size_t>(ocl::Device::getDefault().maxWorkGroupSize(), BINS);
   3602     UMat lut(1, 256, CV_8UC1);
   3603     ocl::Kernel k2("calcLUT", ocl::imgproc::histogram_oclsrc,
   3604                   format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d",
   3605                          BINS, compunits, (int)wgs));
   3606     k2.args(ocl::KernelArg::PtrWriteOnly(lut),
   3607            ocl::KernelArg::PtrReadOnly(ghist), (int)_src.total());
   3608 
   3609     // calculation of LUT
   3610     if (!k2.run(1, &wgs, &wgs, false))
   3611         return false;
   3612 
   3613     // execute LUT transparently
   3614     LUT(_src, lut, _dst);
   3615     return true;
   3616 }
   3617 
   3618 }
   3619 
   3620 #endif
   3621 
   3622 void cv::equalizeHist( InputArray _src, OutputArray _dst )
   3623 {
   3624     CV_Assert( _src.type() == CV_8UC1 );
   3625 
   3626     if (_src.empty())
   3627         return;
   3628 
   3629     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
   3630                ocl_equalizeHist(_src, _dst))
   3631 
   3632     Mat src = _src.getMat();
   3633     _dst.create( src.size(), src.type() );
   3634     Mat dst = _dst.getMat();
   3635 
   3636     Mutex histogramLockInstance;
   3637 
   3638     const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ;
   3639     int hist[hist_sz] = {0,};
   3640     int lut[hist_sz];
   3641 
   3642     EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance);
   3643     EqualizeHistLut_Invoker      lutBody(src, dst, lut);
   3644     cv::Range heightRange(0, src.rows);
   3645 
   3646     if(EqualizeHistCalcHist_Invoker::isWorthParallel(src))
   3647         parallel_for_(heightRange, calcBody);
   3648     else
   3649         calcBody(heightRange);
   3650 
   3651     int i = 0;
   3652     while (!hist[i]) ++i;
   3653 
   3654     int total = (int)src.total();
   3655     if (hist[i] == total)
   3656     {
   3657         dst.setTo(i);
   3658         return;
   3659     }
   3660 
   3661     float scale = (hist_sz - 1.f)/(total - hist[i]);
   3662     int sum = 0;
   3663 
   3664     for (lut[i++] = 0; i < hist_sz; ++i)
   3665     {
   3666         sum += hist[i];
   3667         lut[i] = saturate_cast<uchar>(sum * scale);
   3668     }
   3669 
   3670     if(EqualizeHistLut_Invoker::isWorthParallel(src))
   3671         parallel_for_(heightRange, lutBody);
   3672     else
   3673         lutBody(heightRange);
   3674 }
   3675 
   3676 // ----------------------------------------------------------------------
   3677 
   3678 /* Implementation of RTTI and Generic Functions for CvHistogram */
   3679 #define CV_TYPE_NAME_HIST "opencv-hist"
   3680 
   3681 static int icvIsHist( const void * ptr )
   3682 {
   3683     return CV_IS_HIST( ((CvHistogram*)ptr) );
   3684 }
   3685 
   3686 static CvHistogram * icvCloneHist( const CvHistogram * src )
   3687 {
   3688     CvHistogram * dst=NULL;
   3689     cvCopyHist(src, &dst);
   3690     return dst;
   3691 }
   3692 
   3693 static void *icvReadHist( CvFileStorage * fs, CvFileNode * node )
   3694 {
   3695     CvHistogram * h = 0;
   3696     int type = 0;
   3697     int is_uniform = 0;
   3698     int have_ranges = 0;
   3699 
   3700     h = (CvHistogram *)cvAlloc( sizeof(CvHistogram) );
   3701 
   3702     type = cvReadIntByName( fs, node, "type", 0 );
   3703     is_uniform = cvReadIntByName( fs, node, "is_uniform", 0 );
   3704     have_ranges = cvReadIntByName( fs, node, "have_ranges", 0 );
   3705     h->type = CV_HIST_MAGIC_VAL | type |
   3706         (is_uniform ? CV_HIST_UNIFORM_FLAG : 0) |
   3707         (have_ranges ? CV_HIST_RANGES_FLAG : 0);
   3708 
   3709     if(type == CV_HIST_ARRAY)
   3710     {
   3711         // read histogram bins
   3712         CvMatND* mat = (CvMatND*)cvReadByName( fs, node, "mat" );
   3713         int i, sizes[CV_MAX_DIM];
   3714 
   3715         if(!CV_IS_MATND(mat))
   3716             CV_Error( CV_StsError, "Expected CvMatND");
   3717 
   3718         for(i=0; i<mat->dims; i++)
   3719             sizes[i] = mat->dim[i].size;
   3720 
   3721         cvInitMatNDHeader( &(h->mat), mat->dims, sizes, mat->type, mat->data.ptr );
   3722         h->bins = &(h->mat);
   3723 
   3724         // take ownership of refcount pointer as well
   3725         h->mat.refcount = mat->refcount;
   3726 
   3727         // increase refcount so freeing temp header doesn't free data
   3728         cvIncRefData( mat );
   3729 
   3730         // free temporary header
   3731         cvReleaseMatND( &mat );
   3732     }
   3733     else
   3734     {
   3735         h->bins = cvReadByName( fs, node, "bins" );
   3736         if(!CV_IS_SPARSE_MAT(h->bins)){
   3737             CV_Error( CV_StsError, "Unknown Histogram type");
   3738         }
   3739     }
   3740 
   3741     // read thresholds
   3742     if(have_ranges)
   3743     {
   3744         int i, dims, size[CV_MAX_DIM], total = 0;
   3745         CvSeqReader reader;
   3746         CvFileNode * thresh_node;
   3747 
   3748         dims = cvGetDims( h->bins, size );
   3749         for( i = 0; i < dims; i++ )
   3750             total += size[i]+1;
   3751 
   3752         thresh_node = cvGetFileNodeByName( fs, node, "thresh" );
   3753         if(!thresh_node)
   3754             CV_Error( CV_StsError, "'thresh' node is missing");
   3755         cvStartReadRawData( fs, thresh_node, &reader );
   3756 
   3757         if(is_uniform)
   3758         {
   3759             for(i=0; i<dims; i++)
   3760                 cvReadRawDataSlice( fs, &reader, 2, h->thresh[i], "f" );
   3761             h->thresh2 = NULL;
   3762         }
   3763         else
   3764         {
   3765             float* dim_ranges;
   3766             h->thresh2 = (float**)cvAlloc(
   3767                 dims*sizeof(h->thresh2[0])+
   3768                 total*sizeof(h->thresh2[0][0]));
   3769             dim_ranges = (float*)(h->thresh2 + dims);
   3770             for(i=0; i < dims; i++)
   3771             {
   3772                 h->thresh2[i] = dim_ranges;
   3773                 cvReadRawDataSlice( fs, &reader, size[i]+1, dim_ranges, "f" );
   3774                 dim_ranges += size[i] + 1;
   3775             }
   3776         }
   3777     }
   3778 
   3779     return h;
   3780 }
   3781 
   3782 static void icvWriteHist( CvFileStorage* fs, const char* name,
   3783                           const void* struct_ptr, CvAttrList /*attributes*/ )
   3784 {
   3785     const CvHistogram * hist = (const CvHistogram *) struct_ptr;
   3786     int sizes[CV_MAX_DIM];
   3787     int dims;
   3788     int i;
   3789     int is_uniform, have_ranges;
   3790 
   3791     cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HIST );
   3792 
   3793     is_uniform = (CV_IS_UNIFORM_HIST(hist) ? 1 : 0);
   3794     have_ranges = (hist->type & CV_HIST_RANGES_FLAG ? 1 : 0);
   3795 
   3796     cvWriteInt( fs, "type", (hist->type & 1) );
   3797     cvWriteInt( fs, "is_uniform", is_uniform );
   3798     cvWriteInt( fs, "have_ranges", have_ranges );
   3799     if(!CV_IS_SPARSE_HIST(hist))
   3800         cvWrite( fs, "mat", &(hist->mat) );
   3801     else
   3802         cvWrite( fs, "bins", hist->bins );
   3803 
   3804     // write thresholds
   3805     if(have_ranges){
   3806         dims = cvGetDims( hist->bins, sizes );
   3807         cvStartWriteStruct( fs, "thresh", CV_NODE_SEQ + CV_NODE_FLOW );
   3808         if(is_uniform){
   3809             for(i=0; i<dims; i++){
   3810                 cvWriteRawData( fs, hist->thresh[i], 2, "f" );
   3811             }
   3812         }
   3813         else{
   3814             for(i=0; i<dims; i++){
   3815                 cvWriteRawData( fs, hist->thresh2[i], sizes[i]+1, "f" );
   3816             }
   3817         }
   3818         cvEndWriteStruct( fs );
   3819     }
   3820 
   3821     cvEndWriteStruct( fs );
   3822 }
   3823 
   3824 
   3825 CvType hist_type( CV_TYPE_NAME_HIST, icvIsHist, (CvReleaseFunc)cvReleaseHist,
   3826                   icvReadHist, icvWriteHist, (CvCloneFunc)icvCloneHist );
   3827 
   3828 /* End of file. */
   3829