Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2014, Itseez Inc., all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of the copyright holders may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #include "precomp.hpp"
     43 #include "opencl_kernels_core.hpp"
     44 
     45 ///////////////////////////////// UMat implementation ///////////////////////////////
     46 
     47 namespace cv {
     48 
     49 // it should be a prime number for the best hash function
     50 enum { UMAT_NLOCKS = 31 };
     51 static Mutex umatLocks[UMAT_NLOCKS];
     52 
     53 UMatData::UMatData(const MatAllocator* allocator)
     54 {
     55     prevAllocator = currAllocator = allocator;
     56     urefcount = refcount = 0;
     57     data = origdata = 0;
     58     size = 0;
     59     flags = 0;
     60     handle = 0;
     61     userdata = 0;
     62     allocatorFlags_ = 0;
     63 }
     64 
     65 UMatData::~UMatData()
     66 {
     67     prevAllocator = currAllocator = 0;
     68     urefcount = refcount = 0;
     69     data = origdata = 0;
     70     size = 0;
     71     flags = 0;
     72     handle = 0;
     73     userdata = 0;
     74     allocatorFlags_ = 0;
     75 }
     76 
     77 void UMatData::lock()
     78 {
     79     umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock();
     80 }
     81 
     82 void UMatData::unlock()
     83 {
     84     umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock();
     85 }
     86 
     87 
     88 MatAllocator* UMat::getStdAllocator()
     89 {
     90 #ifdef HAVE_OPENCL
     91     if( ocl::haveOpenCL() && ocl::useOpenCL() )
     92         return ocl::getOpenCLAllocator();
     93 #endif
     94     return Mat::getStdAllocator();
     95 }
     96 
     97 void swap( UMat& a, UMat& b )
     98 {
     99     std::swap(a.flags, b.flags);
    100     std::swap(a.dims, b.dims);
    101     std::swap(a.rows, b.rows);
    102     std::swap(a.cols, b.cols);
    103     std::swap(a.allocator, b.allocator);
    104     std::swap(a.u, b.u);
    105     std::swap(a.offset, b.offset);
    106 
    107     std::swap(a.size.p, b.size.p);
    108     std::swap(a.step.p, b.step.p);
    109     std::swap(a.step.buf[0], b.step.buf[0]);
    110     std::swap(a.step.buf[1], b.step.buf[1]);
    111 
    112     if( a.step.p == b.step.buf )
    113     {
    114         a.step.p = a.step.buf;
    115         a.size.p = &a.rows;
    116     }
    117 
    118     if( b.step.p == a.step.buf )
    119     {
    120         b.step.p = b.step.buf;
    121         b.size.p = &b.rows;
    122     }
    123 }
    124 
    125 
    126 static inline void setSize( UMat& m, int _dims, const int* _sz,
    127                             const size_t* _steps, bool autoSteps=false )
    128 {
    129     CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
    130     if( m.dims != _dims )
    131     {
    132         if( m.step.p != m.step.buf )
    133         {
    134             fastFree(m.step.p);
    135             m.step.p = m.step.buf;
    136             m.size.p = &m.rows;
    137         }
    138         if( _dims > 2 )
    139         {
    140             m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0]));
    141             m.size.p = (int*)(m.step.p + _dims) + 1;
    142             m.size.p[-1] = _dims;
    143             m.rows = m.cols = -1;
    144         }
    145     }
    146 
    147     m.dims = _dims;
    148     if( !_sz )
    149         return;
    150 
    151     size_t esz = CV_ELEM_SIZE(m.flags), total = esz;
    152     int i;
    153     for( i = _dims-1; i >= 0; i-- )
    154     {
    155         int s = _sz[i];
    156         CV_Assert( s >= 0 );
    157         m.size.p[i] = s;
    158 
    159         if( _steps )
    160             m.step.p[i] = i < _dims-1 ? _steps[i] : esz;
    161         else if( autoSteps )
    162         {
    163             m.step.p[i] = total;
    164             int64 total1 = (int64)total*s;
    165             if( (uint64)total1 != (size_t)total1 )
    166                 CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" );
    167             total = (size_t)total1;
    168         }
    169     }
    170 
    171     if( _dims == 1 )
    172     {
    173         m.dims = 2;
    174         m.cols = 1;
    175         m.step[1] = esz;
    176     }
    177 }
    178 
    179 static void updateContinuityFlag(UMat& m)
    180 {
    181     int i, j;
    182     for( i = 0; i < m.dims; i++ )
    183     {
    184         if( m.size[i] > 1 )
    185             break;
    186     }
    187 
    188     for( j = m.dims-1; j > i; j-- )
    189     {
    190         if( m.step[j]*m.size[j] < m.step[j-1] )
    191             break;
    192     }
    193 
    194     uint64 total = (uint64)m.step[0]*m.size[0];
    195     if( j <= i && total == (size_t)total )
    196         m.flags |= UMat::CONTINUOUS_FLAG;
    197     else
    198         m.flags &= ~UMat::CONTINUOUS_FLAG;
    199 }
    200 
    201 
    202 static void finalizeHdr(UMat& m)
    203 {
    204     updateContinuityFlag(m);
    205     int d = m.dims;
    206     if( d > 2 )
    207         m.rows = m.cols = -1;
    208 }
    209 
    210 UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
    211 {
    212     UMat hdr;
    213     if(!data)
    214         return hdr;
    215     UMatData* temp_u = u;
    216     if(!temp_u)
    217     {
    218         MatAllocator *a = allocator, *a0 = getStdAllocator();
    219         if(!a)
    220             a = a0;
    221         temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
    222         temp_u->refcount = 1;
    223     }
    224     UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked
    225     hdr.flags = flags;
    226     setSize(hdr, dims, size.p, step.p);
    227     finalizeHdr(hdr);
    228     hdr.u = temp_u;
    229     hdr.offset = data - datastart;
    230     hdr.addref();
    231     return hdr;
    232 }
    233 
    234 void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags)
    235 {
    236     this->usageFlags = _usageFlags;
    237 
    238     int i;
    239     CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes);
    240     _type = CV_MAT_TYPE(_type);
    241 
    242     if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() )
    243     {
    244         if( d == 2 && rows == _sizes[0] && cols == _sizes[1] )
    245             return;
    246         for( i = 0; i < d; i++ )
    247             if( size[i] != _sizes[i] )
    248                 break;
    249         if( i == d && (d > 1 || size[1] == 1))
    250             return;
    251     }
    252 
    253     release();
    254     if( d == 0 )
    255         return;
    256     flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL;
    257     setSize(*this, d, _sizes, 0, true);
    258     offset = 0;
    259 
    260     if( total() > 0 )
    261     {
    262         MatAllocator *a = allocator, *a0 = getStdAllocator();
    263         if(!a)
    264             a = a0;
    265         try
    266         {
    267             u = a->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
    268             CV_Assert(u != 0);
    269         }
    270         catch(...)
    271         {
    272             if(a != a0)
    273                 u = a0->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
    274             CV_Assert(u != 0);
    275         }
    276         CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
    277     }
    278 
    279     finalizeHdr(*this);
    280     addref();
    281 }
    282 
    283 void UMat::copySize(const UMat& m)
    284 {
    285     setSize(*this, m.dims, 0, 0);
    286     for( int i = 0; i < dims; i++ )
    287     {
    288         size[i] = m.size[i];
    289         step[i] = m.step[i];
    290     }
    291 }
    292 
    293 
    294 UMat::~UMat()
    295 {
    296     release();
    297     if( step.p != step.buf )
    298         fastFree(step.p);
    299 }
    300 
    301 void UMat::deallocate()
    302 {
    303     u->currAllocator->deallocate(u);
    304     u = NULL;
    305 }
    306 
    307 
    308 UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange)
    309     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
    310 {
    311     CV_Assert( m.dims >= 2 );
    312     if( m.dims > 2 )
    313     {
    314         AutoBuffer<Range> rs(m.dims);
    315         rs[0] = _rowRange;
    316         rs[1] = _colRange;
    317         for( int i = 2; i < m.dims; i++ )
    318             rs[i] = Range::all();
    319         *this = m(rs);
    320         return;
    321     }
    322 
    323     *this = m;
    324     if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
    325     {
    326         CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows );
    327         rows = _rowRange.size();
    328         offset += step*_rowRange.start;
    329         flags |= SUBMATRIX_FLAG;
    330     }
    331 
    332     if( _colRange != Range::all() && _colRange != Range(0,cols) )
    333     {
    334         CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols );
    335         cols = _colRange.size();
    336         offset += _colRange.start*elemSize();
    337         flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1;
    338         flags |= SUBMATRIX_FLAG;
    339     }
    340 
    341     if( rows == 1 )
    342         flags |= CONTINUOUS_FLAG;
    343 
    344     if( rows <= 0 || cols <= 0 )
    345     {
    346         release();
    347         rows = cols = 0;
    348     }
    349 }
    350 
    351 
    352 UMat::UMat(const UMat& m, const Rect& roi)
    353     : flags(m.flags), dims(2), rows(roi.height), cols(roi.width),
    354     allocator(m.allocator), usageFlags(m.usageFlags), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows)
    355 {
    356     CV_Assert( m.dims <= 2 );
    357     flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1;
    358     flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0;
    359 
    360     size_t esz = CV_ELEM_SIZE(flags);
    361     offset += roi.x*esz;
    362     CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
    363               0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
    364     if( u )
    365         CV_XADD(&(u->urefcount), 1);
    366     if( roi.width < m.cols || roi.height < m.rows )
    367         flags |= SUBMATRIX_FLAG;
    368 
    369     step[0] = m.step[0]; step[1] = esz;
    370 
    371     if( rows <= 0 || cols <= 0 )
    372     {
    373         release();
    374         rows = cols = 0;
    375     }
    376 }
    377 
    378 
    379 UMat::UMat(const UMat& m, const Range* ranges)
    380     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
    381 {
    382     int i, d = m.dims;
    383 
    384     CV_Assert(ranges);
    385     for( i = 0; i < d; i++ )
    386     {
    387         Range r = ranges[i];
    388         CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) );
    389     }
    390     *this = m;
    391     for( i = 0; i < d; i++ )
    392     {
    393         Range r = ranges[i];
    394         if( r != Range::all() && r != Range(0, size.p[i]))
    395         {
    396             size.p[i] = r.end - r.start;
    397             offset += r.start*step.p[i];
    398             flags |= SUBMATRIX_FLAG;
    399         }
    400     }
    401     updateContinuityFlag(*this);
    402 }
    403 
    404 UMat UMat::diag(int d) const
    405 {
    406     CV_Assert( dims <= 2 );
    407     UMat m = *this;
    408     size_t esz = elemSize();
    409     int len;
    410 
    411     if( d >= 0 )
    412     {
    413         len = std::min(cols - d, rows);
    414         m.offset += esz*d;
    415     }
    416     else
    417     {
    418         len = std::min(rows + d, cols);
    419         m.offset -= step[0]*d;
    420     }
    421     CV_DbgAssert( len > 0 );
    422 
    423     m.size[0] = m.rows = len;
    424     m.size[1] = m.cols = 1;
    425     m.step[0] += (len > 1 ? esz : 0);
    426 
    427     if( m.rows > 1 )
    428         m.flags &= ~CONTINUOUS_FLAG;
    429     else
    430         m.flags |= CONTINUOUS_FLAG;
    431 
    432     if( size() != Size(1,1) )
    433         m.flags |= SUBMATRIX_FLAG;
    434 
    435     return m;
    436 }
    437 
    438 void UMat::locateROI( Size& wholeSize, Point& ofs ) const
    439 {
    440     CV_Assert( dims <= 2 && step[0] > 0 );
    441     size_t esz = elemSize(), minstep;
    442     ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size;
    443 
    444     if( delta1 == 0 )
    445         ofs.x = ofs.y = 0;
    446     else
    447     {
    448         ofs.y = (int)(delta1/step[0]);
    449         ofs.x = (int)((delta1 - step[0]*ofs.y)/esz);
    450         CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) );
    451     }
    452     minstep = (ofs.x + cols)*esz;
    453     wholeSize.height = (int)((delta2 - minstep)/step[0] + 1);
    454     wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
    455     wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
    456     wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
    457 }
    458 
    459 
    460 UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
    461 {
    462     CV_Assert( dims <= 2 && step[0] > 0 );
    463     Size wholeSize; Point ofs;
    464     size_t esz = elemSize();
    465     locateROI( wholeSize, ofs );
    466     int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
    467     int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
    468     offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz;
    469     rows = row2 - row1; cols = col2 - col1;
    470     size.p[0] = rows; size.p[1] = cols;
    471     if( esz*cols == step[0] || rows == 1 )
    472         flags |= CONTINUOUS_FLAG;
    473     else
    474         flags &= ~CONTINUOUS_FLAG;
    475     return *this;
    476 }
    477 
    478 
    479 UMat UMat::reshape(int new_cn, int new_rows) const
    480 {
    481     int cn = channels();
    482     UMat hdr = *this;
    483 
    484     if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 )
    485     {
    486         hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
    487         hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags);
    488         hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn;
    489         return hdr;
    490     }
    491 
    492     CV_Assert( dims <= 2 );
    493 
    494     if( new_cn == 0 )
    495         new_cn = cn;
    496 
    497     int total_width = cols * cn;
    498 
    499     if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 )
    500         new_rows = rows * total_width / new_cn;
    501 
    502     if( new_rows != 0 && new_rows != rows )
    503     {
    504         int total_size = total_width * rows;
    505         if( !isContinuous() )
    506             CV_Error( CV_BadStep,
    507             "The matrix is not continuous, thus its number of rows can not be changed" );
    508 
    509         if( (unsigned)new_rows > (unsigned)total_size )
    510             CV_Error( CV_StsOutOfRange, "Bad new number of rows" );
    511 
    512         total_width = total_size / new_rows;
    513 
    514         if( total_width * new_rows != total_size )
    515             CV_Error( CV_StsBadArg, "The total number of matrix elements "
    516                                     "is not divisible by the new number of rows" );
    517 
    518         hdr.rows = new_rows;
    519         hdr.step[0] = total_width * elemSize1();
    520     }
    521 
    522     int new_width = total_width / new_cn;
    523 
    524     if( new_width * new_cn != total_width )
    525         CV_Error( CV_BadNumChannels,
    526         "The total width is not divisible by the new number of channels" );
    527 
    528     hdr.cols = new_width;
    529     hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
    530     hdr.step[1] = CV_ELEM_SIZE(hdr.flags);
    531     return hdr;
    532 }
    533 
    534 UMat UMat::diag(const UMat& d)
    535 {
    536     CV_Assert( d.cols == 1 || d.rows == 1 );
    537     int len = d.rows + d.cols - 1;
    538     UMat m(len, len, d.type(), Scalar(0));
    539     UMat md = m.diag();
    540     if( d.cols == 1 )
    541         d.copyTo(md);
    542     else
    543         transpose(d, md);
    544     return m;
    545 }
    546 
    547 int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const
    548 {
    549     return (depth() == _depth || _depth <= 0) &&
    550         (isContinuous() || !_requireContinuous) &&
    551         ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) ||
    552                         (cols == _elemChannels && channels() == 1))) ||
    553         (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) &&
    554          (isContinuous() || step.p[1] == step.p[2]*size.p[2])))
    555     ? (int)(total()*channels()/_elemChannels) : -1;
    556 }
    557 
    558 UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const
    559 {
    560     if(_newndims == dims)
    561     {
    562         if(_newsz == 0)
    563             return reshape(_cn);
    564         if(_newndims == 2)
    565             return reshape(_cn, _newsz[0]);
    566     }
    567 
    568     CV_Error(CV_StsNotImplemented, "");
    569     // TBD
    570     return UMat();
    571 }
    572 
    573 
    574 Mat UMat::getMat(int accessFlags) const
    575 {
    576     if(!u)
    577         return Mat();
    578     u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers
    579     CV_Assert(u->data != 0);
    580     Mat hdr(dims, size.p, type(), u->data + offset, step.p);
    581     hdr.flags = flags;
    582     hdr.u = u;
    583     hdr.datastart = u->data;
    584     hdr.data = u->data + offset;
    585     hdr.datalimit = hdr.dataend = u->data + u->size;
    586     CV_XADD(&hdr.u->refcount, 1);
    587     return hdr;
    588 }
    589 
    590 void* UMat::handle(int accessFlags) const
    591 {
    592     if( !u )
    593         return 0;
    594 
    595     // check flags: if CPU copy is newer, copy it back to GPU.
    596     if( u->deviceCopyObsolete() )
    597     {
    598         CV_Assert(u->refcount == 0);
    599         u->currAllocator->unmap(u);
    600     }
    601 
    602     if ((accessFlags & ACCESS_WRITE) != 0)
    603         u->markHostCopyObsolete(true);
    604 
    605     return u->handle;
    606 }
    607 
    608 void UMat::ndoffset(size_t* ofs) const
    609 {
    610     // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...;
    611     size_t val = offset;
    612     for( int i = 0; i < dims; i++ )
    613     {
    614         size_t s = step.p[i];
    615         ofs[i] = val / s;
    616         val -= ofs[i]*s;
    617     }
    618 }
    619 
    620 void UMat::copyTo(OutputArray _dst) const
    621 {
    622     int dtype = _dst.type();
    623     if( _dst.fixedType() && dtype != type() )
    624     {
    625         CV_Assert( channels() == CV_MAT_CN(dtype) );
    626         convertTo( _dst, dtype );
    627         return;
    628     }
    629 
    630     if( empty() )
    631     {
    632         _dst.release();
    633         return;
    634     }
    635 
    636     size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
    637     for( i = 0; i < (size_t)dims; i++ )
    638         sz[i] = size.p[i];
    639     sz[dims-1] *= esz;
    640     ndoffset(srcofs);
    641     srcofs[dims-1] *= esz;
    642 
    643     _dst.create( dims, size.p, type() );
    644     if( _dst.isUMat() )
    645     {
    646         UMat dst = _dst.getUMat();
    647         if( u == dst.u && dst.offset == offset )
    648             return;
    649 
    650         if (u->currAllocator == dst.u->currAllocator)
    651         {
    652             dst.ndoffset(dstofs);
    653             dstofs[dims-1] *= esz;
    654             u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
    655             return;
    656         }
    657     }
    658 
    659     Mat dst = _dst.getMat();
    660     u->currAllocator->download(u, dst.ptr(), dims, sz, srcofs, step.p, dst.step.p);
    661 }
    662 
    663 void UMat::copyTo(OutputArray _dst, InputArray _mask) const
    664 {
    665     if( _mask.empty() )
    666     {
    667         copyTo(_dst);
    668         return;
    669     }
    670 #ifdef HAVE_OPENCL
    671     int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
    672     CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );
    673 
    674     if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
    675     {
    676         UMatData * prevu = _dst.getUMat().u;
    677         _dst.create( dims, size, type() );
    678 
    679         UMat dst = _dst.getUMat();
    680 
    681         bool haveDstUninit = false;
    682         if( prevu != dst.u ) // do not leave dst uninitialized
    683             haveDstUninit = true;
    684 
    685         String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
    686                              ocl::memopTypeToStr(depth()), cn, mcn,
    687                              haveDstUninit ? " -D HAVE_DST_UNINIT" : "");
    688 
    689         ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
    690         if (!k.empty())
    691         {
    692             k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
    693                    ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
    694                    haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
    695                                    ocl::KernelArg::ReadWrite(dst));
    696 
    697             size_t globalsize[2] = { cols, rows };
    698             if (k.run(2, globalsize, NULL, false))
    699             {
    700                 CV_IMPL_ADD(CV_IMPL_OCL);
    701                 return;
    702             }
    703         }
    704     }
    705 #endif
    706     Mat src = getMat(ACCESS_READ);
    707     src.copyTo(_dst, _mask);
    708 }
    709 
    710 void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
    711 {
    712     bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON;
    713     int stype = type(), cn = CV_MAT_CN(stype);
    714 
    715     if( _type < 0 )
    716         _type = _dst.fixedType() ? _dst.type() : stype;
    717     else
    718         _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn);
    719 
    720     int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type);
    721     if( sdepth == ddepth && noScale )
    722     {
    723         copyTo(_dst);
    724         return;
    725     }
    726 #ifdef HAVE_OPENCL
    727     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    728     bool needDouble = sdepth == CV_64F || ddepth == CV_64F;
    729     if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() &&
    730             ((needDouble && doubleSupport) || !needDouble) )
    731     {
    732         int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4;
    733 
    734         char cvt[2][40];
    735         ocl::Kernel k("convertTo", ocl::core::convert_oclsrc,
    736                       format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s",
    737                              ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth),
    738                              ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]),
    739                              ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]),
    740                              doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
    741         if (!k.empty())
    742         {
    743             UMat src = *this;
    744             _dst.create( size(), _type );
    745             UMat dst = _dst.getUMat();
    746 
    747             float alphaf = (float)alpha, betaf = (float)beta;
    748             ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
    749                     dstarg = ocl::KernelArg::WriteOnly(dst, cn);
    750 
    751             if (wdepth == CV_32F)
    752                 k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI);
    753             else
    754                 k.args(srcarg, dstarg, alpha, beta, rowsPerWI);
    755 
    756             size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
    757             if (k.run(2, globalsize, NULL, false))
    758             {
    759                 CV_IMPL_ADD(CV_IMPL_OCL);
    760                 return;
    761             }
    762         }
    763     }
    764 #endif
    765     Mat m = getMat(ACCESS_READ);
    766     m.convertTo(_dst, _type, alpha, beta);
    767 }
    768 
    769 UMat& UMat::setTo(InputArray _value, InputArray _mask)
    770 {
    771     bool haveMask = !_mask.empty();
    772 #ifdef HAVE_OPENCL
    773     int tp = type(), cn = CV_MAT_CN(tp), d = CV_MAT_DEPTH(tp);
    774 
    775     if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() )
    776     {
    777         Mat value = _value.getMat();
    778         CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
    779         int kercn = haveMask || cn == 3 ? cn : std::max(cn, ocl::predictOptimalVectorWidth(*this)),
    780                 kertp = CV_MAKE_TYPE(d, kercn);
    781 
    782         double buf[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
    783                            0, 0, 0, 0, 0, 0, 0, 0 };
    784         convertAndUnrollScalar(value, tp, (uchar *)buf, kercn / cn);
    785 
    786         int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
    787         String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d",
    788                              ocl::memopTypeToStr(kertp), rowsPerWI,
    789                              ocl::memopTypeToStr(CV_MAKETYPE(d, scalarcn)),
    790                              ocl::memopTypeToStr(d), kercn);
    791 
    792         ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
    793         if( !setK.empty() )
    794         {
    795             ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE(d) * scalarcn);
    796             UMat mask;
    797 
    798             if( haveMask )
    799             {
    800                 mask = _mask.getUMat();
    801                 CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
    802                 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
    803                         dstarg = ocl::KernelArg::ReadWrite(*this);
    804                 setK.args(maskarg, dstarg, scalararg);
    805             }
    806             else
    807             {
    808                 ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this, cn, kercn);
    809                 setK.args(dstarg, scalararg);
    810             }
    811 
    812             size_t globalsize[] = { cols * cn / kercn, (rows + rowsPerWI - 1) / rowsPerWI };
    813             if( setK.run(2, globalsize, NULL, false) )
    814             {
    815                 CV_IMPL_ADD(CV_IMPL_OCL);
    816                 return *this;
    817             }
    818         }
    819     }
    820 #endif
    821     Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
    822     m.setTo(_value, _mask);
    823     return *this;
    824 }
    825 
    826 UMat& UMat::operator = (const Scalar& s)
    827 {
    828     setTo(s);
    829     return *this;
    830 }
    831 
    832 UMat UMat::t() const
    833 {
    834     UMat m;
    835     transpose(*this, m);
    836     return m;
    837 }
    838 
    839 UMat UMat::inv(int method) const
    840 {
    841     UMat m;
    842     invert(*this, m, method);
    843     return m;
    844 }
    845 
    846 UMat UMat::mul(InputArray m, double scale) const
    847 {
    848     UMat dst;
    849     multiply(*this, m, dst, scale);
    850     return dst;
    851 }
    852 
    853 #ifdef HAVE_OPENCL
    854 
    855 static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
    856 {
    857     UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
    858 
    859     int type = src1.type(), depth = CV_MAT_DEPTH(type),
    860             kercn = ocl::predictOptimalVectorWidth(src1, src2);
    861     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    862 
    863     if ( !doubleSupport && depth == CV_64F )
    864         return false;
    865 
    866     int dbsize = ocl::Device::getDefault().maxComputeUnits();
    867     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
    868     int ddepth = std::max(CV_32F, depth);
    869 
    870     int wgs2_aligned = 1;
    871     while (wgs2_aligned < (int)wgs)
    872         wgs2_aligned <<= 1;
    873     wgs2_aligned >>= 1;
    874 
    875     char cvt[40];
    876     ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
    877                   format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
    878                          "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
    879                          ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
    880                          ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
    881                          ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
    882                          (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
    883                          _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
    884                          _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
    885     if (k.empty())
    886         return false;
    887 
    888     UMat db(1, dbsize, ddepth);
    889 
    890     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
    891             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
    892             dbarg = ocl::KernelArg::PtrWriteOnly(db);
    893 
    894     k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
    895 
    896     size_t globalsize = dbsize * wgs;
    897     if (k.run(1, &globalsize, &wgs, false))
    898     {
    899         res = sum(db.getMat(ACCESS_READ))[0];
    900         return true;
    901     }
    902     return false;
    903 }
    904 
    905 #endif
    906 
    907 double UMat::dot(InputArray m) const
    908 {
    909     CV_Assert(m.sameSize(*this) && m.type() == type());
    910 
    911 #ifdef HAVE_OPENCL
    912     double r = 0;
    913     CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
    914 #endif
    915 
    916     return getMat(ACCESS_READ).dot(m);
    917 }
    918 
    919 UMat UMat::zeros(int rows, int cols, int type)
    920 {
    921     return UMat(rows, cols, type, Scalar::all(0));
    922 }
    923 
    924 UMat UMat::zeros(Size size, int type)
    925 {
    926     return UMat(size, type, Scalar::all(0));
    927 }
    928 
    929 UMat UMat::zeros(int ndims, const int* sz, int type)
    930 {
    931     return UMat(ndims, sz, type, Scalar::all(0));
    932 }
    933 
    934 UMat UMat::ones(int rows, int cols, int type)
    935 {
    936     return UMat::ones(Size(cols, rows), type);
    937 }
    938 
    939 UMat UMat::ones(Size size, int type)
    940 {
    941     return UMat(size, type, Scalar(1));
    942 }
    943 
    944 UMat UMat::ones(int ndims, const int* sz, int type)
    945 {
    946     return UMat(ndims, sz, type, Scalar(1));
    947 }
    948 
    949 UMat UMat::eye(int rows, int cols, int type)
    950 {
    951     return UMat::eye(Size(cols, rows), type);
    952 }
    953 
    954 UMat UMat::eye(Size size, int type)
    955 {
    956     UMat m(size, type);
    957     setIdentity(m);
    958     return m;
    959 }
    960 
    961 }
    962 
    963 /* End of file. */
    964