Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 using namespace cv;
     46 using namespace cv::cuda;
     47 
     48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
     49 
     50 Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); }
     51 
     52 #else /* !defined (HAVE_CUDA) */
     53 
     54 namespace cv { namespace cuda { namespace device
     55 {
     56     namespace bf_match
     57     {
     58         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
     59             const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
     60             cudaStream_t stream);
     61         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
     62             const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
     63             cudaStream_t stream);
     64         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
     65             const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
     66             cudaStream_t stream);
     67 
     68         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
     69             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
     70             cudaStream_t stream);
     71         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
     72             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
     73             cudaStream_t stream);
     74         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
     75             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
     76             cudaStream_t stream);
     77     }
     78 
     79     namespace bf_knnmatch
     80     {
     81         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
     82             const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
     83             cudaStream_t stream);
     84         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
     85             const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
     86             cudaStream_t stream);
     87         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
     88             const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
     89             cudaStream_t stream);
     90 
     91         template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
     92             const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
     93             cudaStream_t stream);
     94         template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
     95             const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
     96             cudaStream_t stream);
     97         template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
     98             const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
     99             cudaStream_t stream);
    100     }
    101 
    102     namespace bf_radius_match
    103     {
    104         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
    105             const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
    106             cudaStream_t stream);
    107         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
    108             const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
    109             cudaStream_t stream);
    110         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
    111             const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
    112             cudaStream_t stream);
    113 
    114         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
    115             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
    116             cudaStream_t stream);
    117 
    118         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
    119             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
    120             cudaStream_t stream);
    121 
    122         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
    123             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
    124             cudaStream_t stream);
    125     }
    126 }}}
    127 
    128 namespace
    129 {
    130     static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection,
    131                                   const std::vector<GpuMat>& masks,
    132                                   GpuMat& trainCollection,
    133                                   GpuMat& maskCollection)
    134     {
    135         if (trainDescCollection.empty())
    136             return;
    137 
    138         if (masks.empty())
    139         {
    140             Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
    141 
    142             PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
    143 
    144             for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
    145                 *trainCollectionCPU_ptr = trainDescCollection[i];
    146 
    147             trainCollection.upload(trainCollectionCPU);
    148             maskCollection.release();
    149         }
    150         else
    151         {
    152             CV_Assert( masks.size() == trainDescCollection.size() );
    153 
    154             Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
    155             Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
    156 
    157             PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
    158             PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
    159 
    160             for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
    161             {
    162                 const GpuMat& train = trainDescCollection[i];
    163                 const GpuMat& mask = masks[i];
    164 
    165                 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) );
    166 
    167                 *trainCollectionCPU_ptr = train;
    168                 *maskCollectionCPU_ptr = mask;
    169             }
    170 
    171             trainCollection.upload(trainCollectionCPU);
    172             maskCollection.upload(maskCollectionCPU);
    173         }
    174     }
    175 
    176     class BFMatcher_Impl : public cv::cuda::DescriptorMatcher
    177     {
    178     public:
    179         explicit BFMatcher_Impl(int norm) : norm_(norm)
    180         {
    181             CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING );
    182         }
    183 
    184         virtual bool isMaskSupported() const { return true; }
    185 
    186         virtual void add(const std::vector<GpuMat>& descriptors)
    187         {
    188             trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end());
    189         }
    190 
    191         virtual const std::vector<GpuMat>& getTrainDescriptors() const
    192         {
    193             return trainDescCollection_;
    194         }
    195 
    196         virtual void clear()
    197         {
    198             trainDescCollection_.clear();
    199         }
    200 
    201         virtual bool empty() const
    202         {
    203             return trainDescCollection_.empty();
    204         }
    205 
    206         virtual void train()
    207         {
    208         }
    209 
    210         virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
    211                            std::vector<DMatch>& matches,
    212                            InputArray mask = noArray());
    213 
    214         virtual void match(InputArray queryDescriptors,
    215                            std::vector<DMatch>& matches,
    216                            const std::vector<GpuMat>& masks = std::vector<GpuMat>());
    217 
    218         virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
    219                                 OutputArray matches,
    220                                 InputArray mask = noArray(),
    221                                 Stream& stream = Stream::Null());
    222 
    223         virtual void matchAsync(InputArray queryDescriptors,
    224                                 OutputArray matches,
    225                                 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
    226                                 Stream& stream = Stream::Null());
    227 
    228         virtual void matchConvert(InputArray gpu_matches,
    229                                   std::vector<DMatch>& matches);
    230 
    231         virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
    232                               std::vector<std::vector<DMatch> >& matches,
    233                               int k,
    234                               InputArray mask = noArray(),
    235                               bool compactResult = false);
    236 
    237         virtual void knnMatch(InputArray queryDescriptors,
    238                               std::vector<std::vector<DMatch> >& matches,
    239                               int k,
    240                               const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
    241                               bool compactResult = false);
    242 
    243         virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
    244                                    OutputArray matches,
    245                                    int k,
    246                                    InputArray mask = noArray(),
    247                                    Stream& stream = Stream::Null());
    248 
    249         virtual void knnMatchAsync(InputArray queryDescriptors,
    250                                    OutputArray matches,
    251                                    int k,
    252                                    const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
    253                                    Stream& stream = Stream::Null());
    254 
    255         virtual void knnMatchConvert(InputArray gpu_matches,
    256                                      std::vector< std::vector<DMatch> >& matches,
    257                                      bool compactResult = false);
    258 
    259         virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
    260                                  std::vector<std::vector<DMatch> >& matches,
    261                                  float maxDistance,
    262                                  InputArray mask = noArray(),
    263                                  bool compactResult = false);
    264 
    265         virtual void radiusMatch(InputArray queryDescriptors,
    266                                  std::vector<std::vector<DMatch> >& matches,
    267                                  float maxDistance,
    268                                  const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
    269                                  bool compactResult = false);
    270 
    271         virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
    272                                       OutputArray matches,
    273                                       float maxDistance,
    274                                       InputArray mask = noArray(),
    275                                       Stream& stream = Stream::Null());
    276 
    277         virtual void radiusMatchAsync(InputArray queryDescriptors,
    278                                       OutputArray matches,
    279                                       float maxDistance,
    280                                       const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
    281                                       Stream& stream = Stream::Null());
    282 
    283         virtual void radiusMatchConvert(InputArray gpu_matches,
    284                                         std::vector< std::vector<DMatch> >& matches,
    285                                         bool compactResult = false);
    286 
    287     private:
    288         int norm_;
    289         std::vector<GpuMat> trainDescCollection_;
    290     };
    291 
    292     //
    293     // 1 to 1 match
    294     //
    295 
    296     void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors,
    297                                std::vector<DMatch>& matches,
    298                                InputArray _mask)
    299     {
    300         GpuMat d_matches;
    301         matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask);
    302         matchConvert(d_matches, matches);
    303     }
    304 
    305     void BFMatcher_Impl::match(InputArray _queryDescriptors,
    306                                std::vector<DMatch>& matches,
    307                                const std::vector<GpuMat>& masks)
    308     {
    309         GpuMat d_matches;
    310         matchAsync(_queryDescriptors, d_matches, masks);
    311         matchConvert(d_matches, matches);
    312     }
    313 
    314     void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
    315                                     OutputArray _matches,
    316                                     InputArray _mask,
    317                                     Stream& stream)
    318     {
    319         using namespace cv::cuda::device::bf_match;
    320 
    321         const GpuMat query = _queryDescriptors.getGpuMat();
    322         const GpuMat train = _trainDescriptors.getGpuMat();
    323         const GpuMat mask = _mask.getGpuMat();
    324 
    325         if (query.empty() || train.empty())
    326         {
    327             _matches.release();
    328             return;
    329         }
    330 
    331         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
    332         CV_Assert( train.cols == query.cols && train.type() == query.type() );
    333         CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
    334 
    335         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
    336                                  const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
    337                                  cudaStream_t stream);
    338 
    339         static const caller_t callersL1[] =
    340         {
    341             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
    342             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
    343             matchL1_gpu<int>, matchL1_gpu<float>
    344         };
    345         static const caller_t callersL2[] =
    346         {
    347             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
    348             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
    349             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
    350         };
    351         static const caller_t callersHamming[] =
    352         {
    353             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
    354             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
    355             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
    356         };
    357 
    358         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
    359 
    360         const caller_t func = callers[query.depth()];
    361         if (func == 0)
    362         {
    363             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
    364         }
    365 
    366         const int nQuery = query.rows;
    367 
    368         _matches.create(2, nQuery, CV_32SC1);
    369         GpuMat matches = _matches.getGpuMat();
    370 
    371         GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
    372         GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1));
    373 
    374         func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
    375     }
    376 
    377     void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors,
    378                                     OutputArray _matches,
    379                                     const std::vector<GpuMat>& masks,
    380                                     Stream& stream)
    381     {
    382         using namespace cv::cuda::device::bf_match;
    383 
    384         const GpuMat query = _queryDescriptors.getGpuMat();
    385 
    386         if (query.empty() || trainDescCollection_.empty())
    387         {
    388             _matches.release();
    389             return;
    390         }
    391 
    392         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
    393 
    394         GpuMat trainCollection, maskCollection;
    395         makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
    396 
    397         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
    398                                  const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
    399                                  cudaStream_t stream);
    400 
    401         static const caller_t callersL1[] =
    402         {
    403             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
    404             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
    405             matchL1_gpu<int>, matchL1_gpu<float>
    406         };
    407         static const caller_t callersL2[] =
    408         {
    409             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
    410             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
    411             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
    412         };
    413         static const caller_t callersHamming[] =
    414         {
    415             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
    416             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
    417             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
    418         };
    419 
    420         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
    421 
    422         const caller_t func = callers[query.depth()];
    423         if (func == 0)
    424         {
    425             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
    426         }
    427 
    428         const int nQuery = query.rows;
    429 
    430         _matches.create(3, nQuery, CV_32SC1);
    431         GpuMat matches = _matches.getGpuMat();
    432 
    433         GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
    434         GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1));
    435         GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2));
    436 
    437         func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
    438     }
    439 
    440     void BFMatcher_Impl::matchConvert(InputArray _gpu_matches,
    441                                       std::vector<DMatch>& matches)
    442     {
    443         Mat gpu_matches;
    444         if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
    445         {
    446             _gpu_matches.getGpuMat().download(gpu_matches);
    447         }
    448         else
    449         {
    450             gpu_matches = _gpu_matches.getMat();
    451         }
    452 
    453         if (gpu_matches.empty())
    454         {
    455             matches.clear();
    456             return;
    457         }
    458 
    459         CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) );
    460 
    461         const int nQuery = gpu_matches.cols;
    462 
    463         matches.clear();
    464         matches.reserve(nQuery);
    465 
    466         const int* trainIdxPtr = NULL;
    467         const int* imgIdxPtr = NULL;
    468         const float* distancePtr = NULL;
    469 
    470         if (gpu_matches.rows == 2)
    471         {
    472             trainIdxPtr = gpu_matches.ptr<int>(0);
    473             distancePtr =  gpu_matches.ptr<float>(1);
    474         }
    475         else
    476         {
    477             trainIdxPtr = gpu_matches.ptr<int>(0);
    478             imgIdxPtr =  gpu_matches.ptr<int>(1);
    479             distancePtr =  gpu_matches.ptr<float>(2);
    480         }
    481 
    482         for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
    483         {
    484             const int trainIdx = trainIdxPtr[queryIdx];
    485             if (trainIdx == -1)
    486                 continue;
    487 
    488             const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0;
    489             const float distance = distancePtr[queryIdx];
    490 
    491             DMatch m(queryIdx, trainIdx, imgIdx, distance);
    492 
    493             matches.push_back(m);
    494         }
    495     }
    496 
    497     //
    498     // knn match
    499     //
    500 
    501     void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
    502                                   std::vector<std::vector<DMatch> >& matches,
    503                                   int k,
    504                                   InputArray _mask,
    505                                   bool compactResult)
    506     {
    507         GpuMat d_matches;
    508         knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask);
    509         knnMatchConvert(d_matches, matches, compactResult);
    510     }
    511 
    512     void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors,
    513                                   std::vector<std::vector<DMatch> >& matches,
    514                                   int k,
    515                                   const std::vector<GpuMat>& masks,
    516                                   bool compactResult)
    517     {
    518         if (k == 2)
    519         {
    520             GpuMat d_matches;
    521             knnMatchAsync(_queryDescriptors, d_matches, k, masks);
    522             knnMatchConvert(d_matches, matches, compactResult);
    523         }
    524         else
    525         {
    526             const GpuMat query = _queryDescriptors.getGpuMat();
    527 
    528             if (query.empty() || trainDescCollection_.empty())
    529             {
    530                 matches.clear();
    531                 return;
    532             }
    533 
    534             CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
    535 
    536             std::vector< std::vector<DMatch> > curMatches;
    537             std::vector<DMatch> temp;
    538             temp.reserve(2 * k);
    539 
    540             matches.resize(query.rows);
    541             for (size_t i = 0; i < matches.size(); ++i)
    542                 matches[i].reserve(k);
    543 
    544             for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx)
    545             {
    546                 knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
    547 
    548                 for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
    549                 {
    550                     std::vector<DMatch>& localMatch = curMatches[queryIdx];
    551                     std::vector<DMatch>& globalMatch = matches[queryIdx];
    552 
    553                     for (size_t i = 0; i < localMatch.size(); ++i)
    554                         localMatch[i].imgIdx = imgIdx;
    555 
    556                     temp.clear();
    557                     std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp));
    558 
    559                     globalMatch.clear();
    560                     const size_t count = std::min(static_cast<size_t>(k), temp.size());
    561                     std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch));
    562                 }
    563             }
    564 
    565             if (compactResult)
    566             {
    567                 std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
    568                 matches.erase(new_end, matches.end());
    569             }
    570         }
    571     }
    572 
    573     void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
    574                                        OutputArray _matches,
    575                                        int k,
    576                                        InputArray _mask,
    577                                        Stream& stream)
    578     {
    579         using namespace cv::cuda::device::bf_knnmatch;
    580 
    581         const GpuMat query = _queryDescriptors.getGpuMat();
    582         const GpuMat train = _trainDescriptors.getGpuMat();
    583         const GpuMat mask = _mask.getGpuMat();
    584 
    585         if (query.empty() || train.empty())
    586         {
    587             _matches.release();
    588             return;
    589         }
    590 
    591         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
    592         CV_Assert( train.cols == query.cols && train.type() == query.type() );
    593         CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
    594 
    595         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
    596                                  const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
    597                                  cudaStream_t stream);
    598 
    599         static const caller_t callersL1[] =
    600         {
    601             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
    602             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
    603             matchL1_gpu<int>, matchL1_gpu<float>
    604         };
    605         static const caller_t callersL2[] =
    606         {
    607             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
    608             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
    609             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
    610         };
    611         static const caller_t callersHamming[] =
    612         {
    613             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
    614             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
    615             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
    616         };
    617 
    618         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
    619 
    620         const caller_t func = callers[query.depth()];
    621         if (func == 0)
    622         {
    623             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
    624         }
    625 
    626         const int nQuery = query.rows;
    627         const int nTrain = train.rows;
    628 
    629         GpuMat trainIdx, distance, allDist;
    630         if (k == 2)
    631         {
    632             _matches.create(2, nQuery, CV_32SC2);
    633             GpuMat matches = _matches.getGpuMat();
    634 
    635             trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0));
    636             distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1));
    637         }
    638         else
    639         {
    640             _matches.create(2 * nQuery, k, CV_32SC1);
    641             GpuMat matches = _matches.getGpuMat();
    642 
    643             trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step);
    644             distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step);
    645 
    646             BufferPool pool(stream);
    647             allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1);
    648         }
    649 
    650         trainIdx.setTo(Scalar::all(-1), stream);
    651 
    652         func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
    653     }
    654 
    655     void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors,
    656                                        OutputArray _matches,
    657                                        int k,
    658                                        const std::vector<GpuMat>& masks,
    659                                        Stream& stream)
    660     {
    661         using namespace cv::cuda::device::bf_knnmatch;
    662 
    663         if (k != 2)
    664         {
    665             CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now");
    666         }
    667 
    668         const GpuMat query = _queryDescriptors.getGpuMat();
    669 
    670         if (query.empty() || trainDescCollection_.empty())
    671         {
    672             _matches.release();
    673             return;
    674         }
    675 
    676         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
    677 
    678         GpuMat trainCollection, maskCollection;
    679         makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
    680 
    681         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
    682                                  const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
    683                                  cudaStream_t stream);
    684 
    685         static const caller_t callersL1[] =
    686         {
    687             match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
    688             match2L1_gpu<unsigned short>, match2L1_gpu<short>,
    689             match2L1_gpu<int>, match2L1_gpu<float>
    690         };
    691         static const caller_t callersL2[] =
    692         {
    693             0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
    694             0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
    695             0/*match2L2_gpu<int>*/, match2L2_gpu<float>
    696         };
    697         static const caller_t callersHamming[] =
    698         {
    699             match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
    700             match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
    701             match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
    702         };
    703 
    704         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
    705 
    706         const caller_t func = callers[query.depth()];
    707         if (func == 0)
    708         {
    709             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
    710         }
    711 
    712         const int nQuery = query.rows;
    713 
    714         _matches.create(3, nQuery, CV_32SC2);
    715         GpuMat matches = _matches.getGpuMat();
    716 
    717         GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0));
    718         GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1));
    719         GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2));
    720 
    721         trainIdx.setTo(Scalar::all(-1), stream);
    722 
    723         func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
    724     }
    725 
    726     void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches,
    727                                          std::vector< std::vector<DMatch> >& matches,
    728                                          bool compactResult)
    729     {
    730         Mat gpu_matches;
    731         if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
    732         {
    733             _gpu_matches.getGpuMat().download(gpu_matches);
    734         }
    735         else
    736         {
    737             gpu_matches = _gpu_matches.getMat();
    738         }
    739 
    740         if (gpu_matches.empty())
    741         {
    742             matches.clear();
    743             return;
    744         }
    745 
    746         CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) ||
    747                    (gpu_matches.type() == CV_32SC1) );
    748 
    749         int nQuery = -1, k = -1;
    750 
    751         const int* trainIdxPtr = NULL;
    752         const int* imgIdxPtr = NULL;
    753         const float* distancePtr = NULL;
    754 
    755         if (gpu_matches.type() == CV_32SC2)
    756         {
    757             nQuery = gpu_matches.cols;
    758             k = 2;
    759 
    760             if (gpu_matches.rows == 2)
    761             {
    762                 trainIdxPtr = gpu_matches.ptr<int>(0);
    763                 distancePtr =  gpu_matches.ptr<float>(1);
    764             }
    765             else
    766             {
    767                 trainIdxPtr = gpu_matches.ptr<int>(0);
    768                 imgIdxPtr =  gpu_matches.ptr<int>(1);
    769                 distancePtr =  gpu_matches.ptr<float>(2);
    770             }
    771         }
    772         else
    773         {
    774             nQuery = gpu_matches.rows / 2;
    775             k = gpu_matches.cols;
    776 
    777             trainIdxPtr = gpu_matches.ptr<int>(0);
    778             distancePtr =  gpu_matches.ptr<float>(nQuery);
    779         }
    780 
    781         matches.clear();
    782         matches.reserve(nQuery);
    783 
    784         for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
    785         {
    786             matches.push_back(std::vector<DMatch>());
    787             std::vector<DMatch>& curMatches = matches.back();
    788             curMatches.reserve(k);
    789 
    790             for (int i = 0; i < k; ++i)
    791             {
    792                 const int trainIdx = *trainIdxPtr;
    793                 if (trainIdx == -1)
    794                     continue;
    795 
    796                 const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0;
    797                 const float distance = *distancePtr;
    798 
    799                 DMatch m(queryIdx, trainIdx, imgIdx, distance);
    800 
    801                 curMatches.push_back(m);
    802 
    803                 ++trainIdxPtr;
    804                 ++distancePtr;
    805                 if (imgIdxPtr)
    806                     ++imgIdxPtr;
    807             }
    808 
    809             if (compactResult && curMatches.empty())
    810             {
    811                 matches.pop_back();
    812             }
    813         }
    814     }
    815 
    816     //
    817     // radius match
    818     //
    819 
    820     void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
    821                                      std::vector<std::vector<DMatch> >& matches,
    822                                      float maxDistance,
    823                                      InputArray _mask,
    824                                      bool compactResult)
    825     {
    826         GpuMat d_matches;
    827         radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask);
    828         radiusMatchConvert(d_matches, matches, compactResult);
    829     }
    830 
    831     void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors,
    832                                      std::vector<std::vector<DMatch> >& matches,
    833                                      float maxDistance,
    834                                      const std::vector<GpuMat>& masks,
    835                                      bool compactResult)
    836     {
    837         GpuMat d_matches;
    838         radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks);
    839         radiusMatchConvert(d_matches, matches, compactResult);
    840     }
    841 
    842     void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
    843                                           OutputArray _matches,
    844                                           float maxDistance,
    845                                           InputArray _mask,
    846                                           Stream& stream)
    847     {
    848         using namespace cv::cuda::device::bf_radius_match;
    849 
    850         const GpuMat query = _queryDescriptors.getGpuMat();
    851         const GpuMat train = _trainDescriptors.getGpuMat();
    852         const GpuMat mask = _mask.getGpuMat();
    853 
    854         if (query.empty() || train.empty())
    855         {
    856             _matches.release();
    857             return;
    858         }
    859 
    860         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
    861         CV_Assert( train.cols == query.cols && train.type() == query.type() );
    862         CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
    863 
    864         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
    865                                  const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
    866                                  cudaStream_t stream);
    867 
    868         static const caller_t callersL1[] =
    869         {
    870             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
    871             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
    872             matchL1_gpu<int>, matchL1_gpu<float>
    873         };
    874         static const caller_t callersL2[] =
    875         {
    876             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
    877             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
    878             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
    879         };
    880         static const caller_t callersHamming[] =
    881         {
    882             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
    883             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
    884             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
    885         };
    886 
    887         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
    888 
    889         const caller_t func = callers[query.depth()];
    890         if (func == 0)
    891         {
    892             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
    893         }
    894 
    895         const int nQuery = query.rows;
    896         const int nTrain = train.rows;
    897 
    898         const int cols = std::max((nTrain / 100), nQuery);
    899 
    900         _matches.create(2 * nQuery + 1, cols, CV_32SC1);
    901         GpuMat matches = _matches.getGpuMat();
    902 
    903         GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step);
    904         GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step);
    905         GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery));
    906 
    907         nMatches.setTo(Scalar::all(0), stream);
    908 
    909         func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
    910     }
    911 
    912     void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors,
    913                                           OutputArray _matches,
    914                                           float maxDistance,
    915                                           const std::vector<GpuMat>& masks,
    916                                           Stream& stream)
    917     {
    918         using namespace cv::cuda::device::bf_radius_match;
    919 
    920         const GpuMat query = _queryDescriptors.getGpuMat();
    921 
    922         if (query.empty() || trainDescCollection_.empty())
    923         {
    924             _matches.release();
    925             return;
    926         }
    927 
    928         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
    929 
    930         GpuMat trainCollection, maskCollection;
    931         makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
    932 
    933         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
    934                                  const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
    935                                  cudaStream_t stream);
    936 
    937         static const caller_t callersL1[] =
    938         {
    939             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
    940             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
    941             matchL1_gpu<int>, matchL1_gpu<float>
    942         };
    943         static const caller_t callersL2[] =
    944         {
    945             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
    946             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
    947             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
    948         };
    949         static const caller_t callersHamming[] =
    950         {
    951             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
    952             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
    953             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
    954         };
    955 
    956         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
    957 
    958         const caller_t func = callers[query.depth()];
    959         if (func == 0)
    960         {
    961             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
    962         }
    963 
    964         const int nQuery = query.rows;
    965 
    966         _matches.create(3 * nQuery + 1, nQuery, CV_32FC1);
    967         GpuMat matches = _matches.getGpuMat();
    968 
    969         GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step);
    970         GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step);
    971         GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step);
    972         GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery));
    973 
    974         nMatches.setTo(Scalar::all(0), stream);
    975 
    976         std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end());
    977         std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
    978 
    979         func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
    980             trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
    981     }
    982 
    983     void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches,
    984                                             std::vector< std::vector<DMatch> >& matches,
    985                                             bool compactResult)
    986     {
    987         Mat gpu_matches;
    988         if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
    989         {
    990             _gpu_matches.getGpuMat().download(gpu_matches);
    991         }
    992         else
    993         {
    994             gpu_matches = _gpu_matches.getMat();
    995         }
    996 
    997         if (gpu_matches.empty())
    998         {
    999             matches.clear();
   1000             return;
   1001         }
   1002 
   1003         CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 );
   1004 
   1005         int nQuery = -1;
   1006 
   1007         const int* trainIdxPtr = NULL;
   1008         const int* imgIdxPtr = NULL;
   1009         const float* distancePtr = NULL;
   1010         const int* nMatchesPtr = NULL;
   1011 
   1012         if (gpu_matches.type() == CV_32SC1)
   1013         {
   1014             nQuery = (gpu_matches.rows - 1) / 2;
   1015 
   1016             trainIdxPtr = gpu_matches.ptr<int>(0);
   1017             distancePtr =  gpu_matches.ptr<float>(nQuery);
   1018             nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery);
   1019         }
   1020         else
   1021         {
   1022             nQuery = (gpu_matches.rows - 1) / 3;
   1023 
   1024             trainIdxPtr = gpu_matches.ptr<int>(0);
   1025             imgIdxPtr = gpu_matches.ptr<int>(nQuery);
   1026             distancePtr =  gpu_matches.ptr<float>(2 * nQuery);
   1027             nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery);
   1028         }
   1029 
   1030         matches.clear();
   1031         matches.reserve(nQuery);
   1032 
   1033         for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
   1034         {
   1035             const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols);
   1036 
   1037             if (nMatched == 0)
   1038             {
   1039                 if (!compactResult)
   1040                 {
   1041                     matches.push_back(std::vector<DMatch>());
   1042                 }
   1043             }
   1044             else
   1045             {
   1046                 matches.push_back(std::vector<DMatch>(nMatched));
   1047                 std::vector<DMatch>& curMatches = matches.back();
   1048 
   1049                 for (int i = 0; i < nMatched; ++i)
   1050                 {
   1051                     const int trainIdx = trainIdxPtr[i];
   1052 
   1053                     const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0;
   1054                     const float distance = distancePtr[i];
   1055 
   1056                     DMatch m(queryIdx, trainIdx, imgIdx, distance);
   1057 
   1058                     curMatches[i] = m;
   1059                 }
   1060 
   1061                 std::sort(curMatches.begin(), curMatches.end());
   1062             }
   1063 
   1064             trainIdxPtr += gpu_matches.cols;
   1065             distancePtr += gpu_matches.cols;
   1066             if (imgIdxPtr)
   1067                 imgIdxPtr += gpu_matches.cols;
   1068         }
   1069     }
   1070 }
   1071 
   1072 Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm)
   1073 {
   1074     return makePtr<BFMatcher_Impl>(norm);
   1075 }
   1076 
   1077 #endif /* !defined (HAVE_CUDA) */
   1078