1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 #include "precomp.hpp" 45 #include <map> 46 47 using namespace cv; 48 using namespace cv::cuda; 49 50 #ifdef HAVE_CUDA 51 52 namespace { 53 54 class HostMemAllocator : public MatAllocator 55 { 56 public: 57 explicit HostMemAllocator(unsigned int flags) : flags_(flags) 58 { 59 } 60 61 UMatData* allocate(int dims, const int* sizes, int type, 62 void* data0, size_t* step, 63 int /*flags*/, UMatUsageFlags /*usageFlags*/) const 64 { 65 size_t total = CV_ELEM_SIZE(type); 66 for (int i = dims-1; i >= 0; i--) 67 { 68 if (step) 69 { 70 if (data0 && step[i] != CV_AUTOSTEP) 71 { 72 CV_Assert(total <= step[i]); 73 total = step[i]; 74 } 75 else 76 { 77 step[i] = total; 78 } 79 } 80 81 total *= sizes[i]; 82 } 83 84 UMatData* u = new UMatData(this); 85 u->size = total; 86 87 if (data0) 88 { 89 u->data = u->origdata = static_cast<uchar*>(data0); 90 u->flags |= UMatData::USER_ALLOCATED; 91 } 92 else 93 { 94 void* ptr = 0; 95 cudaSafeCall( cudaHostAlloc(&ptr, total, flags_) ); 96 97 u->data = u->origdata = static_cast<uchar*>(ptr); 98 } 99 100 return u; 101 } 102 103 bool allocate(UMatData* u, int /*accessFlags*/, UMatUsageFlags /*usageFlags*/) const 104 { 105 return (u != NULL); 106 } 107 108 void deallocate(UMatData* u) const 109 { 110 if (!u) 111 return; 112 113 CV_Assert(u->urefcount >= 0); 114 CV_Assert(u->refcount >= 0); 115 116 if (u->refcount == 0) 117 { 118 if ( !(u->flags & UMatData::USER_ALLOCATED) ) 119 { 120 cudaFreeHost(u->origdata); 121 u->origdata = 0; 122 } 123 124 delete u; 125 } 126 } 127 128 private: 129 unsigned int flags_; 130 }; 131 132 } // namespace 133 134 #endif 135 136 MatAllocator* cv::cuda::HostMem::getAllocator(AllocType alloc_type) 137 { 138 #ifndef HAVE_CUDA 139 (void) alloc_type; 140 throw_no_cuda(); 141 return NULL; 142 #else 143 static std::map<unsigned int, Ptr<MatAllocator> > allocators; 144 145 unsigned int flag = cudaHostAllocDefault; 146 147 switch (alloc_type) 148 { 149 case PAGE_LOCKED: flag = cudaHostAllocDefault; break; 150 case SHARED: flag = cudaHostAllocMapped; break; 151 case WRITE_COMBINED: flag = cudaHostAllocWriteCombined; break; 152 default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); 153 } 154 155 Ptr<MatAllocator>& a = allocators[flag]; 156 157 if (a.empty()) 158 { 159 a = makePtr<HostMemAllocator>(flag); 160 } 161 162 return a.get(); 163 #endif 164 } 165 166 #ifdef HAVE_CUDA 167 namespace 168 { 169 size_t alignUpStep(size_t what, size_t alignment) 170 { 171 size_t alignMask = alignment - 1; 172 size_t inverseAlignMask = ~alignMask; 173 size_t res = (what + alignMask) & inverseAlignMask; 174 return res; 175 } 176 } 177 #endif 178 179 void cv::cuda::HostMem::create(int rows_, int cols_, int type_) 180 { 181 #ifndef HAVE_CUDA 182 (void) rows_; 183 (void) cols_; 184 (void) type_; 185 throw_no_cuda(); 186 #else 187 if (alloc_type == SHARED) 188 { 189 DeviceInfo devInfo; 190 CV_Assert( devInfo.canMapHostMemory() ); 191 } 192 193 type_ &= Mat::TYPE_MASK; 194 195 if (rows == rows_ && cols == cols_ && type() == type_ && data) 196 return; 197 198 if (data) 199 release(); 200 201 CV_DbgAssert( rows_ >= 0 && cols_ >= 0 ); 202 203 if (rows_ > 0 && cols_ > 0) 204 { 205 flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + type_; 206 rows = rows_; 207 cols = cols_; 208 step = elemSize() * cols; 209 210 if (alloc_type == SHARED) 211 { 212 DeviceInfo devInfo; 213 step = alignUpStep(step, devInfo.textureAlignment()); 214 } 215 216 int64 _nettosize = (int64)step*rows; 217 size_t nettosize = (size_t)_nettosize; 218 219 if (_nettosize != (int64)nettosize) 220 CV_Error(cv::Error::StsNoMem, "Too big buffer is allocated"); 221 222 size_t datasize = alignSize(nettosize, (int)sizeof(*refcount)); 223 224 void* ptr = 0; 225 226 switch (alloc_type) 227 { 228 case PAGE_LOCKED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocDefault) ); break; 229 case SHARED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocMapped) ); break; 230 case WRITE_COMBINED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocWriteCombined) ); break; 231 default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); 232 } 233 234 datastart = data = (uchar*)ptr; 235 dataend = data + nettosize; 236 237 refcount = (int*)cv::fastMalloc(sizeof(*refcount)); 238 *refcount = 1; 239 } 240 #endif 241 } 242 243 HostMem cv::cuda::HostMem::reshape(int new_cn, int new_rows) const 244 { 245 HostMem hdr = *this; 246 247 int cn = channels(); 248 if (new_cn == 0) 249 new_cn = cn; 250 251 int total_width = cols * cn; 252 253 if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) 254 new_rows = rows * total_width / new_cn; 255 256 if (new_rows != 0 && new_rows != rows) 257 { 258 int total_size = total_width * rows; 259 260 if (!isContinuous()) 261 CV_Error(cv::Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); 262 263 if ((unsigned)new_rows > (unsigned)total_size) 264 CV_Error(cv::Error::StsOutOfRange, "Bad new number of rows"); 265 266 total_width = total_size / new_rows; 267 268 if (total_width * new_rows != total_size) 269 CV_Error(cv::Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); 270 271 hdr.rows = new_rows; 272 hdr.step = total_width * elemSize1(); 273 } 274 275 int new_width = total_width / new_cn; 276 277 if (new_width * new_cn != total_width) 278 CV_Error(cv::Error::BadNumChannels, "The total width is not divisible by the new number of channels"); 279 280 hdr.cols = new_width; 281 hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); 282 283 return hdr; 284 } 285 286 void cv::cuda::HostMem::release() 287 { 288 #ifdef HAVE_CUDA 289 if (refcount && CV_XADD(refcount, -1) == 1) 290 { 291 cudaFreeHost(datastart); 292 fastFree(refcount); 293 } 294 295 dataend = data = datastart = 0; 296 step = rows = cols = 0; 297 refcount = 0; 298 #endif 299 } 300 301 GpuMat cv::cuda::HostMem::createGpuMatHeader() const 302 { 303 #ifndef HAVE_CUDA 304 throw_no_cuda(); 305 return GpuMat(); 306 #else 307 CV_Assert( alloc_type == SHARED ); 308 309 void *pdev; 310 cudaSafeCall( cudaHostGetDevicePointer(&pdev, data, 0) ); 311 312 return GpuMat(rows, cols, type(), pdev, step); 313 #endif 314 } 315 316 void cv::cuda::registerPageLocked(Mat& m) 317 { 318 #ifndef HAVE_CUDA 319 (void) m; 320 throw_no_cuda(); 321 #else 322 CV_Assert( m.isContinuous() ); 323 cudaSafeCall( cudaHostRegister(m.data, m.step * m.rows, cudaHostRegisterPortable) ); 324 #endif 325 } 326 327 void cv::cuda::unregisterPageLocked(Mat& m) 328 { 329 #ifndef HAVE_CUDA 330 (void) m; 331 #else 332 cudaSafeCall( cudaHostUnregister(m.data) ); 333 #endif 334 } 335