1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 45 using namespace cv; 46 using namespace cv::cuda; 47 48 int cv::cuda::getCudaEnabledDeviceCount() 49 { 50 #ifndef HAVE_CUDA 51 return 0; 52 #else 53 int count; 54 cudaError_t error = cudaGetDeviceCount(&count); 55 56 if (error == cudaErrorInsufficientDriver) 57 return -1; 58 59 if (error == cudaErrorNoDevice) 60 return 0; 61 62 cudaSafeCall( error ); 63 return count; 64 #endif 65 } 66 67 void cv::cuda::setDevice(int device) 68 { 69 #ifndef HAVE_CUDA 70 (void) device; 71 throw_no_cuda(); 72 #else 73 cudaSafeCall( cudaSetDevice(device) ); 74 #endif 75 } 76 77 int cv::cuda::getDevice() 78 { 79 #ifndef HAVE_CUDA 80 throw_no_cuda(); 81 return 0; 82 #else 83 int device; 84 cudaSafeCall( cudaGetDevice(&device) ); 85 return device; 86 #endif 87 } 88 89 void cv::cuda::resetDevice() 90 { 91 #ifndef HAVE_CUDA 92 throw_no_cuda(); 93 #else 94 cudaSafeCall( cudaDeviceReset() ); 95 #endif 96 } 97 98 bool cv::cuda::deviceSupports(FeatureSet feature_set) 99 { 100 #ifndef HAVE_CUDA 101 (void) feature_set; 102 throw_no_cuda(); 103 return false; 104 #else 105 static int versions[] = 106 { 107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 108 }; 109 static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0])); 110 111 const int devId = getDevice(); 112 113 int version; 114 115 if (devId < cache_size && versions[devId] >= 0) 116 { 117 version = versions[devId]; 118 } 119 else 120 { 121 DeviceInfo dev(devId); 122 version = dev.majorVersion() * 10 + dev.minorVersion(); 123 if (devId < cache_size) 124 versions[devId] = version; 125 } 126 127 return TargetArchs::builtWith(feature_set) && (version >= feature_set); 128 #endif 129 } 130 131 //////////////////////////////////////////////////////////////////////// 132 // TargetArchs 133 134 #ifdef HAVE_CUDA 135 136 namespace 137 { 138 class CudaArch 139 { 140 public: 141 CudaArch(); 142 143 bool builtWith(FeatureSet feature_set) const; 144 bool hasPtx(int major, int minor) const; 145 bool hasBin(int major, int minor) const; 146 bool hasEqualOrLessPtx(int major, int minor) const; 147 bool hasEqualOrGreaterPtx(int major, int minor) const; 148 bool hasEqualOrGreaterBin(int major, int minor) const; 149 150 private: 151 static void fromStr(const char* set_as_str, std::vector<int>& arr); 152 153 std::vector<int> bin; 154 std::vector<int> ptx; 155 std::vector<int> features; 156 }; 157 158 const CudaArch cudaArch; 159 160 CudaArch::CudaArch() 161 { 162 fromStr(CUDA_ARCH_BIN, bin); 163 fromStr(CUDA_ARCH_PTX, ptx); 164 fromStr(CUDA_ARCH_FEATURES, features); 165 } 166 167 bool CudaArch::builtWith(FeatureSet feature_set) const 168 { 169 return !features.empty() && (features.back() >= feature_set); 170 } 171 172 bool CudaArch::hasPtx(int major, int minor) const 173 { 174 return std::find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end(); 175 } 176 177 bool CudaArch::hasBin(int major, int minor) const 178 { 179 return std::find(bin.begin(), bin.end(), major * 10 + minor) != bin.end(); 180 } 181 182 bool CudaArch::hasEqualOrLessPtx(int major, int minor) const 183 { 184 return !ptx.empty() && (ptx.front() <= major * 10 + minor); 185 } 186 187 bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const 188 { 189 return !ptx.empty() && (ptx.back() >= major * 10 + minor); 190 } 191 192 bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const 193 { 194 return !bin.empty() && (bin.back() >= major * 10 + minor); 195 } 196 197 void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr) 198 { 199 arr.clear(); 200 201 const size_t len = strlen(set_as_str); 202 203 size_t pos = 0; 204 while (pos < len) 205 { 206 if (isspace(set_as_str[pos])) 207 { 208 ++pos; 209 } 210 else 211 { 212 int cur_value; 213 int chars_read; 214 int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read); 215 CV_Assert( args_read == 1 ); 216 217 arr.push_back(cur_value); 218 pos += chars_read; 219 } 220 } 221 222 std::sort(arr.begin(), arr.end()); 223 } 224 } 225 226 #endif 227 228 bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set) 229 { 230 #ifndef HAVE_CUDA 231 (void) feature_set; 232 throw_no_cuda(); 233 return false; 234 #else 235 return cudaArch.builtWith(feature_set); 236 #endif 237 } 238 239 bool cv::cuda::TargetArchs::hasPtx(int major, int minor) 240 { 241 #ifndef HAVE_CUDA 242 (void) major; 243 (void) minor; 244 throw_no_cuda(); 245 return false; 246 #else 247 return cudaArch.hasPtx(major, minor); 248 #endif 249 } 250 251 bool cv::cuda::TargetArchs::hasBin(int major, int minor) 252 { 253 #ifndef HAVE_CUDA 254 (void) major; 255 (void) minor; 256 throw_no_cuda(); 257 return false; 258 #else 259 return cudaArch.hasBin(major, minor); 260 #endif 261 } 262 263 bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor) 264 { 265 #ifndef HAVE_CUDA 266 (void) major; 267 (void) minor; 268 throw_no_cuda(); 269 return false; 270 #else 271 return cudaArch.hasEqualOrLessPtx(major, minor); 272 #endif 273 } 274 275 bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) 276 { 277 #ifndef HAVE_CUDA 278 (void) major; 279 (void) minor; 280 throw_no_cuda(); 281 return false; 282 #else 283 return cudaArch.hasEqualOrGreaterPtx(major, minor); 284 #endif 285 } 286 287 bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor) 288 { 289 #ifndef HAVE_CUDA 290 (void) major; 291 (void) minor; 292 throw_no_cuda(); 293 return false; 294 #else 295 return cudaArch.hasEqualOrGreaterBin(major, minor); 296 #endif 297 } 298 299 //////////////////////////////////////////////////////////////////////// 300 // DeviceInfo 301 302 #ifdef HAVE_CUDA 303 304 namespace 305 { 306 class DeviceProps 307 { 308 public: 309 DeviceProps(); 310 311 const cudaDeviceProp* get(int devID) const; 312 313 private: 314 std::vector<cudaDeviceProp> props_; 315 }; 316 317 DeviceProps::DeviceProps() 318 { 319 int count = getCudaEnabledDeviceCount(); 320 321 if (count > 0) 322 { 323 props_.resize(count); 324 325 for (int devID = 0; devID < count; ++devID) 326 { 327 cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) ); 328 } 329 } 330 } 331 332 const cudaDeviceProp* DeviceProps::get(int devID) const 333 { 334 CV_Assert( static_cast<size_t>(devID) < props_.size() ); 335 336 return &props_[devID]; 337 } 338 339 DeviceProps& deviceProps() 340 { 341 static DeviceProps props; 342 return props; 343 } 344 } 345 346 #endif 347 348 const char* cv::cuda::DeviceInfo::name() const 349 { 350 #ifndef HAVE_CUDA 351 throw_no_cuda(); 352 return ""; 353 #else 354 return deviceProps().get(device_id_)->name; 355 #endif 356 } 357 358 size_t cv::cuda::DeviceInfo::totalGlobalMem() const 359 { 360 #ifndef HAVE_CUDA 361 throw_no_cuda(); 362 return 0; 363 #else 364 return deviceProps().get(device_id_)->totalGlobalMem; 365 #endif 366 } 367 368 size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const 369 { 370 #ifndef HAVE_CUDA 371 throw_no_cuda(); 372 return 0; 373 #else 374 return deviceProps().get(device_id_)->sharedMemPerBlock; 375 #endif 376 } 377 378 int cv::cuda::DeviceInfo::regsPerBlock() const 379 { 380 #ifndef HAVE_CUDA 381 throw_no_cuda(); 382 return 0; 383 #else 384 return deviceProps().get(device_id_)->regsPerBlock; 385 #endif 386 } 387 388 int cv::cuda::DeviceInfo::warpSize() const 389 { 390 #ifndef HAVE_CUDA 391 throw_no_cuda(); 392 return 0; 393 #else 394 return deviceProps().get(device_id_)->warpSize; 395 #endif 396 } 397 398 size_t cv::cuda::DeviceInfo::memPitch() const 399 { 400 #ifndef HAVE_CUDA 401 throw_no_cuda(); 402 return 0; 403 #else 404 return deviceProps().get(device_id_)->memPitch; 405 #endif 406 } 407 408 int cv::cuda::DeviceInfo::maxThreadsPerBlock() const 409 { 410 #ifndef HAVE_CUDA 411 throw_no_cuda(); 412 return 0; 413 #else 414 return deviceProps().get(device_id_)->maxThreadsPerBlock; 415 #endif 416 } 417 418 Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const 419 { 420 #ifndef HAVE_CUDA 421 throw_no_cuda(); 422 return Vec3i(); 423 #else 424 return Vec3i(deviceProps().get(device_id_)->maxThreadsDim); 425 #endif 426 } 427 428 Vec3i cv::cuda::DeviceInfo::maxGridSize() const 429 { 430 #ifndef HAVE_CUDA 431 throw_no_cuda(); 432 return Vec3i(); 433 #else 434 return Vec3i(deviceProps().get(device_id_)->maxGridSize); 435 #endif 436 } 437 438 int cv::cuda::DeviceInfo::clockRate() const 439 { 440 #ifndef HAVE_CUDA 441 throw_no_cuda(); 442 return 0; 443 #else 444 return deviceProps().get(device_id_)->clockRate; 445 #endif 446 } 447 448 size_t cv::cuda::DeviceInfo::totalConstMem() const 449 { 450 #ifndef HAVE_CUDA 451 throw_no_cuda(); 452 return 0; 453 #else 454 return deviceProps().get(device_id_)->totalConstMem; 455 #endif 456 } 457 458 int cv::cuda::DeviceInfo::majorVersion() const 459 { 460 #ifndef HAVE_CUDA 461 throw_no_cuda(); 462 return 0; 463 #else 464 return deviceProps().get(device_id_)->major; 465 #endif 466 } 467 468 int cv::cuda::DeviceInfo::minorVersion() const 469 { 470 #ifndef HAVE_CUDA 471 throw_no_cuda(); 472 return 0; 473 #else 474 return deviceProps().get(device_id_)->minor; 475 #endif 476 } 477 478 size_t cv::cuda::DeviceInfo::textureAlignment() const 479 { 480 #ifndef HAVE_CUDA 481 throw_no_cuda(); 482 return 0; 483 #else 484 return deviceProps().get(device_id_)->textureAlignment; 485 #endif 486 } 487 488 size_t cv::cuda::DeviceInfo::texturePitchAlignment() const 489 { 490 #ifndef HAVE_CUDA 491 throw_no_cuda(); 492 return 0; 493 #else 494 return deviceProps().get(device_id_)->texturePitchAlignment; 495 #endif 496 } 497 498 int cv::cuda::DeviceInfo::multiProcessorCount() const 499 { 500 #ifndef HAVE_CUDA 501 throw_no_cuda(); 502 return 0; 503 #else 504 return deviceProps().get(device_id_)->multiProcessorCount; 505 #endif 506 } 507 508 bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const 509 { 510 #ifndef HAVE_CUDA 511 throw_no_cuda(); 512 return false; 513 #else 514 return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0; 515 #endif 516 } 517 518 bool cv::cuda::DeviceInfo::integrated() const 519 { 520 #ifndef HAVE_CUDA 521 throw_no_cuda(); 522 return false; 523 #else 524 return deviceProps().get(device_id_)->integrated != 0; 525 #endif 526 } 527 528 bool cv::cuda::DeviceInfo::canMapHostMemory() const 529 { 530 #ifndef HAVE_CUDA 531 throw_no_cuda(); 532 return false; 533 #else 534 return deviceProps().get(device_id_)->canMapHostMemory != 0; 535 #endif 536 } 537 538 DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const 539 { 540 #ifndef HAVE_CUDA 541 throw_no_cuda(); 542 return ComputeModeDefault; 543 #else 544 static const ComputeMode tbl[] = 545 { 546 ComputeModeDefault, 547 ComputeModeExclusive, 548 ComputeModeProhibited, 549 ComputeModeExclusiveProcess 550 }; 551 552 return tbl[deviceProps().get(device_id_)->computeMode]; 553 #endif 554 } 555 556 int cv::cuda::DeviceInfo::maxTexture1D() const 557 { 558 #ifndef HAVE_CUDA 559 throw_no_cuda(); 560 return 0; 561 #else 562 return deviceProps().get(device_id_)->maxTexture1D; 563 #endif 564 } 565 566 int cv::cuda::DeviceInfo::maxTexture1DMipmap() const 567 { 568 #ifndef HAVE_CUDA 569 throw_no_cuda(); 570 return 0; 571 #else 572 #if CUDA_VERSION >= 5000 573 return deviceProps().get(device_id_)->maxTexture1DMipmap; 574 #else 575 CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0"); 576 return 0; 577 #endif 578 #endif 579 } 580 581 int cv::cuda::DeviceInfo::maxTexture1DLinear() const 582 { 583 #ifndef HAVE_CUDA 584 throw_no_cuda(); 585 return 0; 586 #else 587 return deviceProps().get(device_id_)->maxTexture1DLinear; 588 #endif 589 } 590 591 Vec2i cv::cuda::DeviceInfo::maxTexture2D() const 592 { 593 #ifndef HAVE_CUDA 594 throw_no_cuda(); 595 return Vec2i(); 596 #else 597 return Vec2i(deviceProps().get(device_id_)->maxTexture2D); 598 #endif 599 } 600 601 Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const 602 { 603 #ifndef HAVE_CUDA 604 throw_no_cuda(); 605 return Vec2i(); 606 #else 607 #if CUDA_VERSION >= 5000 608 return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap); 609 #else 610 CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0"); 611 return Vec2i(); 612 #endif 613 #endif 614 } 615 616 Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const 617 { 618 #ifndef HAVE_CUDA 619 throw_no_cuda(); 620 return Vec3i(); 621 #else 622 return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear); 623 #endif 624 } 625 626 Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const 627 { 628 #ifndef HAVE_CUDA 629 throw_no_cuda(); 630 return Vec2i(); 631 #else 632 return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather); 633 #endif 634 } 635 636 Vec3i cv::cuda::DeviceInfo::maxTexture3D() const 637 { 638 #ifndef HAVE_CUDA 639 throw_no_cuda(); 640 return Vec3i(); 641 #else 642 return Vec3i(deviceProps().get(device_id_)->maxTexture3D); 643 #endif 644 } 645 646 int cv::cuda::DeviceInfo::maxTextureCubemap() const 647 { 648 #ifndef HAVE_CUDA 649 throw_no_cuda(); 650 return 0; 651 #else 652 return deviceProps().get(device_id_)->maxTextureCubemap; 653 #endif 654 } 655 656 Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const 657 { 658 #ifndef HAVE_CUDA 659 throw_no_cuda(); 660 return Vec2i(); 661 #else 662 return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered); 663 #endif 664 } 665 666 Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const 667 { 668 #ifndef HAVE_CUDA 669 throw_no_cuda(); 670 return Vec3i(); 671 #else 672 return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered); 673 #endif 674 } 675 676 Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const 677 { 678 #ifndef HAVE_CUDA 679 throw_no_cuda(); 680 return Vec2i(); 681 #else 682 return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered); 683 #endif 684 } 685 686 int cv::cuda::DeviceInfo::maxSurface1D() const 687 { 688 #ifndef HAVE_CUDA 689 throw_no_cuda(); 690 return 0; 691 #else 692 return deviceProps().get(device_id_)->maxSurface1D; 693 #endif 694 } 695 696 Vec2i cv::cuda::DeviceInfo::maxSurface2D() const 697 { 698 #ifndef HAVE_CUDA 699 throw_no_cuda(); 700 return Vec2i(); 701 #else 702 return Vec2i(deviceProps().get(device_id_)->maxSurface2D); 703 #endif 704 } 705 706 Vec3i cv::cuda::DeviceInfo::maxSurface3D() const 707 { 708 #ifndef HAVE_CUDA 709 throw_no_cuda(); 710 return Vec3i(); 711 #else 712 return Vec3i(deviceProps().get(device_id_)->maxSurface3D); 713 #endif 714 } 715 716 Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const 717 { 718 #ifndef HAVE_CUDA 719 throw_no_cuda(); 720 return Vec2i(); 721 #else 722 return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered); 723 #endif 724 } 725 726 Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const 727 { 728 #ifndef HAVE_CUDA 729 throw_no_cuda(); 730 return Vec3i(); 731 #else 732 return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered); 733 #endif 734 } 735 736 int cv::cuda::DeviceInfo::maxSurfaceCubemap() const 737 { 738 #ifndef HAVE_CUDA 739 throw_no_cuda(); 740 return 0; 741 #else 742 return deviceProps().get(device_id_)->maxSurfaceCubemap; 743 #endif 744 } 745 746 Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const 747 { 748 #ifndef HAVE_CUDA 749 throw_no_cuda(); 750 return Vec2i(); 751 #else 752 return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered); 753 #endif 754 } 755 756 size_t cv::cuda::DeviceInfo::surfaceAlignment() const 757 { 758 #ifndef HAVE_CUDA 759 throw_no_cuda(); 760 return 0; 761 #else 762 return deviceProps().get(device_id_)->surfaceAlignment; 763 #endif 764 } 765 766 bool cv::cuda::DeviceInfo::concurrentKernels() const 767 { 768 #ifndef HAVE_CUDA 769 throw_no_cuda(); 770 return false; 771 #else 772 return deviceProps().get(device_id_)->concurrentKernels != 0; 773 #endif 774 } 775 776 bool cv::cuda::DeviceInfo::ECCEnabled() const 777 { 778 #ifndef HAVE_CUDA 779 throw_no_cuda(); 780 return false; 781 #else 782 return deviceProps().get(device_id_)->ECCEnabled != 0; 783 #endif 784 } 785 786 int cv::cuda::DeviceInfo::pciBusID() const 787 { 788 #ifndef HAVE_CUDA 789 throw_no_cuda(); 790 return 0; 791 #else 792 return deviceProps().get(device_id_)->pciBusID; 793 #endif 794 } 795 796 int cv::cuda::DeviceInfo::pciDeviceID() const 797 { 798 #ifndef HAVE_CUDA 799 throw_no_cuda(); 800 return 0; 801 #else 802 return deviceProps().get(device_id_)->pciDeviceID; 803 #endif 804 } 805 806 int cv::cuda::DeviceInfo::pciDomainID() const 807 { 808 #ifndef HAVE_CUDA 809 throw_no_cuda(); 810 return 0; 811 #else 812 return deviceProps().get(device_id_)->pciDomainID; 813 #endif 814 } 815 816 bool cv::cuda::DeviceInfo::tccDriver() const 817 { 818 #ifndef HAVE_CUDA 819 throw_no_cuda(); 820 return false; 821 #else 822 return deviceProps().get(device_id_)->tccDriver != 0; 823 #endif 824 } 825 826 int cv::cuda::DeviceInfo::asyncEngineCount() const 827 { 828 #ifndef HAVE_CUDA 829 throw_no_cuda(); 830 return 0; 831 #else 832 return deviceProps().get(device_id_)->asyncEngineCount; 833 #endif 834 } 835 836 bool cv::cuda::DeviceInfo::unifiedAddressing() const 837 { 838 #ifndef HAVE_CUDA 839 throw_no_cuda(); 840 return false; 841 #else 842 return deviceProps().get(device_id_)->unifiedAddressing != 0; 843 #endif 844 } 845 846 int cv::cuda::DeviceInfo::memoryClockRate() const 847 { 848 #ifndef HAVE_CUDA 849 throw_no_cuda(); 850 return 0; 851 #else 852 return deviceProps().get(device_id_)->memoryClockRate; 853 #endif 854 } 855 856 int cv::cuda::DeviceInfo::memoryBusWidth() const 857 { 858 #ifndef HAVE_CUDA 859 throw_no_cuda(); 860 return 0; 861 #else 862 return deviceProps().get(device_id_)->memoryBusWidth; 863 #endif 864 } 865 866 int cv::cuda::DeviceInfo::l2CacheSize() const 867 { 868 #ifndef HAVE_CUDA 869 throw_no_cuda(); 870 return 0; 871 #else 872 return deviceProps().get(device_id_)->l2CacheSize; 873 #endif 874 } 875 876 int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const 877 { 878 #ifndef HAVE_CUDA 879 throw_no_cuda(); 880 return 0; 881 #else 882 return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor; 883 #endif 884 } 885 886 void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const 887 { 888 #ifndef HAVE_CUDA 889 (void) _totalMemory; 890 (void) _freeMemory; 891 throw_no_cuda(); 892 #else 893 int prevDeviceID = getDevice(); 894 if (prevDeviceID != device_id_) 895 setDevice(device_id_); 896 897 cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) ); 898 899 if (prevDeviceID != device_id_) 900 setDevice(prevDeviceID); 901 #endif 902 } 903 904 bool cv::cuda::DeviceInfo::isCompatible() const 905 { 906 #ifndef HAVE_CUDA 907 throw_no_cuda(); 908 return false; 909 #else 910 // Check PTX compatibility 911 if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion())) 912 return true; 913 914 // Check BIN compatibility 915 for (int i = minorVersion(); i >= 0; --i) 916 if (TargetArchs::hasBin(majorVersion(), i)) 917 return true; 918 919 return false; 920 #endif 921 } 922 923 //////////////////////////////////////////////////////////////////////// 924 // print info 925 926 #ifdef HAVE_CUDA 927 928 namespace 929 { 930 int convertSMVer2Cores(int major, int minor) 931 { 932 // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM 933 typedef struct { 934 int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version 935 int Cores; 936 } SMtoCores; 937 938 SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } }; 939 940 int index = 0; 941 while (gpuArchCoresPerSM[index].SM != -1) 942 { 943 if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) 944 return gpuArchCoresPerSM[index].Cores; 945 index++; 946 } 947 948 return -1; 949 } 950 } 951 952 #endif 953 954 void cv::cuda::printCudaDeviceInfo(int device) 955 { 956 #ifndef HAVE_CUDA 957 (void) device; 958 throw_no_cuda(); 959 #else 960 int count = getCudaEnabledDeviceCount(); 961 bool valid = (device >= 0) && (device < count); 962 963 int beg = valid ? device : 0; 964 int end = valid ? device+1 : count; 965 966 printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n"); 967 printf("Device count: %d\n", count); 968 969 int driverVersion = 0, runtimeVersion = 0; 970 cudaSafeCall( cudaDriverGetVersion(&driverVersion) ); 971 cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); 972 973 const char *computeMode[] = { 974 "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", 975 "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)", 976 "Prohibited (no host thread can use ::cudaSetDevice() with this device)", 977 "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)", 978 "Unknown", 979 NULL 980 }; 981 982 for(int dev = beg; dev < end; ++dev) 983 { 984 cudaDeviceProp prop; 985 cudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); 986 987 printf("\nDevice %d: \"%s\"\n", dev, prop.name); 988 printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); 989 printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor); 990 printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem); 991 992 int cores = convertSMVer2Cores(prop.major, prop.minor); 993 if (cores > 0) 994 printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount); 995 996 printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); 997 998 printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n", 999 prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], 1000 prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]); 1001 printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n", 1002 prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1], 1003 prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]); 1004 1005 printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem); 1006 printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock); 1007 printf(" Total number of registers available per block: %d\n", prop.regsPerBlock); 1008 printf(" Warp size: %d\n", prop.warpSize); 1009 printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock); 1010 printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]); 1011 printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); 1012 printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch); 1013 printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment); 1014 1015 printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount); 1016 printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No"); 1017 printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No"); 1018 printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No"); 1019 1020 printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No"); 1021 printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No"); 1022 printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No"); 1023 printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No"); 1024 printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No"); 1025 printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID ); 1026 printf(" Compute Mode:\n"); 1027 printf(" %s \n", computeMode[prop.computeMode]); 1028 } 1029 1030 printf("\n"); 1031 printf("deviceQuery, CUDA Driver = CUDART"); 1032 printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100); 1033 printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100); 1034 printf(", NumDevs = %d\n\n", count); 1035 1036 fflush(stdout); 1037 #endif 1038 } 1039 1040 void cv::cuda::printShortCudaDeviceInfo(int device) 1041 { 1042 #ifndef HAVE_CUDA 1043 (void) device; 1044 throw_no_cuda(); 1045 #else 1046 int count = getCudaEnabledDeviceCount(); 1047 bool valid = (device >= 0) && (device < count); 1048 1049 int beg = valid ? device : 0; 1050 int end = valid ? device+1 : count; 1051 1052 int driverVersion = 0, runtimeVersion = 0; 1053 cudaSafeCall( cudaDriverGetVersion(&driverVersion) ); 1054 cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); 1055 1056 for(int dev = beg; dev < end; ++dev) 1057 { 1058 cudaDeviceProp prop; 1059 cudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); 1060 1061 const char *arch_str = prop.major < 2 ? " (not Fermi)" : ""; 1062 printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f); 1063 printf(", sm_%d%d%s", prop.major, prop.minor, arch_str); 1064 1065 int cores = convertSMVer2Cores(prop.major, prop.minor); 1066 if (cores > 0) 1067 printf(", %d cores", cores * prop.multiProcessorCount); 1068 1069 printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); 1070 } 1071 1072 fflush(stdout); 1073 #endif 1074 } 1075 1076 //////////////////////////////////////////////////////////////////////// 1077 // Error handling 1078 1079 #ifdef HAVE_CUDA 1080 1081 namespace 1082 { 1083 #define error_entry(entry) { entry, #entry } 1084 1085 struct ErrorEntry 1086 { 1087 int code; 1088 const char* str; 1089 }; 1090 1091 struct ErrorEntryComparer 1092 { 1093 int code; 1094 ErrorEntryComparer(int code_) : code(code_) {} 1095 bool operator()(const ErrorEntry& e) const { return e.code == code; } 1096 }; 1097 1098 const ErrorEntry npp_errors [] = 1099 { 1100 #if defined (_MSC_VER) 1101 error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ), 1102 #endif 1103 1104 #if NPP_VERSION < 5500 1105 error_entry( NPP_BAD_ARG_ERROR ), 1106 error_entry( NPP_COEFF_ERROR ), 1107 error_entry( NPP_RECT_ERROR ), 1108 error_entry( NPP_QUAD_ERROR ), 1109 error_entry( NPP_MEMFREE_ERR ), 1110 error_entry( NPP_MEMSET_ERR ), 1111 error_entry( NPP_MEM_ALLOC_ERR ), 1112 error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ), 1113 error_entry( NPP_MIRROR_FLIP_ERR ), 1114 error_entry( NPP_INVALID_INPUT ), 1115 error_entry( NPP_POINTER_ERROR ), 1116 error_entry( NPP_WARNING ), 1117 error_entry( NPP_ODD_ROI_WARNING ), 1118 #else 1119 error_entry( NPP_INVALID_HOST_POINTER_ERROR ), 1120 error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ), 1121 error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ), 1122 error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ), 1123 error_entry( NPP_MEMFREE_ERROR ), 1124 error_entry( NPP_MEMSET_ERROR ), 1125 error_entry( NPP_QUALITY_INDEX_ERROR ), 1126 error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ), 1127 error_entry( NPP_CHANNEL_ORDER_ERROR ), 1128 error_entry( NPP_ZERO_MASK_VALUE_ERROR ), 1129 error_entry( NPP_QUADRANGLE_ERROR ), 1130 error_entry( NPP_RECTANGLE_ERROR ), 1131 error_entry( NPP_COEFFICIENT_ERROR ), 1132 error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ), 1133 error_entry( NPP_COI_ERROR ), 1134 error_entry( NPP_DIVISOR_ERROR ), 1135 error_entry( NPP_CHANNEL_ERROR ), 1136 error_entry( NPP_STRIDE_ERROR ), 1137 error_entry( NPP_ANCHOR_ERROR ), 1138 error_entry( NPP_MASK_SIZE_ERROR ), 1139 error_entry( NPP_MIRROR_FLIP_ERROR ), 1140 error_entry( NPP_MOMENT_00_ZERO_ERROR ), 1141 error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ), 1142 error_entry( NPP_THRESHOLD_ERROR ), 1143 error_entry( NPP_CONTEXT_MATCH_ERROR ), 1144 error_entry( NPP_FFT_FLAG_ERROR ), 1145 error_entry( NPP_FFT_ORDER_ERROR ), 1146 error_entry( NPP_SCALE_RANGE_ERROR ), 1147 error_entry( NPP_DATA_TYPE_ERROR ), 1148 error_entry( NPP_OUT_OFF_RANGE_ERROR ), 1149 error_entry( NPP_DIVIDE_BY_ZERO_ERROR ), 1150 error_entry( NPP_MEMORY_ALLOCATION_ERR ), 1151 error_entry( NPP_RANGE_ERROR ), 1152 error_entry( NPP_BAD_ARGUMENT_ERROR ), 1153 error_entry( NPP_NO_MEMORY_ERROR ), 1154 error_entry( NPP_ERROR_RESERVED ), 1155 error_entry( NPP_NO_OPERATION_WARNING ), 1156 error_entry( NPP_DIVIDE_BY_ZERO_WARNING ), 1157 error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ), 1158 #endif 1159 1160 error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ), 1161 error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ), 1162 error_entry( NPP_RESIZE_NO_OPERATION_ERROR ), 1163 error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ), 1164 error_entry( NPP_TEXTURE_BIND_ERROR ), 1165 error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ), 1166 error_entry( NPP_NOT_EVEN_STEP_ERROR ), 1167 error_entry( NPP_INTERPOLATION_ERROR ), 1168 error_entry( NPP_RESIZE_FACTOR_ERROR ), 1169 error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ), 1170 error_entry( NPP_MEMCPY_ERROR ), 1171 error_entry( NPP_ALIGNMENT_ERROR ), 1172 error_entry( NPP_STEP_ERROR ), 1173 error_entry( NPP_SIZE_ERROR ), 1174 error_entry( NPP_NULL_POINTER_ERROR ), 1175 error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ), 1176 error_entry( NPP_NOT_IMPLEMENTED_ERROR ), 1177 error_entry( NPP_ERROR ), 1178 error_entry( NPP_NO_ERROR ), 1179 error_entry( NPP_SUCCESS ), 1180 error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ), 1181 error_entry( NPP_MISALIGNED_DST_ROI_WARNING ), 1182 error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ), 1183 error_entry( NPP_DOUBLE_SIZE_WARNING ) 1184 }; 1185 1186 const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]); 1187 1188 const ErrorEntry cu_errors [] = 1189 { 1190 error_entry( CUDA_SUCCESS ), 1191 error_entry( CUDA_ERROR_INVALID_VALUE ), 1192 error_entry( CUDA_ERROR_OUT_OF_MEMORY ), 1193 error_entry( CUDA_ERROR_NOT_INITIALIZED ), 1194 error_entry( CUDA_ERROR_DEINITIALIZED ), 1195 error_entry( CUDA_ERROR_PROFILER_DISABLED ), 1196 error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ), 1197 error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ), 1198 error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ), 1199 error_entry( CUDA_ERROR_NO_DEVICE ), 1200 error_entry( CUDA_ERROR_INVALID_DEVICE ), 1201 error_entry( CUDA_ERROR_INVALID_IMAGE ), 1202 error_entry( CUDA_ERROR_INVALID_CONTEXT ), 1203 error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ), 1204 error_entry( CUDA_ERROR_MAP_FAILED ), 1205 error_entry( CUDA_ERROR_UNMAP_FAILED ), 1206 error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ), 1207 error_entry( CUDA_ERROR_ALREADY_MAPPED ), 1208 error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ), 1209 error_entry( CUDA_ERROR_ALREADY_ACQUIRED ), 1210 error_entry( CUDA_ERROR_NOT_MAPPED ), 1211 error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ), 1212 error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ), 1213 error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ), 1214 error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ), 1215 error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ), 1216 error_entry( CUDA_ERROR_INVALID_SOURCE ), 1217 error_entry( CUDA_ERROR_FILE_NOT_FOUND ), 1218 error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ), 1219 error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ), 1220 error_entry( CUDA_ERROR_OPERATING_SYSTEM ), 1221 error_entry( CUDA_ERROR_INVALID_HANDLE ), 1222 error_entry( CUDA_ERROR_NOT_FOUND ), 1223 error_entry( CUDA_ERROR_NOT_READY ), 1224 error_entry( CUDA_ERROR_LAUNCH_FAILED ), 1225 error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ), 1226 error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ), 1227 error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ), 1228 error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ), 1229 error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ), 1230 error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ), 1231 error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ), 1232 error_entry( CUDA_ERROR_ASSERT ), 1233 error_entry( CUDA_ERROR_TOO_MANY_PEERS ), 1234 error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ), 1235 error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ), 1236 error_entry( CUDA_ERROR_UNKNOWN ) 1237 }; 1238 1239 const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]); 1240 1241 cv::String getErrorString(int code, const ErrorEntry* errors, size_t n) 1242 { 1243 size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors; 1244 1245 const char* msg = (idx != n) ? errors[idx].str : "Unknown error code"; 1246 cv::String str = cv::format("%s [Code = %d]", msg, code); 1247 1248 return str; 1249 } 1250 } 1251 1252 #endif 1253 1254 String cv::cuda::getNppErrorMessage(int code) 1255 { 1256 #ifndef HAVE_CUDA 1257 (void) code; 1258 return String(); 1259 #else 1260 return getErrorString(code, npp_errors, npp_error_num); 1261 #endif 1262 } 1263 1264 String cv::cuda::getCudaDriverApiErrorMessage(int code) 1265 { 1266 #ifndef HAVE_CUDA 1267 (void) code; 1268 return String(); 1269 #else 1270 return getErrorString(code, cu_errors, cu_errors_num); 1271 #endif 1272 } 1273