1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 45 #if defined WIN32 || defined WINCE 46 #include <windows.h> 47 #undef small 48 #undef min 49 #undef max 50 #undef abs 51 #endif 52 53 #if defined __linux__ || defined __APPLE__ 54 #include <unistd.h> 55 #include <stdio.h> 56 #include <sys/types.h> 57 #if defined ANDROID 58 #include <sys/sysconf.h> 59 #elif defined __APPLE__ 60 #include <sys/sysctl.h> 61 #endif 62 #endif 63 64 #ifdef _OPENMP 65 #define HAVE_OPENMP 66 #endif 67 68 #ifdef __APPLE__ 69 #define HAVE_GCD 70 #endif 71 72 #if defined _MSC_VER && _MSC_VER >= 1600 73 #define HAVE_CONCURRENCY 74 #endif 75 76 /* IMPORTANT: always use the same order of defines 77 1. HAVE_TBB - 3rdparty library, should be explicitly enabled 78 2. HAVE_CSTRIPES - 3rdparty library, should be explicitly enabled 79 3. HAVE_OPENMP - integrated to compiler, should be explicitly enabled 80 4. HAVE_GCD - system wide, used automatically (APPLE only) 81 5. WINRT - system wide, used automatically (Windows RT only) 82 6. HAVE_CONCURRENCY - part of runtime, used automatically (Windows only - MSVS 10, MSVS 11) 83 */ 84 85 #if defined HAVE_TBB 86 #include "tbb/tbb_stddef.h" 87 #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 88 #include "tbb/tbb.h" 89 #include "tbb/task.h" 90 #if TBB_INTERFACE_VERSION >= 6100 91 #include "tbb/task_arena.h" 92 #endif 93 #undef min 94 #undef max 95 #else 96 #undef HAVE_TBB 97 #endif // end TBB version 98 #endif 99 100 #ifndef HAVE_TBB 101 #if defined HAVE_CSTRIPES 102 #include "C=.h" 103 #undef shared 104 #elif defined HAVE_OPENMP 105 #include <omp.h> 106 #elif defined HAVE_GCD 107 #include <dispatch/dispatch.h> 108 #include <pthread.h> 109 #elif defined WINRT 110 #include <ppltasks.h> 111 #elif defined HAVE_CONCURRENCY 112 #include <ppl.h> 113 #endif 114 #endif 115 116 #if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 117 # define CV_PARALLEL_FRAMEWORK "tbb" 118 #elif defined HAVE_CSTRIPES 119 # define CV_PARALLEL_FRAMEWORK "cstripes" 120 #elif defined HAVE_OPENMP 121 # define CV_PARALLEL_FRAMEWORK "openmp" 122 #elif defined HAVE_GCD 123 # define CV_PARALLEL_FRAMEWORK "gcd" 124 #elif defined WINRT 125 # define CV_PARALLEL_FRAMEWORK "winrt-concurrency" 126 #elif defined HAVE_CONCURRENCY 127 # define CV_PARALLEL_FRAMEWORK "ms-concurrency" 128 #elif defined HAVE_PTHREADS 129 # define CV_PARALLEL_FRAMEWORK "pthreads" 130 #endif 131 132 namespace cv 133 { 134 ParallelLoopBody::~ParallelLoopBody() {} 135 } 136 137 namespace 138 { 139 #ifdef CV_PARALLEL_FRAMEWORK 140 class ParallelLoopBodyWrapper 141 { 142 public: 143 ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) 144 { 145 body = &_body; 146 wholeRange = _r; 147 double len = wholeRange.end - wholeRange.start; 148 nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len)); 149 } 150 void operator()(const cv::Range& sr) const 151 { 152 cv::Range r; 153 r.start = (int)(wholeRange.start + 154 ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes); 155 r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start + 156 ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes); 157 (*body)(r); 158 } 159 cv::Range stripeRange() const { return cv::Range(0, nstripes); } 160 161 protected: 162 const cv::ParallelLoopBody* body; 163 cv::Range wholeRange; 164 int nstripes; 165 }; 166 167 #if defined HAVE_TBB 168 class ProxyLoopBody : public ParallelLoopBodyWrapper 169 { 170 public: 171 ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) 172 : ParallelLoopBodyWrapper(_body, _r, _nstripes) 173 {} 174 175 void operator ()(const tbb::blocked_range<int>& range) const 176 { 177 this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end())); 178 } 179 }; 180 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP 181 typedef ParallelLoopBodyWrapper ProxyLoopBody; 182 #elif defined HAVE_GCD 183 typedef ParallelLoopBodyWrapper ProxyLoopBody; 184 static void block_function(void* context, size_t index) 185 { 186 ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context); 187 (*ptr_body)(cv::Range((int)index, (int)index + 1)); 188 } 189 #elif defined WINRT || defined HAVE_CONCURRENCY 190 class ProxyLoopBody : public ParallelLoopBodyWrapper 191 { 192 public: 193 ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) 194 : ParallelLoopBodyWrapper(_body, _r, _nstripes) 195 {} 196 197 void operator ()(int i) const 198 { 199 this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1)); 200 } 201 }; 202 #else 203 typedef ParallelLoopBodyWrapper ProxyLoopBody; 204 #endif 205 206 static int numThreads = -1; 207 208 #if defined HAVE_TBB 209 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred); 210 #elif defined HAVE_CSTRIPES 211 // nothing for C= 212 #elif defined HAVE_OPENMP 213 static int numThreadsMax = omp_get_max_threads(); 214 #elif defined HAVE_GCD 215 // nothing for GCD 216 #elif defined WINRT 217 // nothing for WINRT 218 #elif defined HAVE_CONCURRENCY 219 220 class SchedPtr 221 { 222 Concurrency::Scheduler* sched_; 223 public: 224 Concurrency::Scheduler* operator->() { return sched_; } 225 operator Concurrency::Scheduler*() { return sched_; } 226 227 void operator=(Concurrency::Scheduler* sched) 228 { 229 if (sched_) sched_->Release(); 230 sched_ = sched; 231 } 232 233 SchedPtr() : sched_(0) {} 234 ~SchedPtr() { *this = 0; } 235 }; 236 static SchedPtr pplScheduler; 237 238 #endif 239 240 #endif // CV_PARALLEL_FRAMEWORK 241 242 } //namespace 243 244 /* ================================ parallel_for_ ================================ */ 245 246 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) 247 { 248 #ifdef CV_PARALLEL_FRAMEWORK 249 250 if(numThreads != 0) 251 { 252 ProxyLoopBody pbody(body, range, nstripes); 253 cv::Range stripeRange = pbody.stripeRange(); 254 if( stripeRange.end - stripeRange.start == 1 ) 255 { 256 body(range); 257 return; 258 } 259 260 #if defined HAVE_TBB 261 262 tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody); 263 264 #elif defined HAVE_CSTRIPES 265 266 parallel(MAX(0, numThreads)) 267 { 268 int offset = stripeRange.start; 269 int len = stripeRange.end - offset; 270 Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len)); 271 pbody(r); 272 barrier(); 273 } 274 275 #elif defined HAVE_OPENMP 276 277 #pragma omp parallel for schedule(dynamic) 278 for (int i = stripeRange.start; i < stripeRange.end; ++i) 279 pbody(Range(i, i + 1)); 280 281 #elif defined HAVE_GCD 282 283 dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); 284 dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function); 285 286 #elif defined WINRT 287 288 Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody); 289 290 #elif defined HAVE_CONCURRENCY 291 292 if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id()) 293 { 294 Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody); 295 } 296 else 297 { 298 pplScheduler->Attach(); 299 Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody); 300 Concurrency::CurrentScheduler::Detach(); 301 } 302 303 #elif defined HAVE_PTHREADS 304 void parallel_for_pthreads(const Range& range, const ParallelLoopBody& body, double nstripes); 305 parallel_for_pthreads(range, body, nstripes); 306 307 #else 308 309 #error You have hacked and compiling with unsupported parallel framework 310 311 #endif 312 313 } 314 else 315 316 #endif // CV_PARALLEL_FRAMEWORK 317 { 318 (void)nstripes; 319 body(range); 320 } 321 } 322 323 int cv::getNumThreads(void) 324 { 325 #ifdef CV_PARALLEL_FRAMEWORK 326 327 if(numThreads == 0) 328 return 1; 329 330 #endif 331 332 #if defined HAVE_TBB 333 334 return tbbScheduler.is_active() 335 ? numThreads 336 : tbb::task_scheduler_init::default_num_threads(); 337 338 #elif defined HAVE_CSTRIPES 339 340 return numThreads > 0 341 ? numThreads 342 : cv::getNumberOfCPUs(); 343 344 #elif defined HAVE_OPENMP 345 346 return omp_get_max_threads(); 347 348 #elif defined HAVE_GCD 349 350 return 512; // the GCD thread pool limit 351 352 #elif defined WINRT 353 354 return 0; 355 356 #elif defined HAVE_CONCURRENCY 357 358 return 1 + (pplScheduler == 0 359 ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors() 360 : pplScheduler->GetNumberOfVirtualProcessors()); 361 362 #elif defined HAVE_PTHREADS 363 364 size_t parallel_pthreads_get_threads_num(); 365 366 return parallel_pthreads_get_threads_num(); 367 368 #else 369 370 return 1; 371 372 #endif 373 } 374 375 void cv::setNumThreads( int threads ) 376 { 377 (void)threads; 378 #ifdef CV_PARALLEL_FRAMEWORK 379 numThreads = threads; 380 #endif 381 382 #ifdef HAVE_TBB 383 384 if(tbbScheduler.is_active()) tbbScheduler.terminate(); 385 if(threads > 0) tbbScheduler.initialize(threads); 386 387 #elif defined HAVE_CSTRIPES 388 389 return; // nothing needed 390 391 #elif defined HAVE_OPENMP 392 393 if(omp_in_parallel()) 394 return; // can't change number of openmp threads inside a parallel region 395 396 omp_set_num_threads(threads > 0 ? threads : numThreadsMax); 397 398 #elif defined HAVE_GCD 399 400 // unsupported 401 // there is only private dispatch_queue_set_width() and only for desktop 402 403 #elif defined WINRT 404 405 return; 406 407 #elif defined HAVE_CONCURRENCY 408 409 if (threads <= 0) 410 { 411 pplScheduler = 0; 412 } 413 else if (threads == 1) 414 { 415 // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested 416 numThreads = 0; 417 } 418 else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads) 419 { 420 pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2, 421 Concurrency::MinConcurrency, threads-1, 422 Concurrency::MaxConcurrency, threads-1)); 423 } 424 425 #elif defined HAVE_PTHREADS 426 427 void parallel_pthreads_set_threads_num(int num); 428 429 parallel_pthreads_set_threads_num(threads); 430 431 #endif 432 } 433 434 435 int cv::getThreadNum(void) 436 { 437 #if defined HAVE_TBB 438 #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA 439 return tbb::task_arena::current_slot(); 440 #else 441 return 0; 442 #endif 443 #elif defined HAVE_CSTRIPES 444 return pix(); 445 #elif defined HAVE_OPENMP 446 return omp_get_thread_num(); 447 #elif defined HAVE_GCD 448 return (int)(size_t)(void*)pthread_self(); // no zero-based indexing 449 #elif defined WINRT 450 return 0; 451 #elif defined HAVE_CONCURRENCY 452 return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,... 453 #else 454 return 0; 455 #endif 456 } 457 458 #ifdef ANDROID 459 static inline int getNumberOfCPUsImpl() 460 { 461 FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r"); 462 if(!cpuPossible) 463 return 1; 464 465 char buf[2000]; //big enough for 1000 CPUs in worst possible configuration 466 char* pbuf = fgets(buf, sizeof(buf), cpuPossible); 467 fclose(cpuPossible); 468 if(!pbuf) 469 return 1; 470 471 //parse string of form "0-1,3,5-7,10,13-15" 472 int cpusAvailable = 0; 473 474 while(*pbuf) 475 { 476 const char* pos = pbuf; 477 bool range = false; 478 while(*pbuf && *pbuf != ',') 479 { 480 if(*pbuf == '-') range = true; 481 ++pbuf; 482 } 483 if(*pbuf) *pbuf++ = 0; 484 if(!range) 485 ++cpusAvailable; 486 else 487 { 488 int rstart = 0, rend = 0; 489 sscanf(pos, "%d-%d", &rstart, &rend); 490 cpusAvailable += rend - rstart + 1; 491 } 492 493 } 494 return cpusAvailable ? cpusAvailable : 1; 495 } 496 #endif 497 498 int cv::getNumberOfCPUs(void) 499 { 500 #if defined WIN32 || defined _WIN32 501 SYSTEM_INFO sysinfo; 502 #if defined(_M_ARM) || defined(_M_X64) || defined(WINRT) 503 GetNativeSystemInfo( &sysinfo ); 504 #else 505 GetSystemInfo( &sysinfo ); 506 #endif 507 508 return (int)sysinfo.dwNumberOfProcessors; 509 #elif defined ANDROID 510 static int ncpus = getNumberOfCPUsImpl(); 511 return ncpus; 512 #elif defined __linux__ 513 return (int)sysconf( _SC_NPROCESSORS_ONLN ); 514 #elif defined __APPLE__ 515 int numCPU=0; 516 int mib[4]; 517 size_t len = sizeof(numCPU); 518 519 /* set the mib for hw.ncpu */ 520 mib[0] = CTL_HW; 521 mib[1] = HW_AVAILCPU; // alternatively, try HW_NCPU; 522 523 /* get the number of CPUs from the system */ 524 sysctl(mib, 2, &numCPU, &len, NULL, 0); 525 526 if( numCPU < 1 ) 527 { 528 mib[1] = HW_NCPU; 529 sysctl( mib, 2, &numCPU, &len, NULL, 0 ); 530 531 if( numCPU < 1 ) 532 numCPU = 1; 533 } 534 535 return (int)numCPU; 536 #else 537 return 1; 538 #endif 539 } 540 541 const char* cv::currentParallelFramework() { 542 #ifdef CV_PARALLEL_FRAMEWORK 543 return CV_PARALLEL_FRAMEWORK; 544 #else 545 return NULL; 546 #endif 547 } 548 549 CV_IMPL void cvSetNumThreads(int nt) 550 { 551 cv::setNumThreads(nt); 552 } 553 554 CV_IMPL int cvGetNumThreads() 555 { 556 return cv::getNumThreads(); 557 } 558 559 CV_IMPL int cvGetThreadNum() 560 { 561 return cv::getThreadNum(); 562 } 563