Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 #if defined WIN32 || defined WINCE
     46     #include <windows.h>
     47     #undef small
     48     #undef min
     49     #undef max
     50     #undef abs
     51 #endif
     52 
     53 #if defined __linux__ || defined __APPLE__
     54     #include <unistd.h>
     55     #include <stdio.h>
     56     #include <sys/types.h>
     57     #if defined ANDROID
     58         #include <sys/sysconf.h>
     59     #elif defined __APPLE__
     60         #include <sys/sysctl.h>
     61     #endif
     62 #endif
     63 
     64 #ifdef _OPENMP
     65     #define HAVE_OPENMP
     66 #endif
     67 
     68 #ifdef __APPLE__
     69     #define HAVE_GCD
     70 #endif
     71 
     72 #if defined _MSC_VER && _MSC_VER >= 1600
     73     #define HAVE_CONCURRENCY
     74 #endif
     75 
     76 /* IMPORTANT: always use the same order of defines
     77    1. HAVE_TBB         - 3rdparty library, should be explicitly enabled
     78    2. HAVE_CSTRIPES    - 3rdparty library, should be explicitly enabled
     79    3. HAVE_OPENMP      - integrated to compiler, should be explicitly enabled
     80    4. HAVE_GCD         - system wide, used automatically        (APPLE only)
     81    5. WINRT            - system wide, used automatically        (Windows RT only)
     82    6. HAVE_CONCURRENCY - part of runtime, used automatically    (Windows only - MSVS 10, MSVS 11)
     83 */
     84 
     85 #if defined HAVE_TBB
     86     #include "tbb/tbb_stddef.h"
     87     #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
     88         #include "tbb/tbb.h"
     89         #include "tbb/task.h"
     90         #if TBB_INTERFACE_VERSION >= 6100
     91             #include "tbb/task_arena.h"
     92         #endif
     93         #undef min
     94         #undef max
     95     #else
     96         #undef HAVE_TBB
     97     #endif // end TBB version
     98 #endif
     99 
    100 #ifndef HAVE_TBB
    101     #if defined HAVE_CSTRIPES
    102         #include "C=.h"
    103         #undef shared
    104     #elif defined HAVE_OPENMP
    105         #include <omp.h>
    106     #elif defined HAVE_GCD
    107         #include <dispatch/dispatch.h>
    108         #include <pthread.h>
    109     #elif defined WINRT
    110         #include <ppltasks.h>
    111     #elif defined HAVE_CONCURRENCY
    112         #include <ppl.h>
    113     #endif
    114 #endif
    115 
    116 #if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
    117 #  define CV_PARALLEL_FRAMEWORK "tbb"
    118 #elif defined HAVE_CSTRIPES
    119 #  define CV_PARALLEL_FRAMEWORK "cstripes"
    120 #elif defined HAVE_OPENMP
    121 #  define CV_PARALLEL_FRAMEWORK "openmp"
    122 #elif defined HAVE_GCD
    123 #  define CV_PARALLEL_FRAMEWORK "gcd"
    124 #elif defined WINRT
    125 #  define CV_PARALLEL_FRAMEWORK "winrt-concurrency"
    126 #elif defined HAVE_CONCURRENCY
    127 #  define CV_PARALLEL_FRAMEWORK "ms-concurrency"
    128 #elif defined HAVE_PTHREADS
    129 #  define CV_PARALLEL_FRAMEWORK "pthreads"
    130 #endif
    131 
    132 namespace cv
    133 {
    134     ParallelLoopBody::~ParallelLoopBody() {}
    135 }
    136 
    137 namespace
    138 {
    139 #ifdef CV_PARALLEL_FRAMEWORK
    140     class ParallelLoopBodyWrapper
    141     {
    142     public:
    143         ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
    144         {
    145             body = &_body;
    146             wholeRange = _r;
    147             double len = wholeRange.end - wholeRange.start;
    148             nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
    149         }
    150         void operator()(const cv::Range& sr) const
    151         {
    152             cv::Range r;
    153             r.start = (int)(wholeRange.start +
    154                             ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
    155             r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
    156                             ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
    157             (*body)(r);
    158         }
    159         cv::Range stripeRange() const { return cv::Range(0, nstripes); }
    160 
    161     protected:
    162         const cv::ParallelLoopBody* body;
    163         cv::Range wholeRange;
    164         int nstripes;
    165     };
    166 
    167 #if defined HAVE_TBB
    168     class ProxyLoopBody : public ParallelLoopBodyWrapper
    169     {
    170     public:
    171         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
    172         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
    173         {}
    174 
    175         void operator ()(const tbb::blocked_range<int>& range) const
    176         {
    177             this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
    178         }
    179     };
    180 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
    181     typedef ParallelLoopBodyWrapper ProxyLoopBody;
    182 #elif defined HAVE_GCD
    183     typedef ParallelLoopBodyWrapper ProxyLoopBody;
    184     static void block_function(void* context, size_t index)
    185     {
    186         ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
    187         (*ptr_body)(cv::Range((int)index, (int)index + 1));
    188     }
    189 #elif defined WINRT || defined HAVE_CONCURRENCY
    190     class ProxyLoopBody : public ParallelLoopBodyWrapper
    191     {
    192     public:
    193         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
    194         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
    195         {}
    196 
    197         void operator ()(int i) const
    198         {
    199             this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
    200         }
    201     };
    202 #else
    203     typedef ParallelLoopBodyWrapper ProxyLoopBody;
    204 #endif
    205 
    206 static int numThreads = -1;
    207 
    208 #if defined HAVE_TBB
    209 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
    210 #elif defined HAVE_CSTRIPES
    211 // nothing for C=
    212 #elif defined HAVE_OPENMP
    213 static int numThreadsMax = omp_get_max_threads();
    214 #elif defined HAVE_GCD
    215 // nothing for GCD
    216 #elif defined WINRT
    217 // nothing for WINRT
    218 #elif defined HAVE_CONCURRENCY
    219 
    220 class SchedPtr
    221 {
    222     Concurrency::Scheduler* sched_;
    223 public:
    224     Concurrency::Scheduler* operator->() { return sched_; }
    225     operator Concurrency::Scheduler*() { return sched_; }
    226 
    227     void operator=(Concurrency::Scheduler* sched)
    228     {
    229         if (sched_) sched_->Release();
    230         sched_ = sched;
    231     }
    232 
    233     SchedPtr() : sched_(0) {}
    234     ~SchedPtr() { *this = 0; }
    235 };
    236 static SchedPtr pplScheduler;
    237 
    238 #endif
    239 
    240 #endif // CV_PARALLEL_FRAMEWORK
    241 
    242 } //namespace
    243 
    244 /* ================================   parallel_for_  ================================ */
    245 
    246 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
    247 {
    248 #ifdef CV_PARALLEL_FRAMEWORK
    249 
    250     if(numThreads != 0)
    251     {
    252         ProxyLoopBody pbody(body, range, nstripes);
    253         cv::Range stripeRange = pbody.stripeRange();
    254         if( stripeRange.end - stripeRange.start == 1 )
    255         {
    256             body(range);
    257             return;
    258         }
    259 
    260 #if defined HAVE_TBB
    261 
    262         tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
    263 
    264 #elif defined HAVE_CSTRIPES
    265 
    266         parallel(MAX(0, numThreads))
    267         {
    268             int offset = stripeRange.start;
    269             int len = stripeRange.end - offset;
    270             Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
    271             pbody(r);
    272             barrier();
    273         }
    274 
    275 #elif defined HAVE_OPENMP
    276 
    277         #pragma omp parallel for schedule(dynamic)
    278         for (int i = stripeRange.start; i < stripeRange.end; ++i)
    279             pbody(Range(i, i + 1));
    280 
    281 #elif defined HAVE_GCD
    282 
    283         dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
    284         dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
    285 
    286 #elif defined WINRT
    287 
    288         Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
    289 
    290 #elif defined HAVE_CONCURRENCY
    291 
    292         if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
    293         {
    294             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
    295         }
    296         else
    297         {
    298             pplScheduler->Attach();
    299             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
    300             Concurrency::CurrentScheduler::Detach();
    301         }
    302 
    303 #elif defined HAVE_PTHREADS
    304         void parallel_for_pthreads(const Range& range, const ParallelLoopBody& body, double nstripes);
    305         parallel_for_pthreads(range, body, nstripes);
    306 
    307 #else
    308 
    309 #error You have hacked and compiling with unsupported parallel framework
    310 
    311 #endif
    312 
    313     }
    314     else
    315 
    316 #endif // CV_PARALLEL_FRAMEWORK
    317     {
    318         (void)nstripes;
    319         body(range);
    320     }
    321 }
    322 
    323 int cv::getNumThreads(void)
    324 {
    325 #ifdef CV_PARALLEL_FRAMEWORK
    326 
    327     if(numThreads == 0)
    328         return 1;
    329 
    330 #endif
    331 
    332 #if defined HAVE_TBB
    333 
    334     return tbbScheduler.is_active()
    335            ? numThreads
    336            : tbb::task_scheduler_init::default_num_threads();
    337 
    338 #elif defined HAVE_CSTRIPES
    339 
    340     return numThreads > 0
    341             ? numThreads
    342             : cv::getNumberOfCPUs();
    343 
    344 #elif defined HAVE_OPENMP
    345 
    346     return omp_get_max_threads();
    347 
    348 #elif defined HAVE_GCD
    349 
    350     return 512; // the GCD thread pool limit
    351 
    352 #elif defined WINRT
    353 
    354     return 0;
    355 
    356 #elif defined HAVE_CONCURRENCY
    357 
    358     return 1 + (pplScheduler == 0
    359         ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
    360         : pplScheduler->GetNumberOfVirtualProcessors());
    361 
    362 #elif defined HAVE_PTHREADS
    363 
    364         size_t parallel_pthreads_get_threads_num();
    365 
    366         return parallel_pthreads_get_threads_num();
    367 
    368 #else
    369 
    370     return 1;
    371 
    372 #endif
    373 }
    374 
    375 void cv::setNumThreads( int threads )
    376 {
    377     (void)threads;
    378 #ifdef CV_PARALLEL_FRAMEWORK
    379     numThreads = threads;
    380 #endif
    381 
    382 #ifdef HAVE_TBB
    383 
    384     if(tbbScheduler.is_active()) tbbScheduler.terminate();
    385     if(threads > 0) tbbScheduler.initialize(threads);
    386 
    387 #elif defined HAVE_CSTRIPES
    388 
    389     return; // nothing needed
    390 
    391 #elif defined HAVE_OPENMP
    392 
    393     if(omp_in_parallel())
    394         return; // can't change number of openmp threads inside a parallel region
    395 
    396     omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
    397 
    398 #elif defined HAVE_GCD
    399 
    400     // unsupported
    401     // there is only private dispatch_queue_set_width() and only for desktop
    402 
    403 #elif defined WINRT
    404 
    405     return;
    406 
    407 #elif defined HAVE_CONCURRENCY
    408 
    409     if (threads <= 0)
    410     {
    411         pplScheduler = 0;
    412     }
    413     else if (threads == 1)
    414     {
    415         // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
    416         numThreads = 0;
    417     }
    418     else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
    419     {
    420         pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
    421                        Concurrency::MinConcurrency, threads-1,
    422                        Concurrency::MaxConcurrency, threads-1));
    423     }
    424 
    425 #elif defined HAVE_PTHREADS
    426 
    427     void parallel_pthreads_set_threads_num(int num);
    428 
    429     parallel_pthreads_set_threads_num(threads);
    430 
    431 #endif
    432 }
    433 
    434 
    435 int cv::getThreadNum(void)
    436 {
    437 #if defined HAVE_TBB
    438     #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
    439         return tbb::task_arena::current_slot();
    440     #else
    441         return 0;
    442     #endif
    443 #elif defined HAVE_CSTRIPES
    444     return pix();
    445 #elif defined HAVE_OPENMP
    446     return omp_get_thread_num();
    447 #elif defined HAVE_GCD
    448     return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
    449 #elif defined WINRT
    450     return 0;
    451 #elif defined HAVE_CONCURRENCY
    452     return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
    453 #else
    454     return 0;
    455 #endif
    456 }
    457 
    458 #ifdef ANDROID
    459 static inline int getNumberOfCPUsImpl()
    460 {
    461    FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r");
    462    if(!cpuPossible)
    463        return 1;
    464 
    465    char buf[2000]; //big enough for 1000 CPUs in worst possible configuration
    466    char* pbuf = fgets(buf, sizeof(buf), cpuPossible);
    467    fclose(cpuPossible);
    468    if(!pbuf)
    469       return 1;
    470 
    471    //parse string of form "0-1,3,5-7,10,13-15"
    472    int cpusAvailable = 0;
    473 
    474    while(*pbuf)
    475    {
    476       const char* pos = pbuf;
    477       bool range = false;
    478       while(*pbuf && *pbuf != ',')
    479       {
    480           if(*pbuf == '-') range = true;
    481           ++pbuf;
    482       }
    483       if(*pbuf) *pbuf++ = 0;
    484       if(!range)
    485         ++cpusAvailable;
    486       else
    487       {
    488           int rstart = 0, rend = 0;
    489           sscanf(pos, "%d-%d", &rstart, &rend);
    490           cpusAvailable += rend - rstart + 1;
    491       }
    492 
    493    }
    494    return cpusAvailable ? cpusAvailable : 1;
    495 }
    496 #endif
    497 
    498 int cv::getNumberOfCPUs(void)
    499 {
    500 #if defined WIN32 || defined _WIN32
    501     SYSTEM_INFO sysinfo;
    502 #if defined(_M_ARM) || defined(_M_X64) || defined(WINRT)
    503     GetNativeSystemInfo( &sysinfo );
    504 #else
    505     GetSystemInfo( &sysinfo );
    506 #endif
    507 
    508     return (int)sysinfo.dwNumberOfProcessors;
    509 #elif defined ANDROID
    510     static int ncpus = getNumberOfCPUsImpl();
    511     return ncpus;
    512 #elif defined __linux__
    513     return (int)sysconf( _SC_NPROCESSORS_ONLN );
    514 #elif defined __APPLE__
    515     int numCPU=0;
    516     int mib[4];
    517     size_t len = sizeof(numCPU);
    518 
    519     /* set the mib for hw.ncpu */
    520     mib[0] = CTL_HW;
    521     mib[1] = HW_AVAILCPU;  // alternatively, try HW_NCPU;
    522 
    523     /* get the number of CPUs from the system */
    524     sysctl(mib, 2, &numCPU, &len, NULL, 0);
    525 
    526     if( numCPU < 1 )
    527     {
    528         mib[1] = HW_NCPU;
    529         sysctl( mib, 2, &numCPU, &len, NULL, 0 );
    530 
    531         if( numCPU < 1 )
    532             numCPU = 1;
    533     }
    534 
    535     return (int)numCPU;
    536 #else
    537     return 1;
    538 #endif
    539 }
    540 
    541 const char* cv::currentParallelFramework() {
    542 #ifdef CV_PARALLEL_FRAMEWORK
    543     return CV_PARALLEL_FRAMEWORK;
    544 #else
    545     return NULL;
    546 #endif
    547 }
    548 
    549 CV_IMPL void cvSetNumThreads(int nt)
    550 {
    551     cv::setNumThreads(nt);
    552 }
    553 
    554 CV_IMPL int cvGetNumThreads()
    555 {
    556     return cv::getNumThreads();
    557 }
    558 
    559 CV_IMPL int cvGetThreadNum()
    560 {
    561     return cv::getThreadNum();
    562 }
    563