Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2017 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkThreadedBMPDevice.h"
      9 
     10 #include "SkPath.h"
     11 #include "SkTaskGroup.h"
     12 #include "SkVertices.h"
     13 
     14 #include <mutex>
     15 #include <vector>
     16 
     17 constexpr int MAX_CACHE_LINE = 64;
     18 
     19 // Some basic logics and data structures that are shared across the current experimental schedulers.
     20 class TiledDrawSchedulerBase : public TiledDrawScheduler {
     21 public:
     22     TiledDrawSchedulerBase(int tiles, WorkFunc work)
     23             : fTileCnt(tiles), fIsFinishing(false), fDrawCnt(0), fWork(std::move(work)) {}
     24 
     25     void signal() override {
     26         fDrawCnt++;
     27     }
     28     void finish() override {
     29         fIsFinishing.store(true, std::memory_order_relaxed);
     30     }
     31 
     32 protected:
     33     const int                   fTileCnt;
     34     std::atomic<bool>           fIsFinishing;
     35     std::atomic<int>            fDrawCnt;
     36     WorkFunc                    fWork;
     37 };
     38 
     39 class TiledDrawSchedulerBySpinning : public TiledDrawSchedulerBase {
     40 public:
     41     TiledDrawSchedulerBySpinning(int tiles, WorkFunc work)
     42             : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
     43 
     44     void signal() final { this->TiledDrawSchedulerBase::signal(); }
     45     void finish() final { this->TiledDrawSchedulerBase::finish(); }
     46 
     47     bool next(int& tileIndex) final {
     48         int& drawIndex = fScheduleData[tileIndex].fDrawIndex;
     49         SkASSERT(drawIndex <= fDrawCnt);
     50         while (true) {
     51             bool isFinishing = fIsFinishing.load(std::memory_order_relaxed);
     52             if (isFinishing && drawIndex >= fDrawCnt) {
     53                 return false;
     54             } else if (drawIndex < fDrawCnt) {
     55                 fWork(tileIndex, drawIndex++);
     56                 return true;
     57             }
     58         }
     59     }
     60 
     61 private:
     62     // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
     63     struct alignas(MAX_CACHE_LINE) TileScheduleData {
     64         TileScheduleData() : fDrawIndex(0) {}
     65 
     66         int fDrawIndex; // next draw index for this tile
     67     };
     68 
     69     std::vector<TileScheduleData>  fScheduleData;
     70 };
     71 
     72 class TiledDrawSchedulerFlexible : public TiledDrawSchedulerBase {
     73 public:
     74     TiledDrawSchedulerFlexible(int tiles, WorkFunc work)
     75             : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
     76 
     77     void signal() final { this->TiledDrawSchedulerBase::signal(); }
     78     void finish() final { this->TiledDrawSchedulerBase::finish(); }
     79 
     80     bool next(int& tileIndex) final {
     81         int failCnt = 0;
     82         while (true) {
     83             TileScheduleData& scheduleData = fScheduleData[tileIndex];
     84             bool locked = scheduleData.fMutex.try_lock();
     85             bool processed = false;
     86 
     87             if (locked) {
     88                 if (scheduleData.fDrawIndex < fDrawCnt) {
     89                     fWork(tileIndex, scheduleData.fDrawIndex++);
     90                     processed = true;
     91                 } else {
     92                     failCnt += fIsFinishing.load(std::memory_order_relaxed);
     93                 }
     94                 scheduleData.fMutex.unlock();
     95             }
     96 
     97             if (processed) {
     98                 return true;
     99             } else {
    100                 if (failCnt >= fTileCnt) {
    101                     return false;
    102                 }
    103                 tileIndex = (tileIndex + 1) % fTileCnt;
    104             }
    105         }
    106     }
    107 
    108 private:
    109     // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
    110     struct alignas(MAX_CACHE_LINE) TileScheduleData {
    111         TileScheduleData() : fDrawIndex(0) {}
    112 
    113         int         fDrawIndex; // next draw index for this tile
    114         std::mutex  fMutex;     // the mutex for the thread to acquire
    115     };
    116 
    117     std::vector<TileScheduleData>  fScheduleData;
    118 };
    119 
    120 class TiledDrawSchedulerBySemaphores : public TiledDrawSchedulerBase {
    121 public:
    122     TiledDrawSchedulerBySemaphores(int tiles, WorkFunc work)
    123             : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
    124 
    125 
    126     void signal() final {
    127         this->TiledDrawSchedulerBase::signal();
    128         signalRoot();
    129     }
    130 
    131     void finish() final {
    132         this->TiledDrawSchedulerBase::finish();
    133         signalRoot();
    134     }
    135 
    136     bool next(int& tileIndex) final {
    137         SkASSERT(tileIndex >= 0 && tileIndex < fTileCnt);
    138         TileScheduleData& scheduleData = fScheduleData[tileIndex];
    139         while (true) {
    140             scheduleData.fSemaphore.wait();
    141             int leftChild = (tileIndex + 1) * 2 - 1;
    142             int rightChild = leftChild + 1;
    143             if (leftChild < fTileCnt) {
    144                 fScheduleData[leftChild].fSemaphore.signal();
    145             }
    146             if (rightChild < fTileCnt) {
    147                 fScheduleData[rightChild].fSemaphore.signal();
    148             }
    149 
    150             bool isFinishing = fIsFinishing.load(std::memory_order_relaxed);
    151             if (isFinishing && scheduleData.fDrawIndex >= fDrawCnt) {
    152                 return false;
    153             } else {
    154                 SkASSERT(scheduleData.fDrawIndex < fDrawCnt);
    155                 fWork(tileIndex, scheduleData.fDrawIndex++);
    156                 return true;
    157             }
    158         }
    159     }
    160 
    161 private:
    162     // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
    163     struct alignas(MAX_CACHE_LINE) TileScheduleData {
    164         TileScheduleData() : fDrawIndex(0) {}
    165 
    166         int         fDrawIndex;
    167         SkSemaphore fSemaphore;
    168     };
    169 
    170     void signalRoot() {
    171         SkASSERT(fTileCnt > 0);
    172         fScheduleData[0].fSemaphore.signal();
    173     }
    174 
    175     std::vector<TileScheduleData> fScheduleData;
    176 };
    177 
    178 void SkThreadedBMPDevice::startThreads() {
    179     SkASSERT(fThreadFutures.count() == 0);
    180     SkASSERT(fQueueSize == 0);
    181 
    182     TiledDrawScheduler::WorkFunc work = [this](int tileIndex, int drawIndex){
    183         auto& element = fQueue[drawIndex];
    184         if (SkIRect::Intersects(fTileBounds[tileIndex], element.fDrawBounds)) {
    185             element.fDrawFn(fTileBounds[tileIndex]);
    186         }
    187     };
    188 
    189     // using Scheduler = TiledDrawSchedulerBySemaphores;
    190     // using Scheduler = TiledDrawSchedulerBySpinning;
    191     using Scheduler = TiledDrawSchedulerFlexible;
    192     fScheduler.reset(new Scheduler(fTileCnt, work));
    193     for(int i = 0; i < fThreadCnt; ++i) {
    194         fThreadFutures.push_back(std::async(std::launch::async, [this, i]() {
    195             int tileIndex = i;
    196             while (fScheduler->next(tileIndex)) {}
    197         }));
    198     }
    199 }
    200 
    201 void SkThreadedBMPDevice::finishThreads() {
    202     fScheduler->finish();
    203     for(auto& future : fThreadFutures) {
    204         future.wait();
    205     }
    206     fThreadFutures.reset();
    207     fQueueSize = 0;
    208     fScheduler.reset(nullptr);
    209 }
    210 
    211 SkThreadedBMPDevice::SkThreadedBMPDevice(const SkBitmap& bitmap, int tiles, int threads)
    212         : INHERITED(bitmap)
    213         , fTileCnt(tiles)
    214         , fThreadCnt(threads <= 0 ? tiles : threads)
    215 {
    216     // Tiling using stripes for now; we'll explore better tiling in the future.
    217     int h = (bitmap.height() + fTileCnt - 1) / SkTMax(fTileCnt, 1);
    218     int w = bitmap.width();
    219     int top = 0;
    220     for(int tid = 0; tid < fTileCnt; ++tid, top += h) {
    221         fTileBounds.push_back(SkIRect::MakeLTRB(0, top, w, top + h));
    222     }
    223     fQueueSize = 0;
    224     startThreads();
    225 }
    226 
    227 void SkThreadedBMPDevice::flush() {
    228     finishThreads();
    229     startThreads();
    230 }
    231 
    232 // Having this captured in lambda seems to be faster than saving this in DrawElement
    233 struct SkThreadedBMPDevice::DrawState {
    234     SkPixmap fDst;
    235     SkMatrix fMatrix;
    236     SkRasterClip fRC;
    237 
    238     explicit DrawState(SkThreadedBMPDevice* dev) {
    239         // we need fDst to be set, and if we're actually drawing, to dirty the genID
    240         if (!dev->accessPixels(&fDst)) {
    241             // NoDrawDevice uses us (why?) so we have to catch this case w/ no pixels
    242             fDst.reset(dev->imageInfo(), nullptr, 0);
    243         }
    244         fMatrix = dev->ctm();
    245         fRC = dev->fRCStack.rc();
    246     }
    247 
    248     SkDraw getThreadDraw(SkRasterClip& threadRC, const SkIRect& threadBounds) const {
    249         SkDraw draw;
    250         draw.fDst = fDst;
    251         draw.fMatrix = &fMatrix;
    252         threadRC = fRC;
    253         threadRC.op(threadBounds, SkRegion::kIntersect_Op);
    254         draw.fRC = &threadRC;
    255         return draw;
    256     }
    257 };
    258 
    259 SkIRect SkThreadedBMPDevice::transformDrawBounds(const SkRect& drawBounds) const {
    260     if (drawBounds.isLargest()) {
    261         return SkIRect::MakeLargest();
    262     }
    263     SkRect transformedBounds;
    264     this->ctm().mapRect(&transformedBounds, drawBounds);
    265     return transformedBounds.roundOut();
    266 }
    267 
    268 // The do {...} while (false) is to enforce trailing semicolon as suggested by mtklein@
    269 #define THREADED_DRAW(drawBounds, actualDrawCall)                                                  \
    270     do {                                                                                           \
    271         DrawState ds(this);                                                                        \
    272         SkASSERT(fQueueSize < MAX_QUEUE_SIZE);                                                     \
    273         fQueue[fQueueSize++] = {                                                                   \
    274             this->transformDrawBounds(drawBounds),                                                 \
    275             [=](const SkIRect& tileBounds) {                                                       \
    276                 SkRasterClip tileRC;                                                               \
    277                 SkDraw draw = ds.getThreadDraw(tileRC, tileBounds);                                \
    278                 draw.actualDrawCall;                                                               \
    279             },                                                                                     \
    280         };                                                                                         \
    281         fScheduler->signal();                                                                      \
    282     } while (false)
    283 
    284 static inline SkRect get_fast_bounds(const SkRect& r, const SkPaint& p) {
    285     SkRect result;
    286     if (p.canComputeFastBounds()) {
    287         result = p.computeFastBounds(r, &result);
    288     } else {
    289         result = SkRect::MakeLargest();
    290     }
    291     return result;
    292 }
    293 
    294 void SkThreadedBMPDevice::drawPaint(const SkPaint& paint) {
    295     THREADED_DRAW(SkRect::MakeLargest(), drawPaint(paint));
    296 }
    297 
    298 void SkThreadedBMPDevice::drawPoints(SkCanvas::PointMode mode, size_t count,
    299         const SkPoint pts[], const SkPaint& paint) {
    300     // TODO tighter drawBounds
    301     SkRect drawBounds = SkRect::MakeLargest();
    302     THREADED_DRAW(drawBounds, drawPoints(mode, count, pts, paint, nullptr));
    303 }
    304 
    305 void SkThreadedBMPDevice::drawRect(const SkRect& r, const SkPaint& paint) {
    306     SkRect drawBounds = get_fast_bounds(r, paint);
    307     THREADED_DRAW(drawBounds, drawRect(r, paint));
    308 }
    309 
    310 void SkThreadedBMPDevice::drawRRect(const SkRRect& rrect, const SkPaint& paint) {
    311 #ifdef SK_IGNORE_BLURRED_RRECT_OPT
    312     SkPath  path;
    313 
    314     path.addRRect(rrect);
    315     // call the VIRTUAL version, so any subclasses who do handle drawPath aren't
    316     // required to override drawRRect.
    317     this->drawPath(path, paint, nullptr, false);
    318 #else
    319     SkRect drawBounds = get_fast_bounds(rrect.getBounds(), paint);
    320     THREADED_DRAW(drawBounds, drawRRect(rrect, paint));
    321 #endif
    322 }
    323 
    324 void SkThreadedBMPDevice::drawPath(const SkPath& path, const SkPaint& paint,
    325         const SkMatrix* prePathMatrix, bool pathIsMutable) {
    326     SkRect drawBounds = path.isInverseFillType() ? SkRect::MakeLargest()
    327                                                  : get_fast_bounds(path.getBounds(), paint);
    328     // For thread safety, make path imutable
    329     THREADED_DRAW(drawBounds, drawPath(path, paint, prePathMatrix, false));
    330 }
    331 
    332 void SkThreadedBMPDevice::drawBitmap(const SkBitmap& bitmap, SkScalar x, SkScalar y,
    333         const SkPaint& paint) {
    334     SkMatrix matrix = SkMatrix::MakeTrans(x, y);
    335     LogDrawScaleFactor(SkMatrix::Concat(this->ctm(), matrix), paint.getFilterQuality());
    336     SkRect drawBounds = SkRect::MakeWH(bitmap.width(), bitmap.height());
    337     matrix.mapRect(&drawBounds);
    338     THREADED_DRAW(drawBounds, drawBitmap(bitmap, matrix, nullptr, paint));
    339 }
    340 
    341 void SkThreadedBMPDevice::drawSprite(const SkBitmap& bitmap, int x, int y, const SkPaint& paint) {
    342     SkRect drawBounds = SkRect::MakeXYWH(x, y, bitmap.width(), bitmap.height());
    343     THREADED_DRAW(drawBounds, drawSprite(bitmap, x, y, paint));
    344 }
    345 
    346 void SkThreadedBMPDevice::drawText(const void* text, size_t len, SkScalar x, SkScalar y,
    347         const SkPaint& paint) {
    348     SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
    349     THREADED_DRAW(drawBounds, drawText((const char*)text, len, x, y, paint, &this->surfaceProps()));
    350 }
    351 
    352 void SkThreadedBMPDevice::drawPosText(const void* text, size_t len, const SkScalar xpos[],
    353         int scalarsPerPos, const SkPoint& offset, const SkPaint& paint) {
    354     SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
    355     THREADED_DRAW(drawBounds, drawPosText((const char*)text, len, xpos, scalarsPerPos, offset,
    356                                           paint, &surfaceProps()));
    357 }
    358 
    359 void SkThreadedBMPDevice::drawVertices(const SkVertices* vertices, SkBlendMode bmode,
    360         const SkPaint& paint) {
    361     SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
    362     THREADED_DRAW(drawBounds, drawVertices(vertices->mode(), vertices->vertexCount(),
    363                                            vertices->positions(), vertices->texCoords(),
    364                                            vertices->colors(), bmode, vertices->indices(),
    365                                            vertices->indexCount(), paint));
    366 }
    367 
    368 void SkThreadedBMPDevice::drawDevice(SkBaseDevice* device, int x, int y, const SkPaint& paint) {
    369     SkASSERT(!paint.getImageFilter());
    370     SkRect drawBounds = SkRect::MakeXYWH(x, y, device->width(), device->height());
    371     THREADED_DRAW(drawBounds,
    372                   drawSprite(static_cast<SkBitmapDevice*>(device)->fBitmap, x, y, paint));
    373 }
    374