1 /* 2 * Copyright 2017 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkThreadedBMPDevice.h" 9 10 #include "SkPath.h" 11 #include "SkTaskGroup.h" 12 #include "SkVertices.h" 13 14 #include <mutex> 15 #include <vector> 16 17 constexpr int MAX_CACHE_LINE = 64; 18 19 // Some basic logics and data structures that are shared across the current experimental schedulers. 20 class TiledDrawSchedulerBase : public TiledDrawScheduler { 21 public: 22 TiledDrawSchedulerBase(int tiles, WorkFunc work) 23 : fTileCnt(tiles), fIsFinishing(false), fDrawCnt(0), fWork(std::move(work)) {} 24 25 void signal() override { 26 fDrawCnt++; 27 } 28 void finish() override { 29 fIsFinishing.store(true, std::memory_order_relaxed); 30 } 31 32 protected: 33 const int fTileCnt; 34 std::atomic<bool> fIsFinishing; 35 std::atomic<int> fDrawCnt; 36 WorkFunc fWork; 37 }; 38 39 class TiledDrawSchedulerBySpinning : public TiledDrawSchedulerBase { 40 public: 41 TiledDrawSchedulerBySpinning(int tiles, WorkFunc work) 42 : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {} 43 44 void signal() final { this->TiledDrawSchedulerBase::signal(); } 45 void finish() final { this->TiledDrawSchedulerBase::finish(); } 46 47 bool next(int& tileIndex) final { 48 int& drawIndex = fScheduleData[tileIndex].fDrawIndex; 49 SkASSERT(drawIndex <= fDrawCnt); 50 while (true) { 51 bool isFinishing = fIsFinishing.load(std::memory_order_relaxed); 52 if (isFinishing && drawIndex >= fDrawCnt) { 53 return false; 54 } else if (drawIndex < fDrawCnt) { 55 fWork(tileIndex, drawIndex++); 56 return true; 57 } 58 } 59 } 60 61 private: 62 // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines 63 struct alignas(MAX_CACHE_LINE) TileScheduleData { 64 TileScheduleData() : fDrawIndex(0) {} 65 66 int fDrawIndex; // next draw index for this tile 67 }; 68 69 std::vector<TileScheduleData> fScheduleData; 70 }; 71 72 class TiledDrawSchedulerFlexible : public TiledDrawSchedulerBase { 73 public: 74 TiledDrawSchedulerFlexible(int tiles, WorkFunc work) 75 : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {} 76 77 void signal() final { this->TiledDrawSchedulerBase::signal(); } 78 void finish() final { this->TiledDrawSchedulerBase::finish(); } 79 80 bool next(int& tileIndex) final { 81 int failCnt = 0; 82 while (true) { 83 TileScheduleData& scheduleData = fScheduleData[tileIndex]; 84 bool locked = scheduleData.fMutex.try_lock(); 85 bool processed = false; 86 87 if (locked) { 88 if (scheduleData.fDrawIndex < fDrawCnt) { 89 fWork(tileIndex, scheduleData.fDrawIndex++); 90 processed = true; 91 } else { 92 failCnt += fIsFinishing.load(std::memory_order_relaxed); 93 } 94 scheduleData.fMutex.unlock(); 95 } 96 97 if (processed) { 98 return true; 99 } else { 100 if (failCnt >= fTileCnt) { 101 return false; 102 } 103 tileIndex = (tileIndex + 1) % fTileCnt; 104 } 105 } 106 } 107 108 private: 109 // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines 110 struct alignas(MAX_CACHE_LINE) TileScheduleData { 111 TileScheduleData() : fDrawIndex(0) {} 112 113 int fDrawIndex; // next draw index for this tile 114 std::mutex fMutex; // the mutex for the thread to acquire 115 }; 116 117 std::vector<TileScheduleData> fScheduleData; 118 }; 119 120 class TiledDrawSchedulerBySemaphores : public TiledDrawSchedulerBase { 121 public: 122 TiledDrawSchedulerBySemaphores(int tiles, WorkFunc work) 123 : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {} 124 125 126 void signal() final { 127 this->TiledDrawSchedulerBase::signal(); 128 signalRoot(); 129 } 130 131 void finish() final { 132 this->TiledDrawSchedulerBase::finish(); 133 signalRoot(); 134 } 135 136 bool next(int& tileIndex) final { 137 SkASSERT(tileIndex >= 0 && tileIndex < fTileCnt); 138 TileScheduleData& scheduleData = fScheduleData[tileIndex]; 139 while (true) { 140 scheduleData.fSemaphore.wait(); 141 int leftChild = (tileIndex + 1) * 2 - 1; 142 int rightChild = leftChild + 1; 143 if (leftChild < fTileCnt) { 144 fScheduleData[leftChild].fSemaphore.signal(); 145 } 146 if (rightChild < fTileCnt) { 147 fScheduleData[rightChild].fSemaphore.signal(); 148 } 149 150 bool isFinishing = fIsFinishing.load(std::memory_order_relaxed); 151 if (isFinishing && scheduleData.fDrawIndex >= fDrawCnt) { 152 return false; 153 } else { 154 SkASSERT(scheduleData.fDrawIndex < fDrawCnt); 155 fWork(tileIndex, scheduleData.fDrawIndex++); 156 return true; 157 } 158 } 159 } 160 161 private: 162 // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines 163 struct alignas(MAX_CACHE_LINE) TileScheduleData { 164 TileScheduleData() : fDrawIndex(0) {} 165 166 int fDrawIndex; 167 SkSemaphore fSemaphore; 168 }; 169 170 void signalRoot() { 171 SkASSERT(fTileCnt > 0); 172 fScheduleData[0].fSemaphore.signal(); 173 } 174 175 std::vector<TileScheduleData> fScheduleData; 176 }; 177 178 void SkThreadedBMPDevice::startThreads() { 179 SkASSERT(fThreadFutures.count() == 0); 180 SkASSERT(fQueueSize == 0); 181 182 TiledDrawScheduler::WorkFunc work = [this](int tileIndex, int drawIndex){ 183 auto& element = fQueue[drawIndex]; 184 if (SkIRect::Intersects(fTileBounds[tileIndex], element.fDrawBounds)) { 185 element.fDrawFn(fTileBounds[tileIndex]); 186 } 187 }; 188 189 // using Scheduler = TiledDrawSchedulerBySemaphores; 190 // using Scheduler = TiledDrawSchedulerBySpinning; 191 using Scheduler = TiledDrawSchedulerFlexible; 192 fScheduler.reset(new Scheduler(fTileCnt, work)); 193 for(int i = 0; i < fThreadCnt; ++i) { 194 fThreadFutures.push_back(std::async(std::launch::async, [this, i]() { 195 int tileIndex = i; 196 while (fScheduler->next(tileIndex)) {} 197 })); 198 } 199 } 200 201 void SkThreadedBMPDevice::finishThreads() { 202 fScheduler->finish(); 203 for(auto& future : fThreadFutures) { 204 future.wait(); 205 } 206 fThreadFutures.reset(); 207 fQueueSize = 0; 208 fScheduler.reset(nullptr); 209 } 210 211 SkThreadedBMPDevice::SkThreadedBMPDevice(const SkBitmap& bitmap, int tiles, int threads) 212 : INHERITED(bitmap) 213 , fTileCnt(tiles) 214 , fThreadCnt(threads <= 0 ? tiles : threads) 215 { 216 // Tiling using stripes for now; we'll explore better tiling in the future. 217 int h = (bitmap.height() + fTileCnt - 1) / SkTMax(fTileCnt, 1); 218 int w = bitmap.width(); 219 int top = 0; 220 for(int tid = 0; tid < fTileCnt; ++tid, top += h) { 221 fTileBounds.push_back(SkIRect::MakeLTRB(0, top, w, top + h)); 222 } 223 fQueueSize = 0; 224 startThreads(); 225 } 226 227 void SkThreadedBMPDevice::flush() { 228 finishThreads(); 229 startThreads(); 230 } 231 232 // Having this captured in lambda seems to be faster than saving this in DrawElement 233 struct SkThreadedBMPDevice::DrawState { 234 SkPixmap fDst; 235 SkMatrix fMatrix; 236 SkRasterClip fRC; 237 238 explicit DrawState(SkThreadedBMPDevice* dev) { 239 // we need fDst to be set, and if we're actually drawing, to dirty the genID 240 if (!dev->accessPixels(&fDst)) { 241 // NoDrawDevice uses us (why?) so we have to catch this case w/ no pixels 242 fDst.reset(dev->imageInfo(), nullptr, 0); 243 } 244 fMatrix = dev->ctm(); 245 fRC = dev->fRCStack.rc(); 246 } 247 248 SkDraw getThreadDraw(SkRasterClip& threadRC, const SkIRect& threadBounds) const { 249 SkDraw draw; 250 draw.fDst = fDst; 251 draw.fMatrix = &fMatrix; 252 threadRC = fRC; 253 threadRC.op(threadBounds, SkRegion::kIntersect_Op); 254 draw.fRC = &threadRC; 255 return draw; 256 } 257 }; 258 259 SkIRect SkThreadedBMPDevice::transformDrawBounds(const SkRect& drawBounds) const { 260 if (drawBounds.isLargest()) { 261 return SkIRect::MakeLargest(); 262 } 263 SkRect transformedBounds; 264 this->ctm().mapRect(&transformedBounds, drawBounds); 265 return transformedBounds.roundOut(); 266 } 267 268 // The do {...} while (false) is to enforce trailing semicolon as suggested by mtklein@ 269 #define THREADED_DRAW(drawBounds, actualDrawCall) \ 270 do { \ 271 DrawState ds(this); \ 272 SkASSERT(fQueueSize < MAX_QUEUE_SIZE); \ 273 fQueue[fQueueSize++] = { \ 274 this->transformDrawBounds(drawBounds), \ 275 [=](const SkIRect& tileBounds) { \ 276 SkRasterClip tileRC; \ 277 SkDraw draw = ds.getThreadDraw(tileRC, tileBounds); \ 278 draw.actualDrawCall; \ 279 }, \ 280 }; \ 281 fScheduler->signal(); \ 282 } while (false) 283 284 static inline SkRect get_fast_bounds(const SkRect& r, const SkPaint& p) { 285 SkRect result; 286 if (p.canComputeFastBounds()) { 287 result = p.computeFastBounds(r, &result); 288 } else { 289 result = SkRect::MakeLargest(); 290 } 291 return result; 292 } 293 294 void SkThreadedBMPDevice::drawPaint(const SkPaint& paint) { 295 THREADED_DRAW(SkRect::MakeLargest(), drawPaint(paint)); 296 } 297 298 void SkThreadedBMPDevice::drawPoints(SkCanvas::PointMode mode, size_t count, 299 const SkPoint pts[], const SkPaint& paint) { 300 // TODO tighter drawBounds 301 SkRect drawBounds = SkRect::MakeLargest(); 302 THREADED_DRAW(drawBounds, drawPoints(mode, count, pts, paint, nullptr)); 303 } 304 305 void SkThreadedBMPDevice::drawRect(const SkRect& r, const SkPaint& paint) { 306 SkRect drawBounds = get_fast_bounds(r, paint); 307 THREADED_DRAW(drawBounds, drawRect(r, paint)); 308 } 309 310 void SkThreadedBMPDevice::drawRRect(const SkRRect& rrect, const SkPaint& paint) { 311 #ifdef SK_IGNORE_BLURRED_RRECT_OPT 312 SkPath path; 313 314 path.addRRect(rrect); 315 // call the VIRTUAL version, so any subclasses who do handle drawPath aren't 316 // required to override drawRRect. 317 this->drawPath(path, paint, nullptr, false); 318 #else 319 SkRect drawBounds = get_fast_bounds(rrect.getBounds(), paint); 320 THREADED_DRAW(drawBounds, drawRRect(rrect, paint)); 321 #endif 322 } 323 324 void SkThreadedBMPDevice::drawPath(const SkPath& path, const SkPaint& paint, 325 const SkMatrix* prePathMatrix, bool pathIsMutable) { 326 SkRect drawBounds = path.isInverseFillType() ? SkRect::MakeLargest() 327 : get_fast_bounds(path.getBounds(), paint); 328 // For thread safety, make path imutable 329 THREADED_DRAW(drawBounds, drawPath(path, paint, prePathMatrix, false)); 330 } 331 332 void SkThreadedBMPDevice::drawBitmap(const SkBitmap& bitmap, SkScalar x, SkScalar y, 333 const SkPaint& paint) { 334 SkMatrix matrix = SkMatrix::MakeTrans(x, y); 335 LogDrawScaleFactor(SkMatrix::Concat(this->ctm(), matrix), paint.getFilterQuality()); 336 SkRect drawBounds = SkRect::MakeWH(bitmap.width(), bitmap.height()); 337 matrix.mapRect(&drawBounds); 338 THREADED_DRAW(drawBounds, drawBitmap(bitmap, matrix, nullptr, paint)); 339 } 340 341 void SkThreadedBMPDevice::drawSprite(const SkBitmap& bitmap, int x, int y, const SkPaint& paint) { 342 SkRect drawBounds = SkRect::MakeXYWH(x, y, bitmap.width(), bitmap.height()); 343 THREADED_DRAW(drawBounds, drawSprite(bitmap, x, y, paint)); 344 } 345 346 void SkThreadedBMPDevice::drawText(const void* text, size_t len, SkScalar x, SkScalar y, 347 const SkPaint& paint) { 348 SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds 349 THREADED_DRAW(drawBounds, drawText((const char*)text, len, x, y, paint, &this->surfaceProps())); 350 } 351 352 void SkThreadedBMPDevice::drawPosText(const void* text, size_t len, const SkScalar xpos[], 353 int scalarsPerPos, const SkPoint& offset, const SkPaint& paint) { 354 SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds 355 THREADED_DRAW(drawBounds, drawPosText((const char*)text, len, xpos, scalarsPerPos, offset, 356 paint, &surfaceProps())); 357 } 358 359 void SkThreadedBMPDevice::drawVertices(const SkVertices* vertices, SkBlendMode bmode, 360 const SkPaint& paint) { 361 SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds 362 THREADED_DRAW(drawBounds, drawVertices(vertices->mode(), vertices->vertexCount(), 363 vertices->positions(), vertices->texCoords(), 364 vertices->colors(), bmode, vertices->indices(), 365 vertices->indexCount(), paint)); 366 } 367 368 void SkThreadedBMPDevice::drawDevice(SkBaseDevice* device, int x, int y, const SkPaint& paint) { 369 SkASSERT(!paint.getImageFilter()); 370 SkRect drawBounds = SkRect::MakeXYWH(x, y, device->width(), device->height()); 371 THREADED_DRAW(drawBounds, 372 drawSprite(static_cast<SkBitmapDevice*>(device)->fBitmap, x, y, paint)); 373 } 374