1 /* 2 * Copyright 2016 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED 9 #define SkLinearBitmapPipeline_sampler_DEFINED 10 11 #include <tuple> 12 13 #include "SkAutoMalloc.h" 14 #include "SkColor.h" 15 #include "SkColorPriv.h" 16 #include "SkFixed.h" // for SkFixed1 only. Don't use SkFixed in this file. 17 #include "SkHalf.h" 18 #include "SkLinearBitmapPipeline_core.h" 19 #include "SkNx.h" 20 #include "SkPM4fPriv.h" 21 22 namespace { 23 // Explaination of the math: 24 // 1 - x x 25 // +--------+--------+ 26 // | | | 27 // 1 - y | px00 | px10 | 28 // | | | 29 // +--------+--------+ 30 // | | | 31 // y | px01 | px11 | 32 // | | | 33 // +--------+--------+ 34 // 35 // 36 // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x 37 // and y: 38 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy 39 // * px10 -> x(1 - y) = x - xy 40 // * px01 -> (1 - x)y = y - xy 41 // * px11 -> xy 42 // So x * y is calculated first and then used to calculate all the other factors. 43 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, 44 Sk4f px01, Sk4f px11) { 45 // Calculate fractional xs and ys. 46 Sk4s fxs = xs - xs.floor(); 47 Sk4s fys = ys - ys.floor(); 48 Sk4s fxys{fxs * fys}; 49 Sk4f sum = px11 * fxys; 50 sum = sum + px01 * (fys - fxys); 51 sum = sum + px10 * (fxs - fxys); 52 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); 53 return sum; 54 } 55 56 //////////////////////////////////////////////////////////////////////////////////////////////////// 57 // PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each 58 // of the different SkColorTypes. 59 template <SkColorType, SkGammaType> class PixelConverter; 60 61 // Alpha handling: 62 // The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate 63 // the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can 64 // modulate this color later. 65 template <> 66 class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> { 67 public: 68 using Element = uint8_t; 69 PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) { 70 fTintColor = SkColor4f::FromColor(tintColor); 71 fTintColor.fA = 1.0f; 72 } 73 74 Sk4f toSk4f(const Element pixel) const { 75 return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f)); 76 } 77 78 private: 79 SkColor4f fTintColor; 80 }; 81 82 template <SkGammaType gammaType> 83 static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) { 84 return swizzle_rb_if_bgra( 85 (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel) 86 : Sk4f_fromL32(pixel)); 87 } 88 89 template <SkGammaType gammaType> 90 class PixelConverter<kRGB_565_SkColorType, gammaType> { 91 public: 92 using Element = uint16_t; 93 PixelConverter(const SkPixmap& srcPixmap) { } 94 95 Sk4f toSk4f(Element pixel) const { 96 return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel)); 97 } 98 }; 99 100 template <SkGammaType gammaType> 101 class PixelConverter<kARGB_4444_SkColorType, gammaType> { 102 public: 103 using Element = uint16_t; 104 PixelConverter(const SkPixmap& srcPixmap) { } 105 106 Sk4f toSk4f(Element pixel) const { 107 return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel)); 108 } 109 }; 110 111 template <SkGammaType gammaType> 112 class PixelConverter<kRGBA_8888_SkColorType, gammaType> { 113 public: 114 using Element = uint32_t; 115 PixelConverter(const SkPixmap& srcPixmap) { } 116 117 Sk4f toSk4f(Element pixel) const { 118 return gammaType == kSRGB_SkGammaType 119 ? Sk4f_fromS32(pixel) 120 : Sk4f_fromL32(pixel); 121 } 122 }; 123 124 template <SkGammaType gammaType> 125 class PixelConverter<kBGRA_8888_SkColorType, gammaType> { 126 public: 127 using Element = uint32_t; 128 PixelConverter(const SkPixmap& srcPixmap) { } 129 130 Sk4f toSk4f(Element pixel) const { 131 return swizzle_rb( 132 gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel)); 133 } 134 }; 135 136 template <SkGammaType gammaType> 137 class PixelConverter<kIndex_8_SkColorType, gammaType> { 138 public: 139 using Element = uint8_t; 140 PixelConverter(const SkPixmap& srcPixmap) 141 : fColorTableSize(srcPixmap.ctable()->count()){ 142 SkColorTable* skColorTable = srcPixmap.ctable(); 143 SkASSERT(skColorTable != nullptr); 144 145 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); 146 for (int i = 0; i < fColorTableSize; i++) { 147 fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]); 148 } 149 } 150 151 PixelConverter(const PixelConverter& strategy) 152 : fColorTableSize{strategy.fColorTableSize}{ 153 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); 154 for (int i = 0; i < fColorTableSize; i++) { 155 fColorTable[i] = strategy.fColorTable[i]; 156 } 157 } 158 159 Sk4f toSk4f(Element index) const { 160 return fColorTable[index]; 161 } 162 163 private: 164 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12; 165 const int fColorTableSize; 166 SkAutoMalloc fColorTableStorage{kColorTableSize}; 167 Sk4f* fColorTable; 168 }; 169 170 template <SkGammaType gammaType> 171 class PixelConverter<kGray_8_SkColorType, gammaType> { 172 public: 173 using Element = uint8_t; 174 PixelConverter(const SkPixmap& srcPixmap) { } 175 176 Sk4f toSk4f(Element pixel) const { 177 float gray = (gammaType == kSRGB_SkGammaType) 178 ? sk_linear_from_srgb[pixel] 179 : pixel * (1/255.0f); 180 return {gray, gray, gray, 1.0f}; 181 } 182 }; 183 184 template <> 185 class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> { 186 public: 187 using Element = uint64_t; 188 PixelConverter(const SkPixmap& srcPixmap) { } 189 190 Sk4f toSk4f(const Element pixel) const { 191 return SkHalfToFloat_finite_ftz(pixel); 192 } 193 }; 194 195 class PixelAccessorShim { 196 public: 197 explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor) 198 : fPixelAccessor(accessor) { } 199 200 void SK_VECTORCALL getFewPixels( 201 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { 202 fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2); 203 } 204 205 void SK_VECTORCALL get4Pixels( 206 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { 207 fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3); 208 } 209 210 void get4Pixels( 211 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { 212 fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3); 213 } 214 215 Sk4f getPixelFromRow(const void* row, int index) const { 216 return fPixelAccessor->getPixelFromRow(row, index); 217 } 218 219 Sk4f getPixelAt(int index) const { 220 return fPixelAccessor->getPixelAt(index); 221 } 222 223 const void* row(int y) const { 224 return fPixelAccessor->row(y); 225 } 226 227 private: 228 SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor; 229 }; 230 231 //////////////////////////////////////////////////////////////////////////////////////////////////// 232 // PixelAccessor handles all the same plumbing for all the PixelGetters. 233 template <SkColorType colorType, SkGammaType gammaType> 234 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface { 235 using Element = typename PixelConverter<colorType, gammaType>::Element; 236 public: 237 template <typename... Args> 238 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args) 239 : fSrc{static_cast<const Element*>(srcPixmap.addr())} 240 , fWidth{srcPixmap.rowBytesAsPixels()} 241 , fConverter{srcPixmap, std::move<Args>(args)...} { } 242 243 void SK_VECTORCALL getFewPixels ( 244 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { 245 Sk4i bufferLoc = ys * fWidth + xs; 246 switch (n) { 247 case 3: 248 *px2 = this->getPixelAt(bufferLoc[2]); 249 case 2: 250 *px1 = this->getPixelAt(bufferLoc[1]); 251 case 1: 252 *px0 = this->getPixelAt(bufferLoc[0]); 253 default: 254 break; 255 } 256 } 257 258 void SK_VECTORCALL get4Pixels( 259 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { 260 Sk4i bufferLoc = ys * fWidth + xs; 261 *px0 = this->getPixelAt(bufferLoc[0]); 262 *px1 = this->getPixelAt(bufferLoc[1]); 263 *px2 = this->getPixelAt(bufferLoc[2]); 264 *px3 = this->getPixelAt(bufferLoc[3]); 265 } 266 267 void get4Pixels( 268 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { 269 *px0 = this->getPixelFromRow(src, index + 0); 270 *px1 = this->getPixelFromRow(src, index + 1); 271 *px2 = this->getPixelFromRow(src, index + 2); 272 *px3 = this->getPixelFromRow(src, index + 3); 273 } 274 275 Sk4f getPixelFromRow(const void* row, int index) const override { 276 const Element* src = static_cast<const Element*>(row); 277 return fConverter.toSk4f(src[index]); 278 } 279 280 Sk4f getPixelAt(int index) const override { 281 return this->getPixelFromRow(fSrc, index); 282 } 283 284 const void* row(int y) const override { return fSrc + y * fWidth; } 285 286 private: 287 const Element* const fSrc; 288 const int fWidth; 289 PixelConverter<colorType, gammaType> fConverter; 290 }; 291 292 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. 293 // We'll never re-use pixels, but we can at least load contiguous pixels. 294 template <typename Next, typename Strategy> 295 static void src_strategy_blend(Span span, Next* next, Strategy* strategy) { 296 SkPoint start; 297 SkScalar length; 298 int count; 299 std::tie(start, length, count) = span; 300 int ix = SkScalarFloorToInt(X(start)); 301 const void* row = strategy->row((int)std::floor(Y(start))); 302 if (length > 0) { 303 while (count >= 4) { 304 Sk4f px0, px1, px2, px3; 305 strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3); 306 next->blend4Pixels(px0, px1, px2, px3); 307 ix += 4; 308 count -= 4; 309 } 310 311 while (count > 0) { 312 next->blendPixel(strategy->getPixelFromRow(row, ix)); 313 ix += 1; 314 count -= 1; 315 } 316 } else { 317 while (count >= 4) { 318 Sk4f px0, px1, px2, px3; 319 strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0); 320 next->blend4Pixels(px0, px1, px2, px3); 321 ix -= 4; 322 count -= 4; 323 } 324 325 while (count > 0) { 326 next->blendPixel(strategy->getPixelFromRow(row, ix)); 327 ix -= 1; 328 count -= 1; 329 } 330 } 331 } 332 333 // -- NearestNeighborSampler ----------------------------------------------------------------------- 334 // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels. 335 template<typename Accessor, typename Next> 336 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { 337 public: 338 template<typename... Args> 339 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args) 340 : fNext{next}, fAccessor{std::forward<Args>(args)...} { } 341 342 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, 343 const NearestNeighborSampler& sampler) 344 : fNext{next}, fAccessor{sampler.fAccessor} { } 345 346 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { 347 SkASSERT(0 < n && n < 4); 348 Sk4f px0, px1, px2; 349 fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2); 350 if (n >= 1) fNext->blendPixel(px0); 351 if (n >= 2) fNext->blendPixel(px1); 352 if (n >= 3) fNext->blendPixel(px2); 353 } 354 355 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { 356 Sk4f px0, px1, px2, px3; 357 fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3); 358 fNext->blend4Pixels(px0, px1, px2, px3); 359 } 360 361 void pointSpan(Span span) override { 362 SkASSERT(!span.isEmpty()); 363 SkPoint start; 364 SkScalar length; 365 int count; 366 std::tie(start, length, count) = span; 367 SkScalar absLength = SkScalarAbs(length); 368 if (absLength < (count - 1)) { 369 this->spanSlowRate(span); 370 } else if (absLength == (count - 1)) { 371 src_strategy_blend(span, fNext, &fAccessor); 372 } else { 373 this->spanFastRate(span); 374 } 375 } 376 377 void repeatSpan(Span span, int32_t repeatCount) override { 378 while (repeatCount > 0) { 379 this->pointSpan(span); 380 repeatCount--; 381 } 382 } 383 384 private: 385 // When moving through source space more slowly than dst space (zoomed in), 386 // we'll be sampling from the same source pixel more than once. 387 void spanSlowRate(Span span) { 388 SkPoint start; SkScalar length; int count; 389 std::tie(start, length, count) = span; 390 SkScalar x = X(start); 391 // fx is a fixed 48.16 number. 392 int64_t fx = static_cast<int64_t>(x * SK_Fixed1); 393 SkScalar dx = length / (count - 1); 394 // fdx is a fixed 48.16 number. 395 int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1); 396 397 const void* row = fAccessor.row((int)std::floor(Y(start))); 398 Next* next = fNext; 399 400 int64_t ix = fx >> 16; 401 int64_t prevIX = ix; 402 Sk4f fpixel = fAccessor.getPixelFromRow(row, ix); 403 404 // When dx is less than one, each pixel is used more than once. Using the fixed point fx 405 // allows the code to quickly check that the same pixel is being used. The code uses this 406 // same pixel check to do the sRGB and normalization only once. 407 auto getNextPixel = [&]() { 408 if (ix != prevIX) { 409 fpixel = fAccessor.getPixelFromRow(row, ix); 410 prevIX = ix; 411 } 412 fx += fdx; 413 ix = fx >> 16; 414 return fpixel; 415 }; 416 417 while (count >= 4) { 418 Sk4f px0 = getNextPixel(); 419 Sk4f px1 = getNextPixel(); 420 Sk4f px2 = getNextPixel(); 421 Sk4f px3 = getNextPixel(); 422 next->blend4Pixels(px0, px1, px2, px3); 423 count -= 4; 424 } 425 while (count > 0) { 426 next->blendPixel(getNextPixel()); 427 count -= 1; 428 } 429 } 430 431 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. 432 // We'll never re-use pixels, but we can at least load contiguous pixels. 433 void spanUnitRate(Span span) { 434 src_strategy_blend(span, fNext, &fAccessor); 435 } 436 437 // We're moving through source space faster than dst (zoomed out), 438 // so we'll never reuse a source pixel or be able to do contiguous loads. 439 void spanFastRate(Span span) { 440 span_fallback(span, this); 441 } 442 443 Next* const fNext; 444 Accessor fAccessor; 445 }; 446 447 // From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge 448 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to 449 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value 450 // on the interval [0, vMax]. 451 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel. 452 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) { 453 SkASSERT(-1 <= vs && vs <= vMax + 1); 454 switch (edgeType) { 455 case SkShader::kClamp_TileMode: 456 case SkShader::kMirror_TileMode: 457 vs = std::max(vs, 0); 458 vs = std::min(vs, vMax); 459 break; 460 case SkShader::kRepeat_TileMode: 461 vs = (vs <= vMax) ? vs : 0; 462 vs = (vs >= 0) ? vs : vMax; 463 break; 464 } 465 SkASSERT(0 <= vs && vs <= vMax); 466 return vs; 467 } 468 469 // From a sample point on the tile, return the top or left filter value. 470 // The result r should be in the range (0, 1]. Since this represents the weight given to the top 471 // left element, then if x == 0.5 the filter value should be 1.0. 472 // The input sample point must be on the tile, therefore it must be >= 0. 473 static SkScalar sample_to_filter(SkScalar x) { 474 SkASSERT(x >= 0.0f); 475 // The usual form of the top or left edge is x - .5, but since we are working on the unit 476 // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use 477 // of trunc. 478 SkScalar v = x + 0.5f; 479 // Produce the top or left offset a value on the range [0, 1). 480 SkScalar f = v - SkScalarTruncToScalar(v); 481 // Produce the filter value which is on the range (0, 1]. 482 SkScalar r = 1.0f - f; 483 SkASSERT(0.0f < r && r <= 1.0f); 484 return r; 485 } 486 487 // -- BilerpSampler -------------------------------------------------------------------------------- 488 // BilerpSampler - use a bilerp filter to create runs of destination pixels. 489 // Note: in the code below, there are two types of points 490 // * sample points - these are the points passed in by pointList* and Spans. 491 // * filter points - are created from a sample point to form the coordinates of the points 492 // to use in the filter and to generate the filter values. 493 template<typename Accessor, typename Next> 494 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { 495 public: 496 template<typename... Args> 497 BilerpSampler( 498 SkLinearBitmapPipeline::BlendProcessorInterface* next, 499 SkISize dimensions, 500 SkShader::TileMode xTile, SkShader::TileMode yTile, 501 Args&& ... args 502 ) 503 : fNext{next} 504 , fXEdgeType{xTile} 505 , fXMax{dimensions.width() - 1} 506 , fYEdgeType{yTile} 507 , fYMax{dimensions.height() - 1} 508 , fAccessor{std::forward<Args>(args)...} { } 509 510 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, 511 const BilerpSampler& sampler) 512 : fNext{next} 513 , fXEdgeType{sampler.fXEdgeType} 514 , fXMax{sampler.fXMax} 515 , fYEdgeType{sampler.fYEdgeType} 516 , fYMax{sampler.fYMax} 517 , fAccessor{sampler.fAccessor} { } 518 519 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { 520 SkASSERT(0 < n && n < 4); 521 auto bilerpPixel = [&](int index) { 522 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); 523 }; 524 525 if (n >= 1) fNext->blendPixel(bilerpPixel(0)); 526 if (n >= 2) fNext->blendPixel(bilerpPixel(1)); 527 if (n >= 3) fNext->blendPixel(bilerpPixel(2)); 528 } 529 530 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { 531 auto bilerpPixel = [&](int index) { 532 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); 533 }; 534 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3)); 535 } 536 537 void pointSpan(Span span) override { 538 SkASSERT(!span.isEmpty()); 539 SkPoint start; 540 SkScalar length; 541 int count; 542 std::tie(start, length, count) = span; 543 544 // Nothing to do. 545 if (count == 0) { 546 return; 547 } 548 549 // Trivial case. No sample points are generated other than start. 550 if (count == 1) { 551 fNext->blendPixel(this->bilerpSamplePoint(start)); 552 return; 553 } 554 555 // Note: the following code could be done in terms of dx = length / (count -1), but that 556 // would introduce a divide that is not needed for the most common dx == 1 cases. 557 SkScalar absLength = SkScalarAbs(length); 558 if (absLength == 0.0f) { 559 // |dx| == 0 560 // length is zero, so clamp an edge pixel. 561 this->spanZeroRate(span); 562 } else if (absLength < (count - 1)) { 563 // 0 < |dx| < 1. 564 this->spanSlowRate(span); 565 } else if (absLength == (count - 1)) { 566 // |dx| == 1. 567 if (sample_to_filter(span.startX()) == 1.0f 568 && sample_to_filter(span.startY()) == 1.0f) { 569 // All the pixels are aligned with the dest; go fast. 570 src_strategy_blend(span, fNext, &fAccessor); 571 } else { 572 // There is some sub-pixel offsets, so bilerp. 573 this->spanUnitRate(span); 574 } 575 } else if (absLength < 2.0f * (count - 1)) { 576 // 1 < |dx| < 2. 577 this->spanMediumRate(span); 578 } else { 579 // |dx| >= 2. 580 this->spanFastRate(span); 581 } 582 } 583 584 void repeatSpan(Span span, int32_t repeatCount) override { 585 while (repeatCount > 0) { 586 this->pointSpan(span); 587 repeatCount--; 588 } 589 } 590 591 private: 592 593 // Convert a sample point to the points used by the filter. 594 void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) { 595 // May be less than zero. Be careful to use Floor. 596 int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax); 597 // Always greater than zero. Use the faster Trunc. 598 int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax); 599 int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax); 600 int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax); 601 602 *filterXs = Sk4i{x0, x1, x0, x1}; 603 *filterYs = Sk4i{y0, y0, y1, y1}; 604 } 605 606 // Given a sample point, generate a color by bilerping the four filter points. 607 Sk4f bilerpSamplePoint(SkPoint sample) { 608 Sk4i iXs, iYs; 609 filterPoints(sample, &iXs, &iYs); 610 Sk4f px00, px10, px01, px11; 611 fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11); 612 return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11); 613 } 614 615 // Get two pixels at x from row0 and row1. 616 void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) { 617 *px0 = fAccessor.getPixelFromRow(row0, x); 618 *px1 = fAccessor.getPixelFromRow(row1, x); 619 } 620 621 // |dx| == 0. This code assumes that length is zero. 622 void spanZeroRate(Span span) { 623 SkPoint start; SkScalar length; int count; 624 std::tie(start, length, count) = span; 625 SkASSERT(length == 0.0f); 626 627 // Filter for the blending of the top and bottom pixels. 628 SkScalar filterY = sample_to_filter(Y(start)); 629 630 // Generate the four filter points from the sample point start. Generate the row* values. 631 Sk4i iXs, iYs; 632 this->filterPoints(start, &iXs, &iYs); 633 const void* const row0 = fAccessor.row(iYs[0]); 634 const void* const row1 = fAccessor.row(iYs[2]); 635 636 // Get the two pixels that make up the clamping pixel. 637 Sk4f pxTop, pxBottom; 638 this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom); 639 Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom; 640 641 while (count >= 4) { 642 fNext->blend4Pixels(pixel, pixel, pixel, pixel); 643 count -= 4; 644 } 645 while (count > 0) { 646 fNext->blendPixel(pixel); 647 count -= 1; 648 } 649 } 650 651 // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce 652 // computation. In particular, several destination pixels maybe generated from the same four 653 // source pixels. 654 // In the following code a "part" is a combination of two pixels from the same column of the 655 // filter. 656 void spanSlowRate(Span span) { 657 SkPoint start; SkScalar length; int count; 658 std::tie(start, length, count) = span; 659 660 // Calculate the distance between each sample point. 661 const SkScalar dx = length / (count - 1); 662 SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f); 663 664 // Generate the filter values for the top-left corner. 665 // Note: these values are in filter space; this has implications about how to adjust 666 // these values at each step. For example, as the sample point increases, the filter 667 // value decreases, this is because the filter and position are related by 668 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite 669 // direction of the sample point which is increasing by dx. 670 SkScalar filterX = sample_to_filter(X(start)); 671 SkScalar filterY = sample_to_filter(Y(start)); 672 673 // Generate the four filter points from the sample point start. Generate the row* values. 674 Sk4i iXs, iYs; 675 this->filterPoints(start, &iXs, &iYs); 676 const void* const row0 = fAccessor.row(iYs[0]); 677 const void* const row1 = fAccessor.row(iYs[2]); 678 679 // Generate part of the filter value at xColumn. 680 auto partAtColumn = [&](int xColumn) { 681 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); 682 Sk4f pxTop, pxBottom; 683 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); 684 return pxTop * filterY + (1.0f - filterY) * pxBottom; 685 }; 686 687 // The leftPart is made up of two pixels from the left column of the filter, right part 688 // is similar. The top and bottom pixels in the *Part are created as a linear blend of 689 // the top and bottom pixels using filterY. See the partAtColumn function above. 690 Sk4f leftPart = partAtColumn(iXs[0]); 691 Sk4f rightPart = partAtColumn(iXs[1]); 692 693 // Create a destination color by blending together a left and right part using filterX. 694 auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) { 695 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); 696 return check_pixel(pixel); 697 }; 698 699 // Send the first pixel to the destination. This simplifies the loop structure so that no 700 // extra pixels are fetched for the last iteration of the loop. 701 fNext->blendPixel(bilerp(leftPart, rightPart)); 702 count -= 1; 703 704 if (dx > 0.0f) { 705 // * positive direction - generate destination pixels by sliding the filter from left 706 // to right. 707 int rightPartCursor = iXs[1]; 708 709 // Advance the filter from left to right. Remember that moving the top-left corner of 710 // the filter to the right actually makes the filter value smaller. 711 auto advanceFilter = [&]() { 712 filterX -= dx; 713 if (filterX <= 0.0f) { 714 filterX += 1.0f; 715 leftPart = rightPart; 716 rightPartCursor += 1; 717 rightPart = partAtColumn(rightPartCursor); 718 } 719 SkASSERT(0.0f < filterX && filterX <= 1.0f); 720 721 return bilerp(leftPart, rightPart); 722 }; 723 724 while (count >= 4) { 725 Sk4f px0 = advanceFilter(), 726 px1 = advanceFilter(), 727 px2 = advanceFilter(), 728 px3 = advanceFilter(); 729 fNext->blend4Pixels(px0, px1, px2, px3); 730 count -= 4; 731 } 732 733 while (count > 0) { 734 fNext->blendPixel(advanceFilter()); 735 count -= 1; 736 } 737 } else { 738 // * negative direction - generate destination pixels by sliding the filter from 739 // right to left. 740 int leftPartCursor = iXs[0]; 741 742 // Advance the filter from right to left. Remember that moving the top-left corner of 743 // the filter to the left actually makes the filter value larger. 744 auto advanceFilter = [&]() { 745 // Remember, dx < 0 therefore this adds |dx| to filterX. 746 filterX -= dx; 747 // At this point filterX may be > 1, and needs to be wrapped back on to the filter 748 // interval, and the next column in the filter is calculated. 749 if (filterX > 1.0f) { 750 filterX -= 1.0f; 751 rightPart = leftPart; 752 leftPartCursor -= 1; 753 leftPart = partAtColumn(leftPartCursor); 754 } 755 SkASSERT(0.0f < filterX && filterX <= 1.0f); 756 757 return bilerp(leftPart, rightPart); 758 }; 759 760 while (count >= 4) { 761 Sk4f px0 = advanceFilter(), 762 px1 = advanceFilter(), 763 px2 = advanceFilter(), 764 px3 = advanceFilter(); 765 fNext->blend4Pixels(px0, px1, px2, px3); 766 count -= 4; 767 } 768 769 while (count > 0) { 770 fNext->blendPixel(advanceFilter()); 771 count -= 1; 772 } 773 } 774 } 775 776 // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel. 777 // Every filter part is used for two destination pixels, and the code can bulk load four 778 // pixels at a time. 779 void spanUnitRate(Span span) { 780 SkPoint start; SkScalar length; int count; 781 std::tie(start, length, count) = span; 782 SkASSERT(SkScalarAbs(length) == (count - 1)); 783 784 // Calculate the four filter points of start, and use the two different Y values to 785 // generate the row pointers. 786 Sk4i iXs, iYs; 787 filterPoints(start, &iXs, &iYs); 788 const void* row0 = fAccessor.row(iYs[0]); 789 const void* row1 = fAccessor.row(iYs[2]); 790 791 // Calculate the filter values for the top-left filter element. 792 const SkScalar filterX = sample_to_filter(X(start)); 793 const SkScalar filterY = sample_to_filter(Y(start)); 794 795 // Generate part of the filter value at xColumn. 796 auto partAtColumn = [&](int xColumn) { 797 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); 798 Sk4f pxTop, pxBottom; 799 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); 800 return pxTop * filterY + (1.0f - filterY) * pxBottom; 801 }; 802 803 auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) { 804 // Check if the pixels needed are near the edges. If not go fast using bulk pixels, 805 // otherwise be careful. 806 if (0 <= ix && ix <= fXMax - 3) { 807 Sk4f px00, px10, px20, px30, 808 px01, px11, px21, px31; 809 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30); 810 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31); 811 *part0 = filterY * px00 + (1.0f - filterY) * px01; 812 *part1 = filterY * px10 + (1.0f - filterY) * px11; 813 *part2 = filterY * px20 + (1.0f - filterY) * px21; 814 *part3 = filterY * px30 + (1.0f - filterY) * px31; 815 } else { 816 *part0 = partAtColumn(ix + 0); 817 *part1 = partAtColumn(ix + 1); 818 *part2 = partAtColumn(ix + 2); 819 *part3 = partAtColumn(ix + 3); 820 } 821 }; 822 823 auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) { 824 return part0 * filterX + part1 * (1.0f - filterX); 825 }; 826 827 if (length > 0) { 828 // * positive direction - generate destination pixels by sliding the filter from left 829 // to right. 830 831 // overlapPart is the filter part from the end of the previous four pixels used at 832 // the start of the next four pixels. 833 Sk4f overlapPart = partAtColumn(iXs[0]); 834 int rightColumnCursor = iXs[1]; 835 while (count >= 4) { 836 Sk4f part0, part1, part2, part3; 837 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3); 838 Sk4f px0 = bilerp(overlapPart, part0); 839 Sk4f px1 = bilerp(part0, part1); 840 Sk4f px2 = bilerp(part1, part2); 841 Sk4f px3 = bilerp(part2, part3); 842 overlapPart = part3; 843 fNext->blend4Pixels(px0, px1, px2, px3); 844 rightColumnCursor += 4; 845 count -= 4; 846 } 847 848 while (count > 0) { 849 Sk4f rightPart = partAtColumn(rightColumnCursor); 850 851 fNext->blendPixel(bilerp(overlapPart, rightPart)); 852 overlapPart = rightPart; 853 rightColumnCursor += 1; 854 count -= 1; 855 } 856 } else { 857 // * negative direction - generate destination pixels by sliding the filter from 858 // right to left. 859 Sk4f overlapPart = partAtColumn(iXs[1]); 860 int leftColumnCursor = iXs[0]; 861 862 while (count >= 4) { 863 Sk4f part0, part1, part2, part3; 864 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0); 865 Sk4f px0 = bilerp(part0, overlapPart); 866 Sk4f px1 = bilerp(part1, part0); 867 Sk4f px2 = bilerp(part2, part1); 868 Sk4f px3 = bilerp(part3, part2); 869 overlapPart = part3; 870 fNext->blend4Pixels(px0, px1, px2, px3); 871 leftColumnCursor -= 4; 872 count -= 4; 873 } 874 875 while (count > 0) { 876 Sk4f leftPart = partAtColumn(leftColumnCursor); 877 878 fNext->blendPixel(bilerp(leftPart, overlapPart)); 879 overlapPart = leftPart; 880 leftColumnCursor -= 1; 881 count -= 1; 882 } 883 } 884 } 885 886 // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but 887 // still slow enough to take advantage of previous calculations. 888 void spanMediumRate(Span span) { 889 SkPoint start; SkScalar length; int count; 890 std::tie(start, length, count) = span; 891 892 // Calculate the distance between each sample point. 893 const SkScalar dx = length / (count - 1); 894 SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f)); 895 896 // Generate the filter values for the top-left corner. 897 // Note: these values are in filter space; this has implications about how to adjust 898 // these values at each step. For example, as the sample point increases, the filter 899 // value decreases, this is because the filter and position are related by 900 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite 901 // direction of the sample point which is increasing by dx. 902 SkScalar filterX = sample_to_filter(X(start)); 903 SkScalar filterY = sample_to_filter(Y(start)); 904 905 // Generate the four filter points from the sample point start. Generate the row* values. 906 Sk4i iXs, iYs; 907 this->filterPoints(start, &iXs, &iYs); 908 const void* const row0 = fAccessor.row(iYs[0]); 909 const void* const row1 = fAccessor.row(iYs[2]); 910 911 // Generate part of the filter value at xColumn. 912 auto partAtColumn = [&](int xColumn) { 913 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); 914 Sk4f pxTop, pxBottom; 915 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); 916 return pxTop * filterY + (1.0f - filterY) * pxBottom; 917 }; 918 919 // The leftPart is made up of two pixels from the left column of the filter, right part 920 // is similar. The top and bottom pixels in the *Part are created as a linear blend of 921 // the top and bottom pixels using filterY. See the nextPart function below. 922 Sk4f leftPart = partAtColumn(iXs[0]); 923 Sk4f rightPart = partAtColumn(iXs[1]); 924 925 // Create a destination color by blending together a left and right part using filterX. 926 auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) { 927 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); 928 return check_pixel(pixel); 929 }; 930 931 // Send the first pixel to the destination. This simplifies the loop structure so that no 932 // extra pixels are fetched for the last iteration of the loop. 933 fNext->blendPixel(bilerp(leftPart, rightPart)); 934 count -= 1; 935 936 if (dx > 0.0f) { 937 // * positive direction - generate destination pixels by sliding the filter from left 938 // to right. 939 int rightPartCursor = iXs[1]; 940 941 // Advance the filter from left to right. Remember that moving the top-left corner of 942 // the filter to the right actually makes the filter value smaller. 943 auto advanceFilter = [&]() { 944 filterX -= dx; 945 // At this point filterX is less than zero, but might actually be less than -1. 946 if (filterX > -1.0f) { 947 filterX += 1.0f; 948 leftPart = rightPart; 949 rightPartCursor += 1; 950 rightPart = partAtColumn(rightPartCursor); 951 } else { 952 filterX += 2.0f; 953 rightPartCursor += 2; 954 leftPart = partAtColumn(rightPartCursor - 1); 955 rightPart = partAtColumn(rightPartCursor); 956 } 957 SkASSERT(0.0f < filterX && filterX <= 1.0f); 958 959 return bilerp(leftPart, rightPart); 960 }; 961 962 while (count >= 4) { 963 Sk4f px0 = advanceFilter(), 964 px1 = advanceFilter(), 965 px2 = advanceFilter(), 966 px3 = advanceFilter(); 967 fNext->blend4Pixels(px0, px1, px2, px3); 968 count -= 4; 969 } 970 971 while (count > 0) { 972 fNext->blendPixel(advanceFilter()); 973 count -= 1; 974 } 975 } else { 976 // * negative direction - generate destination pixels by sliding the filter from 977 // right to left. 978 int leftPartCursor = iXs[0]; 979 980 auto advanceFilter = [&]() { 981 // Remember, dx < 0 therefore this adds |dx| to filterX. 982 filterX -= dx; 983 // At this point, filterX is greater than one, but may actually be greater than two. 984 if (filterX < 2.0f) { 985 filterX -= 1.0f; 986 rightPart = leftPart; 987 leftPartCursor -= 1; 988 leftPart = partAtColumn(leftPartCursor); 989 } else { 990 filterX -= 2.0f; 991 leftPartCursor -= 2; 992 rightPart = partAtColumn(leftPartCursor - 1); 993 leftPart = partAtColumn(leftPartCursor); 994 } 995 SkASSERT(0.0f < filterX && filterX <= 1.0f); 996 return bilerp(leftPart, rightPart); 997 }; 998 999 while (count >= 4) { 1000 Sk4f px0 = advanceFilter(), 1001 px1 = advanceFilter(), 1002 px2 = advanceFilter(), 1003 px3 = advanceFilter(); 1004 fNext->blend4Pixels(px0, px1, px2, px3); 1005 count -= 4; 1006 } 1007 1008 while (count > 0) { 1009 fNext->blendPixel(advanceFilter()); 1010 count -= 1; 1011 } 1012 } 1013 } 1014 1015 // We're moving through source space faster than dst (zoomed out), 1016 // so we'll never reuse a source pixel or be able to do contiguous loads. 1017 void spanFastRate(Span span) { 1018 SkPoint start; SkScalar length; int count; 1019 std::tie(start, length, count) = span; 1020 SkScalar x = X(start); 1021 SkScalar y = Y(start); 1022 1023 SkScalar dx = length / (count - 1); 1024 while (count > 0) { 1025 fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y})); 1026 x += dx; 1027 count -= 1; 1028 } 1029 } 1030 1031 Next* const fNext; 1032 const SkShader::TileMode fXEdgeType; 1033 const int fXMax; 1034 const SkShader::TileMode fYEdgeType; 1035 const int fYMax; 1036 Accessor fAccessor; 1037 }; 1038 1039 } // namespace 1040 1041 #endif // SkLinearBitmapPipeline_sampler_DEFINED 1042