1 /* 2 * Copyright 2016 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED 9 #define SkLinearBitmapPipeline_sampler_DEFINED 10 11 #include <tuple> 12 13 #include "SkAutoMalloc.h" 14 #include "SkColor.h" 15 #include "SkColorPriv.h" 16 #include "SkFixed.h" // for SkFixed1 only. Don't use SkFixed in this file. 17 #include "SkHalf.h" 18 #include "SkLinearBitmapPipeline_core.h" 19 #include "SkNx.h" 20 #include "SkPM4fPriv.h" 21 22 namespace { 23 // Explaination of the math: 24 // 1 - x x 25 // +--------+--------+ 26 // | | | 27 // 1 - y | px00 | px10 | 28 // | | | 29 // +--------+--------+ 30 // | | | 31 // y | px01 | px11 | 32 // | | | 33 // +--------+--------+ 34 // 35 // 36 // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x 37 // and y: 38 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy 39 // * px10 -> x(1 - y) = x - xy 40 // * px01 -> (1 - x)y = y - xy 41 // * px11 -> xy 42 // So x * y is calculated first and then used to calculate all the other factors. 43 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, 44 Sk4f px01, Sk4f px11) { 45 // Calculate fractional xs and ys. 46 Sk4s fxs = xs - xs.floor(); 47 Sk4s fys = ys - ys.floor(); 48 Sk4s fxys{fxs * fys}; 49 Sk4f sum = px11 * fxys; 50 sum = sum + px01 * (fys - fxys); 51 sum = sum + px10 * (fxs - fxys); 52 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); 53 return sum; 54 } 55 56 //////////////////////////////////////////////////////////////////////////////////////////////////// 57 // PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each 58 // of the different SkColorTypes. 59 template <SkColorType, SkGammaType> class PixelConverter; 60 61 // Alpha handling: 62 // The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate 63 // the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can 64 // modulate this color later. 65 template <> 66 class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> { 67 public: 68 using Element = uint8_t; 69 PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) { 70 fTintColor = SkColor4f::FromColor(tintColor); 71 fTintColor.fA = 1.0f; 72 } 73 74 Sk4f toSk4f(const Element pixel) const { 75 return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f)); 76 } 77 78 private: 79 SkColor4f fTintColor; 80 }; 81 82 template <SkGammaType gammaType> 83 static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) { 84 return swizzle_rb_if_bgra( 85 (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel) 86 : Sk4f_fromL32(pixel)); 87 } 88 89 template <SkGammaType gammaType> 90 class PixelConverter<kRGB_565_SkColorType, gammaType> { 91 public: 92 using Element = uint16_t; 93 PixelConverter(const SkPixmap& srcPixmap) { } 94 95 Sk4f toSk4f(Element pixel) const { 96 return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel)); 97 } 98 }; 99 100 template <SkGammaType gammaType> 101 class PixelConverter<kARGB_4444_SkColorType, gammaType> { 102 public: 103 using Element = uint16_t; 104 PixelConverter(const SkPixmap& srcPixmap) { } 105 106 Sk4f toSk4f(Element pixel) const { 107 return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel)); 108 } 109 }; 110 111 template <SkGammaType gammaType> 112 class PixelConverter<kRGBA_8888_SkColorType, gammaType> { 113 public: 114 using Element = uint32_t; 115 PixelConverter(const SkPixmap& srcPixmap) { } 116 117 Sk4f toSk4f(Element pixel) const { 118 return gammaType == kSRGB_SkGammaType 119 ? Sk4f_fromS32(pixel) 120 : Sk4f_fromL32(pixel); 121 } 122 }; 123 124 template <SkGammaType gammaType> 125 class PixelConverter<kBGRA_8888_SkColorType, gammaType> { 126 public: 127 using Element = uint32_t; 128 PixelConverter(const SkPixmap& srcPixmap) { } 129 130 Sk4f toSk4f(Element pixel) const { 131 return swizzle_rb( 132 gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel)); 133 } 134 }; 135 136 template <SkGammaType gammaType> 137 class PixelConverter<kGray_8_SkColorType, gammaType> { 138 public: 139 using Element = uint8_t; 140 PixelConverter(const SkPixmap& srcPixmap) { } 141 142 Sk4f toSk4f(Element pixel) const { 143 float gray = (gammaType == kSRGB_SkGammaType) 144 ? sk_linear_from_srgb[pixel] 145 : pixel * (1/255.0f); 146 return {gray, gray, gray, 1.0f}; 147 } 148 }; 149 150 template <> 151 class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> { 152 public: 153 using Element = uint64_t; 154 PixelConverter(const SkPixmap& srcPixmap) { } 155 156 Sk4f toSk4f(const Element pixel) const { 157 return SkHalfToFloat_finite_ftz(pixel); 158 } 159 }; 160 161 class PixelAccessorShim { 162 public: 163 explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor) 164 : fPixelAccessor(accessor) { } 165 166 void SK_VECTORCALL getFewPixels( 167 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { 168 fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2); 169 } 170 171 void SK_VECTORCALL get4Pixels( 172 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { 173 fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3); 174 } 175 176 void get4Pixels( 177 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { 178 fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3); 179 } 180 181 Sk4f getPixelFromRow(const void* row, int index) const { 182 return fPixelAccessor->getPixelFromRow(row, index); 183 } 184 185 Sk4f getPixelAt(int index) const { 186 return fPixelAccessor->getPixelAt(index); 187 } 188 189 const void* row(int y) const { 190 return fPixelAccessor->row(y); 191 } 192 193 private: 194 SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor; 195 }; 196 197 //////////////////////////////////////////////////////////////////////////////////////////////////// 198 // PixelAccessor handles all the same plumbing for all the PixelGetters. 199 template <SkColorType colorType, SkGammaType gammaType> 200 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface { 201 using Element = typename PixelConverter<colorType, gammaType>::Element; 202 public: 203 template <typename... Args> 204 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args) 205 : fSrc{static_cast<const Element*>(srcPixmap.addr())} 206 , fWidth{srcPixmap.rowBytesAsPixels()} 207 , fConverter{srcPixmap, std::move<Args>(args)...} { } 208 209 void SK_VECTORCALL getFewPixels ( 210 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { 211 Sk4i bufferLoc = ys * fWidth + xs; 212 switch (n) { 213 case 3: 214 *px2 = this->getPixelAt(bufferLoc[2]); 215 case 2: 216 *px1 = this->getPixelAt(bufferLoc[1]); 217 case 1: 218 *px0 = this->getPixelAt(bufferLoc[0]); 219 default: 220 break; 221 } 222 } 223 224 void SK_VECTORCALL get4Pixels( 225 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { 226 Sk4i bufferLoc = ys * fWidth + xs; 227 *px0 = this->getPixelAt(bufferLoc[0]); 228 *px1 = this->getPixelAt(bufferLoc[1]); 229 *px2 = this->getPixelAt(bufferLoc[2]); 230 *px3 = this->getPixelAt(bufferLoc[3]); 231 } 232 233 void get4Pixels( 234 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { 235 *px0 = this->getPixelFromRow(src, index + 0); 236 *px1 = this->getPixelFromRow(src, index + 1); 237 *px2 = this->getPixelFromRow(src, index + 2); 238 *px3 = this->getPixelFromRow(src, index + 3); 239 } 240 241 Sk4f getPixelFromRow(const void* row, int index) const override { 242 const Element* src = static_cast<const Element*>(row); 243 return fConverter.toSk4f(src[index]); 244 } 245 246 Sk4f getPixelAt(int index) const override { 247 return this->getPixelFromRow(fSrc, index); 248 } 249 250 const void* row(int y) const override { return fSrc + y * fWidth; } 251 252 private: 253 const Element* const fSrc; 254 const int fWidth; 255 PixelConverter<colorType, gammaType> fConverter; 256 }; 257 258 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. 259 // We'll never re-use pixels, but we can at least load contiguous pixels. 260 template <typename Next, typename Strategy> 261 static void src_strategy_blend(Span span, Next* next, Strategy* strategy) { 262 SkPoint start; 263 SkScalar length; 264 int count; 265 std::tie(start, length, count) = span; 266 int ix = SkScalarFloorToInt(X(start)); 267 const void* row = strategy->row((int)std::floor(Y(start))); 268 if (length > 0) { 269 while (count >= 4) { 270 Sk4f px0, px1, px2, px3; 271 strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3); 272 next->blend4Pixels(px0, px1, px2, px3); 273 ix += 4; 274 count -= 4; 275 } 276 277 while (count > 0) { 278 next->blendPixel(strategy->getPixelFromRow(row, ix)); 279 ix += 1; 280 count -= 1; 281 } 282 } else { 283 while (count >= 4) { 284 Sk4f px0, px1, px2, px3; 285 strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0); 286 next->blend4Pixels(px0, px1, px2, px3); 287 ix -= 4; 288 count -= 4; 289 } 290 291 while (count > 0) { 292 next->blendPixel(strategy->getPixelFromRow(row, ix)); 293 ix -= 1; 294 count -= 1; 295 } 296 } 297 } 298 299 // -- NearestNeighborSampler ----------------------------------------------------------------------- 300 // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels. 301 template<typename Accessor, typename Next> 302 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { 303 public: 304 template<typename... Args> 305 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args) 306 : fNext{next}, fAccessor{std::forward<Args>(args)...} { } 307 308 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, 309 const NearestNeighborSampler& sampler) 310 : fNext{next}, fAccessor{sampler.fAccessor} { } 311 312 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { 313 SkASSERT(0 < n && n < 4); 314 Sk4f px0, px1, px2; 315 fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2); 316 if (n >= 1) fNext->blendPixel(px0); 317 if (n >= 2) fNext->blendPixel(px1); 318 if (n >= 3) fNext->blendPixel(px2); 319 } 320 321 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { 322 Sk4f px0, px1, px2, px3; 323 fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3); 324 fNext->blend4Pixels(px0, px1, px2, px3); 325 } 326 327 void pointSpan(Span span) override { 328 SkASSERT(!span.isEmpty()); 329 SkPoint start; 330 SkScalar length; 331 int count; 332 std::tie(start, length, count) = span; 333 SkScalar absLength = SkScalarAbs(length); 334 if (absLength < (count - 1)) { 335 this->spanSlowRate(span); 336 } else if (absLength == (count - 1)) { 337 src_strategy_blend(span, fNext, &fAccessor); 338 } else { 339 this->spanFastRate(span); 340 } 341 } 342 343 void repeatSpan(Span span, int32_t repeatCount) override { 344 while (repeatCount > 0) { 345 this->pointSpan(span); 346 repeatCount--; 347 } 348 } 349 350 private: 351 // When moving through source space more slowly than dst space (zoomed in), 352 // we'll be sampling from the same source pixel more than once. 353 void spanSlowRate(Span span) { 354 SkPoint start; SkScalar length; int count; 355 std::tie(start, length, count) = span; 356 SkScalar x = X(start); 357 // fx is a fixed 48.16 number. 358 int64_t fx = static_cast<int64_t>(x * SK_Fixed1); 359 SkScalar dx = length / (count - 1); 360 // fdx is a fixed 48.16 number. 361 int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1); 362 363 const void* row = fAccessor.row((int)std::floor(Y(start))); 364 Next* next = fNext; 365 366 int64_t ix = fx >> 16; 367 int64_t prevIX = ix; 368 Sk4f fpixel = fAccessor.getPixelFromRow(row, ix); 369 370 // When dx is less than one, each pixel is used more than once. Using the fixed point fx 371 // allows the code to quickly check that the same pixel is being used. The code uses this 372 // same pixel check to do the sRGB and normalization only once. 373 auto getNextPixel = [&]() { 374 if (ix != prevIX) { 375 fpixel = fAccessor.getPixelFromRow(row, ix); 376 prevIX = ix; 377 } 378 fx += fdx; 379 ix = fx >> 16; 380 return fpixel; 381 }; 382 383 while (count >= 4) { 384 Sk4f px0 = getNextPixel(); 385 Sk4f px1 = getNextPixel(); 386 Sk4f px2 = getNextPixel(); 387 Sk4f px3 = getNextPixel(); 388 next->blend4Pixels(px0, px1, px2, px3); 389 count -= 4; 390 } 391 while (count > 0) { 392 next->blendPixel(getNextPixel()); 393 count -= 1; 394 } 395 } 396 397 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. 398 // We'll never re-use pixels, but we can at least load contiguous pixels. 399 void spanUnitRate(Span span) { 400 src_strategy_blend(span, fNext, &fAccessor); 401 } 402 403 // We're moving through source space faster than dst (zoomed out), 404 // so we'll never reuse a source pixel or be able to do contiguous loads. 405 void spanFastRate(Span span) { 406 span_fallback(span, this); 407 } 408 409 Next* const fNext; 410 Accessor fAccessor; 411 }; 412 413 // From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge 414 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to 415 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value 416 // on the interval [0, vMax]. 417 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel. 418 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) { 419 SkASSERT(-1 <= vs && vs <= vMax + 1); 420 switch (edgeType) { 421 case SkShader::kClamp_TileMode: 422 case SkShader::kMirror_TileMode: 423 vs = std::max(vs, 0); 424 vs = std::min(vs, vMax); 425 break; 426 case SkShader::kRepeat_TileMode: 427 vs = (vs <= vMax) ? vs : 0; 428 vs = (vs >= 0) ? vs : vMax; 429 break; 430 } 431 SkASSERT(0 <= vs && vs <= vMax); 432 return vs; 433 } 434 435 // From a sample point on the tile, return the top or left filter value. 436 // The result r should be in the range (0, 1]. Since this represents the weight given to the top 437 // left element, then if x == 0.5 the filter value should be 1.0. 438 // The input sample point must be on the tile, therefore it must be >= 0. 439 static SkScalar sample_to_filter(SkScalar x) { 440 SkASSERT(x >= 0.0f); 441 // The usual form of the top or left edge is x - .5, but since we are working on the unit 442 // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use 443 // of trunc. 444 SkScalar v = x + 0.5f; 445 // Produce the top or left offset a value on the range [0, 1). 446 SkScalar f = v - SkScalarTruncToScalar(v); 447 // Produce the filter value which is on the range (0, 1]. 448 SkScalar r = 1.0f - f; 449 SkASSERT(0.0f < r && r <= 1.0f); 450 return r; 451 } 452 453 // -- BilerpSampler -------------------------------------------------------------------------------- 454 // BilerpSampler - use a bilerp filter to create runs of destination pixels. 455 // Note: in the code below, there are two types of points 456 // * sample points - these are the points passed in by pointList* and Spans. 457 // * filter points - are created from a sample point to form the coordinates of the points 458 // to use in the filter and to generate the filter values. 459 template<typename Accessor, typename Next> 460 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { 461 public: 462 template<typename... Args> 463 BilerpSampler( 464 SkLinearBitmapPipeline::BlendProcessorInterface* next, 465 SkISize dimensions, 466 SkShader::TileMode xTile, SkShader::TileMode yTile, 467 Args&& ... args 468 ) 469 : fNext{next} 470 , fXEdgeType{xTile} 471 , fXMax{dimensions.width() - 1} 472 , fYEdgeType{yTile} 473 , fYMax{dimensions.height() - 1} 474 , fAccessor{std::forward<Args>(args)...} { } 475 476 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, 477 const BilerpSampler& sampler) 478 : fNext{next} 479 , fXEdgeType{sampler.fXEdgeType} 480 , fXMax{sampler.fXMax} 481 , fYEdgeType{sampler.fYEdgeType} 482 , fYMax{sampler.fYMax} 483 , fAccessor{sampler.fAccessor} { } 484 485 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { 486 SkASSERT(0 < n && n < 4); 487 auto bilerpPixel = [&](int index) { 488 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); 489 }; 490 491 if (n >= 1) fNext->blendPixel(bilerpPixel(0)); 492 if (n >= 2) fNext->blendPixel(bilerpPixel(1)); 493 if (n >= 3) fNext->blendPixel(bilerpPixel(2)); 494 } 495 496 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { 497 auto bilerpPixel = [&](int index) { 498 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); 499 }; 500 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3)); 501 } 502 503 void pointSpan(Span span) override { 504 SkASSERT(!span.isEmpty()); 505 SkPoint start; 506 SkScalar length; 507 int count; 508 std::tie(start, length, count) = span; 509 510 // Nothing to do. 511 if (count == 0) { 512 return; 513 } 514 515 // Trivial case. No sample points are generated other than start. 516 if (count == 1) { 517 fNext->blendPixel(this->bilerpSamplePoint(start)); 518 return; 519 } 520 521 // Note: the following code could be done in terms of dx = length / (count -1), but that 522 // would introduce a divide that is not needed for the most common dx == 1 cases. 523 SkScalar absLength = SkScalarAbs(length); 524 if (absLength == 0.0f) { 525 // |dx| == 0 526 // length is zero, so clamp an edge pixel. 527 this->spanZeroRate(span); 528 } else if (absLength < (count - 1)) { 529 // 0 < |dx| < 1. 530 this->spanSlowRate(span); 531 } else if (absLength == (count - 1)) { 532 // |dx| == 1. 533 if (sample_to_filter(span.startX()) == 1.0f 534 && sample_to_filter(span.startY()) == 1.0f) { 535 // All the pixels are aligned with the dest; go fast. 536 src_strategy_blend(span, fNext, &fAccessor); 537 } else { 538 // There is some sub-pixel offsets, so bilerp. 539 this->spanUnitRate(span); 540 } 541 } else if (absLength < 2.0f * (count - 1)) { 542 // 1 < |dx| < 2. 543 this->spanMediumRate(span); 544 } else { 545 // |dx| >= 2. 546 this->spanFastRate(span); 547 } 548 } 549 550 void repeatSpan(Span span, int32_t repeatCount) override { 551 while (repeatCount > 0) { 552 this->pointSpan(span); 553 repeatCount--; 554 } 555 } 556 557 private: 558 559 // Convert a sample point to the points used by the filter. 560 void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) { 561 // May be less than zero. Be careful to use Floor. 562 int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax); 563 // Always greater than zero. Use the faster Trunc. 564 int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax); 565 int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax); 566 int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax); 567 568 *filterXs = Sk4i{x0, x1, x0, x1}; 569 *filterYs = Sk4i{y0, y0, y1, y1}; 570 } 571 572 // Given a sample point, generate a color by bilerping the four filter points. 573 Sk4f bilerpSamplePoint(SkPoint sample) { 574 Sk4i iXs, iYs; 575 filterPoints(sample, &iXs, &iYs); 576 Sk4f px00, px10, px01, px11; 577 fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11); 578 return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11); 579 } 580 581 // Get two pixels at x from row0 and row1. 582 void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) { 583 *px0 = fAccessor.getPixelFromRow(row0, x); 584 *px1 = fAccessor.getPixelFromRow(row1, x); 585 } 586 587 // |dx| == 0. This code assumes that length is zero. 588 void spanZeroRate(Span span) { 589 SkPoint start; SkScalar length; int count; 590 std::tie(start, length, count) = span; 591 SkASSERT(length == 0.0f); 592 593 // Filter for the blending of the top and bottom pixels. 594 SkScalar filterY = sample_to_filter(Y(start)); 595 596 // Generate the four filter points from the sample point start. Generate the row* values. 597 Sk4i iXs, iYs; 598 this->filterPoints(start, &iXs, &iYs); 599 const void* const row0 = fAccessor.row(iYs[0]); 600 const void* const row1 = fAccessor.row(iYs[2]); 601 602 // Get the two pixels that make up the clamping pixel. 603 Sk4f pxTop, pxBottom; 604 this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom); 605 Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom; 606 607 while (count >= 4) { 608 fNext->blend4Pixels(pixel, pixel, pixel, pixel); 609 count -= 4; 610 } 611 while (count > 0) { 612 fNext->blendPixel(pixel); 613 count -= 1; 614 } 615 } 616 617 // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce 618 // computation. In particular, several destination pixels maybe generated from the same four 619 // source pixels. 620 // In the following code a "part" is a combination of two pixels from the same column of the 621 // filter. 622 void spanSlowRate(Span span) { 623 SkPoint start; SkScalar length; int count; 624 std::tie(start, length, count) = span; 625 626 // Calculate the distance between each sample point. 627 const SkScalar dx = length / (count - 1); 628 SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f); 629 630 // Generate the filter values for the top-left corner. 631 // Note: these values are in filter space; this has implications about how to adjust 632 // these values at each step. For example, as the sample point increases, the filter 633 // value decreases, this is because the filter and position are related by 634 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite 635 // direction of the sample point which is increasing by dx. 636 SkScalar filterX = sample_to_filter(X(start)); 637 SkScalar filterY = sample_to_filter(Y(start)); 638 639 // Generate the four filter points from the sample point start. Generate the row* values. 640 Sk4i iXs, iYs; 641 this->filterPoints(start, &iXs, &iYs); 642 const void* const row0 = fAccessor.row(iYs[0]); 643 const void* const row1 = fAccessor.row(iYs[2]); 644 645 // Generate part of the filter value at xColumn. 646 auto partAtColumn = [&](int xColumn) { 647 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); 648 Sk4f pxTop, pxBottom; 649 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); 650 return pxTop * filterY + (1.0f - filterY) * pxBottom; 651 }; 652 653 // The leftPart is made up of two pixels from the left column of the filter, right part 654 // is similar. The top and bottom pixels in the *Part are created as a linear blend of 655 // the top and bottom pixels using filterY. See the partAtColumn function above. 656 Sk4f leftPart = partAtColumn(iXs[0]); 657 Sk4f rightPart = partAtColumn(iXs[1]); 658 659 // Create a destination color by blending together a left and right part using filterX. 660 auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) { 661 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); 662 return check_pixel(pixel); 663 }; 664 665 // Send the first pixel to the destination. This simplifies the loop structure so that no 666 // extra pixels are fetched for the last iteration of the loop. 667 fNext->blendPixel(bilerp(leftPart, rightPart)); 668 count -= 1; 669 670 if (dx > 0.0f) { 671 // * positive direction - generate destination pixels by sliding the filter from left 672 // to right. 673 int rightPartCursor = iXs[1]; 674 675 // Advance the filter from left to right. Remember that moving the top-left corner of 676 // the filter to the right actually makes the filter value smaller. 677 auto advanceFilter = [&]() { 678 filterX -= dx; 679 if (filterX <= 0.0f) { 680 filterX += 1.0f; 681 leftPart = rightPart; 682 rightPartCursor += 1; 683 rightPart = partAtColumn(rightPartCursor); 684 } 685 SkASSERT(0.0f < filterX && filterX <= 1.0f); 686 687 return bilerp(leftPart, rightPart); 688 }; 689 690 while (count >= 4) { 691 Sk4f px0 = advanceFilter(), 692 px1 = advanceFilter(), 693 px2 = advanceFilter(), 694 px3 = advanceFilter(); 695 fNext->blend4Pixels(px0, px1, px2, px3); 696 count -= 4; 697 } 698 699 while (count > 0) { 700 fNext->blendPixel(advanceFilter()); 701 count -= 1; 702 } 703 } else { 704 // * negative direction - generate destination pixels by sliding the filter from 705 // right to left. 706 int leftPartCursor = iXs[0]; 707 708 // Advance the filter from right to left. Remember that moving the top-left corner of 709 // the filter to the left actually makes the filter value larger. 710 auto advanceFilter = [&]() { 711 // Remember, dx < 0 therefore this adds |dx| to filterX. 712 filterX -= dx; 713 // At this point filterX may be > 1, and needs to be wrapped back on to the filter 714 // interval, and the next column in the filter is calculated. 715 if (filterX > 1.0f) { 716 filterX -= 1.0f; 717 rightPart = leftPart; 718 leftPartCursor -= 1; 719 leftPart = partAtColumn(leftPartCursor); 720 } 721 SkASSERT(0.0f < filterX && filterX <= 1.0f); 722 723 return bilerp(leftPart, rightPart); 724 }; 725 726 while (count >= 4) { 727 Sk4f px0 = advanceFilter(), 728 px1 = advanceFilter(), 729 px2 = advanceFilter(), 730 px3 = advanceFilter(); 731 fNext->blend4Pixels(px0, px1, px2, px3); 732 count -= 4; 733 } 734 735 while (count > 0) { 736 fNext->blendPixel(advanceFilter()); 737 count -= 1; 738 } 739 } 740 } 741 742 // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel. 743 // Every filter part is used for two destination pixels, and the code can bulk load four 744 // pixels at a time. 745 void spanUnitRate(Span span) { 746 SkPoint start; SkScalar length; int count; 747 std::tie(start, length, count) = span; 748 SkASSERT(SkScalarAbs(length) == (count - 1)); 749 750 // Calculate the four filter points of start, and use the two different Y values to 751 // generate the row pointers. 752 Sk4i iXs, iYs; 753 filterPoints(start, &iXs, &iYs); 754 const void* row0 = fAccessor.row(iYs[0]); 755 const void* row1 = fAccessor.row(iYs[2]); 756 757 // Calculate the filter values for the top-left filter element. 758 const SkScalar filterX = sample_to_filter(X(start)); 759 const SkScalar filterY = sample_to_filter(Y(start)); 760 761 // Generate part of the filter value at xColumn. 762 auto partAtColumn = [&](int xColumn) { 763 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); 764 Sk4f pxTop, pxBottom; 765 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); 766 return pxTop * filterY + (1.0f - filterY) * pxBottom; 767 }; 768 769 auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) { 770 // Check if the pixels needed are near the edges. If not go fast using bulk pixels, 771 // otherwise be careful. 772 if (0 <= ix && ix <= fXMax - 3) { 773 Sk4f px00, px10, px20, px30, 774 px01, px11, px21, px31; 775 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30); 776 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31); 777 *part0 = filterY * px00 + (1.0f - filterY) * px01; 778 *part1 = filterY * px10 + (1.0f - filterY) * px11; 779 *part2 = filterY * px20 + (1.0f - filterY) * px21; 780 *part3 = filterY * px30 + (1.0f - filterY) * px31; 781 } else { 782 *part0 = partAtColumn(ix + 0); 783 *part1 = partAtColumn(ix + 1); 784 *part2 = partAtColumn(ix + 2); 785 *part3 = partAtColumn(ix + 3); 786 } 787 }; 788 789 auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) { 790 return part0 * filterX + part1 * (1.0f - filterX); 791 }; 792 793 if (length > 0) { 794 // * positive direction - generate destination pixels by sliding the filter from left 795 // to right. 796 797 // overlapPart is the filter part from the end of the previous four pixels used at 798 // the start of the next four pixels. 799 Sk4f overlapPart = partAtColumn(iXs[0]); 800 int rightColumnCursor = iXs[1]; 801 while (count >= 4) { 802 Sk4f part0, part1, part2, part3; 803 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3); 804 Sk4f px0 = bilerp(overlapPart, part0); 805 Sk4f px1 = bilerp(part0, part1); 806 Sk4f px2 = bilerp(part1, part2); 807 Sk4f px3 = bilerp(part2, part3); 808 overlapPart = part3; 809 fNext->blend4Pixels(px0, px1, px2, px3); 810 rightColumnCursor += 4; 811 count -= 4; 812 } 813 814 while (count > 0) { 815 Sk4f rightPart = partAtColumn(rightColumnCursor); 816 817 fNext->blendPixel(bilerp(overlapPart, rightPart)); 818 overlapPart = rightPart; 819 rightColumnCursor += 1; 820 count -= 1; 821 } 822 } else { 823 // * negative direction - generate destination pixels by sliding the filter from 824 // right to left. 825 Sk4f overlapPart = partAtColumn(iXs[1]); 826 int leftColumnCursor = iXs[0]; 827 828 while (count >= 4) { 829 Sk4f part0, part1, part2, part3; 830 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0); 831 Sk4f px0 = bilerp(part0, overlapPart); 832 Sk4f px1 = bilerp(part1, part0); 833 Sk4f px2 = bilerp(part2, part1); 834 Sk4f px3 = bilerp(part3, part2); 835 overlapPart = part3; 836 fNext->blend4Pixels(px0, px1, px2, px3); 837 leftColumnCursor -= 4; 838 count -= 4; 839 } 840 841 while (count > 0) { 842 Sk4f leftPart = partAtColumn(leftColumnCursor); 843 844 fNext->blendPixel(bilerp(leftPart, overlapPart)); 845 overlapPart = leftPart; 846 leftColumnCursor -= 1; 847 count -= 1; 848 } 849 } 850 } 851 852 // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but 853 // still slow enough to take advantage of previous calculations. 854 void spanMediumRate(Span span) { 855 SkPoint start; SkScalar length; int count; 856 std::tie(start, length, count) = span; 857 858 // Calculate the distance between each sample point. 859 const SkScalar dx = length / (count - 1); 860 SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f)); 861 862 // Generate the filter values for the top-left corner. 863 // Note: these values are in filter space; this has implications about how to adjust 864 // these values at each step. For example, as the sample point increases, the filter 865 // value decreases, this is because the filter and position are related by 866 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite 867 // direction of the sample point which is increasing by dx. 868 SkScalar filterX = sample_to_filter(X(start)); 869 SkScalar filterY = sample_to_filter(Y(start)); 870 871 // Generate the four filter points from the sample point start. Generate the row* values. 872 Sk4i iXs, iYs; 873 this->filterPoints(start, &iXs, &iYs); 874 const void* const row0 = fAccessor.row(iYs[0]); 875 const void* const row1 = fAccessor.row(iYs[2]); 876 877 // Generate part of the filter value at xColumn. 878 auto partAtColumn = [&](int xColumn) { 879 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); 880 Sk4f pxTop, pxBottom; 881 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); 882 return pxTop * filterY + (1.0f - filterY) * pxBottom; 883 }; 884 885 // The leftPart is made up of two pixels from the left column of the filter, right part 886 // is similar. The top and bottom pixels in the *Part are created as a linear blend of 887 // the top and bottom pixels using filterY. See the nextPart function below. 888 Sk4f leftPart = partAtColumn(iXs[0]); 889 Sk4f rightPart = partAtColumn(iXs[1]); 890 891 // Create a destination color by blending together a left and right part using filterX. 892 auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) { 893 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); 894 return check_pixel(pixel); 895 }; 896 897 // Send the first pixel to the destination. This simplifies the loop structure so that no 898 // extra pixels are fetched for the last iteration of the loop. 899 fNext->blendPixel(bilerp(leftPart, rightPart)); 900 count -= 1; 901 902 if (dx > 0.0f) { 903 // * positive direction - generate destination pixels by sliding the filter from left 904 // to right. 905 int rightPartCursor = iXs[1]; 906 907 // Advance the filter from left to right. Remember that moving the top-left corner of 908 // the filter to the right actually makes the filter value smaller. 909 auto advanceFilter = [&]() { 910 filterX -= dx; 911 // At this point filterX is less than zero, but might actually be less than -1. 912 if (filterX > -1.0f) { 913 filterX += 1.0f; 914 leftPart = rightPart; 915 rightPartCursor += 1; 916 rightPart = partAtColumn(rightPartCursor); 917 } else { 918 filterX += 2.0f; 919 rightPartCursor += 2; 920 leftPart = partAtColumn(rightPartCursor - 1); 921 rightPart = partAtColumn(rightPartCursor); 922 } 923 SkASSERT(0.0f < filterX && filterX <= 1.0f); 924 925 return bilerp(leftPart, rightPart); 926 }; 927 928 while (count >= 4) { 929 Sk4f px0 = advanceFilter(), 930 px1 = advanceFilter(), 931 px2 = advanceFilter(), 932 px3 = advanceFilter(); 933 fNext->blend4Pixels(px0, px1, px2, px3); 934 count -= 4; 935 } 936 937 while (count > 0) { 938 fNext->blendPixel(advanceFilter()); 939 count -= 1; 940 } 941 } else { 942 // * negative direction - generate destination pixels by sliding the filter from 943 // right to left. 944 int leftPartCursor = iXs[0]; 945 946 auto advanceFilter = [&]() { 947 // Remember, dx < 0 therefore this adds |dx| to filterX. 948 filterX -= dx; 949 // At this point, filterX is greater than one, but may actually be greater than two. 950 if (filterX < 2.0f) { 951 filterX -= 1.0f; 952 rightPart = leftPart; 953 leftPartCursor -= 1; 954 leftPart = partAtColumn(leftPartCursor); 955 } else { 956 filterX -= 2.0f; 957 leftPartCursor -= 2; 958 rightPart = partAtColumn(leftPartCursor - 1); 959 leftPart = partAtColumn(leftPartCursor); 960 } 961 SkASSERT(0.0f < filterX && filterX <= 1.0f); 962 return bilerp(leftPart, rightPart); 963 }; 964 965 while (count >= 4) { 966 Sk4f px0 = advanceFilter(), 967 px1 = advanceFilter(), 968 px2 = advanceFilter(), 969 px3 = advanceFilter(); 970 fNext->blend4Pixels(px0, px1, px2, px3); 971 count -= 4; 972 } 973 974 while (count > 0) { 975 fNext->blendPixel(advanceFilter()); 976 count -= 1; 977 } 978 } 979 } 980 981 // We're moving through source space faster than dst (zoomed out), 982 // so we'll never reuse a source pixel or be able to do contiguous loads. 983 void spanFastRate(Span span) { 984 SkPoint start; SkScalar length; int count; 985 std::tie(start, length, count) = span; 986 SkScalar x = X(start); 987 SkScalar y = Y(start); 988 989 SkScalar dx = length / (count - 1); 990 while (count > 0) { 991 fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y})); 992 x += dx; 993 count -= 1; 994 } 995 } 996 997 Next* const fNext; 998 const SkShader::TileMode fXEdgeType; 999 const int fXMax; 1000 const SkShader::TileMode fYEdgeType; 1001 const int fYMax; 1002 Accessor fAccessor; 1003 }; 1004 1005 } // namespace 1006 1007 #endif // SkLinearBitmapPipeline_sampler_DEFINED 1008