Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2016 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED
      9 #define SkLinearBitmapPipeline_sampler_DEFINED
     10 
     11 #include <tuple>
     12 
     13 #include "SkAutoMalloc.h"
     14 #include "SkColor.h"
     15 #include "SkColorPriv.h"
     16 #include "SkFixed.h"  // for SkFixed1 only. Don't use SkFixed in this file.
     17 #include "SkHalf.h"
     18 #include "SkLinearBitmapPipeline_core.h"
     19 #include "SkNx.h"
     20 #include "SkPM4fPriv.h"
     21 
     22 namespace {
     23 // Explaination of the math:
     24 //              1 - x      x
     25 //           +--------+--------+
     26 //           |        |        |
     27 //  1 - y    |  px00  |  px10  |
     28 //           |        |        |
     29 //           +--------+--------+
     30 //           |        |        |
     31 //    y      |  px01  |  px11  |
     32 //           |        |        |
     33 //           +--------+--------+
     34 //
     35 //
     36 // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
     37 // and y:
     38 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
     39 // * px10 -> x(1 - y) = x - xy
     40 // * px01 -> (1 - x)y = y - xy
     41 // * px11 -> xy
     42 // So x * y is calculated first and then used to calculate all the other factors.
     43 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
     44                                                     Sk4f px01, Sk4f px11) {
     45     // Calculate fractional xs and ys.
     46     Sk4s fxs = xs - xs.floor();
     47     Sk4s fys = ys - ys.floor();
     48     Sk4s fxys{fxs * fys};
     49     Sk4f sum = px11 * fxys;
     50     sum = sum + px01 * (fys - fxys);
     51     sum = sum + px10 * (fxs - fxys);
     52     sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
     53     return sum;
     54 }
     55 
     56 ////////////////////////////////////////////////////////////////////////////////////////////////////
     57 // PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each
     58 // of the different SkColorTypes.
     59 template <SkColorType, SkGammaType> class PixelConverter;
     60 
     61 // Alpha handling:
     62 //   The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate
     63 // the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can
     64 // modulate this color later.
     65 template <>
     66 class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> {
     67 public:
     68     using Element = uint8_t;
     69     PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) {
     70         fTintColor = SkColor4f::FromColor(tintColor);
     71         fTintColor.fA = 1.0f;
     72     }
     73 
     74     Sk4f toSk4f(const Element pixel) const {
     75         return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f));
     76     }
     77 
     78 private:
     79     SkColor4f fTintColor;
     80 };
     81 
     82 template <SkGammaType gammaType>
     83 static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) {
     84     return swizzle_rb_if_bgra(
     85             (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel)
     86                                              : Sk4f_fromL32(pixel));
     87 }
     88 
     89 template <SkGammaType gammaType>
     90 class PixelConverter<kRGB_565_SkColorType, gammaType> {
     91 public:
     92     using Element = uint16_t;
     93     PixelConverter(const SkPixmap& srcPixmap) { }
     94 
     95     Sk4f toSk4f(Element pixel) const {
     96         return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel));
     97     }
     98 };
     99 
    100 template <SkGammaType gammaType>
    101 class PixelConverter<kARGB_4444_SkColorType, gammaType> {
    102 public:
    103     using Element = uint16_t;
    104     PixelConverter(const SkPixmap& srcPixmap) { }
    105 
    106     Sk4f toSk4f(Element pixel) const {
    107         return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel));
    108     }
    109 };
    110 
    111 template <SkGammaType gammaType>
    112 class PixelConverter<kRGBA_8888_SkColorType, gammaType> {
    113 public:
    114     using Element = uint32_t;
    115     PixelConverter(const SkPixmap& srcPixmap) { }
    116 
    117     Sk4f toSk4f(Element pixel) const {
    118         return gammaType == kSRGB_SkGammaType
    119                ? Sk4f_fromS32(pixel)
    120                : Sk4f_fromL32(pixel);
    121     }
    122 };
    123 
    124 template <SkGammaType gammaType>
    125 class PixelConverter<kBGRA_8888_SkColorType, gammaType> {
    126 public:
    127     using Element = uint32_t;
    128     PixelConverter(const SkPixmap& srcPixmap) { }
    129 
    130     Sk4f toSk4f(Element pixel) const {
    131         return swizzle_rb(
    132                    gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel));
    133     }
    134 };
    135 
    136 template <SkGammaType gammaType>
    137 class PixelConverter<kIndex_8_SkColorType, gammaType> {
    138 public:
    139     using Element = uint8_t;
    140     PixelConverter(const SkPixmap& srcPixmap)
    141     : fColorTableSize(srcPixmap.ctable()->count()){
    142         SkColorTable* skColorTable = srcPixmap.ctable();
    143         SkASSERT(skColorTable != nullptr);
    144 
    145         fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
    146         for (int i = 0; i < fColorTableSize; i++) {
    147             fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]);
    148         }
    149     }
    150 
    151     PixelConverter(const PixelConverter& strategy)
    152     : fColorTableSize{strategy.fColorTableSize}{
    153         fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
    154         for (int i = 0; i < fColorTableSize; i++) {
    155             fColorTable[i] = strategy.fColorTable[i];
    156         }
    157     }
    158 
    159     Sk4f toSk4f(Element index) const {
    160         return fColorTable[index];
    161     }
    162 
    163 private:
    164     static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
    165     const int           fColorTableSize;
    166     SkAutoMalloc        fColorTableStorage{kColorTableSize};
    167     Sk4f*               fColorTable;
    168 };
    169 
    170 template <SkGammaType gammaType>
    171 class PixelConverter<kGray_8_SkColorType, gammaType> {
    172 public:
    173     using Element = uint8_t;
    174     PixelConverter(const SkPixmap& srcPixmap) { }
    175 
    176     Sk4f toSk4f(Element pixel) const {
    177         float gray = (gammaType == kSRGB_SkGammaType)
    178             ? sk_linear_from_srgb[pixel]
    179             : pixel * (1/255.0f);
    180         return {gray, gray, gray, 1.0f};
    181     }
    182 };
    183 
    184 template <>
    185 class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> {
    186 public:
    187     using Element = uint64_t;
    188     PixelConverter(const SkPixmap& srcPixmap) { }
    189 
    190     Sk4f toSk4f(const Element pixel) const {
    191         return SkHalfToFloat_finite_ftz(pixel);
    192     }
    193 };
    194 
    195 class PixelAccessorShim {
    196 public:
    197     explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor)
    198         : fPixelAccessor(accessor) { }
    199 
    200     void SK_VECTORCALL getFewPixels(
    201         int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
    202         fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
    203     }
    204 
    205     void SK_VECTORCALL get4Pixels(
    206         Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
    207         fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
    208     }
    209 
    210     void get4Pixels(
    211         const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
    212         fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3);
    213     }
    214 
    215     Sk4f getPixelFromRow(const void* row, int index) const {
    216         return fPixelAccessor->getPixelFromRow(row, index);
    217     }
    218 
    219     Sk4f getPixelAt(int index) const {
    220         return fPixelAccessor->getPixelAt(index);
    221     }
    222 
    223     const void* row(int y) const {
    224         return fPixelAccessor->row(y);
    225     }
    226 
    227 private:
    228     SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor;
    229 };
    230 
    231 ////////////////////////////////////////////////////////////////////////////////////////////////////
    232 // PixelAccessor handles all the same plumbing for all the PixelGetters.
    233 template <SkColorType colorType, SkGammaType gammaType>
    234 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface {
    235     using Element = typename PixelConverter<colorType, gammaType>::Element;
    236 public:
    237     template <typename... Args>
    238     PixelAccessor(const SkPixmap& srcPixmap, Args&&... args)
    239         : fSrc{static_cast<const Element*>(srcPixmap.addr())}
    240         , fWidth{srcPixmap.rowBytesAsPixels()}
    241         , fConverter{srcPixmap, std::move<Args>(args)...} { }
    242 
    243     void SK_VECTORCALL getFewPixels (
    244         int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
    245         Sk4i bufferLoc = ys * fWidth + xs;
    246         switch (n) {
    247             case 3:
    248                 *px2 = this->getPixelAt(bufferLoc[2]);
    249             case 2:
    250                 *px1 = this->getPixelAt(bufferLoc[1]);
    251             case 1:
    252                 *px0 = this->getPixelAt(bufferLoc[0]);
    253             default:
    254                 break;
    255         }
    256     }
    257 
    258     void SK_VECTORCALL get4Pixels(
    259         Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
    260         Sk4i bufferLoc = ys * fWidth + xs;
    261         *px0 = this->getPixelAt(bufferLoc[0]);
    262         *px1 = this->getPixelAt(bufferLoc[1]);
    263         *px2 = this->getPixelAt(bufferLoc[2]);
    264         *px3 = this->getPixelAt(bufferLoc[3]);
    265     }
    266 
    267     void get4Pixels(
    268         const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
    269         *px0 = this->getPixelFromRow(src, index + 0);
    270         *px1 = this->getPixelFromRow(src, index + 1);
    271         *px2 = this->getPixelFromRow(src, index + 2);
    272         *px3 = this->getPixelFromRow(src, index + 3);
    273     }
    274 
    275     Sk4f getPixelFromRow(const void* row, int index) const override {
    276         const Element* src = static_cast<const Element*>(row);
    277         return fConverter.toSk4f(src[index]);
    278     }
    279 
    280     Sk4f getPixelAt(int index) const override {
    281         return this->getPixelFromRow(fSrc, index);
    282     }
    283 
    284     const void* row(int y) const override { return fSrc + y * fWidth; }
    285 
    286 private:
    287     const Element* const                 fSrc;
    288     const int                            fWidth;
    289     PixelConverter<colorType, gammaType> fConverter;
    290 };
    291 
    292 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
    293 // We'll never re-use pixels, but we can at least load contiguous pixels.
    294 template <typename Next, typename Strategy>
    295 static void src_strategy_blend(Span span, Next* next, Strategy* strategy) {
    296     SkPoint start;
    297     SkScalar length;
    298     int count;
    299     std::tie(start, length, count) = span;
    300     int ix = SkScalarFloorToInt(X(start));
    301     const void* row = strategy->row((int)std::floor(Y(start)));
    302     if (length > 0) {
    303         while (count >= 4) {
    304             Sk4f px0, px1, px2, px3;
    305             strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3);
    306             next->blend4Pixels(px0, px1, px2, px3);
    307             ix += 4;
    308             count -= 4;
    309         }
    310 
    311         while (count > 0) {
    312             next->blendPixel(strategy->getPixelFromRow(row, ix));
    313             ix += 1;
    314             count -= 1;
    315         }
    316     } else {
    317         while (count >= 4) {
    318             Sk4f px0, px1, px2, px3;
    319             strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
    320             next->blend4Pixels(px0, px1, px2, px3);
    321             ix -= 4;
    322             count -= 4;
    323         }
    324 
    325         while (count > 0) {
    326             next->blendPixel(strategy->getPixelFromRow(row, ix));
    327             ix -= 1;
    328             count -= 1;
    329         }
    330     }
    331 }
    332 
    333 // -- NearestNeighborSampler -----------------------------------------------------------------------
    334 // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels.
    335 template<typename Accessor, typename Next>
    336 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
    337 public:
    338     template<typename... Args>
    339     NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
    340     : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
    341 
    342     NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
    343     const NearestNeighborSampler& sampler)
    344     : fNext{next}, fAccessor{sampler.fAccessor} { }
    345 
    346     void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
    347         SkASSERT(0 < n && n < 4);
    348         Sk4f px0, px1, px2;
    349         fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
    350         if (n >= 1) fNext->blendPixel(px0);
    351         if (n >= 2) fNext->blendPixel(px1);
    352         if (n >= 3) fNext->blendPixel(px2);
    353     }
    354 
    355     void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
    356         Sk4f px0, px1, px2, px3;
    357         fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
    358         fNext->blend4Pixels(px0, px1, px2, px3);
    359     }
    360 
    361     void pointSpan(Span span) override {
    362         SkASSERT(!span.isEmpty());
    363         SkPoint start;
    364         SkScalar length;
    365         int count;
    366         std::tie(start, length, count) = span;
    367         SkScalar absLength = SkScalarAbs(length);
    368         if (absLength < (count - 1)) {
    369             this->spanSlowRate(span);
    370         } else if (absLength == (count - 1)) {
    371             src_strategy_blend(span, fNext, &fAccessor);
    372         } else {
    373             this->spanFastRate(span);
    374         }
    375     }
    376 
    377     void repeatSpan(Span span, int32_t repeatCount) override {
    378         while (repeatCount > 0) {
    379             this->pointSpan(span);
    380             repeatCount--;
    381         }
    382     }
    383 
    384 private:
    385     // When moving through source space more slowly than dst space (zoomed in),
    386     // we'll be sampling from the same source pixel more than once.
    387     void spanSlowRate(Span span) {
    388         SkPoint start; SkScalar length; int count;
    389         std::tie(start, length, count) = span;
    390         SkScalar x = X(start);
    391         // fx is a fixed 48.16 number.
    392         int64_t fx = static_cast<int64_t>(x * SK_Fixed1);
    393         SkScalar dx = length / (count - 1);
    394         // fdx is a fixed 48.16 number.
    395         int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1);
    396 
    397         const void* row = fAccessor.row((int)std::floor(Y(start)));
    398         Next* next = fNext;
    399 
    400         int64_t ix = fx >> 16;
    401         int64_t prevIX = ix;
    402         Sk4f fpixel = fAccessor.getPixelFromRow(row, ix);
    403 
    404         // When dx is less than one, each pixel is used more than once. Using the fixed point fx
    405         // allows the code to quickly check that the same pixel is being used. The code uses this
    406         // same pixel check to do the sRGB and normalization only once.
    407         auto getNextPixel = [&]() {
    408             if (ix != prevIX) {
    409                 fpixel = fAccessor.getPixelFromRow(row, ix);
    410                 prevIX = ix;
    411             }
    412             fx += fdx;
    413             ix = fx >> 16;
    414             return fpixel;
    415         };
    416 
    417         while (count >= 4) {
    418             Sk4f px0 = getNextPixel();
    419             Sk4f px1 = getNextPixel();
    420             Sk4f px2 = getNextPixel();
    421             Sk4f px3 = getNextPixel();
    422             next->blend4Pixels(px0, px1, px2, px3);
    423             count -= 4;
    424         }
    425         while (count > 0) {
    426             next->blendPixel(getNextPixel());
    427             count -= 1;
    428         }
    429     }
    430 
    431     // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
    432     // We'll never re-use pixels, but we can at least load contiguous pixels.
    433     void spanUnitRate(Span span) {
    434         src_strategy_blend(span, fNext, &fAccessor);
    435     }
    436 
    437     // We're moving through source space faster than dst (zoomed out),
    438     // so we'll never reuse a source pixel or be able to do contiguous loads.
    439     void spanFastRate(Span span) {
    440         span_fallback(span, this);
    441     }
    442 
    443     Next* const fNext;
    444     Accessor    fAccessor;
    445 };
    446 
    447 // From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge
    448 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
    449 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
    450 // on the interval [0, vMax].
    451 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
    452 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
    453     SkASSERT(-1 <= vs && vs <= vMax + 1);
    454     switch (edgeType) {
    455         case SkShader::kClamp_TileMode:
    456         case SkShader::kMirror_TileMode:
    457             vs = std::max(vs, 0);
    458             vs = std::min(vs, vMax);
    459             break;
    460         case SkShader::kRepeat_TileMode:
    461             vs = (vs <= vMax) ? vs : 0;
    462             vs =    (vs >= 0) ? vs : vMax;
    463             break;
    464     }
    465     SkASSERT(0 <= vs && vs <= vMax);
    466     return vs;
    467 }
    468 
    469 // From a sample point on the tile, return the top or left filter value.
    470 // The result r should be in the range (0, 1]. Since this represents the weight given to the top
    471 // left element, then if x == 0.5 the filter value should be 1.0.
    472 // The input sample point must be on the tile, therefore it must be >= 0.
    473 static SkScalar sample_to_filter(SkScalar x) {
    474     SkASSERT(x >= 0.0f);
    475     // The usual form of the top or left edge is x - .5, but since we are working on the unit
    476     // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
    477     // of trunc.
    478     SkScalar v = x + 0.5f;
    479     // Produce the top or left offset a value on the range [0, 1).
    480     SkScalar f = v - SkScalarTruncToScalar(v);
    481     // Produce the filter value which is on the range (0, 1].
    482     SkScalar r =  1.0f - f;
    483     SkASSERT(0.0f < r && r <= 1.0f);
    484     return r;
    485 }
    486 
    487 // -- BilerpSampler --------------------------------------------------------------------------------
    488 // BilerpSampler - use a bilerp filter to create runs of destination pixels.
    489 // Note: in the code below, there are two types of points
    490 //       * sample points - these are the points passed in by pointList* and Spans.
    491 //       * filter points - are created from a sample point to form the coordinates of the points
    492 //                         to use in the filter and to generate the filter values.
    493 template<typename Accessor, typename Next>
    494 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
    495 public:
    496     template<typename... Args>
    497     BilerpSampler(
    498         SkLinearBitmapPipeline::BlendProcessorInterface* next,
    499         SkISize dimensions,
    500         SkShader::TileMode xTile, SkShader::TileMode yTile,
    501         Args&& ... args
    502     )
    503         : fNext{next}
    504         , fXEdgeType{xTile}
    505         , fXMax{dimensions.width() - 1}
    506         , fYEdgeType{yTile}
    507         , fYMax{dimensions.height() - 1}
    508         , fAccessor{std::forward<Args>(args)...} { }
    509 
    510     BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
    511                    const BilerpSampler& sampler)
    512         : fNext{next}
    513         , fXEdgeType{sampler.fXEdgeType}
    514         , fXMax{sampler.fXMax}
    515         , fYEdgeType{sampler.fYEdgeType}
    516         , fYMax{sampler.fYMax}
    517         , fAccessor{sampler.fAccessor} { }
    518 
    519     void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
    520         SkASSERT(0 < n && n < 4);
    521         auto bilerpPixel = [&](int index) {
    522             return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
    523         };
    524 
    525         if (n >= 1) fNext->blendPixel(bilerpPixel(0));
    526         if (n >= 2) fNext->blendPixel(bilerpPixel(1));
    527         if (n >= 3) fNext->blendPixel(bilerpPixel(2));
    528     }
    529 
    530     void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
    531         auto bilerpPixel = [&](int index) {
    532             return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
    533         };
    534         fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
    535     }
    536 
    537     void pointSpan(Span span) override {
    538         SkASSERT(!span.isEmpty());
    539         SkPoint start;
    540         SkScalar length;
    541         int count;
    542         std::tie(start, length, count) = span;
    543 
    544         // Nothing to do.
    545         if (count == 0) {
    546             return;
    547         }
    548 
    549         // Trivial case. No sample points are generated other than start.
    550         if (count == 1) {
    551             fNext->blendPixel(this->bilerpSamplePoint(start));
    552             return;
    553         }
    554 
    555         // Note: the following code could be done in terms of dx = length / (count -1), but that
    556         // would introduce a divide that is not needed for the most common dx == 1 cases.
    557         SkScalar absLength = SkScalarAbs(length);
    558         if (absLength == 0.0f) {
    559             // |dx| == 0
    560             // length is zero, so clamp an edge pixel.
    561             this->spanZeroRate(span);
    562         } else if (absLength < (count - 1)) {
    563             // 0 < |dx| < 1.
    564             this->spanSlowRate(span);
    565         } else if (absLength == (count - 1)) {
    566             // |dx| == 1.
    567             if (sample_to_filter(span.startX()) == 1.0f
    568                 && sample_to_filter(span.startY()) == 1.0f) {
    569                 // All the pixels are aligned with the dest; go fast.
    570                 src_strategy_blend(span, fNext, &fAccessor);
    571             } else {
    572                 // There is some sub-pixel offsets, so bilerp.
    573                 this->spanUnitRate(span);
    574             }
    575         } else if (absLength < 2.0f * (count - 1)) {
    576             // 1 < |dx| < 2.
    577             this->spanMediumRate(span);
    578         } else {
    579             // |dx| >= 2.
    580             this->spanFastRate(span);
    581         }
    582     }
    583 
    584     void repeatSpan(Span span, int32_t repeatCount) override {
    585         while (repeatCount > 0) {
    586             this->pointSpan(span);
    587             repeatCount--;
    588         }
    589     }
    590 
    591 private:
    592 
    593     // Convert a sample point to the points used by the filter.
    594     void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
    595         // May be less than zero. Be careful to use Floor.
    596         int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax);
    597         // Always greater than zero. Use the faster Trunc.
    598         int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax);
    599         int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax);
    600         int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax);
    601 
    602         *filterXs = Sk4i{x0, x1, x0, x1};
    603         *filterYs = Sk4i{y0, y0, y1, y1};
    604     }
    605 
    606     // Given a sample point, generate a color by bilerping the four filter points.
    607     Sk4f bilerpSamplePoint(SkPoint sample) {
    608         Sk4i iXs, iYs;
    609         filterPoints(sample, &iXs, &iYs);
    610         Sk4f px00, px10, px01, px11;
    611         fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
    612         return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11);
    613     }
    614 
    615     // Get two pixels at x from row0 and row1.
    616     void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) {
    617         *px0 = fAccessor.getPixelFromRow(row0, x);
    618         *px1 = fAccessor.getPixelFromRow(row1, x);
    619     }
    620 
    621     // |dx| == 0. This code assumes that length is zero.
    622     void spanZeroRate(Span span) {
    623         SkPoint start; SkScalar length; int count;
    624         std::tie(start, length, count) = span;
    625         SkASSERT(length == 0.0f);
    626 
    627         // Filter for the blending of the top and bottom pixels.
    628         SkScalar filterY = sample_to_filter(Y(start));
    629 
    630         // Generate the four filter points from the sample point start. Generate the row* values.
    631         Sk4i iXs, iYs;
    632         this->filterPoints(start, &iXs, &iYs);
    633         const void* const row0 = fAccessor.row(iYs[0]);
    634         const void* const row1 = fAccessor.row(iYs[2]);
    635 
    636         // Get the two pixels that make up the clamping pixel.
    637         Sk4f pxTop, pxBottom;
    638         this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
    639         Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
    640 
    641         while (count >= 4) {
    642             fNext->blend4Pixels(pixel, pixel, pixel, pixel);
    643             count -= 4;
    644         }
    645         while (count > 0) {
    646             fNext->blendPixel(pixel);
    647             count -= 1;
    648         }
    649     }
    650 
    651     // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce
    652     // computation. In particular, several destination pixels maybe generated from the same four
    653     // source pixels.
    654     // In the following code a "part" is a combination of two pixels from the same column of the
    655     // filter.
    656     void spanSlowRate(Span span) {
    657         SkPoint start; SkScalar length; int count;
    658         std::tie(start, length, count) = span;
    659 
    660         // Calculate the distance between each sample point.
    661         const SkScalar dx = length / (count - 1);
    662         SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
    663 
    664         // Generate the filter values for the top-left corner.
    665         // Note: these values are in filter space; this has implications about how to adjust
    666         // these values at each step. For example, as the sample point increases, the filter
    667         // value decreases, this is because the filter and position are related by
    668         // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
    669         // direction of the sample point which is increasing by dx.
    670         SkScalar filterX = sample_to_filter(X(start));
    671         SkScalar filterY = sample_to_filter(Y(start));
    672 
    673         // Generate the four filter points from the sample point start. Generate the row* values.
    674         Sk4i iXs, iYs;
    675         this->filterPoints(start, &iXs, &iYs);
    676         const void* const row0 = fAccessor.row(iYs[0]);
    677         const void* const row1 = fAccessor.row(iYs[2]);
    678 
    679         // Generate part of the filter value at xColumn.
    680         auto partAtColumn = [&](int xColumn) {
    681             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
    682             Sk4f pxTop, pxBottom;
    683             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
    684             return pxTop * filterY + (1.0f - filterY) * pxBottom;
    685         };
    686 
    687         // The leftPart is made up of two pixels from the left column of the filter, right part
    688         // is similar. The top and bottom pixels in the *Part are created as a linear blend of
    689         // the top and bottom pixels using filterY. See the partAtColumn function above.
    690         Sk4f leftPart  = partAtColumn(iXs[0]);
    691         Sk4f rightPart = partAtColumn(iXs[1]);
    692 
    693         // Create a destination color by blending together a left and right part using filterX.
    694         auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
    695             Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
    696             return check_pixel(pixel);
    697         };
    698 
    699         // Send the first pixel to the destination. This simplifies the loop structure so that no
    700         // extra pixels are fetched for the last iteration of the loop.
    701         fNext->blendPixel(bilerp(leftPart, rightPart));
    702         count -= 1;
    703 
    704         if (dx > 0.0f) {
    705             // * positive direction - generate destination pixels by sliding the filter from left
    706             //                        to right.
    707             int rightPartCursor = iXs[1];
    708 
    709             // Advance the filter from left to right. Remember that moving the top-left corner of
    710             // the filter to the right actually makes the filter value smaller.
    711             auto advanceFilter = [&]() {
    712                 filterX -= dx;
    713                 if (filterX <= 0.0f) {
    714                     filterX += 1.0f;
    715                     leftPart = rightPart;
    716                     rightPartCursor += 1;
    717                     rightPart = partAtColumn(rightPartCursor);
    718                 }
    719                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
    720 
    721                 return bilerp(leftPart, rightPart);
    722             };
    723 
    724             while (count >= 4) {
    725                 Sk4f px0 = advanceFilter(),
    726                      px1 = advanceFilter(),
    727                      px2 = advanceFilter(),
    728                      px3 = advanceFilter();
    729                 fNext->blend4Pixels(px0, px1, px2, px3);
    730                 count -= 4;
    731             }
    732 
    733             while (count > 0) {
    734                 fNext->blendPixel(advanceFilter());
    735                 count -= 1;
    736             }
    737         } else {
    738             // * negative direction - generate destination pixels by sliding the filter from
    739             //                        right to left.
    740             int leftPartCursor = iXs[0];
    741 
    742             // Advance the filter from right to left. Remember that moving the top-left corner of
    743             // the filter to the left actually makes the filter value larger.
    744             auto advanceFilter = [&]() {
    745                 // Remember, dx < 0 therefore this adds |dx| to filterX.
    746                 filterX -= dx;
    747                 // At this point filterX may be > 1, and needs to be wrapped back on to the filter
    748                 // interval, and the next column in the filter is calculated.
    749                 if (filterX > 1.0f) {
    750                     filterX -= 1.0f;
    751                     rightPart = leftPart;
    752                     leftPartCursor -= 1;
    753                     leftPart = partAtColumn(leftPartCursor);
    754                 }
    755                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
    756 
    757                 return bilerp(leftPart, rightPart);
    758             };
    759 
    760             while (count >= 4) {
    761                 Sk4f px0 = advanceFilter(),
    762                      px1 = advanceFilter(),
    763                      px2 = advanceFilter(),
    764                      px3 = advanceFilter();
    765                 fNext->blend4Pixels(px0, px1, px2, px3);
    766                 count -= 4;
    767             }
    768 
    769             while (count > 0) {
    770                 fNext->blendPixel(advanceFilter());
    771                 count -= 1;
    772             }
    773         }
    774     }
    775 
    776     // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
    777     // Every filter part is used for two destination pixels, and the code can bulk load four
    778     // pixels at a time.
    779     void spanUnitRate(Span span) {
    780         SkPoint start; SkScalar length; int count;
    781         std::tie(start, length, count) = span;
    782         SkASSERT(SkScalarAbs(length) == (count - 1));
    783 
    784         // Calculate the four filter points of start, and use the two different Y values to
    785         // generate the row pointers.
    786         Sk4i iXs, iYs;
    787         filterPoints(start, &iXs, &iYs);
    788         const void* row0 = fAccessor.row(iYs[0]);
    789         const void* row1 = fAccessor.row(iYs[2]);
    790 
    791         // Calculate the filter values for the top-left filter element.
    792         const SkScalar filterX = sample_to_filter(X(start));
    793         const SkScalar filterY = sample_to_filter(Y(start));
    794 
    795         // Generate part of the filter value at xColumn.
    796         auto partAtColumn = [&](int xColumn) {
    797             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
    798             Sk4f pxTop, pxBottom;
    799             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
    800             return pxTop * filterY + (1.0f - filterY) * pxBottom;
    801         };
    802 
    803         auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) {
    804             // Check if the pixels needed are near the edges. If not go fast using bulk pixels,
    805             // otherwise be careful.
    806             if (0 <= ix && ix <= fXMax - 3) {
    807                 Sk4f px00, px10, px20, px30,
    808                      px01, px11, px21, px31;
    809                 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
    810                 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
    811                 *part0 = filterY * px00 + (1.0f - filterY) * px01;
    812                 *part1 = filterY * px10 + (1.0f - filterY) * px11;
    813                 *part2 = filterY * px20 + (1.0f - filterY) * px21;
    814                 *part3 = filterY * px30 + (1.0f - filterY) * px31;
    815             } else {
    816                 *part0 = partAtColumn(ix + 0);
    817                 *part1 = partAtColumn(ix + 1);
    818                 *part2 = partAtColumn(ix + 2);
    819                 *part3 = partAtColumn(ix + 3);
    820             }
    821         };
    822 
    823         auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) {
    824             return part0 * filterX + part1 * (1.0f - filterX);
    825         };
    826 
    827         if (length > 0) {
    828             // * positive direction - generate destination pixels by sliding the filter from left
    829             //                        to right.
    830 
    831             // overlapPart is the filter part from the end of the previous four pixels used at
    832             // the start of the next four pixels.
    833             Sk4f overlapPart = partAtColumn(iXs[0]);
    834             int rightColumnCursor = iXs[1];
    835             while (count >= 4) {
    836                 Sk4f part0, part1, part2, part3;
    837                 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
    838                 Sk4f px0 = bilerp(overlapPart, part0);
    839                 Sk4f px1 = bilerp(part0, part1);
    840                 Sk4f px2 = bilerp(part1, part2);
    841                 Sk4f px3 = bilerp(part2, part3);
    842                 overlapPart = part3;
    843                 fNext->blend4Pixels(px0, px1, px2, px3);
    844                 rightColumnCursor += 4;
    845                 count -= 4;
    846             }
    847 
    848             while (count > 0) {
    849                 Sk4f rightPart = partAtColumn(rightColumnCursor);
    850 
    851                 fNext->blendPixel(bilerp(overlapPart, rightPart));
    852                 overlapPart = rightPart;
    853                 rightColumnCursor += 1;
    854                 count -= 1;
    855             }
    856         } else {
    857             // * negative direction - generate destination pixels by sliding the filter from
    858             //                        right to left.
    859             Sk4f overlapPart = partAtColumn(iXs[1]);
    860             int leftColumnCursor = iXs[0];
    861 
    862             while (count >= 4) {
    863                 Sk4f part0, part1, part2, part3;
    864                 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
    865                 Sk4f px0 = bilerp(part0, overlapPart);
    866                 Sk4f px1 = bilerp(part1, part0);
    867                 Sk4f px2 = bilerp(part2, part1);
    868                 Sk4f px3 = bilerp(part3, part2);
    869                 overlapPart = part3;
    870                 fNext->blend4Pixels(px0, px1, px2, px3);
    871                 leftColumnCursor -= 4;
    872                 count -= 4;
    873             }
    874 
    875             while (count > 0) {
    876                 Sk4f leftPart = partAtColumn(leftColumnCursor);
    877 
    878                 fNext->blendPixel(bilerp(leftPart, overlapPart));
    879                 overlapPart = leftPart;
    880                 leftColumnCursor -= 1;
    881                 count -= 1;
    882             }
    883         }
    884     }
    885 
    886     // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but
    887     // still slow enough to take advantage of previous calculations.
    888     void spanMediumRate(Span span) {
    889         SkPoint start; SkScalar length; int count;
    890         std::tie(start, length, count) = span;
    891 
    892         // Calculate the distance between each sample point.
    893         const SkScalar dx = length / (count - 1);
    894         SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
    895 
    896         // Generate the filter values for the top-left corner.
    897         // Note: these values are in filter space; this has implications about how to adjust
    898         // these values at each step. For example, as the sample point increases, the filter
    899         // value decreases, this is because the filter and position are related by
    900         // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
    901         // direction of the sample point which is increasing by dx.
    902         SkScalar filterX = sample_to_filter(X(start));
    903         SkScalar filterY = sample_to_filter(Y(start));
    904 
    905         // Generate the four filter points from the sample point start. Generate the row* values.
    906         Sk4i iXs, iYs;
    907         this->filterPoints(start, &iXs, &iYs);
    908         const void* const row0 = fAccessor.row(iYs[0]);
    909         const void* const row1 = fAccessor.row(iYs[2]);
    910 
    911         // Generate part of the filter value at xColumn.
    912         auto partAtColumn = [&](int xColumn) {
    913             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
    914             Sk4f pxTop, pxBottom;
    915             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
    916             return pxTop * filterY + (1.0f - filterY) * pxBottom;
    917         };
    918 
    919         // The leftPart is made up of two pixels from the left column of the filter, right part
    920         // is similar. The top and bottom pixels in the *Part are created as a linear blend of
    921         // the top and bottom pixels using filterY. See the nextPart function below.
    922         Sk4f leftPart  = partAtColumn(iXs[0]);
    923         Sk4f rightPart = partAtColumn(iXs[1]);
    924 
    925         // Create a destination color by blending together a left and right part using filterX.
    926         auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
    927             Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
    928             return check_pixel(pixel);
    929         };
    930 
    931         // Send the first pixel to the destination. This simplifies the loop structure so that no
    932         // extra pixels are fetched for the last iteration of the loop.
    933         fNext->blendPixel(bilerp(leftPart, rightPart));
    934         count -= 1;
    935 
    936         if (dx > 0.0f) {
    937             // * positive direction - generate destination pixels by sliding the filter from left
    938             //                        to right.
    939             int rightPartCursor = iXs[1];
    940 
    941             // Advance the filter from left to right. Remember that moving the top-left corner of
    942             // the filter to the right actually makes the filter value smaller.
    943             auto advanceFilter = [&]() {
    944                 filterX -= dx;
    945                 // At this point filterX is less than zero, but might actually be less than -1.
    946                 if (filterX > -1.0f) {
    947                     filterX += 1.0f;
    948                     leftPart = rightPart;
    949                     rightPartCursor += 1;
    950                     rightPart = partAtColumn(rightPartCursor);
    951                 } else {
    952                     filterX += 2.0f;
    953                     rightPartCursor += 2;
    954                     leftPart = partAtColumn(rightPartCursor - 1);
    955                     rightPart = partAtColumn(rightPartCursor);
    956                 }
    957                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
    958 
    959                 return bilerp(leftPart, rightPart);
    960             };
    961 
    962             while (count >= 4) {
    963                 Sk4f px0 = advanceFilter(),
    964                      px1 = advanceFilter(),
    965                      px2 = advanceFilter(),
    966                      px3 = advanceFilter();
    967                 fNext->blend4Pixels(px0, px1, px2, px3);
    968                 count -= 4;
    969             }
    970 
    971             while (count > 0) {
    972                 fNext->blendPixel(advanceFilter());
    973                 count -= 1;
    974             }
    975         } else {
    976             // * negative direction - generate destination pixels by sliding the filter from
    977             //                        right to left.
    978             int leftPartCursor = iXs[0];
    979 
    980             auto advanceFilter = [&]() {
    981                 // Remember, dx < 0 therefore this adds |dx| to filterX.
    982                 filterX -= dx;
    983                 // At this point, filterX is greater than one, but may actually be greater than two.
    984                 if (filterX < 2.0f) {
    985                     filterX -= 1.0f;
    986                     rightPart = leftPart;
    987                     leftPartCursor -= 1;
    988                     leftPart = partAtColumn(leftPartCursor);
    989                 } else {
    990                     filterX -= 2.0f;
    991                     leftPartCursor -= 2;
    992                     rightPart = partAtColumn(leftPartCursor - 1);
    993                     leftPart = partAtColumn(leftPartCursor);
    994                 }
    995                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
    996                 return bilerp(leftPart, rightPart);
    997             };
    998 
    999             while (count >= 4) {
   1000                 Sk4f px0 = advanceFilter(),
   1001                      px1 = advanceFilter(),
   1002                      px2 = advanceFilter(),
   1003                      px3 = advanceFilter();
   1004                 fNext->blend4Pixels(px0, px1, px2, px3);
   1005                 count -= 4;
   1006             }
   1007 
   1008             while (count > 0) {
   1009                 fNext->blendPixel(advanceFilter());
   1010                 count -= 1;
   1011             }
   1012         }
   1013     }
   1014 
   1015     // We're moving through source space faster than dst (zoomed out),
   1016     // so we'll never reuse a source pixel or be able to do contiguous loads.
   1017     void spanFastRate(Span span) {
   1018         SkPoint start; SkScalar length; int count;
   1019         std::tie(start, length, count) = span;
   1020         SkScalar x = X(start);
   1021         SkScalar y = Y(start);
   1022 
   1023         SkScalar dx = length / (count - 1);
   1024         while (count > 0) {
   1025             fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
   1026             x += dx;
   1027             count -= 1;
   1028         }
   1029     }
   1030 
   1031     Next* const              fNext;
   1032     const SkShader::TileMode fXEdgeType;
   1033     const int                fXMax;
   1034     const SkShader::TileMode fYEdgeType;
   1035     const int                fYMax;
   1036     Accessor                 fAccessor;
   1037 };
   1038 
   1039 }  // namespace
   1040 
   1041 #endif  // SkLinearBitmapPipeline_sampler_DEFINED
   1042