Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2016 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED
      9 #define SkLinearBitmapPipeline_sampler_DEFINED
     10 
     11 #include <tuple>
     12 
     13 #include "SkAutoMalloc.h"
     14 #include "SkColor.h"
     15 #include "SkColorPriv.h"
     16 #include "SkFixed.h"  // for SkFixed1 only. Don't use SkFixed in this file.
     17 #include "SkHalf.h"
     18 #include "SkLinearBitmapPipeline_core.h"
     19 #include "SkNx.h"
     20 #include "SkPM4fPriv.h"
     21 
     22 namespace {
     23 // Explaination of the math:
     24 //              1 - x      x
     25 //           +--------+--------+
     26 //           |        |        |
     27 //  1 - y    |  px00  |  px10  |
     28 //           |        |        |
     29 //           +--------+--------+
     30 //           |        |        |
     31 //    y      |  px01  |  px11  |
     32 //           |        |        |
     33 //           +--------+--------+
     34 //
     35 //
     36 // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
     37 // and y:
     38 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
     39 // * px10 -> x(1 - y) = x - xy
     40 // * px01 -> (1 - x)y = y - xy
     41 // * px11 -> xy
     42 // So x * y is calculated first and then used to calculate all the other factors.
     43 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
     44                                                     Sk4f px01, Sk4f px11) {
     45     // Calculate fractional xs and ys.
     46     Sk4s fxs = xs - xs.floor();
     47     Sk4s fys = ys - ys.floor();
     48     Sk4s fxys{fxs * fys};
     49     Sk4f sum = px11 * fxys;
     50     sum = sum + px01 * (fys - fxys);
     51     sum = sum + px10 * (fxs - fxys);
     52     sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
     53     return sum;
     54 }
     55 
     56 ////////////////////////////////////////////////////////////////////////////////////////////////////
     57 // PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each
     58 // of the different SkColorTypes.
     59 template <SkColorType, SkGammaType> class PixelConverter;
     60 
     61 // Alpha handling:
     62 //   The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate
     63 // the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can
     64 // modulate this color later.
     65 template <>
     66 class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> {
     67 public:
     68     using Element = uint8_t;
     69     PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) {
     70         fTintColor = SkColor4f::FromColor(tintColor);
     71         fTintColor.fA = 1.0f;
     72     }
     73 
     74     Sk4f toSk4f(const Element pixel) const {
     75         return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f));
     76     }
     77 
     78 private:
     79     SkColor4f fTintColor;
     80 };
     81 
     82 template <SkGammaType gammaType>
     83 static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) {
     84     return swizzle_rb_if_bgra(
     85             (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel)
     86                                              : Sk4f_fromL32(pixel));
     87 }
     88 
     89 template <SkGammaType gammaType>
     90 class PixelConverter<kRGB_565_SkColorType, gammaType> {
     91 public:
     92     using Element = uint16_t;
     93     PixelConverter(const SkPixmap& srcPixmap) { }
     94 
     95     Sk4f toSk4f(Element pixel) const {
     96         return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel));
     97     }
     98 };
     99 
    100 template <SkGammaType gammaType>
    101 class PixelConverter<kARGB_4444_SkColorType, gammaType> {
    102 public:
    103     using Element = uint16_t;
    104     PixelConverter(const SkPixmap& srcPixmap) { }
    105 
    106     Sk4f toSk4f(Element pixel) const {
    107         return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel));
    108     }
    109 };
    110 
    111 template <SkGammaType gammaType>
    112 class PixelConverter<kRGBA_8888_SkColorType, gammaType> {
    113 public:
    114     using Element = uint32_t;
    115     PixelConverter(const SkPixmap& srcPixmap) { }
    116 
    117     Sk4f toSk4f(Element pixel) const {
    118         return gammaType == kSRGB_SkGammaType
    119                ? Sk4f_fromS32(pixel)
    120                : Sk4f_fromL32(pixel);
    121     }
    122 };
    123 
    124 template <SkGammaType gammaType>
    125 class PixelConverter<kBGRA_8888_SkColorType, gammaType> {
    126 public:
    127     using Element = uint32_t;
    128     PixelConverter(const SkPixmap& srcPixmap) { }
    129 
    130     Sk4f toSk4f(Element pixel) const {
    131         return swizzle_rb(
    132                    gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel));
    133     }
    134 };
    135 
    136 template <SkGammaType gammaType>
    137 class PixelConverter<kGray_8_SkColorType, gammaType> {
    138 public:
    139     using Element = uint8_t;
    140     PixelConverter(const SkPixmap& srcPixmap) { }
    141 
    142     Sk4f toSk4f(Element pixel) const {
    143         float gray = (gammaType == kSRGB_SkGammaType)
    144             ? sk_linear_from_srgb[pixel]
    145             : pixel * (1/255.0f);
    146         return {gray, gray, gray, 1.0f};
    147     }
    148 };
    149 
    150 template <>
    151 class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> {
    152 public:
    153     using Element = uint64_t;
    154     PixelConverter(const SkPixmap& srcPixmap) { }
    155 
    156     Sk4f toSk4f(const Element pixel) const {
    157         return SkHalfToFloat_finite_ftz(pixel);
    158     }
    159 };
    160 
    161 class PixelAccessorShim {
    162 public:
    163     explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor)
    164         : fPixelAccessor(accessor) { }
    165 
    166     void SK_VECTORCALL getFewPixels(
    167         int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
    168         fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
    169     }
    170 
    171     void SK_VECTORCALL get4Pixels(
    172         Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
    173         fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
    174     }
    175 
    176     void get4Pixels(
    177         const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
    178         fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3);
    179     }
    180 
    181     Sk4f getPixelFromRow(const void* row, int index) const {
    182         return fPixelAccessor->getPixelFromRow(row, index);
    183     }
    184 
    185     Sk4f getPixelAt(int index) const {
    186         return fPixelAccessor->getPixelAt(index);
    187     }
    188 
    189     const void* row(int y) const {
    190         return fPixelAccessor->row(y);
    191     }
    192 
    193 private:
    194     SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor;
    195 };
    196 
    197 ////////////////////////////////////////////////////////////////////////////////////////////////////
    198 // PixelAccessor handles all the same plumbing for all the PixelGetters.
    199 template <SkColorType colorType, SkGammaType gammaType>
    200 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface {
    201     using Element = typename PixelConverter<colorType, gammaType>::Element;
    202 public:
    203     template <typename... Args>
    204     PixelAccessor(const SkPixmap& srcPixmap, Args&&... args)
    205         : fSrc{static_cast<const Element*>(srcPixmap.addr())}
    206         , fWidth{srcPixmap.rowBytesAsPixels()}
    207         , fConverter{srcPixmap, std::move<Args>(args)...} { }
    208 
    209     void SK_VECTORCALL getFewPixels (
    210         int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
    211         Sk4i bufferLoc = ys * fWidth + xs;
    212         switch (n) {
    213             case 3:
    214                 *px2 = this->getPixelAt(bufferLoc[2]);
    215             case 2:
    216                 *px1 = this->getPixelAt(bufferLoc[1]);
    217             case 1:
    218                 *px0 = this->getPixelAt(bufferLoc[0]);
    219             default:
    220                 break;
    221         }
    222     }
    223 
    224     void SK_VECTORCALL get4Pixels(
    225         Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
    226         Sk4i bufferLoc = ys * fWidth + xs;
    227         *px0 = this->getPixelAt(bufferLoc[0]);
    228         *px1 = this->getPixelAt(bufferLoc[1]);
    229         *px2 = this->getPixelAt(bufferLoc[2]);
    230         *px3 = this->getPixelAt(bufferLoc[3]);
    231     }
    232 
    233     void get4Pixels(
    234         const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
    235         *px0 = this->getPixelFromRow(src, index + 0);
    236         *px1 = this->getPixelFromRow(src, index + 1);
    237         *px2 = this->getPixelFromRow(src, index + 2);
    238         *px3 = this->getPixelFromRow(src, index + 3);
    239     }
    240 
    241     Sk4f getPixelFromRow(const void* row, int index) const override {
    242         const Element* src = static_cast<const Element*>(row);
    243         return fConverter.toSk4f(src[index]);
    244     }
    245 
    246     Sk4f getPixelAt(int index) const override {
    247         return this->getPixelFromRow(fSrc, index);
    248     }
    249 
    250     const void* row(int y) const override { return fSrc + y * fWidth; }
    251 
    252 private:
    253     const Element* const                 fSrc;
    254     const int                            fWidth;
    255     PixelConverter<colorType, gammaType> fConverter;
    256 };
    257 
    258 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
    259 // We'll never re-use pixels, but we can at least load contiguous pixels.
    260 template <typename Next, typename Strategy>
    261 static void src_strategy_blend(Span span, Next* next, Strategy* strategy) {
    262     SkPoint start;
    263     SkScalar length;
    264     int count;
    265     std::tie(start, length, count) = span;
    266     int ix = SkScalarFloorToInt(X(start));
    267     const void* row = strategy->row((int)std::floor(Y(start)));
    268     if (length > 0) {
    269         while (count >= 4) {
    270             Sk4f px0, px1, px2, px3;
    271             strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3);
    272             next->blend4Pixels(px0, px1, px2, px3);
    273             ix += 4;
    274             count -= 4;
    275         }
    276 
    277         while (count > 0) {
    278             next->blendPixel(strategy->getPixelFromRow(row, ix));
    279             ix += 1;
    280             count -= 1;
    281         }
    282     } else {
    283         while (count >= 4) {
    284             Sk4f px0, px1, px2, px3;
    285             strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
    286             next->blend4Pixels(px0, px1, px2, px3);
    287             ix -= 4;
    288             count -= 4;
    289         }
    290 
    291         while (count > 0) {
    292             next->blendPixel(strategy->getPixelFromRow(row, ix));
    293             ix -= 1;
    294             count -= 1;
    295         }
    296     }
    297 }
    298 
    299 // -- NearestNeighborSampler -----------------------------------------------------------------------
    300 // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels.
    301 template<typename Accessor, typename Next>
    302 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
    303 public:
    304     template<typename... Args>
    305     NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
    306     : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
    307 
    308     NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
    309     const NearestNeighborSampler& sampler)
    310     : fNext{next}, fAccessor{sampler.fAccessor} { }
    311 
    312     void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
    313         SkASSERT(0 < n && n < 4);
    314         Sk4f px0, px1, px2;
    315         fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
    316         if (n >= 1) fNext->blendPixel(px0);
    317         if (n >= 2) fNext->blendPixel(px1);
    318         if (n >= 3) fNext->blendPixel(px2);
    319     }
    320 
    321     void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
    322         Sk4f px0, px1, px2, px3;
    323         fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
    324         fNext->blend4Pixels(px0, px1, px2, px3);
    325     }
    326 
    327     void pointSpan(Span span) override {
    328         SkASSERT(!span.isEmpty());
    329         SkPoint start;
    330         SkScalar length;
    331         int count;
    332         std::tie(start, length, count) = span;
    333         SkScalar absLength = SkScalarAbs(length);
    334         if (absLength < (count - 1)) {
    335             this->spanSlowRate(span);
    336         } else if (absLength == (count - 1)) {
    337             src_strategy_blend(span, fNext, &fAccessor);
    338         } else {
    339             this->spanFastRate(span);
    340         }
    341     }
    342 
    343     void repeatSpan(Span span, int32_t repeatCount) override {
    344         while (repeatCount > 0) {
    345             this->pointSpan(span);
    346             repeatCount--;
    347         }
    348     }
    349 
    350 private:
    351     // When moving through source space more slowly than dst space (zoomed in),
    352     // we'll be sampling from the same source pixel more than once.
    353     void spanSlowRate(Span span) {
    354         SkPoint start; SkScalar length; int count;
    355         std::tie(start, length, count) = span;
    356         SkScalar x = X(start);
    357         // fx is a fixed 48.16 number.
    358         int64_t fx = static_cast<int64_t>(x * SK_Fixed1);
    359         SkScalar dx = length / (count - 1);
    360         // fdx is a fixed 48.16 number.
    361         int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1);
    362 
    363         const void* row = fAccessor.row((int)std::floor(Y(start)));
    364         Next* next = fNext;
    365 
    366         int64_t ix = fx >> 16;
    367         int64_t prevIX = ix;
    368         Sk4f fpixel = fAccessor.getPixelFromRow(row, ix);
    369 
    370         // When dx is less than one, each pixel is used more than once. Using the fixed point fx
    371         // allows the code to quickly check that the same pixel is being used. The code uses this
    372         // same pixel check to do the sRGB and normalization only once.
    373         auto getNextPixel = [&]() {
    374             if (ix != prevIX) {
    375                 fpixel = fAccessor.getPixelFromRow(row, ix);
    376                 prevIX = ix;
    377             }
    378             fx += fdx;
    379             ix = fx >> 16;
    380             return fpixel;
    381         };
    382 
    383         while (count >= 4) {
    384             Sk4f px0 = getNextPixel();
    385             Sk4f px1 = getNextPixel();
    386             Sk4f px2 = getNextPixel();
    387             Sk4f px3 = getNextPixel();
    388             next->blend4Pixels(px0, px1, px2, px3);
    389             count -= 4;
    390         }
    391         while (count > 0) {
    392             next->blendPixel(getNextPixel());
    393             count -= 1;
    394         }
    395     }
    396 
    397     // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
    398     // We'll never re-use pixels, but we can at least load contiguous pixels.
    399     void spanUnitRate(Span span) {
    400         src_strategy_blend(span, fNext, &fAccessor);
    401     }
    402 
    403     // We're moving through source space faster than dst (zoomed out),
    404     // so we'll never reuse a source pixel or be able to do contiguous loads.
    405     void spanFastRate(Span span) {
    406         span_fallback(span, this);
    407     }
    408 
    409     Next* const fNext;
    410     Accessor    fAccessor;
    411 };
    412 
    413 // From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge
    414 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
    415 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
    416 // on the interval [0, vMax].
    417 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
    418 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
    419     SkASSERT(-1 <= vs && vs <= vMax + 1);
    420     switch (edgeType) {
    421         case SkShader::kClamp_TileMode:
    422         case SkShader::kMirror_TileMode:
    423             vs = std::max(vs, 0);
    424             vs = std::min(vs, vMax);
    425             break;
    426         case SkShader::kRepeat_TileMode:
    427             vs = (vs <= vMax) ? vs : 0;
    428             vs =    (vs >= 0) ? vs : vMax;
    429             break;
    430     }
    431     SkASSERT(0 <= vs && vs <= vMax);
    432     return vs;
    433 }
    434 
    435 // From a sample point on the tile, return the top or left filter value.
    436 // The result r should be in the range (0, 1]. Since this represents the weight given to the top
    437 // left element, then if x == 0.5 the filter value should be 1.0.
    438 // The input sample point must be on the tile, therefore it must be >= 0.
    439 static SkScalar sample_to_filter(SkScalar x) {
    440     SkASSERT(x >= 0.0f);
    441     // The usual form of the top or left edge is x - .5, but since we are working on the unit
    442     // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
    443     // of trunc.
    444     SkScalar v = x + 0.5f;
    445     // Produce the top or left offset a value on the range [0, 1).
    446     SkScalar f = v - SkScalarTruncToScalar(v);
    447     // Produce the filter value which is on the range (0, 1].
    448     SkScalar r =  1.0f - f;
    449     SkASSERT(0.0f < r && r <= 1.0f);
    450     return r;
    451 }
    452 
    453 // -- BilerpSampler --------------------------------------------------------------------------------
    454 // BilerpSampler - use a bilerp filter to create runs of destination pixels.
    455 // Note: in the code below, there are two types of points
    456 //       * sample points - these are the points passed in by pointList* and Spans.
    457 //       * filter points - are created from a sample point to form the coordinates of the points
    458 //                         to use in the filter and to generate the filter values.
    459 template<typename Accessor, typename Next>
    460 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
    461 public:
    462     template<typename... Args>
    463     BilerpSampler(
    464         SkLinearBitmapPipeline::BlendProcessorInterface* next,
    465         SkISize dimensions,
    466         SkShader::TileMode xTile, SkShader::TileMode yTile,
    467         Args&& ... args
    468     )
    469         : fNext{next}
    470         , fXEdgeType{xTile}
    471         , fXMax{dimensions.width() - 1}
    472         , fYEdgeType{yTile}
    473         , fYMax{dimensions.height() - 1}
    474         , fAccessor{std::forward<Args>(args)...} { }
    475 
    476     BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
    477                    const BilerpSampler& sampler)
    478         : fNext{next}
    479         , fXEdgeType{sampler.fXEdgeType}
    480         , fXMax{sampler.fXMax}
    481         , fYEdgeType{sampler.fYEdgeType}
    482         , fYMax{sampler.fYMax}
    483         , fAccessor{sampler.fAccessor} { }
    484 
    485     void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
    486         SkASSERT(0 < n && n < 4);
    487         auto bilerpPixel = [&](int index) {
    488             return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
    489         };
    490 
    491         if (n >= 1) fNext->blendPixel(bilerpPixel(0));
    492         if (n >= 2) fNext->blendPixel(bilerpPixel(1));
    493         if (n >= 3) fNext->blendPixel(bilerpPixel(2));
    494     }
    495 
    496     void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
    497         auto bilerpPixel = [&](int index) {
    498             return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
    499         };
    500         fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
    501     }
    502 
    503     void pointSpan(Span span) override {
    504         SkASSERT(!span.isEmpty());
    505         SkPoint start;
    506         SkScalar length;
    507         int count;
    508         std::tie(start, length, count) = span;
    509 
    510         // Nothing to do.
    511         if (count == 0) {
    512             return;
    513         }
    514 
    515         // Trivial case. No sample points are generated other than start.
    516         if (count == 1) {
    517             fNext->blendPixel(this->bilerpSamplePoint(start));
    518             return;
    519         }
    520 
    521         // Note: the following code could be done in terms of dx = length / (count -1), but that
    522         // would introduce a divide that is not needed for the most common dx == 1 cases.
    523         SkScalar absLength = SkScalarAbs(length);
    524         if (absLength == 0.0f) {
    525             // |dx| == 0
    526             // length is zero, so clamp an edge pixel.
    527             this->spanZeroRate(span);
    528         } else if (absLength < (count - 1)) {
    529             // 0 < |dx| < 1.
    530             this->spanSlowRate(span);
    531         } else if (absLength == (count - 1)) {
    532             // |dx| == 1.
    533             if (sample_to_filter(span.startX()) == 1.0f
    534                 && sample_to_filter(span.startY()) == 1.0f) {
    535                 // All the pixels are aligned with the dest; go fast.
    536                 src_strategy_blend(span, fNext, &fAccessor);
    537             } else {
    538                 // There is some sub-pixel offsets, so bilerp.
    539                 this->spanUnitRate(span);
    540             }
    541         } else if (absLength < 2.0f * (count - 1)) {
    542             // 1 < |dx| < 2.
    543             this->spanMediumRate(span);
    544         } else {
    545             // |dx| >= 2.
    546             this->spanFastRate(span);
    547         }
    548     }
    549 
    550     void repeatSpan(Span span, int32_t repeatCount) override {
    551         while (repeatCount > 0) {
    552             this->pointSpan(span);
    553             repeatCount--;
    554         }
    555     }
    556 
    557 private:
    558 
    559     // Convert a sample point to the points used by the filter.
    560     void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
    561         // May be less than zero. Be careful to use Floor.
    562         int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax);
    563         // Always greater than zero. Use the faster Trunc.
    564         int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax);
    565         int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax);
    566         int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax);
    567 
    568         *filterXs = Sk4i{x0, x1, x0, x1};
    569         *filterYs = Sk4i{y0, y0, y1, y1};
    570     }
    571 
    572     // Given a sample point, generate a color by bilerping the four filter points.
    573     Sk4f bilerpSamplePoint(SkPoint sample) {
    574         Sk4i iXs, iYs;
    575         filterPoints(sample, &iXs, &iYs);
    576         Sk4f px00, px10, px01, px11;
    577         fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
    578         return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11);
    579     }
    580 
    581     // Get two pixels at x from row0 and row1.
    582     void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) {
    583         *px0 = fAccessor.getPixelFromRow(row0, x);
    584         *px1 = fAccessor.getPixelFromRow(row1, x);
    585     }
    586 
    587     // |dx| == 0. This code assumes that length is zero.
    588     void spanZeroRate(Span span) {
    589         SkPoint start; SkScalar length; int count;
    590         std::tie(start, length, count) = span;
    591         SkASSERT(length == 0.0f);
    592 
    593         // Filter for the blending of the top and bottom pixels.
    594         SkScalar filterY = sample_to_filter(Y(start));
    595 
    596         // Generate the four filter points from the sample point start. Generate the row* values.
    597         Sk4i iXs, iYs;
    598         this->filterPoints(start, &iXs, &iYs);
    599         const void* const row0 = fAccessor.row(iYs[0]);
    600         const void* const row1 = fAccessor.row(iYs[2]);
    601 
    602         // Get the two pixels that make up the clamping pixel.
    603         Sk4f pxTop, pxBottom;
    604         this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
    605         Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
    606 
    607         while (count >= 4) {
    608             fNext->blend4Pixels(pixel, pixel, pixel, pixel);
    609             count -= 4;
    610         }
    611         while (count > 0) {
    612             fNext->blendPixel(pixel);
    613             count -= 1;
    614         }
    615     }
    616 
    617     // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce
    618     // computation. In particular, several destination pixels maybe generated from the same four
    619     // source pixels.
    620     // In the following code a "part" is a combination of two pixels from the same column of the
    621     // filter.
    622     void spanSlowRate(Span span) {
    623         SkPoint start; SkScalar length; int count;
    624         std::tie(start, length, count) = span;
    625 
    626         // Calculate the distance between each sample point.
    627         const SkScalar dx = length / (count - 1);
    628         SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
    629 
    630         // Generate the filter values for the top-left corner.
    631         // Note: these values are in filter space; this has implications about how to adjust
    632         // these values at each step. For example, as the sample point increases, the filter
    633         // value decreases, this is because the filter and position are related by
    634         // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
    635         // direction of the sample point which is increasing by dx.
    636         SkScalar filterX = sample_to_filter(X(start));
    637         SkScalar filterY = sample_to_filter(Y(start));
    638 
    639         // Generate the four filter points from the sample point start. Generate the row* values.
    640         Sk4i iXs, iYs;
    641         this->filterPoints(start, &iXs, &iYs);
    642         const void* const row0 = fAccessor.row(iYs[0]);
    643         const void* const row1 = fAccessor.row(iYs[2]);
    644 
    645         // Generate part of the filter value at xColumn.
    646         auto partAtColumn = [&](int xColumn) {
    647             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
    648             Sk4f pxTop, pxBottom;
    649             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
    650             return pxTop * filterY + (1.0f - filterY) * pxBottom;
    651         };
    652 
    653         // The leftPart is made up of two pixels from the left column of the filter, right part
    654         // is similar. The top and bottom pixels in the *Part are created as a linear blend of
    655         // the top and bottom pixels using filterY. See the partAtColumn function above.
    656         Sk4f leftPart  = partAtColumn(iXs[0]);
    657         Sk4f rightPart = partAtColumn(iXs[1]);
    658 
    659         // Create a destination color by blending together a left and right part using filterX.
    660         auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
    661             Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
    662             return check_pixel(pixel);
    663         };
    664 
    665         // Send the first pixel to the destination. This simplifies the loop structure so that no
    666         // extra pixels are fetched for the last iteration of the loop.
    667         fNext->blendPixel(bilerp(leftPart, rightPart));
    668         count -= 1;
    669 
    670         if (dx > 0.0f) {
    671             // * positive direction - generate destination pixels by sliding the filter from left
    672             //                        to right.
    673             int rightPartCursor = iXs[1];
    674 
    675             // Advance the filter from left to right. Remember that moving the top-left corner of
    676             // the filter to the right actually makes the filter value smaller.
    677             auto advanceFilter = [&]() {
    678                 filterX -= dx;
    679                 if (filterX <= 0.0f) {
    680                     filterX += 1.0f;
    681                     leftPart = rightPart;
    682                     rightPartCursor += 1;
    683                     rightPart = partAtColumn(rightPartCursor);
    684                 }
    685                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
    686 
    687                 return bilerp(leftPart, rightPart);
    688             };
    689 
    690             while (count >= 4) {
    691                 Sk4f px0 = advanceFilter(),
    692                      px1 = advanceFilter(),
    693                      px2 = advanceFilter(),
    694                      px3 = advanceFilter();
    695                 fNext->blend4Pixels(px0, px1, px2, px3);
    696                 count -= 4;
    697             }
    698 
    699             while (count > 0) {
    700                 fNext->blendPixel(advanceFilter());
    701                 count -= 1;
    702             }
    703         } else {
    704             // * negative direction - generate destination pixels by sliding the filter from
    705             //                        right to left.
    706             int leftPartCursor = iXs[0];
    707 
    708             // Advance the filter from right to left. Remember that moving the top-left corner of
    709             // the filter to the left actually makes the filter value larger.
    710             auto advanceFilter = [&]() {
    711                 // Remember, dx < 0 therefore this adds |dx| to filterX.
    712                 filterX -= dx;
    713                 // At this point filterX may be > 1, and needs to be wrapped back on to the filter
    714                 // interval, and the next column in the filter is calculated.
    715                 if (filterX > 1.0f) {
    716                     filterX -= 1.0f;
    717                     rightPart = leftPart;
    718                     leftPartCursor -= 1;
    719                     leftPart = partAtColumn(leftPartCursor);
    720                 }
    721                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
    722 
    723                 return bilerp(leftPart, rightPart);
    724             };
    725 
    726             while (count >= 4) {
    727                 Sk4f px0 = advanceFilter(),
    728                      px1 = advanceFilter(),
    729                      px2 = advanceFilter(),
    730                      px3 = advanceFilter();
    731                 fNext->blend4Pixels(px0, px1, px2, px3);
    732                 count -= 4;
    733             }
    734 
    735             while (count > 0) {
    736                 fNext->blendPixel(advanceFilter());
    737                 count -= 1;
    738             }
    739         }
    740     }
    741 
    742     // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
    743     // Every filter part is used for two destination pixels, and the code can bulk load four
    744     // pixels at a time.
    745     void spanUnitRate(Span span) {
    746         SkPoint start; SkScalar length; int count;
    747         std::tie(start, length, count) = span;
    748         SkASSERT(SkScalarAbs(length) == (count - 1));
    749 
    750         // Calculate the four filter points of start, and use the two different Y values to
    751         // generate the row pointers.
    752         Sk4i iXs, iYs;
    753         filterPoints(start, &iXs, &iYs);
    754         const void* row0 = fAccessor.row(iYs[0]);
    755         const void* row1 = fAccessor.row(iYs[2]);
    756 
    757         // Calculate the filter values for the top-left filter element.
    758         const SkScalar filterX = sample_to_filter(X(start));
    759         const SkScalar filterY = sample_to_filter(Y(start));
    760 
    761         // Generate part of the filter value at xColumn.
    762         auto partAtColumn = [&](int xColumn) {
    763             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
    764             Sk4f pxTop, pxBottom;
    765             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
    766             return pxTop * filterY + (1.0f - filterY) * pxBottom;
    767         };
    768 
    769         auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) {
    770             // Check if the pixels needed are near the edges. If not go fast using bulk pixels,
    771             // otherwise be careful.
    772             if (0 <= ix && ix <= fXMax - 3) {
    773                 Sk4f px00, px10, px20, px30,
    774                      px01, px11, px21, px31;
    775                 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
    776                 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
    777                 *part0 = filterY * px00 + (1.0f - filterY) * px01;
    778                 *part1 = filterY * px10 + (1.0f - filterY) * px11;
    779                 *part2 = filterY * px20 + (1.0f - filterY) * px21;
    780                 *part3 = filterY * px30 + (1.0f - filterY) * px31;
    781             } else {
    782                 *part0 = partAtColumn(ix + 0);
    783                 *part1 = partAtColumn(ix + 1);
    784                 *part2 = partAtColumn(ix + 2);
    785                 *part3 = partAtColumn(ix + 3);
    786             }
    787         };
    788 
    789         auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) {
    790             return part0 * filterX + part1 * (1.0f - filterX);
    791         };
    792 
    793         if (length > 0) {
    794             // * positive direction - generate destination pixels by sliding the filter from left
    795             //                        to right.
    796 
    797             // overlapPart is the filter part from the end of the previous four pixels used at
    798             // the start of the next four pixels.
    799             Sk4f overlapPart = partAtColumn(iXs[0]);
    800             int rightColumnCursor = iXs[1];
    801             while (count >= 4) {
    802                 Sk4f part0, part1, part2, part3;
    803                 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
    804                 Sk4f px0 = bilerp(overlapPart, part0);
    805                 Sk4f px1 = bilerp(part0, part1);
    806                 Sk4f px2 = bilerp(part1, part2);
    807                 Sk4f px3 = bilerp(part2, part3);
    808                 overlapPart = part3;
    809                 fNext->blend4Pixels(px0, px1, px2, px3);
    810                 rightColumnCursor += 4;
    811                 count -= 4;
    812             }
    813 
    814             while (count > 0) {
    815                 Sk4f rightPart = partAtColumn(rightColumnCursor);
    816 
    817                 fNext->blendPixel(bilerp(overlapPart, rightPart));
    818                 overlapPart = rightPart;
    819                 rightColumnCursor += 1;
    820                 count -= 1;
    821             }
    822         } else {
    823             // * negative direction - generate destination pixels by sliding the filter from
    824             //                        right to left.
    825             Sk4f overlapPart = partAtColumn(iXs[1]);
    826             int leftColumnCursor = iXs[0];
    827 
    828             while (count >= 4) {
    829                 Sk4f part0, part1, part2, part3;
    830                 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
    831                 Sk4f px0 = bilerp(part0, overlapPart);
    832                 Sk4f px1 = bilerp(part1, part0);
    833                 Sk4f px2 = bilerp(part2, part1);
    834                 Sk4f px3 = bilerp(part3, part2);
    835                 overlapPart = part3;
    836                 fNext->blend4Pixels(px0, px1, px2, px3);
    837                 leftColumnCursor -= 4;
    838                 count -= 4;
    839             }
    840 
    841             while (count > 0) {
    842                 Sk4f leftPart = partAtColumn(leftColumnCursor);
    843 
    844                 fNext->blendPixel(bilerp(leftPart, overlapPart));
    845                 overlapPart = leftPart;
    846                 leftColumnCursor -= 1;
    847                 count -= 1;
    848             }
    849         }
    850     }
    851 
    852     // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but
    853     // still slow enough to take advantage of previous calculations.
    854     void spanMediumRate(Span span) {
    855         SkPoint start; SkScalar length; int count;
    856         std::tie(start, length, count) = span;
    857 
    858         // Calculate the distance between each sample point.
    859         const SkScalar dx = length / (count - 1);
    860         SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
    861 
    862         // Generate the filter values for the top-left corner.
    863         // Note: these values are in filter space; this has implications about how to adjust
    864         // these values at each step. For example, as the sample point increases, the filter
    865         // value decreases, this is because the filter and position are related by
    866         // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
    867         // direction of the sample point which is increasing by dx.
    868         SkScalar filterX = sample_to_filter(X(start));
    869         SkScalar filterY = sample_to_filter(Y(start));
    870 
    871         // Generate the four filter points from the sample point start. Generate the row* values.
    872         Sk4i iXs, iYs;
    873         this->filterPoints(start, &iXs, &iYs);
    874         const void* const row0 = fAccessor.row(iYs[0]);
    875         const void* const row1 = fAccessor.row(iYs[2]);
    876 
    877         // Generate part of the filter value at xColumn.
    878         auto partAtColumn = [&](int xColumn) {
    879             int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
    880             Sk4f pxTop, pxBottom;
    881             this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
    882             return pxTop * filterY + (1.0f - filterY) * pxBottom;
    883         };
    884 
    885         // The leftPart is made up of two pixels from the left column of the filter, right part
    886         // is similar. The top and bottom pixels in the *Part are created as a linear blend of
    887         // the top and bottom pixels using filterY. See the nextPart function below.
    888         Sk4f leftPart  = partAtColumn(iXs[0]);
    889         Sk4f rightPart = partAtColumn(iXs[1]);
    890 
    891         // Create a destination color by blending together a left and right part using filterX.
    892         auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
    893             Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
    894             return check_pixel(pixel);
    895         };
    896 
    897         // Send the first pixel to the destination. This simplifies the loop structure so that no
    898         // extra pixels are fetched for the last iteration of the loop.
    899         fNext->blendPixel(bilerp(leftPart, rightPart));
    900         count -= 1;
    901 
    902         if (dx > 0.0f) {
    903             // * positive direction - generate destination pixels by sliding the filter from left
    904             //                        to right.
    905             int rightPartCursor = iXs[1];
    906 
    907             // Advance the filter from left to right. Remember that moving the top-left corner of
    908             // the filter to the right actually makes the filter value smaller.
    909             auto advanceFilter = [&]() {
    910                 filterX -= dx;
    911                 // At this point filterX is less than zero, but might actually be less than -1.
    912                 if (filterX > -1.0f) {
    913                     filterX += 1.0f;
    914                     leftPart = rightPart;
    915                     rightPartCursor += 1;
    916                     rightPart = partAtColumn(rightPartCursor);
    917                 } else {
    918                     filterX += 2.0f;
    919                     rightPartCursor += 2;
    920                     leftPart = partAtColumn(rightPartCursor - 1);
    921                     rightPart = partAtColumn(rightPartCursor);
    922                 }
    923                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
    924 
    925                 return bilerp(leftPart, rightPart);
    926             };
    927 
    928             while (count >= 4) {
    929                 Sk4f px0 = advanceFilter(),
    930                      px1 = advanceFilter(),
    931                      px2 = advanceFilter(),
    932                      px3 = advanceFilter();
    933                 fNext->blend4Pixels(px0, px1, px2, px3);
    934                 count -= 4;
    935             }
    936 
    937             while (count > 0) {
    938                 fNext->blendPixel(advanceFilter());
    939                 count -= 1;
    940             }
    941         } else {
    942             // * negative direction - generate destination pixels by sliding the filter from
    943             //                        right to left.
    944             int leftPartCursor = iXs[0];
    945 
    946             auto advanceFilter = [&]() {
    947                 // Remember, dx < 0 therefore this adds |dx| to filterX.
    948                 filterX -= dx;
    949                 // At this point, filterX is greater than one, but may actually be greater than two.
    950                 if (filterX < 2.0f) {
    951                     filterX -= 1.0f;
    952                     rightPart = leftPart;
    953                     leftPartCursor -= 1;
    954                     leftPart = partAtColumn(leftPartCursor);
    955                 } else {
    956                     filterX -= 2.0f;
    957                     leftPartCursor -= 2;
    958                     rightPart = partAtColumn(leftPartCursor - 1);
    959                     leftPart = partAtColumn(leftPartCursor);
    960                 }
    961                 SkASSERT(0.0f < filterX && filterX <= 1.0f);
    962                 return bilerp(leftPart, rightPart);
    963             };
    964 
    965             while (count >= 4) {
    966                 Sk4f px0 = advanceFilter(),
    967                      px1 = advanceFilter(),
    968                      px2 = advanceFilter(),
    969                      px3 = advanceFilter();
    970                 fNext->blend4Pixels(px0, px1, px2, px3);
    971                 count -= 4;
    972             }
    973 
    974             while (count > 0) {
    975                 fNext->blendPixel(advanceFilter());
    976                 count -= 1;
    977             }
    978         }
    979     }
    980 
    981     // We're moving through source space faster than dst (zoomed out),
    982     // so we'll never reuse a source pixel or be able to do contiguous loads.
    983     void spanFastRate(Span span) {
    984         SkPoint start; SkScalar length; int count;
    985         std::tie(start, length, count) = span;
    986         SkScalar x = X(start);
    987         SkScalar y = Y(start);
    988 
    989         SkScalar dx = length / (count - 1);
    990         while (count > 0) {
    991             fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
    992             x += dx;
    993             count -= 1;
    994         }
    995     }
    996 
    997     Next* const              fNext;
    998     const SkShader::TileMode fXEdgeType;
    999     const int                fXMax;
   1000     const SkShader::TileMode fYEdgeType;
   1001     const int                fYMax;
   1002     Accessor                 fAccessor;
   1003 };
   1004 
   1005 }  // namespace
   1006 
   1007 #endif  // SkLinearBitmapPipeline_sampler_DEFINED
   1008