Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2006 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkColorPriv_DEFINED
      9 #define SkColorPriv_DEFINED
     10 
     11 // turn this own for extra debug checking when blending onto 565
     12 #ifdef SK_DEBUG
     13     #define CHECK_FOR_565_OVERFLOW
     14 #endif
     15 
     16 #include "SkColor.h"
     17 #include "SkMath.h"
     18 
     19 //////////////////////////////////////////////////////////////////////////////
     20 
     21 #define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFF))
     22 
     23 /*
     24  *  Skia's 32bit backend only supports 1 sizzle order at a time (compile-time).
     25  *  This is specified by 4 defines SK_A32_SHIFT, SK_R32_SHIFT, ... for G and B.
     26  *
     27  *  For easier compatibility with Skia's GPU backend, we further restrict these
     28  *  to either (in memory-byte-order) RGBA or BGRA. Note that this "order" does
     29  *  not directly correspond to the same shift-order, since we have to take endianess
     30  *  into account.
     31  *
     32  *  Here we enforce this constraint.
     33  */
     34 
     35 #ifdef SK_CPU_BENDIAN
     36     #define SK_RGBA_R32_SHIFT   24
     37     #define SK_RGBA_G32_SHIFT   16
     38     #define SK_RGBA_B32_SHIFT   8
     39     #define SK_RGBA_A32_SHIFT   0
     40 
     41     #define SK_BGRA_B32_SHIFT   24
     42     #define SK_BGRA_G32_SHIFT   16
     43     #define SK_BGRA_R32_SHIFT   8
     44     #define SK_BGRA_A32_SHIFT   0
     45 #else
     46     #define SK_RGBA_R32_SHIFT   0
     47     #define SK_RGBA_G32_SHIFT   8
     48     #define SK_RGBA_B32_SHIFT   16
     49     #define SK_RGBA_A32_SHIFT   24
     50 
     51     #define SK_BGRA_B32_SHIFT   0
     52     #define SK_BGRA_G32_SHIFT   8
     53     #define SK_BGRA_R32_SHIFT   16
     54     #define SK_BGRA_A32_SHIFT   24
     55 #endif
     56 
     57 #if defined(SK_PMCOLOR_IS_RGBA) && defined(SK_PMCOLOR_IS_BGRA)
     58     #error "can't define PMCOLOR to be RGBA and BGRA"
     59 #endif
     60 
     61 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA  \
     62     (SK_A32_SHIFT == SK_RGBA_A32_SHIFT &&    \
     63      SK_R32_SHIFT == SK_RGBA_R32_SHIFT &&    \
     64      SK_G32_SHIFT == SK_RGBA_G32_SHIFT &&    \
     65      SK_B32_SHIFT == SK_RGBA_B32_SHIFT)
     66 
     67 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA  \
     68     (SK_A32_SHIFT == SK_BGRA_A32_SHIFT &&    \
     69      SK_R32_SHIFT == SK_BGRA_R32_SHIFT &&    \
     70      SK_G32_SHIFT == SK_BGRA_G32_SHIFT &&    \
     71      SK_B32_SHIFT == SK_BGRA_B32_SHIFT)
     72 
     73 
     74 #if defined(SK_PMCOLOR_IS_RGBA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     75     #error "SK_PMCOLOR_IS_RGBA does not match SK_*32_SHIFT values"
     76 #endif
     77 
     78 #if defined(SK_PMCOLOR_IS_BGRA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     79     #error "SK_PMCOLOR_IS_BGRA does not match SK_*32_SHIFT values"
     80 #endif
     81 
     82 #if !defined(SK_PMCOLOR_IS_RGBA) && !defined(SK_PMCOLOR_IS_BGRA)
     83     // deduce which to define from the _SHIFT defines
     84 
     85     #if LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     86         #define SK_PMCOLOR_IS_RGBA
     87     #elif LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     88         #define SK_PMCOLOR_IS_BGRA
     89     #else
     90         #error "need 32bit packing to be either RGBA or BGRA"
     91     #endif
     92 #endif
     93 
     94 // hide these now that we're done
     95 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     96 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     97 
     98 //////////////////////////////////////////////////////////////////////////////
     99 
    100 // Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
    101 // pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
    102 // no need to pass in the colortype to this function.
    103 static inline uint32_t SkSwizzle_RB(uint32_t c) {
    104     static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
    105 
    106     unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
    107     unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
    108     return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
    109 }
    110 
    111 static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    112     SkASSERT_IS_BYTE(a);
    113     SkASSERT_IS_BYTE(r);
    114     SkASSERT_IS_BYTE(g);
    115     SkASSERT_IS_BYTE(b);
    116     return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
    117            (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
    118 }
    119 
    120 static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    121     SkASSERT_IS_BYTE(a);
    122     SkASSERT_IS_BYTE(r);
    123     SkASSERT_IS_BYTE(g);
    124     SkASSERT_IS_BYTE(b);
    125     return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
    126            (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
    127 }
    128 
    129 static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
    130 #ifdef SK_PMCOLOR_IS_RGBA
    131     return c;
    132 #else
    133     return SkSwizzle_RB(c);
    134 #endif
    135 }
    136 
    137 static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
    138 #ifdef SK_PMCOLOR_IS_BGRA
    139     return c;
    140 #else
    141     return SkSwizzle_RB(c);
    142 #endif
    143 }
    144 
    145 //////////////////////////////////////////////////////////////////////////////
    146 
    147 ///@{
    148 /** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
    149 #define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
    150 #define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
    151 #define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
    152 ///@}
    153 
    154 ///@{
    155 /** A float value which specifies this channel's contribution to luminance. */
    156 #define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
    157 #define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
    158 #define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
    159 ///@}
    160 
    161 /** Computes the luminance from the given r, g, and b in accordance with
    162     SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
    163 */
    164 static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
    165     //The following is
    166     //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
    167     //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
    168     return (r * 54 + g * 183 + b * 19) >> 8;
    169 }
    170 
    171 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
    172     byte into a scale value, so that we can say scale * value >> 8 instead of
    173     alpha * value / 255.
    174 
    175     In debugging, asserts that alpha is 0..255
    176 */
    177 static inline unsigned SkAlpha255To256(U8CPU alpha) {
    178     SkASSERT(SkToU8(alpha) == alpha);
    179     // this one assues that blending on top of an opaque dst keeps it that way
    180     // even though it is less accurate than a+(a>>7) for non-opaque dsts
    181     return alpha + 1;
    182 }
    183 
    184 /**
    185  *  Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80.
    186  *  This is slightly more accurate than SkAlpha255To256.
    187  */
    188 static inline unsigned Sk255To256(U8CPU value) {
    189     SkASSERT(SkToU8(value) == value);
    190     return value + (value >> 7);
    191 }
    192 
    193 /** Multiplify value by 0..256, and shift the result down 8
    194     (i.e. return (value * alpha256) >> 8)
    195  */
    196 #define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
    197 
    198 //  The caller may want negative values, so keep all params signed (int)
    199 //  so we don't accidentally slip into unsigned math and lose the sign
    200 //  extension when we shift (in SkAlphaMul)
    201 static inline int SkAlphaBlend(int src, int dst, int scale256) {
    202     SkASSERT((unsigned)scale256 <= 256);
    203     return dst + SkAlphaMul(src - dst, scale256);
    204 }
    205 
    206 /**
    207  *  Returns (src * alpha + dst * (255 - alpha)) / 255
    208  *
    209  *  This is more accurate than SkAlphaBlend, but slightly slower
    210  */
    211 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
    212     SkASSERT((int16_t)src == src);
    213     SkASSERT((int16_t)dst == dst);
    214     SkASSERT((uint8_t)alpha == alpha);
    215 
    216     int prod = SkMulS16(src - dst, alpha) + 128;
    217     prod = (prod + (prod >> 8)) >> 8;
    218     return dst + prod;
    219 }
    220 
    221 #define SK_R16_BITS     5
    222 #define SK_G16_BITS     6
    223 #define SK_B16_BITS     5
    224 
    225 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
    226 #define SK_G16_SHIFT    (SK_B16_BITS)
    227 #define SK_B16_SHIFT    0
    228 
    229 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
    230 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
    231 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
    232 
    233 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
    234 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
    235 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
    236 
    237 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
    238 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
    239 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
    240 
    241 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
    242     SkASSERT(r <= SK_R16_MASK);
    243     SkASSERT(g <= SK_G16_MASK);
    244     SkASSERT(b <= SK_B16_MASK);
    245 
    246     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
    247 }
    248 
    249 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
    250 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
    251 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
    252 
    253 /** Expand the 16bit color into a 32bit value that can be scaled all at once
    254     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
    255 */
    256 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
    257     SkASSERT(c == (uint16_t)c);
    258 
    259     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
    260 }
    261 
    262 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
    263     color value. The computation yields only 16bits of valid data, but we claim
    264     to return 32bits, so that the compiler won't generate extra instructions to
    265     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
    266     up to the caller to safely ignore them.
    267 */
    268 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
    269     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
    270 }
    271 
    272 /** Scale the 16bit color value by the 0..256 scale parameter.
    273     The computation yields only 16bits of valid data, but we claim
    274     to return 32bits, so that the compiler won't generate extra instructions to
    275     "clean" the top 16bits.
    276 */
    277 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
    278     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
    279 }
    280 
    281 // this helper explicitly returns a clean 16bit value (but slower)
    282 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
    283 
    284 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
    285     The computation yields only 16bits of valid data, but we claim
    286     to return 32bits, so that the compiler won't generate extra instructions to
    287     "clean" the top 16bits.
    288 */
    289 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
    290     SkASSERT((unsigned)srcScale <= 256);
    291 
    292     srcScale >>= 3;
    293 
    294     uint32_t src32 = SkExpand_rgb_16(src);
    295     uint32_t dst32 = SkExpand_rgb_16(dst);
    296     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    297 }
    298 
    299 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
    300                                 int srcScale, int count) {
    301     SkASSERT(count > 0);
    302     SkASSERT((unsigned)srcScale <= 256);
    303 
    304     srcScale >>= 3;
    305 
    306     do {
    307         uint32_t src32 = SkExpand_rgb_16(*src++);
    308         uint32_t dst32 = SkExpand_rgb_16(*dst);
    309         *dst++ = static_cast<uint16_t>(
    310             SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)));
    311     } while (--count > 0);
    312 }
    313 
    314 #ifdef SK_DEBUG
    315     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
    316         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
    317         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
    318         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
    319 
    320         return a + b;
    321     }
    322 #else
    323     #define SkRGB16Add(a, b)  ((a) + (b))
    324 #endif
    325 
    326 ///////////////////////////////////////////////////////////////////////////////
    327 
    328 #define SK_A32_BITS     8
    329 #define SK_R32_BITS     8
    330 #define SK_G32_BITS     8
    331 #define SK_B32_BITS     8
    332 
    333 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
    334 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
    335 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
    336 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
    337 
    338 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
    339 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
    340 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
    341 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
    342 
    343 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
    344 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
    345 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
    346 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
    347 
    348 #ifdef SK_DEBUG
    349     #define SkPMColorAssert(color_value)                                    \
    350         do {                                                                \
    351             SkPMColor pm_color_value = (color_value);                       \
    352             uint32_t alpha_color_value = SkGetPackedA32(pm_color_value);    \
    353             SkA32Assert(alpha_color_value);                                 \
    354             SkASSERT(SkGetPackedR32(pm_color_value) <= alpha_color_value);  \
    355             SkASSERT(SkGetPackedG32(pm_color_value) <= alpha_color_value);  \
    356             SkASSERT(SkGetPackedB32(pm_color_value) <= alpha_color_value);  \
    357         } while (false)
    358 #else
    359     #define SkPMColorAssert(c)
    360 #endif
    361 
    362 /**
    363  *  Pack the components into a SkPMColor, checking (in the debug version) that
    364  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
    365  */
    366 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    367     SkA32Assert(a);
    368     SkASSERT(r <= a);
    369     SkASSERT(g <= a);
    370     SkASSERT(b <= a);
    371 
    372     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    373            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    374 }
    375 
    376 static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) {
    377     return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c),
    378                               SkGetPackedG32(c), SkGetPackedB32(c));
    379 }
    380 
    381 static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) {
    382     return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c),
    383                               SkGetPackedG32(c), SkGetPackedB32(c));
    384 }
    385 
    386 /**
    387  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    388  * utility functions. Third parameter controls blending of the first two:
    389  *   (src, dst, 0) returns dst
    390  *   (src, dst, 0xFF) returns src
    391  *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
    392  */
    393 static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
    394                                          unsigned scale) {
    395     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
    396     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
    397     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
    398     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
    399 
    400     return SkPackARGB32(a, r, g, b);
    401 }
    402 
    403 /**
    404  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    405  * utility functions. Third parameter controls blending of the first two:
    406  *   (src, dst, 0) returns dst
    407  *   (src, dst, 0xFF) returns src
    408  */
    409 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
    410                                          U8CPU srcWeight) {
    411     unsigned scale = SkAlpha255To256(srcWeight);
    412     return SkFourByteInterp256(src, dst, scale);
    413 }
    414 
    415 /**
    416  * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
    417  */
    418 static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
    419     const uint32_t mask = 0x00FF00FF;
    420     *ag = (color >> 8) & mask;
    421     *rb = color & mask;
    422 }
    423 
    424 /**
    425  * 0xAARRGGBB -> 0x00AA00GG00RR00BB
    426  * (note, ARGB -> AGRB)
    427  */
    428 static inline uint64_t SkSplay(uint32_t color) {
    429     const uint32_t mask = 0x00FF00FF;
    430     uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
    431     agrb <<= 32;                          // 0x00AA00GG00000000
    432     agrb |= color & mask;                 // 0x00AA00GG00RR00BB
    433     return agrb;
    434 }
    435 
    436 /**
    437  * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
    438  */
    439 static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
    440     const uint32_t mask = 0xFF00FF00;
    441     return (ag & mask) | ((rb & mask) >> 8);
    442 }
    443 
    444 /**
    445  * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
    446  * (note, AGRB -> ARGB)
    447  */
    448 static inline uint32_t SkUnsplay(uint64_t agrb) {
    449     const uint32_t mask = 0xFF00FF00;
    450     return SkPMColor(
    451         ((agrb & mask) >> 8) |   // 0x00RR00BB
    452         ((agrb >> 32) & mask));  // 0xAARRGGBB
    453 }
    454 
    455 static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
    456     SkASSERT(scale <= 256);
    457 
    458     // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
    459     uint32_t src_ag, src_rb, dst_ag, dst_rb;
    460     SkSplay(src, &src_ag, &src_rb);
    461     SkSplay(dst, &dst_ag, &dst_rb);
    462 
    463     const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
    464     const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
    465 
    466     return SkUnsplay(ret_ag, ret_rb);
    467 }
    468 
    469 static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
    470     SkASSERT(scale <= 256);
    471     // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
    472     return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
    473 }
    474 
    475 // TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
    476 
    477 /**
    478  * Same as SkFourByteInterp256, but faster.
    479  */
    480 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
    481     // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
    482     if (sizeof(void*) == 4) {
    483         return SkFastFourByteInterp256_32(src, dst, scale);
    484     } else {
    485         return SkFastFourByteInterp256_64(src, dst, scale);
    486     }
    487 }
    488 
    489 /**
    490  * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
    491  * srcWeight scaling to [0, 256].
    492  */
    493 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
    494                                              SkPMColor dst,
    495                                              U8CPU srcWeight) {
    496     SkASSERT(srcWeight <= 255);
    497     // scale = srcWeight + (srcWeight >> 7) is more accurate than
    498     // scale = srcWeight + 1, but 7% slower
    499     return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
    500 }
    501 
    502 /**
    503  *  Same as SkPackARGB32, but this version guarantees to not check that the
    504  *  values are premultiplied in the debug version.
    505  */
    506 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    507     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    508            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    509 }
    510 
    511 static inline
    512 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    513     SkA32Assert(a);
    514     SkR32Assert(r);
    515     SkG32Assert(g);
    516     SkB32Assert(b);
    517 
    518     if (a != 255) {
    519         r = SkMulDiv255Round(r, a);
    520         g = SkMulDiv255Round(g, a);
    521         b = SkMulDiv255Round(b, a);
    522     }
    523     return SkPackARGB32(a, r, g, b);
    524 }
    525 
    526 // When Android is compiled optimizing for size, SkAlphaMulQ doesn't get
    527 // inlined; forcing inlining significantly improves performance.
    528 static SK_ALWAYS_INLINE uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
    529     uint32_t mask = 0xFF00FF;
    530 
    531     uint32_t rb = ((c & mask) * scale) >> 8;
    532     uint32_t ag = ((c >> 8) & mask) * scale;
    533     return (rb & mask) | (ag & ~mask);
    534 }
    535 
    536 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
    537     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
    538 }
    539 
    540 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
    541     SkASSERT((unsigned)aa <= 255);
    542 
    543     unsigned src_scale = SkAlpha255To256(aa);
    544     unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
    545 
    546     return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
    547 }
    548 
    549 ////////////////////////////////////////////////////////////////////////////////////////////
    550 // Convert a 32bit pixel to a 16bit pixel (no dither)
    551 
    552 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
    553 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
    554 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
    555 
    556 #ifdef SK_DEBUG
    557     static inline unsigned SkR32ToR16(unsigned r) {
    558         SkR32Assert(r);
    559         return SkR32ToR16_MACRO(r);
    560     }
    561     static inline unsigned SkG32ToG16(unsigned g) {
    562         SkG32Assert(g);
    563         return SkG32ToG16_MACRO(g);
    564     }
    565     static inline unsigned SkB32ToB16(unsigned b) {
    566         SkB32Assert(b);
    567         return SkB32ToB16_MACRO(b);
    568     }
    569 #else
    570     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
    571     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
    572     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
    573 #endif
    574 
    575 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
    576 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
    577 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
    578 
    579 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
    580     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
    581     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
    582     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
    583     return r | g | b;
    584 }
    585 
    586 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    587     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
    588             (SkG32ToG16(g) << SK_G16_SHIFT) |
    589             (SkB32ToB16(b) << SK_B16_SHIFT);
    590 }
    591 
    592 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
    593 
    594 /////////////////////////////////////////////////////////////////////////////////////////
    595 // Fast dither from 32->16
    596 
    597 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
    598 
    599 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    600     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
    601     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
    602     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
    603 
    604     return SkPackRGB16(r, g, b);
    605 }
    606 
    607 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
    608     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
    609 }
    610 
    611 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
    612     It is now suitable for combining with a scaled expanded_rgb_16 color
    613     as in SkSrcOver32To16().
    614     We must do this 565 high-bit replication, in order for the subsequent add
    615     to saturate properly (and not overflow). If we take the 8 bits as is, it is
    616     possible to overflow.
    617 */
    618 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
    619     unsigned sr = SkPacked32ToR16(c);
    620     unsigned sg = SkPacked32ToG16(c);
    621     unsigned sb = SkPacked32ToB16(c);
    622 
    623     sr = (sr << 5) | sr;
    624     sg = (sg << 5) | (sg >> 1);
    625     sb = (sb << 5) | sb;
    626     return (sr << 11) | (sg << 21) | (sb << 0);
    627 }
    628 
    629 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
    630     (with dirt in the high 16bits, so caller beware).
    631 */
    632 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
    633     unsigned sr = SkGetPackedR32(src);
    634     unsigned sg = SkGetPackedG32(src);
    635     unsigned sb = SkGetPackedB32(src);
    636 
    637     unsigned dr = SkGetPackedR16(dst);
    638     unsigned dg = SkGetPackedG16(dst);
    639     unsigned db = SkGetPackedB16(dst);
    640 
    641     unsigned isa = 255 - SkGetPackedA32(src);
    642 
    643     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
    644     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
    645     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
    646 
    647     return SkPackRGB16(dr, dg, db);
    648 }
    649 
    650 ////////////////////////////////////////////////////////////////////////////////////////////
    651 // Convert a 16bit pixel to a 32bit pixel
    652 
    653 static inline unsigned SkR16ToR32(unsigned r) {
    654     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
    655 }
    656 
    657 static inline unsigned SkG16ToG32(unsigned g) {
    658     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
    659 }
    660 
    661 static inline unsigned SkB16ToB32(unsigned b) {
    662     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
    663 }
    664 
    665 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
    666 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
    667 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
    668 
    669 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
    670     SkASSERT(src == SkToU16(src));
    671 
    672     unsigned    r = SkPacked16ToR32(src);
    673     unsigned    g = SkPacked16ToG32(src);
    674     unsigned    b = SkPacked16ToB32(src);
    675 
    676     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    677     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    678     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    679 
    680     return SkPackARGB32(0xFF, r, g, b);
    681 }
    682 
    683 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
    684 static inline SkColor SkPixel16ToColor(U16CPU src) {
    685     SkASSERT(src == SkToU16(src));
    686 
    687     unsigned    r = SkPacked16ToR32(src);
    688     unsigned    g = SkPacked16ToG32(src);
    689     unsigned    b = SkPacked16ToB32(src);
    690 
    691     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    692     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    693     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    694 
    695     return SkColorSetRGB(r, g, b);
    696 }
    697 
    698 ///////////////////////////////////////////////////////////////////////////////
    699 
    700 typedef uint16_t SkPMColor16;
    701 
    702 // Put in OpenGL order (r g b a)
    703 #define SK_A4444_SHIFT    0
    704 #define SK_R4444_SHIFT    12
    705 #define SK_G4444_SHIFT    8
    706 #define SK_B4444_SHIFT    4
    707 
    708 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
    709 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
    710 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
    711 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
    712 
    713 static inline U8CPU SkReplicateNibble(unsigned nib) {
    714     SkASSERT(nib <= 0xF);
    715     return (nib << 4) | nib;
    716 }
    717 
    718 #define SkA4444ToA32(a)     SkReplicateNibble(a)
    719 #define SkR4444ToR32(r)     SkReplicateNibble(r)
    720 #define SkG4444ToG32(g)     SkReplicateNibble(g)
    721 #define SkB4444ToB32(b)     SkReplicateNibble(b)
    722 
    723 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
    724 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
    725 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
    726 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
    727 
    728 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
    729 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
    730 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
    731 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
    732 
    733 #ifdef SK_DEBUG
    734 static inline void SkPMColor16Assert(U16CPU c) {
    735     unsigned a = SkGetPackedA4444(c);
    736     unsigned r = SkGetPackedR4444(c);
    737     unsigned g = SkGetPackedG4444(c);
    738     unsigned b = SkGetPackedB4444(c);
    739 
    740     SkASSERT(a <= 0xF);
    741     SkASSERT(r <= a);
    742     SkASSERT(g <= a);
    743     SkASSERT(b <= a);
    744 }
    745 #else
    746 #define SkPMColor16Assert(c)
    747 #endif
    748 
    749 static inline unsigned SkAlpha15To16(unsigned a) {
    750     SkASSERT(a <= 0xF);
    751     return a + (a >> 3);
    752 }
    753 
    754 #ifdef SK_DEBUG
    755     static inline int SkAlphaMul4(int value, int scale) {
    756         SkASSERT((unsigned)scale <= 0x10);
    757         return value * scale >> 4;
    758     }
    759 #else
    760     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
    761 #endif
    762 
    763 static inline unsigned SkR4444ToR565(unsigned r) {
    764     SkASSERT(r <= 0xF);
    765     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
    766 }
    767 
    768 static inline unsigned SkG4444ToG565(unsigned g) {
    769     SkASSERT(g <= 0xF);
    770     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
    771 }
    772 
    773 static inline unsigned SkB4444ToB565(unsigned b) {
    774     SkASSERT(b <= 0xF);
    775     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
    776 }
    777 
    778 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
    779                                          unsigned g, unsigned b) {
    780     SkASSERT(a <= 0xF);
    781     SkASSERT(r <= a);
    782     SkASSERT(g <= a);
    783     SkASSERT(b <= a);
    784 
    785     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
    786                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
    787 }
    788 
    789 static inline SkPMColor16 SkAlphaMulQ4(SkPMColor16 c, int scale) {
    790     SkASSERT(scale <= 16);
    791 
    792     const unsigned mask = 0xF0F;    //gMask_0F0F;
    793 
    794 #if 0
    795     unsigned rb = ((c & mask) * scale) >> 4;
    796     unsigned ag = ((c >> 4) & mask) * scale;
    797     return (rb & mask) | (ag & ~mask);
    798 #else
    799     unsigned expanded_c = (c & mask) | ((c & (mask << 4)) << 12);
    800     unsigned scaled_c = (expanded_c * scale) >> 4;
    801     return (scaled_c & mask) | ((scaled_c >> 12) & (mask << 4));
    802 #endif
    803 }
    804 
    805 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
    806     once by a value up to 16.
    807 */
    808 static inline uint32_t SkExpand_4444(U16CPU c) {
    809     SkASSERT(c == (uint16_t)c);
    810 
    811     const unsigned mask = 0xF0F;    //gMask_0F0F;
    812     return (c & mask) | ((c & ~mask) << 12);
    813 }
    814 
    815 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
    816     unsigned sa = SkGetPackedA4444(s);
    817     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
    818     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
    819     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
    820 
    821     // To avoid overflow, we have to clear the low bit of the synthetic sg
    822     // if the src alpha is <= 7.
    823     // to see why, try blending 0x4444 on top of 565-white and watch green
    824     // overflow (sum == 64)
    825     sg &= ~(~(sa >> 3) & 1);
    826 
    827     unsigned scale = SkAlpha15To16(15 - sa);
    828     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
    829     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
    830     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
    831 
    832 #if 0
    833     if (sg + dg > 63) {
    834         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
    835     }
    836 #endif
    837     return SkPackRGB16(sr + dr, sg + dg, sb + db);
    838 }
    839 
    840 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
    841     SkASSERT((unsigned)scale16 <= 16);
    842 
    843     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
    844 }
    845 
    846 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
    847     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
    848                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
    849                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
    850                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
    851     return d | (d << 4);
    852 }
    853 
    854 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
    855     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
    856     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
    857     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
    858     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
    859 }
    860 
    861 // cheap 2x2 dither
    862 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
    863                                                U8CPU g, U8CPU b) {
    864     // to ensure that we stay a legal premultiplied color, we take the max()
    865     // of the truncated and dithered alpha values. If we didn't, cases like
    866     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
    867     // which is not legal premultiplied, since a < color
    868     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
    869     a = SkMax32(a >> 4, dithered_a);
    870     // these we just dither in place
    871     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
    872     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
    873     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
    874 
    875     return SkPackARGB4444(a, r, g, b);
    876 }
    877 
    878 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
    879     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
    880                                 SkGetPackedG32(c), SkGetPackedB32(c));
    881 }
    882 
    883 /*  Assumes 16bit is in standard RGBA order.
    884     Transforms a normal ARGB_8888 into the same byte order as
    885     expanded ARGB_4444, but keeps each component 8bits
    886 */
    887 static inline uint32_t SkExpand_8888(SkPMColor c) {
    888     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
    889             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
    890             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
    891             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
    892 }
    893 
    894 /*  Undo the operation of SkExpand_8888, turning the argument back into
    895     a SkPMColor.
    896 */
    897 static inline SkPMColor SkCompact_8888(uint32_t c) {
    898     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
    899             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
    900             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
    901             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
    902 }
    903 
    904 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
    905     but this routine just keeps the high 4bits of each component in the low
    906     4bits of the result (just like a newly expanded PMColor16).
    907 */
    908 static inline uint32_t SkExpand32_4444(SkPMColor c) {
    909     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
    910             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
    911             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
    912             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
    913 }
    914 
    915 // takes two values and alternamtes them as part of a memset16
    916 // used for cheap 2x2 dithering when the colors are opaque
    917 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
    918 
    919 ///////////////////////////////////////////////////////////////////////////////
    920 
    921 static inline int SkUpscale31To32(int value) {
    922     SkASSERT((unsigned)value <= 31);
    923     return value + (value >> 4);
    924 }
    925 
    926 static inline int SkBlend32(int src, int dst, int scale) {
    927     SkASSERT((unsigned)src <= 0xFF);
    928     SkASSERT((unsigned)dst <= 0xFF);
    929     SkASSERT((unsigned)scale <= 32);
    930     return dst + ((src - dst) * scale >> 5);
    931 }
    932 
    933 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
    934                                      SkPMColor dst, uint16_t mask) {
    935     if (mask == 0) {
    936         return dst;
    937     }
    938 
    939     /*  We want all of these in 5bits, hence the shifts in case one of them
    940      *  (green) is 6bits.
    941      */
    942     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    943     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    944     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    945 
    946     // Now upscale them to 0..32, so we can use blend32
    947     maskR = SkUpscale31To32(maskR);
    948     maskG = SkUpscale31To32(maskG);
    949     maskB = SkUpscale31To32(maskB);
    950 
    951     // srcA has been upscaled to 256 before passed into this function
    952     maskR = maskR * srcA >> 8;
    953     maskG = maskG * srcA >> 8;
    954     maskB = maskB * srcA >> 8;
    955 
    956     int dstR = SkGetPackedR32(dst);
    957     int dstG = SkGetPackedG32(dst);
    958     int dstB = SkGetPackedB32(dst);
    959 
    960     // LCD blitting is only supported if the dst is known/required
    961     // to be opaque
    962     return SkPackARGB32(0xFF,
    963                         SkBlend32(srcR, dstR, maskR),
    964                         SkBlend32(srcG, dstG, maskG),
    965                         SkBlend32(srcB, dstB, maskB));
    966 }
    967 
    968 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
    969                                            SkPMColor dst, uint16_t mask,
    970                                            SkPMColor opaqueDst) {
    971     if (mask == 0) {
    972         return dst;
    973     }
    974 
    975     if (0xFFFF == mask) {
    976         return opaqueDst;
    977     }
    978 
    979     /*  We want all of these in 5bits, hence the shifts in case one of them
    980      *  (green) is 6bits.
    981      */
    982     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    983     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    984     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    985 
    986     // Now upscale them to 0..32, so we can use blend32
    987     maskR = SkUpscale31To32(maskR);
    988     maskG = SkUpscale31To32(maskG);
    989     maskB = SkUpscale31To32(maskB);
    990 
    991     int dstR = SkGetPackedR32(dst);
    992     int dstG = SkGetPackedG32(dst);
    993     int dstB = SkGetPackedB32(dst);
    994 
    995     // LCD blitting is only supported if the dst is known/required
    996     // to be opaque
    997     return SkPackARGB32(0xFF,
    998                         SkBlend32(srcR, dstR, maskR),
    999                         SkBlend32(srcG, dstG, maskG),
   1000                         SkBlend32(srcB, dstB, maskB));
   1001 }
   1002 
   1003 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
   1004                                   SkColor src, int width, SkPMColor) {
   1005     int srcA = SkColorGetA(src);
   1006     int srcR = SkColorGetR(src);
   1007     int srcG = SkColorGetG(src);
   1008     int srcB = SkColorGetB(src);
   1009 
   1010     srcA = SkAlpha255To256(srcA);
   1011 
   1012     for (int i = 0; i < width; i++) {
   1013         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
   1014     }
   1015 }
   1016 
   1017 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
   1018                                         SkColor src, int width,
   1019                                         SkPMColor opaqueDst) {
   1020     int srcR = SkColorGetR(src);
   1021     int srcG = SkColorGetG(src);
   1022     int srcB = SkColorGetB(src);
   1023 
   1024     for (int i = 0; i < width; i++) {
   1025         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
   1026                                     opaqueDst);
   1027     }
   1028 }
   1029 
   1030 #endif
   1031