Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2006 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkColorPriv_DEFINED
      9 #define SkColorPriv_DEFINED
     10 
     11 // turn this own for extra debug checking when blending onto 565
     12 #ifdef SK_DEBUG
     13     #define CHECK_FOR_565_OVERFLOW
     14 #endif
     15 
     16 #include "SkColor.h"
     17 #include "SkMath.h"
     18 
     19 //////////////////////////////////////////////////////////////////////////////
     20 
     21 #define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFF))
     22 
     23 /*
     24  *  Skia's 32bit backend only supports 1 sizzle order at a time (compile-time).
     25  *  This is specified by 4 defines SK_A32_SHIFT, SK_R32_SHIFT, ... for G and B.
     26  *
     27  *  For easier compatibility with Skia's GPU backend, we further restrict these
     28  *  to either (in memory-byte-order) RGBA or BGRA. Note that this "order" does
     29  *  not directly correspond to the same shift-order, since we have to take endianess
     30  *  into account.
     31  *
     32  *  Here we enforce this constraint.
     33  */
     34 
     35 #ifdef SK_CPU_BENDIAN
     36     #define SK_RGBA_R32_SHIFT   24
     37     #define SK_RGBA_G32_SHIFT   16
     38     #define SK_RGBA_B32_SHIFT   8
     39     #define SK_RGBA_A32_SHIFT   0
     40 
     41     #define SK_BGRA_B32_SHIFT   24
     42     #define SK_BGRA_G32_SHIFT   16
     43     #define SK_BGRA_R32_SHIFT   8
     44     #define SK_BGRA_A32_SHIFT   0
     45 #else
     46     #define SK_RGBA_R32_SHIFT   0
     47     #define SK_RGBA_G32_SHIFT   8
     48     #define SK_RGBA_B32_SHIFT   16
     49     #define SK_RGBA_A32_SHIFT   24
     50 
     51     #define SK_BGRA_B32_SHIFT   0
     52     #define SK_BGRA_G32_SHIFT   8
     53     #define SK_BGRA_R32_SHIFT   16
     54     #define SK_BGRA_A32_SHIFT   24
     55 #endif
     56 
     57 #if defined(SK_PMCOLOR_IS_RGBA) && defined(SK_PMCOLOR_IS_BGRA)
     58     #error "can't define PMCOLOR to be RGBA and BGRA"
     59 #endif
     60 
     61 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA  \
     62     (SK_A32_SHIFT == SK_RGBA_A32_SHIFT &&    \
     63      SK_R32_SHIFT == SK_RGBA_R32_SHIFT &&    \
     64      SK_G32_SHIFT == SK_RGBA_G32_SHIFT &&    \
     65      SK_B32_SHIFT == SK_RGBA_B32_SHIFT)
     66 
     67 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA  \
     68     (SK_A32_SHIFT == SK_BGRA_A32_SHIFT &&    \
     69      SK_R32_SHIFT == SK_BGRA_R32_SHIFT &&    \
     70      SK_G32_SHIFT == SK_BGRA_G32_SHIFT &&    \
     71      SK_B32_SHIFT == SK_BGRA_B32_SHIFT)
     72 
     73 
     74 #if defined(SK_PMCOLOR_IS_RGBA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     75     #error "SK_PMCOLOR_IS_RGBA does not match SK_*32_SHIFT values"
     76 #endif
     77 
     78 #if defined(SK_PMCOLOR_IS_BGRA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     79     #error "SK_PMCOLOR_IS_BGRA does not match SK_*32_SHIFT values"
     80 #endif
     81 
     82 #if !defined(SK_PMCOLOR_IS_RGBA) && !defined(SK_PMCOLOR_IS_BGRA)
     83     // deduce which to define from the _SHIFT defines
     84 
     85     #if LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     86         #define SK_PMCOLOR_IS_RGBA
     87     #elif LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     88         #define SK_PMCOLOR_IS_BGRA
     89     #else
     90         #error "need 32bit packing to be either RGBA or BGRA"
     91     #endif
     92 #endif
     93 
     94 // hide these now that we're done
     95 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     96 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     97 
     98 //////////////////////////////////////////////////////////////////////////////
     99 
    100 // Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
    101 // pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
    102 // no need to pass in the colortype to this function.
    103 static inline uint32_t SkSwizzle_RB(uint32_t c) {
    104     static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
    105 
    106     unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
    107     unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
    108     return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
    109 }
    110 
    111 static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    112     SkASSERT_IS_BYTE(a);
    113     SkASSERT_IS_BYTE(r);
    114     SkASSERT_IS_BYTE(g);
    115     SkASSERT_IS_BYTE(b);
    116     return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
    117            (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
    118 }
    119 
    120 static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    121     SkASSERT_IS_BYTE(a);
    122     SkASSERT_IS_BYTE(r);
    123     SkASSERT_IS_BYTE(g);
    124     SkASSERT_IS_BYTE(b);
    125     return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
    126            (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
    127 }
    128 
    129 static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
    130 #ifdef SK_PMCOLOR_IS_RGBA
    131     return c;
    132 #else
    133     return SkSwizzle_RB(c);
    134 #endif
    135 }
    136 
    137 static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
    138 #ifdef SK_PMCOLOR_IS_BGRA
    139     return c;
    140 #else
    141     return SkSwizzle_RB(c);
    142 #endif
    143 }
    144 
    145 //////////////////////////////////////////////////////////////////////////////
    146 
    147 ///@{
    148 /** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
    149 #define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
    150 #define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
    151 #define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
    152 ///@}
    153 
    154 ///@{
    155 /** A float value which specifies this channel's contribution to luminance. */
    156 #define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
    157 #define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
    158 #define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
    159 ///@}
    160 
    161 /** Computes the luminance from the given r, g, and b in accordance with
    162     SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
    163 */
    164 static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
    165     //The following is
    166     //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
    167     //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
    168     return (r * 54 + g * 183 + b * 19) >> 8;
    169 }
    170 
    171 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
    172     byte into a scale value, so that we can say scale * value >> 8 instead of
    173     alpha * value / 255.
    174 
    175     In debugging, asserts that alpha is 0..255
    176 */
    177 static inline unsigned SkAlpha255To256(U8CPU alpha) {
    178     SkASSERT(SkToU8(alpha) == alpha);
    179     // this one assues that blending on top of an opaque dst keeps it that way
    180     // even though it is less accurate than a+(a>>7) for non-opaque dsts
    181     return alpha + 1;
    182 }
    183 
    184 /**
    185  *  Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80.
    186  *  This is slightly more accurate than SkAlpha255To256.
    187  */
    188 static inline unsigned Sk255To256(U8CPU value) {
    189     SkASSERT(SkToU8(value) == value);
    190     return value + (value >> 7);
    191 }
    192 
    193 /** Multiplify value by 0..256, and shift the result down 8
    194     (i.e. return (value * alpha256) >> 8)
    195  */
    196 #define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
    197 
    198 //  The caller may want negative values, so keep all params signed (int)
    199 //  so we don't accidentally slip into unsigned math and lose the sign
    200 //  extension when we shift (in SkAlphaMul)
    201 static inline int SkAlphaBlend(int src, int dst, int scale256) {
    202     SkASSERT((unsigned)scale256 <= 256);
    203     return dst + SkAlphaMul(src - dst, scale256);
    204 }
    205 
    206 /**
    207  *  Returns (src * alpha + dst * (255 - alpha)) / 255
    208  *
    209  *  This is more accurate than SkAlphaBlend, but slightly slower
    210  */
    211 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
    212     SkASSERT((int16_t)src == src);
    213     SkASSERT((int16_t)dst == dst);
    214     SkASSERT((uint8_t)alpha == alpha);
    215 
    216     int prod = SkMulS16(src - dst, alpha) + 128;
    217     prod = (prod + (prod >> 8)) >> 8;
    218     return dst + prod;
    219 }
    220 
    221 #define SK_R16_BITS     5
    222 #define SK_G16_BITS     6
    223 #define SK_B16_BITS     5
    224 
    225 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
    226 #define SK_G16_SHIFT    (SK_B16_BITS)
    227 #define SK_B16_SHIFT    0
    228 
    229 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
    230 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
    231 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
    232 
    233 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
    234 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
    235 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
    236 
    237 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
    238 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
    239 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
    240 
    241 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
    242     SkASSERT(r <= SK_R16_MASK);
    243     SkASSERT(g <= SK_G16_MASK);
    244     SkASSERT(b <= SK_B16_MASK);
    245 
    246     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
    247 }
    248 
    249 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
    250 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
    251 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
    252 
    253 /** Expand the 16bit color into a 32bit value that can be scaled all at once
    254     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
    255 */
    256 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
    257     SkASSERT(c == (uint16_t)c);
    258 
    259     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
    260 }
    261 
    262 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
    263     color value. The computation yields only 16bits of valid data, but we claim
    264     to return 32bits, so that the compiler won't generate extra instructions to
    265     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
    266     up to the caller to safely ignore them.
    267 */
    268 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
    269     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
    270 }
    271 
    272 /** Scale the 16bit color value by the 0..256 scale parameter.
    273     The computation yields only 16bits of valid data, but we claim
    274     to return 32bits, so that the compiler won't generate extra instructions to
    275     "clean" the top 16bits.
    276 */
    277 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
    278     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
    279 }
    280 
    281 // this helper explicitly returns a clean 16bit value (but slower)
    282 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
    283 
    284 /** Blend pre-expanded RGB32 with 16bit color value by the 0..32 scale parameter.
    285     The computation yields only 16bits of valid data, but we claim to return
    286     32bits, so that the compiler won't generate extra instructions to "clean"
    287     the top 16bits.
    288 */
    289 static inline U16CPU SkBlend32_RGB16(uint32_t src_expand, uint16_t dst, unsigned scale) {
    290     uint32_t dst_expand = SkExpand_rgb_16(dst) * scale;
    291     return SkCompact_rgb_16((src_expand + dst_expand) >> 5);
    292 }
    293 
    294 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
    295     The computation yields only 16bits of valid data, but we claim
    296     to return 32bits, so that the compiler won't generate extra instructions to
    297     "clean" the top 16bits.
    298 */
    299 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
    300     SkASSERT((unsigned)srcScale <= 256);
    301 
    302     srcScale >>= 3;
    303 
    304     uint32_t src32 = SkExpand_rgb_16(src);
    305     uint32_t dst32 = SkExpand_rgb_16(dst);
    306     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    307 }
    308 
    309 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
    310                                 int srcScale, int count) {
    311     SkASSERT(count > 0);
    312     SkASSERT((unsigned)srcScale <= 256);
    313 
    314     srcScale >>= 3;
    315 
    316     do {
    317         uint32_t src32 = SkExpand_rgb_16(*src++);
    318         uint32_t dst32 = SkExpand_rgb_16(*dst);
    319         *dst++ = static_cast<uint16_t>(
    320             SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)));
    321     } while (--count > 0);
    322 }
    323 
    324 #ifdef SK_DEBUG
    325     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
    326         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
    327         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
    328         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
    329 
    330         return a + b;
    331     }
    332 #else
    333     #define SkRGB16Add(a, b)  ((a) + (b))
    334 #endif
    335 
    336 ///////////////////////////////////////////////////////////////////////////////
    337 
    338 #define SK_A32_BITS     8
    339 #define SK_R32_BITS     8
    340 #define SK_G32_BITS     8
    341 #define SK_B32_BITS     8
    342 
    343 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
    344 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
    345 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
    346 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
    347 
    348 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
    349 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
    350 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
    351 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
    352 
    353 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
    354 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
    355 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
    356 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
    357 
    358 #ifdef SK_DEBUG
    359     #define SkPMColorAssert(color_value)                                    \
    360         do {                                                                \
    361             SkPMColor pm_color_value = (color_value);                       \
    362             uint32_t alpha_color_value = SkGetPackedA32(pm_color_value);    \
    363             SkA32Assert(alpha_color_value);                                 \
    364             SkASSERT(SkGetPackedR32(pm_color_value) <= alpha_color_value);  \
    365             SkASSERT(SkGetPackedG32(pm_color_value) <= alpha_color_value);  \
    366             SkASSERT(SkGetPackedB32(pm_color_value) <= alpha_color_value);  \
    367         } while (false)
    368 #else
    369     #define SkPMColorAssert(c)
    370 #endif
    371 
    372 /**
    373  *  Pack the components into a SkPMColor, checking (in the debug version) that
    374  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
    375  */
    376 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    377     SkA32Assert(a);
    378     SkASSERT(r <= a);
    379     SkASSERT(g <= a);
    380     SkASSERT(b <= a);
    381 
    382     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    383            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    384 }
    385 
    386 static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) {
    387     return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c),
    388                               SkGetPackedG32(c), SkGetPackedB32(c));
    389 }
    390 
    391 static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) {
    392     return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c),
    393                               SkGetPackedG32(c), SkGetPackedB32(c));
    394 }
    395 
    396 /**
    397  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    398  * utility functions. Third parameter controls blending of the first two:
    399  *   (src, dst, 0) returns dst
    400  *   (src, dst, 0xFF) returns src
    401  *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
    402  */
    403 static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
    404                                          unsigned scale) {
    405     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
    406     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
    407     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
    408     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
    409 
    410     return SkPackARGB32(a, r, g, b);
    411 }
    412 
    413 /**
    414  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    415  * utility functions. Third parameter controls blending of the first two:
    416  *   (src, dst, 0) returns dst
    417  *   (src, dst, 0xFF) returns src
    418  */
    419 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
    420                                          U8CPU srcWeight) {
    421     unsigned scale = SkAlpha255To256(srcWeight);
    422     return SkFourByteInterp256(src, dst, scale);
    423 }
    424 
    425 /**
    426  * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
    427  */
    428 static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
    429     const uint32_t mask = 0x00FF00FF;
    430     *ag = (color >> 8) & mask;
    431     *rb = color & mask;
    432 }
    433 
    434 /**
    435  * 0xAARRGGBB -> 0x00AA00GG00RR00BB
    436  * (note, ARGB -> AGRB)
    437  */
    438 static inline uint64_t SkSplay(uint32_t color) {
    439     const uint32_t mask = 0x00FF00FF;
    440     uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
    441     agrb <<= 32;                          // 0x00AA00GG00000000
    442     agrb |= color & mask;                 // 0x00AA00GG00RR00BB
    443     return agrb;
    444 }
    445 
    446 /**
    447  * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
    448  */
    449 static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
    450     const uint32_t mask = 0xFF00FF00;
    451     return (ag & mask) | ((rb & mask) >> 8);
    452 }
    453 
    454 /**
    455  * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
    456  * (note, AGRB -> ARGB)
    457  */
    458 static inline uint32_t SkUnsplay(uint64_t agrb) {
    459     const uint32_t mask = 0xFF00FF00;
    460     return SkPMColor(
    461         ((agrb & mask) >> 8) |   // 0x00RR00BB
    462         ((agrb >> 32) & mask));  // 0xAARRGGBB
    463 }
    464 
    465 static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
    466     SkASSERT(scale <= 256);
    467 
    468     // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
    469     uint32_t src_ag, src_rb, dst_ag, dst_rb;
    470     SkSplay(src, &src_ag, &src_rb);
    471     SkSplay(dst, &dst_ag, &dst_rb);
    472 
    473     const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
    474     const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
    475 
    476     return SkUnsplay(ret_ag, ret_rb);
    477 }
    478 
    479 static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
    480     SkASSERT(scale <= 256);
    481     // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
    482     return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
    483 }
    484 
    485 // TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
    486 
    487 /**
    488  * Same as SkFourByteInterp256, but faster.
    489  */
    490 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
    491     // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
    492     if (sizeof(void*) == 4) {
    493         return SkFastFourByteInterp256_32(src, dst, scale);
    494     } else {
    495         return SkFastFourByteInterp256_64(src, dst, scale);
    496     }
    497 }
    498 
    499 /**
    500  * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
    501  * srcWeight scaling to [0, 256].
    502  */
    503 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
    504                                              SkPMColor dst,
    505                                              U8CPU srcWeight) {
    506     SkASSERT(srcWeight <= 255);
    507     // scale = srcWeight + (srcWeight >> 7) is more accurate than
    508     // scale = srcWeight + 1, but 7% slower
    509     return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
    510 }
    511 
    512 /**
    513  *  Same as SkPackARGB32, but this version guarantees to not check that the
    514  *  values are premultiplied in the debug version.
    515  */
    516 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    517     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    518            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    519 }
    520 
    521 static inline
    522 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    523     SkA32Assert(a);
    524     SkR32Assert(r);
    525     SkG32Assert(g);
    526     SkB32Assert(b);
    527 
    528     if (a != 255) {
    529         r = SkMulDiv255Round(r, a);
    530         g = SkMulDiv255Round(g, a);
    531         b = SkMulDiv255Round(b, a);
    532     }
    533     return SkPackARGB32(a, r, g, b);
    534 }
    535 
    536 // When Android is compiled optimizing for size, SkAlphaMulQ doesn't get
    537 // inlined; forcing inlining significantly improves performance.
    538 static SK_ALWAYS_INLINE uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
    539     uint32_t mask = 0xFF00FF;
    540 
    541     uint32_t rb = ((c & mask) * scale) >> 8;
    542     uint32_t ag = ((c >> 8) & mask) * scale;
    543     return (rb & mask) | (ag & ~mask);
    544 }
    545 
    546 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
    547     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
    548 }
    549 
    550 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
    551     SkASSERT((unsigned)aa <= 255);
    552 
    553     unsigned src_scale = SkAlpha255To256(aa);
    554     unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
    555 
    556     return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
    557 }
    558 
    559 ////////////////////////////////////////////////////////////////////////////////////////////
    560 // Convert a 32bit pixel to a 16bit pixel (no dither)
    561 
    562 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
    563 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
    564 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
    565 
    566 #ifdef SK_DEBUG
    567     static inline unsigned SkR32ToR16(unsigned r) {
    568         SkR32Assert(r);
    569         return SkR32ToR16_MACRO(r);
    570     }
    571     static inline unsigned SkG32ToG16(unsigned g) {
    572         SkG32Assert(g);
    573         return SkG32ToG16_MACRO(g);
    574     }
    575     static inline unsigned SkB32ToB16(unsigned b) {
    576         SkB32Assert(b);
    577         return SkB32ToB16_MACRO(b);
    578     }
    579 #else
    580     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
    581     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
    582     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
    583 #endif
    584 
    585 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
    586 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
    587 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
    588 
    589 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
    590     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
    591     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
    592     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
    593     return r | g | b;
    594 }
    595 
    596 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    597     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
    598             (SkG32ToG16(g) << SK_G16_SHIFT) |
    599             (SkB32ToB16(b) << SK_B16_SHIFT);
    600 }
    601 
    602 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
    603 
    604 /////////////////////////////////////////////////////////////////////////////////////////
    605 // Fast dither from 32->16
    606 
    607 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
    608 
    609 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    610     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
    611     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
    612     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
    613 
    614     return SkPackRGB16(r, g, b);
    615 }
    616 
    617 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
    618     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
    619 }
    620 
    621 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
    622     It is now suitable for combining with a scaled expanded_rgb_16 color
    623     as in SkSrcOver32To16().
    624     We must do this 565 high-bit replication, in order for the subsequent add
    625     to saturate properly (and not overflow). If we take the 8 bits as is, it is
    626     possible to overflow.
    627 */
    628 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
    629     unsigned sr = SkPacked32ToR16(c);
    630     unsigned sg = SkPacked32ToG16(c);
    631     unsigned sb = SkPacked32ToB16(c);
    632 
    633     sr = (sr << 5) | sr;
    634     sg = (sg << 5) | (sg >> 1);
    635     sb = (sb << 5) | sb;
    636     return (sr << 11) | (sg << 21) | (sb << 0);
    637 }
    638 
    639 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
    640     (with dirt in the high 16bits, so caller beware).
    641 */
    642 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
    643     unsigned sr = SkGetPackedR32(src);
    644     unsigned sg = SkGetPackedG32(src);
    645     unsigned sb = SkGetPackedB32(src);
    646 
    647     unsigned dr = SkGetPackedR16(dst);
    648     unsigned dg = SkGetPackedG16(dst);
    649     unsigned db = SkGetPackedB16(dst);
    650 
    651     unsigned isa = 255 - SkGetPackedA32(src);
    652 
    653     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
    654     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
    655     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
    656 
    657     return SkPackRGB16(dr, dg, db);
    658 }
    659 
    660 ////////////////////////////////////////////////////////////////////////////////////////////
    661 // Convert a 16bit pixel to a 32bit pixel
    662 
    663 static inline unsigned SkR16ToR32(unsigned r) {
    664     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
    665 }
    666 
    667 static inline unsigned SkG16ToG32(unsigned g) {
    668     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
    669 }
    670 
    671 static inline unsigned SkB16ToB32(unsigned b) {
    672     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
    673 }
    674 
    675 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
    676 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
    677 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
    678 
    679 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
    680     SkASSERT(src == SkToU16(src));
    681 
    682     unsigned    r = SkPacked16ToR32(src);
    683     unsigned    g = SkPacked16ToG32(src);
    684     unsigned    b = SkPacked16ToB32(src);
    685 
    686     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    687     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    688     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    689 
    690     return SkPackARGB32(0xFF, r, g, b);
    691 }
    692 
    693 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
    694 static inline SkColor SkPixel16ToColor(U16CPU src) {
    695     SkASSERT(src == SkToU16(src));
    696 
    697     unsigned    r = SkPacked16ToR32(src);
    698     unsigned    g = SkPacked16ToG32(src);
    699     unsigned    b = SkPacked16ToB32(src);
    700 
    701     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    702     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    703     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    704 
    705     return SkColorSetRGB(r, g, b);
    706 }
    707 
    708 ///////////////////////////////////////////////////////////////////////////////
    709 
    710 typedef uint16_t SkPMColor16;
    711 
    712 // Put in OpenGL order (r g b a)
    713 #define SK_A4444_SHIFT    0
    714 #define SK_R4444_SHIFT    12
    715 #define SK_G4444_SHIFT    8
    716 #define SK_B4444_SHIFT    4
    717 
    718 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
    719 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
    720 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
    721 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
    722 
    723 static inline U8CPU SkReplicateNibble(unsigned nib) {
    724     SkASSERT(nib <= 0xF);
    725     return (nib << 4) | nib;
    726 }
    727 
    728 #define SkA4444ToA32(a)     SkReplicateNibble(a)
    729 #define SkR4444ToR32(r)     SkReplicateNibble(r)
    730 #define SkG4444ToG32(g)     SkReplicateNibble(g)
    731 #define SkB4444ToB32(b)     SkReplicateNibble(b)
    732 
    733 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
    734 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
    735 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
    736 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
    737 
    738 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
    739 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
    740 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
    741 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
    742 
    743 #ifdef SK_DEBUG
    744 static inline void SkPMColor16Assert(U16CPU c) {
    745     unsigned a = SkGetPackedA4444(c);
    746     unsigned r = SkGetPackedR4444(c);
    747     unsigned g = SkGetPackedG4444(c);
    748     unsigned b = SkGetPackedB4444(c);
    749 
    750     SkASSERT(a <= 0xF);
    751     SkASSERT(r <= a);
    752     SkASSERT(g <= a);
    753     SkASSERT(b <= a);
    754 }
    755 #else
    756 #define SkPMColor16Assert(c)
    757 #endif
    758 
    759 static inline unsigned SkAlpha15To16(unsigned a) {
    760     SkASSERT(a <= 0xF);
    761     return a + (a >> 3);
    762 }
    763 
    764 #ifdef SK_DEBUG
    765     static inline int SkAlphaMul4(int value, int scale) {
    766         SkASSERT((unsigned)scale <= 0x10);
    767         return value * scale >> 4;
    768     }
    769 #else
    770     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
    771 #endif
    772 
    773 static inline unsigned SkR4444ToR565(unsigned r) {
    774     SkASSERT(r <= 0xF);
    775     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
    776 }
    777 
    778 static inline unsigned SkG4444ToG565(unsigned g) {
    779     SkASSERT(g <= 0xF);
    780     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
    781 }
    782 
    783 static inline unsigned SkB4444ToB565(unsigned b) {
    784     SkASSERT(b <= 0xF);
    785     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
    786 }
    787 
    788 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
    789                                          unsigned g, unsigned b) {
    790     SkASSERT(a <= 0xF);
    791     SkASSERT(r <= a);
    792     SkASSERT(g <= a);
    793     SkASSERT(b <= a);
    794 
    795     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
    796                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
    797 }
    798 
    799 static inline SkPMColor16 SkAlphaMulQ4(SkPMColor16 c, int scale) {
    800     SkASSERT(scale <= 16);
    801 
    802     const unsigned mask = 0xF0F;    //gMask_0F0F;
    803 
    804 #if 0
    805     unsigned rb = ((c & mask) * scale) >> 4;
    806     unsigned ag = ((c >> 4) & mask) * scale;
    807     return (rb & mask) | (ag & ~mask);
    808 #else
    809     unsigned expanded_c = (c & mask) | ((c & (mask << 4)) << 12);
    810     unsigned scaled_c = (expanded_c * scale) >> 4;
    811     return (scaled_c & mask) | ((scaled_c >> 12) & (mask << 4));
    812 #endif
    813 }
    814 
    815 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
    816     once by a value up to 16.
    817 */
    818 static inline uint32_t SkExpand_4444(U16CPU c) {
    819     SkASSERT(c == (uint16_t)c);
    820 
    821     const unsigned mask = 0xF0F;    //gMask_0F0F;
    822     return (c & mask) | ((c & ~mask) << 12);
    823 }
    824 
    825 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
    826     unsigned sa = SkGetPackedA4444(s);
    827     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
    828     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
    829     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
    830 
    831     // To avoid overflow, we have to clear the low bit of the synthetic sg
    832     // if the src alpha is <= 7.
    833     // to see why, try blending 0x4444 on top of 565-white and watch green
    834     // overflow (sum == 64)
    835     sg &= ~(~(sa >> 3) & 1);
    836 
    837     unsigned scale = SkAlpha15To16(15 - sa);
    838     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
    839     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
    840     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
    841 
    842 #if 0
    843     if (sg + dg > 63) {
    844         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
    845     }
    846 #endif
    847     return SkPackRGB16(sr + dr, sg + dg, sb + db);
    848 }
    849 
    850 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
    851     SkASSERT((unsigned)scale16 <= 16);
    852 
    853     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
    854 }
    855 
    856 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
    857     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
    858                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
    859                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
    860                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
    861     return d | (d << 4);
    862 }
    863 
    864 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
    865     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
    866     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
    867     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
    868     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
    869 }
    870 
    871 // cheap 2x2 dither
    872 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
    873                                                U8CPU g, U8CPU b) {
    874     // to ensure that we stay a legal premultiplied color, we take the max()
    875     // of the truncated and dithered alpha values. If we didn't, cases like
    876     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
    877     // which is not legal premultiplied, since a < color
    878     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
    879     a = SkMax32(a >> 4, dithered_a);
    880     // these we just dither in place
    881     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
    882     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
    883     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
    884 
    885     return SkPackARGB4444(a, r, g, b);
    886 }
    887 
    888 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
    889     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
    890                                 SkGetPackedG32(c), SkGetPackedB32(c));
    891 }
    892 
    893 /*  Assumes 16bit is in standard RGBA order.
    894     Transforms a normal ARGB_8888 into the same byte order as
    895     expanded ARGB_4444, but keeps each component 8bits
    896 */
    897 static inline uint32_t SkExpand_8888(SkPMColor c) {
    898     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
    899             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
    900             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
    901             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
    902 }
    903 
    904 /*  Undo the operation of SkExpand_8888, turning the argument back into
    905     a SkPMColor.
    906 */
    907 static inline SkPMColor SkCompact_8888(uint32_t c) {
    908     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
    909             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
    910             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
    911             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
    912 }
    913 
    914 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
    915     but this routine just keeps the high 4bits of each component in the low
    916     4bits of the result (just like a newly expanded PMColor16).
    917 */
    918 static inline uint32_t SkExpand32_4444(SkPMColor c) {
    919     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
    920             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
    921             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
    922             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
    923 }
    924 
    925 // takes two values and alternamtes them as part of a memset16
    926 // used for cheap 2x2 dithering when the colors are opaque
    927 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
    928 
    929 ///////////////////////////////////////////////////////////////////////////////
    930 
    931 static inline int SkUpscale31To32(int value) {
    932     SkASSERT((unsigned)value <= 31);
    933     return value + (value >> 4);
    934 }
    935 
    936 static inline int SkBlend32(int src, int dst, int scale) {
    937     SkASSERT((unsigned)src <= 0xFF);
    938     SkASSERT((unsigned)dst <= 0xFF);
    939     SkASSERT((unsigned)scale <= 32);
    940     return dst + ((src - dst) * scale >> 5);
    941 }
    942 
    943 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
    944                                      SkPMColor dst, uint16_t mask) {
    945     if (mask == 0) {
    946         return dst;
    947     }
    948 
    949     /*  We want all of these in 5bits, hence the shifts in case one of them
    950      *  (green) is 6bits.
    951      */
    952     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    953     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    954     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    955 
    956     // Now upscale them to 0..32, so we can use blend32
    957     maskR = SkUpscale31To32(maskR);
    958     maskG = SkUpscale31To32(maskG);
    959     maskB = SkUpscale31To32(maskB);
    960 
    961     // srcA has been upscaled to 256 before passed into this function
    962     maskR = maskR * srcA >> 8;
    963     maskG = maskG * srcA >> 8;
    964     maskB = maskB * srcA >> 8;
    965 
    966     int dstR = SkGetPackedR32(dst);
    967     int dstG = SkGetPackedG32(dst);
    968     int dstB = SkGetPackedB32(dst);
    969 
    970     // LCD blitting is only supported if the dst is known/required
    971     // to be opaque
    972     return SkPackARGB32(0xFF,
    973                         SkBlend32(srcR, dstR, maskR),
    974                         SkBlend32(srcG, dstG, maskG),
    975                         SkBlend32(srcB, dstB, maskB));
    976 }
    977 
    978 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
    979                                            SkPMColor dst, uint16_t mask,
    980                                            SkPMColor opaqueDst) {
    981     if (mask == 0) {
    982         return dst;
    983     }
    984 
    985     if (0xFFFF == mask) {
    986         return opaqueDst;
    987     }
    988 
    989     /*  We want all of these in 5bits, hence the shifts in case one of them
    990      *  (green) is 6bits.
    991      */
    992     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    993     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    994     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    995 
    996     // Now upscale them to 0..32, so we can use blend32
    997     maskR = SkUpscale31To32(maskR);
    998     maskG = SkUpscale31To32(maskG);
    999     maskB = SkUpscale31To32(maskB);
   1000 
   1001     int dstR = SkGetPackedR32(dst);
   1002     int dstG = SkGetPackedG32(dst);
   1003     int dstB = SkGetPackedB32(dst);
   1004 
   1005     // LCD blitting is only supported if the dst is known/required
   1006     // to be opaque
   1007     return SkPackARGB32(0xFF,
   1008                         SkBlend32(srcR, dstR, maskR),
   1009                         SkBlend32(srcG, dstG, maskG),
   1010                         SkBlend32(srcB, dstB, maskB));
   1011 }
   1012 
   1013 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
   1014                                   SkColor src, int width, SkPMColor) {
   1015     int srcA = SkColorGetA(src);
   1016     int srcR = SkColorGetR(src);
   1017     int srcG = SkColorGetG(src);
   1018     int srcB = SkColorGetB(src);
   1019 
   1020     srcA = SkAlpha255To256(srcA);
   1021 
   1022     for (int i = 0; i < width; i++) {
   1023         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
   1024     }
   1025 }
   1026 
   1027 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
   1028                                         SkColor src, int width,
   1029                                         SkPMColor opaqueDst) {
   1030     int srcR = SkColorGetR(src);
   1031     int srcG = SkColorGetG(src);
   1032     int srcB = SkColorGetB(src);
   1033 
   1034     for (int i = 0; i < width; i++) {
   1035         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
   1036                                     opaqueDst);
   1037     }
   1038 }
   1039 
   1040 #endif
   1041