Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2006 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkColorPriv_DEFINED
      9 #define SkColorPriv_DEFINED
     10 
     11 // turn this own for extra debug checking when blending onto 565
     12 #ifdef SK_DEBUG
     13     #define CHECK_FOR_565_OVERFLOW
     14 #endif
     15 
     16 #include "SkColor.h"
     17 #include "SkMath.h"
     18 
     19 //////////////////////////////////////////////////////////////////////////////
     20 
     21 #define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFF))
     22 
     23 /*
     24  *  Skia's 32bit backend only supports 1 sizzle order at a time (compile-time).
     25  *  This is specified by 4 defines SK_A32_SHIFT, SK_R32_SHIFT, ... for G and B.
     26  *
     27  *  For easier compatibility with Skia's GPU backend, we further restrict these
     28  *  to either (in memory-byte-order) RGBA or BGRA. Note that this "order" does
     29  *  not directly correspond to the same shift-order, since we have to take endianess
     30  *  into account.
     31  *
     32  *  Here we enforce this constraint.
     33  */
     34 
     35 #ifdef SK_CPU_BENDIAN
     36     #define SK_RGBA_R32_SHIFT   24
     37     #define SK_RGBA_G32_SHIFT   16
     38     #define SK_RGBA_B32_SHIFT   8
     39     #define SK_RGBA_A32_SHIFT   0
     40 
     41     #define SK_BGRA_B32_SHIFT   24
     42     #define SK_BGRA_G32_SHIFT   16
     43     #define SK_BGRA_R32_SHIFT   8
     44     #define SK_BGRA_A32_SHIFT   0
     45 #else
     46     #define SK_RGBA_R32_SHIFT   0
     47     #define SK_RGBA_G32_SHIFT   8
     48     #define SK_RGBA_B32_SHIFT   16
     49     #define SK_RGBA_A32_SHIFT   24
     50 
     51     #define SK_BGRA_B32_SHIFT   0
     52     #define SK_BGRA_G32_SHIFT   8
     53     #define SK_BGRA_R32_SHIFT   16
     54     #define SK_BGRA_A32_SHIFT   24
     55 #endif
     56 
     57 #if defined(SK_PMCOLOR_IS_RGBA) && defined(SK_PMCOLOR_IS_BGRA)
     58     #error "can't define PMCOLOR to be RGBA and BGRA"
     59 #endif
     60 
     61 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA  \
     62     (SK_A32_SHIFT == SK_RGBA_A32_SHIFT &&    \
     63      SK_R32_SHIFT == SK_RGBA_R32_SHIFT &&    \
     64      SK_G32_SHIFT == SK_RGBA_G32_SHIFT &&    \
     65      SK_B32_SHIFT == SK_RGBA_B32_SHIFT)
     66 
     67 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA  \
     68     (SK_A32_SHIFT == SK_BGRA_A32_SHIFT &&    \
     69      SK_R32_SHIFT == SK_BGRA_R32_SHIFT &&    \
     70      SK_G32_SHIFT == SK_BGRA_G32_SHIFT &&    \
     71      SK_B32_SHIFT == SK_BGRA_B32_SHIFT)
     72 
     73 
     74 #define SK_A_INDEX  (SK_A32_SHIFT/8)
     75 #define SK_R_INDEX  (SK_R32_SHIFT/8)
     76 #define SK_G_INDEX  (SK_G32_SHIFT/8)
     77 #define SK_B_INDEX  (SK_B32_SHIFT/8)
     78 
     79 #if defined(SK_PMCOLOR_IS_RGBA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     80     #error "SK_PMCOLOR_IS_RGBA does not match SK_*32_SHIFT values"
     81 #endif
     82 
     83 #if defined(SK_PMCOLOR_IS_BGRA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     84     #error "SK_PMCOLOR_IS_BGRA does not match SK_*32_SHIFT values"
     85 #endif
     86 
     87 #if !defined(SK_PMCOLOR_IS_RGBA) && !defined(SK_PMCOLOR_IS_BGRA)
     88     // deduce which to define from the _SHIFT defines
     89 
     90     #if LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     91         #define SK_PMCOLOR_IS_RGBA
     92     #elif LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     93         #define SK_PMCOLOR_IS_BGRA
     94     #else
     95         #error "need 32bit packing to be either RGBA or BGRA"
     96     #endif
     97 #endif
     98 
     99 // hide these now that we're done
    100 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
    101 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
    102 
    103 //////////////////////////////////////////////////////////////////////////////
    104 
    105 // Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
    106 // pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
    107 // no need to pass in the colortype to this function.
    108 static inline uint32_t SkSwizzle_RB(uint32_t c) {
    109     static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
    110 
    111     unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
    112     unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
    113     return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
    114 }
    115 
    116 static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    117     SkASSERT_IS_BYTE(a);
    118     SkASSERT_IS_BYTE(r);
    119     SkASSERT_IS_BYTE(g);
    120     SkASSERT_IS_BYTE(b);
    121     return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
    122            (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
    123 }
    124 
    125 static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    126     SkASSERT_IS_BYTE(a);
    127     SkASSERT_IS_BYTE(r);
    128     SkASSERT_IS_BYTE(g);
    129     SkASSERT_IS_BYTE(b);
    130     return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
    131            (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
    132 }
    133 
    134 static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
    135 #ifdef SK_PMCOLOR_IS_RGBA
    136     return c;
    137 #else
    138     return SkSwizzle_RB(c);
    139 #endif
    140 }
    141 
    142 static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
    143 #ifdef SK_PMCOLOR_IS_BGRA
    144     return c;
    145 #else
    146     return SkSwizzle_RB(c);
    147 #endif
    148 }
    149 
    150 //////////////////////////////////////////////////////////////////////////////
    151 
    152 ///@{
    153 /** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
    154 #define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
    155 #define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
    156 #define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
    157 ///@}
    158 
    159 ///@{
    160 /** A float value which specifies this channel's contribution to luminance. */
    161 #define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
    162 #define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
    163 #define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
    164 ///@}
    165 
    166 /** Computes the luminance from the given r, g, and b in accordance with
    167     SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
    168 */
    169 static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
    170     //The following is
    171     //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
    172     //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
    173     return (r * 54 + g * 183 + b * 19) >> 8;
    174 }
    175 
    176 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
    177     byte into a scale value, so that we can say scale * value >> 8 instead of
    178     alpha * value / 255.
    179 
    180     In debugging, asserts that alpha is 0..255
    181 */
    182 static inline unsigned SkAlpha255To256(U8CPU alpha) {
    183     SkASSERT(SkToU8(alpha) == alpha);
    184     // this one assues that blending on top of an opaque dst keeps it that way
    185     // even though it is less accurate than a+(a>>7) for non-opaque dsts
    186     return alpha + 1;
    187 }
    188 
    189 /**
    190  *  Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80.
    191  *  This is slightly more accurate than SkAlpha255To256.
    192  */
    193 static inline unsigned Sk255To256(U8CPU value) {
    194     SkASSERT(SkToU8(value) == value);
    195     return value + (value >> 7);
    196 }
    197 
    198 /** Multiplify value by 0..256, and shift the result down 8
    199     (i.e. return (value * alpha256) >> 8)
    200  */
    201 #define SkAlphaMul(value, alpha256)     (((value) * (alpha256)) >> 8)
    202 
    203 //  The caller may want negative values, so keep all params signed (int)
    204 //  so we don't accidentally slip into unsigned math and lose the sign
    205 //  extension when we shift (in SkAlphaMul)
    206 static inline int SkAlphaBlend(int src, int dst, int scale256) {
    207     SkASSERT((unsigned)scale256 <= 256);
    208     return dst + SkAlphaMul(src - dst, scale256);
    209 }
    210 
    211 /**
    212  *  Returns (src * alpha + dst * (255 - alpha)) / 255
    213  *
    214  *  This is more accurate than SkAlphaBlend, but slightly slower
    215  */
    216 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
    217     SkASSERT((int16_t)src == src);
    218     SkASSERT((int16_t)dst == dst);
    219     SkASSERT((uint8_t)alpha == alpha);
    220 
    221     int prod = (src - dst) * alpha + 128;
    222     prod = (prod + (prod >> 8)) >> 8;
    223     return dst + prod;
    224 }
    225 
    226 static inline U8CPU SkUnitScalarClampToByte(SkScalar x) {
    227     return static_cast<U8CPU>(SkScalarPin(x, 0, 1) * 255 + 0.5);
    228 }
    229 
    230 #define SK_R16_BITS     5
    231 #define SK_G16_BITS     6
    232 #define SK_B16_BITS     5
    233 
    234 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
    235 #define SK_G16_SHIFT    (SK_B16_BITS)
    236 #define SK_B16_SHIFT    0
    237 
    238 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
    239 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
    240 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
    241 
    242 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
    243 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
    244 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
    245 
    246 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
    247 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
    248 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
    249 
    250 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
    251     SkASSERT(r <= SK_R16_MASK);
    252     SkASSERT(g <= SK_G16_MASK);
    253     SkASSERT(b <= SK_B16_MASK);
    254 
    255     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
    256 }
    257 
    258 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
    259 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
    260 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
    261 
    262 /** Expand the 16bit color into a 32bit value that can be scaled all at once
    263     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
    264 */
    265 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
    266     SkASSERT(c == (uint16_t)c);
    267 
    268     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
    269 }
    270 
    271 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
    272     color value. The computation yields only 16bits of valid data, but we claim
    273     to return 32bits, so that the compiler won't generate extra instructions to
    274     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
    275     up to the caller to safely ignore them.
    276 */
    277 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
    278     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
    279 }
    280 
    281 /** Scale the 16bit color value by the 0..256 scale parameter.
    282     The computation yields only 16bits of valid data, but we claim
    283     to return 32bits, so that the compiler won't generate extra instructions to
    284     "clean" the top 16bits.
    285 */
    286 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
    287     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
    288 }
    289 
    290 // this helper explicitly returns a clean 16bit value (but slower)
    291 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
    292 
    293 /** Blend pre-expanded RGB32 with 16bit color value by the 0..32 scale parameter.
    294     The computation yields only 16bits of valid data, but we claim to return
    295     32bits, so that the compiler won't generate extra instructions to "clean"
    296     the top 16bits.
    297 */
    298 static inline U16CPU SkBlend32_RGB16(uint32_t src_expand, uint16_t dst, unsigned scale) {
    299     uint32_t dst_expand = SkExpand_rgb_16(dst) * scale;
    300     return SkCompact_rgb_16((src_expand + dst_expand) >> 5);
    301 }
    302 
    303 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
    304     The computation yields only 16bits of valid data, but we claim
    305     to return 32bits, so that the compiler won't generate extra instructions to
    306     "clean" the top 16bits.
    307 */
    308 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
    309     SkASSERT((unsigned)srcScale <= 256);
    310 
    311     srcScale >>= 3;
    312 
    313     uint32_t src32 = SkExpand_rgb_16(src);
    314     uint32_t dst32 = SkExpand_rgb_16(dst);
    315     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    316 }
    317 
    318 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
    319                                 int srcScale, int count) {
    320     SkASSERT(count > 0);
    321     SkASSERT((unsigned)srcScale <= 256);
    322 
    323     srcScale >>= 3;
    324 
    325     do {
    326         uint32_t src32 = SkExpand_rgb_16(*src++);
    327         uint32_t dst32 = SkExpand_rgb_16(*dst);
    328         *dst++ = static_cast<uint16_t>(
    329             SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)));
    330     } while (--count > 0);
    331 }
    332 
    333 #ifdef SK_DEBUG
    334     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
    335         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
    336         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
    337         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
    338 
    339         return a + b;
    340     }
    341 #else
    342     #define SkRGB16Add(a, b)  ((a) + (b))
    343 #endif
    344 
    345 ///////////////////////////////////////////////////////////////////////////////
    346 
    347 #define SK_A32_BITS     8
    348 #define SK_R32_BITS     8
    349 #define SK_G32_BITS     8
    350 #define SK_B32_BITS     8
    351 
    352 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
    353 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
    354 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
    355 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
    356 
    357 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
    358 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
    359 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
    360 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
    361 
    362 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
    363 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
    364 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
    365 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
    366 
    367 #ifdef SK_DEBUG
    368     #define SkPMColorAssert(color_value)                                    \
    369         do {                                                                \
    370             SkPMColor pm_color_value = (color_value);                       \
    371             uint32_t alpha_color_value = SkGetPackedA32(pm_color_value);    \
    372             SkA32Assert(alpha_color_value);                                 \
    373             SkASSERT(SkGetPackedR32(pm_color_value) <= alpha_color_value);  \
    374             SkASSERT(SkGetPackedG32(pm_color_value) <= alpha_color_value);  \
    375             SkASSERT(SkGetPackedB32(pm_color_value) <= alpha_color_value);  \
    376         } while (false)
    377 #else
    378     #define SkPMColorAssert(c)
    379 #endif
    380 
    381 static inline bool SkPMColorValid(SkPMColor c) {
    382     auto a = SkGetPackedA32(c);
    383     bool valid = a <= SK_A32_MASK
    384               && SkGetPackedR32(c) <= a
    385               && SkGetPackedG32(c) <= a
    386               && SkGetPackedB32(c) <= a;
    387     if (valid) {
    388         SkPMColorAssert(c);  // Make sure we're consistent when it counts.
    389     }
    390     return valid;
    391 }
    392 
    393 /**
    394  *  Pack the components into a SkPMColor, checking (in the debug version) that
    395  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
    396  */
    397 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    398     SkA32Assert(a);
    399     SkASSERT(r <= a);
    400     SkASSERT(g <= a);
    401     SkASSERT(b <= a);
    402 
    403     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    404            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    405 }
    406 
    407 static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) {
    408     return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c),
    409                               SkGetPackedG32(c), SkGetPackedB32(c));
    410 }
    411 
    412 static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) {
    413     return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c),
    414                               SkGetPackedG32(c), SkGetPackedB32(c));
    415 }
    416 
    417 /**
    418  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    419  * utility functions. Third parameter controls blending of the first two:
    420  *   (src, dst, 0) returns dst
    421  *   (src, dst, 0xFF) returns src
    422  *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
    423  */
    424 static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
    425                                          unsigned scale) {
    426     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
    427     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
    428     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
    429     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
    430 
    431     return SkPackARGB32(a, r, g, b);
    432 }
    433 
    434 /**
    435  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    436  * utility functions. Third parameter controls blending of the first two:
    437  *   (src, dst, 0) returns dst
    438  *   (src, dst, 0xFF) returns src
    439  */
    440 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
    441                                          U8CPU srcWeight) {
    442     unsigned scale = SkAlpha255To256(srcWeight);
    443     return SkFourByteInterp256(src, dst, scale);
    444 }
    445 
    446 /**
    447  * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
    448  */
    449 static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
    450     const uint32_t mask = 0x00FF00FF;
    451     *ag = (color >> 8) & mask;
    452     *rb = color & mask;
    453 }
    454 
    455 /**
    456  * 0xAARRGGBB -> 0x00AA00GG00RR00BB
    457  * (note, ARGB -> AGRB)
    458  */
    459 static inline uint64_t SkSplay(uint32_t color) {
    460     const uint32_t mask = 0x00FF00FF;
    461     uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
    462     agrb <<= 32;                          // 0x00AA00GG00000000
    463     agrb |= color & mask;                 // 0x00AA00GG00RR00BB
    464     return agrb;
    465 }
    466 
    467 /**
    468  * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
    469  */
    470 static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
    471     const uint32_t mask = 0xFF00FF00;
    472     return (ag & mask) | ((rb & mask) >> 8);
    473 }
    474 
    475 /**
    476  * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
    477  * (note, AGRB -> ARGB)
    478  */
    479 static inline uint32_t SkUnsplay(uint64_t agrb) {
    480     const uint32_t mask = 0xFF00FF00;
    481     return SkPMColor(
    482         ((agrb & mask) >> 8) |   // 0x00RR00BB
    483         ((agrb >> 32) & mask));  // 0xAARRGGBB
    484 }
    485 
    486 static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
    487     SkASSERT(scale <= 256);
    488 
    489     // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
    490     uint32_t src_ag, src_rb, dst_ag, dst_rb;
    491     SkSplay(src, &src_ag, &src_rb);
    492     SkSplay(dst, &dst_ag, &dst_rb);
    493 
    494     const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
    495     const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
    496 
    497     return SkUnsplay(ret_ag, ret_rb);
    498 }
    499 
    500 static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
    501     SkASSERT(scale <= 256);
    502     // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
    503     return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
    504 }
    505 
    506 // TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
    507 
    508 /**
    509  * Same as SkFourByteInterp256, but faster.
    510  */
    511 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
    512     // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
    513     if (sizeof(void*) == 4) {
    514         return SkFastFourByteInterp256_32(src, dst, scale);
    515     } else {
    516         return SkFastFourByteInterp256_64(src, dst, scale);
    517     }
    518 }
    519 
    520 /**
    521  * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
    522  * srcWeight scaling to [0, 256].
    523  */
    524 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
    525                                              SkPMColor dst,
    526                                              U8CPU srcWeight) {
    527     SkASSERT(srcWeight <= 255);
    528     // scale = srcWeight + (srcWeight >> 7) is more accurate than
    529     // scale = srcWeight + 1, but 7% slower
    530     return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
    531 }
    532 
    533 /**
    534  *  Same as SkPackARGB32, but this version guarantees to not check that the
    535  *  values are premultiplied in the debug version.
    536  */
    537 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    538     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    539            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    540 }
    541 
    542 static inline
    543 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    544     SkA32Assert(a);
    545     SkR32Assert(r);
    546     SkG32Assert(g);
    547     SkB32Assert(b);
    548 
    549     if (a != 255) {
    550         r = SkMulDiv255Round(r, a);
    551         g = SkMulDiv255Round(g, a);
    552         b = SkMulDiv255Round(b, a);
    553     }
    554     return SkPackARGB32(a, r, g, b);
    555 }
    556 
    557 // When Android is compiled optimizing for size, SkAlphaMulQ doesn't get
    558 // inlined; forcing inlining significantly improves performance.
    559 static SK_ALWAYS_INLINE uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
    560     uint32_t mask = 0xFF00FF;
    561 
    562     uint32_t rb = ((c & mask) * scale) >> 8;
    563     uint32_t ag = ((c >> 8) & mask) * scale;
    564     return (rb & mask) | (ag & ~mask);
    565 }
    566 
    567 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
    568     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
    569 }
    570 
    571 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
    572     SkASSERT((unsigned)aa <= 255);
    573 
    574     unsigned src_scale = SkAlpha255To256(aa);
    575     unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
    576 
    577     return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
    578 }
    579 
    580 ////////////////////////////////////////////////////////////////////////////////////////////
    581 // Convert a 32bit pixel to a 16bit pixel (no dither)
    582 
    583 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
    584 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
    585 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
    586 
    587 #ifdef SK_DEBUG
    588     static inline unsigned SkR32ToR16(unsigned r) {
    589         SkR32Assert(r);
    590         return SkR32ToR16_MACRO(r);
    591     }
    592     static inline unsigned SkG32ToG16(unsigned g) {
    593         SkG32Assert(g);
    594         return SkG32ToG16_MACRO(g);
    595     }
    596     static inline unsigned SkB32ToB16(unsigned b) {
    597         SkB32Assert(b);
    598         return SkB32ToB16_MACRO(b);
    599     }
    600 #else
    601     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
    602     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
    603     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
    604 #endif
    605 
    606 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
    607 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
    608 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
    609 
    610 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
    611     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
    612     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
    613     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
    614     return r | g | b;
    615 }
    616 
    617 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    618     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
    619             (SkG32ToG16(g) << SK_G16_SHIFT) |
    620             (SkB32ToB16(b) << SK_B16_SHIFT);
    621 }
    622 
    623 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
    624 
    625 /////////////////////////////////////////////////////////////////////////////////////////
    626 // Fast dither from 32->16
    627 
    628 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
    629 
    630 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    631     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
    632     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
    633     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
    634 
    635     return SkPackRGB16(r, g, b);
    636 }
    637 
    638 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
    639     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
    640 }
    641 
    642 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
    643     It is now suitable for combining with a scaled expanded_rgb_16 color
    644     as in SkSrcOver32To16().
    645     We must do this 565 high-bit replication, in order for the subsequent add
    646     to saturate properly (and not overflow). If we take the 8 bits as is, it is
    647     possible to overflow.
    648 */
    649 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
    650     unsigned sr = SkPacked32ToR16(c);
    651     unsigned sg = SkPacked32ToG16(c);
    652     unsigned sb = SkPacked32ToB16(c);
    653 
    654     sr = (sr << 5) | sr;
    655     sg = (sg << 5) | (sg >> 1);
    656     sb = (sb << 5) | sb;
    657     return (sr << 11) | (sg << 21) | (sb << 0);
    658 }
    659 
    660 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
    661     (with dirt in the high 16bits, so caller beware).
    662 */
    663 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
    664     unsigned sr = SkGetPackedR32(src);
    665     unsigned sg = SkGetPackedG32(src);
    666     unsigned sb = SkGetPackedB32(src);
    667 
    668     unsigned dr = SkGetPackedR16(dst);
    669     unsigned dg = SkGetPackedG16(dst);
    670     unsigned db = SkGetPackedB16(dst);
    671 
    672     unsigned isa = 255 - SkGetPackedA32(src);
    673 
    674     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
    675     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
    676     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
    677 
    678     return SkPackRGB16(dr, dg, db);
    679 }
    680 
    681 ////////////////////////////////////////////////////////////////////////////////////////////
    682 // Convert a 16bit pixel to a 32bit pixel
    683 
    684 static inline unsigned SkR16ToR32(unsigned r) {
    685     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
    686 }
    687 
    688 static inline unsigned SkG16ToG32(unsigned g) {
    689     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
    690 }
    691 
    692 static inline unsigned SkB16ToB32(unsigned b) {
    693     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
    694 }
    695 
    696 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
    697 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
    698 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
    699 
    700 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
    701     SkASSERT(src == SkToU16(src));
    702 
    703     unsigned    r = SkPacked16ToR32(src);
    704     unsigned    g = SkPacked16ToG32(src);
    705     unsigned    b = SkPacked16ToB32(src);
    706 
    707     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    708     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    709     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    710 
    711     return SkPackARGB32(0xFF, r, g, b);
    712 }
    713 
    714 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
    715 static inline SkColor SkPixel16ToColor(U16CPU src) {
    716     SkASSERT(src == SkToU16(src));
    717 
    718     unsigned    r = SkPacked16ToR32(src);
    719     unsigned    g = SkPacked16ToG32(src);
    720     unsigned    b = SkPacked16ToB32(src);
    721 
    722     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    723     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    724     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    725 
    726     return SkColorSetRGB(r, g, b);
    727 }
    728 
    729 ///////////////////////////////////////////////////////////////////////////////
    730 
    731 typedef uint16_t SkPMColor16;
    732 
    733 // Put in OpenGL order (r g b a)
    734 #define SK_A4444_SHIFT    0
    735 #define SK_R4444_SHIFT    12
    736 #define SK_G4444_SHIFT    8
    737 #define SK_B4444_SHIFT    4
    738 
    739 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
    740 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
    741 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
    742 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
    743 
    744 static inline U8CPU SkReplicateNibble(unsigned nib) {
    745     SkASSERT(nib <= 0xF);
    746     return (nib << 4) | nib;
    747 }
    748 
    749 #define SkA4444ToA32(a)     SkReplicateNibble(a)
    750 #define SkR4444ToR32(r)     SkReplicateNibble(r)
    751 #define SkG4444ToG32(g)     SkReplicateNibble(g)
    752 #define SkB4444ToB32(b)     SkReplicateNibble(b)
    753 
    754 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
    755 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
    756 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
    757 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
    758 
    759 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
    760 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
    761 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
    762 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
    763 
    764 #ifdef SK_DEBUG
    765 static inline void SkPMColor16Assert(U16CPU c) {
    766     unsigned a = SkGetPackedA4444(c);
    767     unsigned r = SkGetPackedR4444(c);
    768     unsigned g = SkGetPackedG4444(c);
    769     unsigned b = SkGetPackedB4444(c);
    770 
    771     SkASSERT(a <= 0xF);
    772     SkASSERT(r <= a);
    773     SkASSERT(g <= a);
    774     SkASSERT(b <= a);
    775 }
    776 #else
    777 #define SkPMColor16Assert(c)
    778 #endif
    779 
    780 static inline unsigned SkAlpha15To16(unsigned a) {
    781     SkASSERT(a <= 0xF);
    782     return a + (a >> 3);
    783 }
    784 
    785 #ifdef SK_DEBUG
    786     static inline int SkAlphaMul4(int value, int scale) {
    787         SkASSERT((unsigned)scale <= 0x10);
    788         return value * scale >> 4;
    789     }
    790 #else
    791     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
    792 #endif
    793 
    794 static inline unsigned SkR4444ToR565(unsigned r) {
    795     SkASSERT(r <= 0xF);
    796     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
    797 }
    798 
    799 static inline unsigned SkG4444ToG565(unsigned g) {
    800     SkASSERT(g <= 0xF);
    801     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
    802 }
    803 
    804 static inline unsigned SkB4444ToB565(unsigned b) {
    805     SkASSERT(b <= 0xF);
    806     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
    807 }
    808 
    809 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
    810                                          unsigned g, unsigned b) {
    811     SkASSERT(a <= 0xF);
    812     SkASSERT(r <= a);
    813     SkASSERT(g <= a);
    814     SkASSERT(b <= a);
    815 
    816     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
    817                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
    818 }
    819 
    820 static inline SkPMColor16 SkAlphaMulQ4(SkPMColor16 c, int scale) {
    821     SkASSERT(scale <= 16);
    822 
    823     const unsigned mask = 0xF0F;    //gMask_0F0F;
    824 
    825 #if 0
    826     unsigned rb = ((c & mask) * scale) >> 4;
    827     unsigned ag = ((c >> 4) & mask) * scale;
    828     return (rb & mask) | (ag & ~mask);
    829 #else
    830     unsigned expanded_c = (c & mask) | ((c & (mask << 4)) << 12);
    831     unsigned scaled_c = (expanded_c * scale) >> 4;
    832     return (scaled_c & mask) | ((scaled_c >> 12) & (mask << 4));
    833 #endif
    834 }
    835 
    836 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
    837     once by a value up to 16.
    838 */
    839 static inline uint32_t SkExpand_4444(U16CPU c) {
    840     SkASSERT(c == (uint16_t)c);
    841 
    842     const unsigned mask = 0xF0F;    //gMask_0F0F;
    843     return (c & mask) | ((c & ~mask) << 12);
    844 }
    845 
    846 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
    847     unsigned sa = SkGetPackedA4444(s);
    848     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
    849     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
    850     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
    851 
    852     // To avoid overflow, we have to clear the low bit of the synthetic sg
    853     // if the src alpha is <= 7.
    854     // to see why, try blending 0x4444 on top of 565-white and watch green
    855     // overflow (sum == 64)
    856     sg &= ~(~(sa >> 3) & 1);
    857 
    858     unsigned scale = SkAlpha15To16(15 - sa);
    859     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
    860     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
    861     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
    862 
    863 #if 0
    864     if (sg + dg > 63) {
    865         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
    866     }
    867 #endif
    868     return SkPackRGB16(sr + dr, sg + dg, sb + db);
    869 }
    870 
    871 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
    872     SkASSERT((unsigned)scale16 <= 16);
    873 
    874     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
    875 }
    876 
    877 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
    878     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
    879                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
    880                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
    881                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
    882     return d | (d << 4);
    883 }
    884 
    885 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
    886     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
    887     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
    888     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
    889     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
    890 }
    891 
    892 // cheap 2x2 dither
    893 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
    894                                                U8CPU g, U8CPU b) {
    895     // to ensure that we stay a legal premultiplied color, we take the max()
    896     // of the truncated and dithered alpha values. If we didn't, cases like
    897     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
    898     // which is not legal premultiplied, since a < color
    899     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
    900     a = SkMax32(a >> 4, dithered_a);
    901     // these we just dither in place
    902     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
    903     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
    904     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
    905 
    906     return SkPackARGB4444(a, r, g, b);
    907 }
    908 
    909 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
    910     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
    911                                 SkGetPackedG32(c), SkGetPackedB32(c));
    912 }
    913 
    914 /*  Assumes 16bit is in standard RGBA order.
    915     Transforms a normal ARGB_8888 into the same byte order as
    916     expanded ARGB_4444, but keeps each component 8bits
    917 */
    918 static inline uint32_t SkExpand_8888(SkPMColor c) {
    919     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
    920             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
    921             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
    922             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
    923 }
    924 
    925 /*  Undo the operation of SkExpand_8888, turning the argument back into
    926     a SkPMColor.
    927 */
    928 static inline SkPMColor SkCompact_8888(uint32_t c) {
    929     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
    930             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
    931             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
    932             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
    933 }
    934 
    935 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
    936     but this routine just keeps the high 4bits of each component in the low
    937     4bits of the result (just like a newly expanded PMColor16).
    938 */
    939 static inline uint32_t SkExpand32_4444(SkPMColor c) {
    940     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
    941             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
    942             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
    943             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
    944 }
    945 
    946 // takes two values and alternamtes them as part of a memset16
    947 // used for cheap 2x2 dithering when the colors are opaque
    948 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
    949 
    950 ///////////////////////////////////////////////////////////////////////////////
    951 
    952 static inline int SkUpscale31To32(int value) {
    953     SkASSERT((unsigned)value <= 31);
    954     return value + (value >> 4);
    955 }
    956 
    957 static inline int SkBlend32(int src, int dst, int scale) {
    958     SkASSERT((unsigned)src <= 0xFF);
    959     SkASSERT((unsigned)dst <= 0xFF);
    960     SkASSERT((unsigned)scale <= 32);
    961     return dst + ((src - dst) * scale >> 5);
    962 }
    963 
    964 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
    965                                      SkPMColor dst, uint16_t mask) {
    966     if (mask == 0) {
    967         return dst;
    968     }
    969 
    970     /*  We want all of these in 5bits, hence the shifts in case one of them
    971      *  (green) is 6bits.
    972      */
    973     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    974     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    975     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    976 
    977     // Now upscale them to 0..32, so we can use blend32
    978     maskR = SkUpscale31To32(maskR);
    979     maskG = SkUpscale31To32(maskG);
    980     maskB = SkUpscale31To32(maskB);
    981 
    982     // srcA has been upscaled to 256 before passed into this function
    983     maskR = maskR * srcA >> 8;
    984     maskG = maskG * srcA >> 8;
    985     maskB = maskB * srcA >> 8;
    986 
    987     int dstR = SkGetPackedR32(dst);
    988     int dstG = SkGetPackedG32(dst);
    989     int dstB = SkGetPackedB32(dst);
    990 
    991     // LCD blitting is only supported if the dst is known/required
    992     // to be opaque
    993     return SkPackARGB32(0xFF,
    994                         SkBlend32(srcR, dstR, maskR),
    995                         SkBlend32(srcG, dstG, maskG),
    996                         SkBlend32(srcB, dstB, maskB));
    997 }
    998 
    999 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
   1000                                            SkPMColor dst, uint16_t mask,
   1001                                            SkPMColor opaqueDst) {
   1002     if (mask == 0) {
   1003         return dst;
   1004     }
   1005 
   1006     if (0xFFFF == mask) {
   1007         return opaqueDst;
   1008     }
   1009 
   1010     /*  We want all of these in 5bits, hence the shifts in case one of them
   1011      *  (green) is 6bits.
   1012      */
   1013     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
   1014     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
   1015     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
   1016 
   1017     // Now upscale them to 0..32, so we can use blend32
   1018     maskR = SkUpscale31To32(maskR);
   1019     maskG = SkUpscale31To32(maskG);
   1020     maskB = SkUpscale31To32(maskB);
   1021 
   1022     int dstR = SkGetPackedR32(dst);
   1023     int dstG = SkGetPackedG32(dst);
   1024     int dstB = SkGetPackedB32(dst);
   1025 
   1026     // LCD blitting is only supported if the dst is known/required
   1027     // to be opaque
   1028     return SkPackARGB32(0xFF,
   1029                         SkBlend32(srcR, dstR, maskR),
   1030                         SkBlend32(srcG, dstG, maskG),
   1031                         SkBlend32(srcB, dstB, maskB));
   1032 }
   1033 
   1034 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
   1035                                   SkColor src, int width, SkPMColor) {
   1036     int srcA = SkColorGetA(src);
   1037     int srcR = SkColorGetR(src);
   1038     int srcG = SkColorGetG(src);
   1039     int srcB = SkColorGetB(src);
   1040 
   1041     srcA = SkAlpha255To256(srcA);
   1042 
   1043     for (int i = 0; i < width; i++) {
   1044         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
   1045     }
   1046 }
   1047 
   1048 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
   1049                                         SkColor src, int width,
   1050                                         SkPMColor opaqueDst) {
   1051     int srcR = SkColorGetR(src);
   1052     int srcG = SkColorGetG(src);
   1053     int srcB = SkColorGetB(src);
   1054 
   1055     for (int i = 0; i < width; i++) {
   1056         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
   1057                                     opaqueDst);
   1058     }
   1059 }
   1060 
   1061 #endif
   1062