Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2006 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkColorPriv_DEFINED
      9 #define SkColorPriv_DEFINED
     10 
     11 // turn this own for extra debug checking when blending onto 565
     12 #ifdef SK_DEBUG
     13     #define CHECK_FOR_565_OVERFLOW
     14 #endif
     15 
     16 #include "SkColor.h"
     17 #include "SkMath.h"
     18 
     19 //////////////////////////////////////////////////////////////////////////////
     20 
     21 #define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFF))
     22 
     23 /*
     24  *  Skia's 32bit backend only supports 1 sizzle order at a time (compile-time).
     25  *  This is specified by 4 defines SK_A32_SHIFT, SK_R32_SHIFT, ... for G and B.
     26  *
     27  *  For easier compatibility with Skia's GPU backend, we further restrict these
     28  *  to either (in memory-byte-order) RGBA or BGRA. Note that this "order" does
     29  *  not directly correspond to the same shift-order, since we have to take endianess
     30  *  into account.
     31  *
     32  *  Here we enforce this constraint.
     33  */
     34 
     35 #ifdef SK_CPU_BENDIAN
     36     #define SK_RGBA_R32_SHIFT   24
     37     #define SK_RGBA_G32_SHIFT   16
     38     #define SK_RGBA_B32_SHIFT   8
     39     #define SK_RGBA_A32_SHIFT   0
     40 
     41     #define SK_BGRA_B32_SHIFT   24
     42     #define SK_BGRA_G32_SHIFT   16
     43     #define SK_BGRA_R32_SHIFT   8
     44     #define SK_BGRA_A32_SHIFT   0
     45 #else
     46     #define SK_RGBA_R32_SHIFT   0
     47     #define SK_RGBA_G32_SHIFT   8
     48     #define SK_RGBA_B32_SHIFT   16
     49     #define SK_RGBA_A32_SHIFT   24
     50 
     51     #define SK_BGRA_B32_SHIFT   0
     52     #define SK_BGRA_G32_SHIFT   8
     53     #define SK_BGRA_R32_SHIFT   16
     54     #define SK_BGRA_A32_SHIFT   24
     55 #endif
     56 
     57 #if defined(SK_PMCOLOR_IS_RGBA) && defined(SK_PMCOLOR_IS_BGRA)
     58     #error "can't define PMCOLOR to be RGBA and BGRA"
     59 #endif
     60 
     61 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA  \
     62     (SK_A32_SHIFT == SK_RGBA_A32_SHIFT &&    \
     63      SK_R32_SHIFT == SK_RGBA_R32_SHIFT &&    \
     64      SK_G32_SHIFT == SK_RGBA_G32_SHIFT &&    \
     65      SK_B32_SHIFT == SK_RGBA_B32_SHIFT)
     66 
     67 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA  \
     68     (SK_A32_SHIFT == SK_BGRA_A32_SHIFT &&    \
     69      SK_R32_SHIFT == SK_BGRA_R32_SHIFT &&    \
     70      SK_G32_SHIFT == SK_BGRA_G32_SHIFT &&    \
     71      SK_B32_SHIFT == SK_BGRA_B32_SHIFT)
     72 
     73 
     74 #define SK_A_INDEX  (SK_A32_SHIFT/8)
     75 #define SK_R_INDEX  (SK_R32_SHIFT/8)
     76 #define SK_G_INDEX  (SK_G32_SHIFT/8)
     77 #define SK_B_INDEX  (SK_B32_SHIFT/8)
     78 
     79 #if defined(SK_PMCOLOR_IS_RGBA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     80     #error "SK_PMCOLOR_IS_RGBA does not match SK_*32_SHIFT values"
     81 #endif
     82 
     83 #if defined(SK_PMCOLOR_IS_BGRA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     84     #error "SK_PMCOLOR_IS_BGRA does not match SK_*32_SHIFT values"
     85 #endif
     86 
     87 #if !defined(SK_PMCOLOR_IS_RGBA) && !defined(SK_PMCOLOR_IS_BGRA)
     88     // deduce which to define from the _SHIFT defines
     89 
     90     #if LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     91         #define SK_PMCOLOR_IS_RGBA
     92     #elif LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     93         #define SK_PMCOLOR_IS_BGRA
     94     #else
     95         #error "need 32bit packing to be either RGBA or BGRA"
     96     #endif
     97 #endif
     98 
     99 // hide these now that we're done
    100 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
    101 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
    102 
    103 //////////////////////////////////////////////////////////////////////////////
    104 
    105 // Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
    106 // pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
    107 // no need to pass in the colortype to this function.
    108 static inline uint32_t SkSwizzle_RB(uint32_t c) {
    109     static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
    110 
    111     unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
    112     unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
    113     return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
    114 }
    115 
    116 static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    117     SkASSERT_IS_BYTE(a);
    118     SkASSERT_IS_BYTE(r);
    119     SkASSERT_IS_BYTE(g);
    120     SkASSERT_IS_BYTE(b);
    121     return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
    122            (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
    123 }
    124 
    125 static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    126     SkASSERT_IS_BYTE(a);
    127     SkASSERT_IS_BYTE(r);
    128     SkASSERT_IS_BYTE(g);
    129     SkASSERT_IS_BYTE(b);
    130     return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
    131            (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
    132 }
    133 
    134 static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
    135 #ifdef SK_PMCOLOR_IS_RGBA
    136     return c;
    137 #else
    138     return SkSwizzle_RB(c);
    139 #endif
    140 }
    141 
    142 static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
    143 #ifdef SK_PMCOLOR_IS_BGRA
    144     return c;
    145 #else
    146     return SkSwizzle_RB(c);
    147 #endif
    148 }
    149 
    150 //////////////////////////////////////////////////////////////////////////////
    151 
    152 ///@{
    153 /** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
    154 #define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
    155 #define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
    156 #define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
    157 ///@}
    158 
    159 ///@{
    160 /** A float value which specifies this channel's contribution to luminance. */
    161 #define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
    162 #define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
    163 #define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
    164 ///@}
    165 
    166 /** Computes the luminance from the given r, g, and b in accordance with
    167     SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
    168 */
    169 static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
    170     //The following is
    171     //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
    172     //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
    173     return (r * 54 + g * 183 + b * 19) >> 8;
    174 }
    175 
    176 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
    177     byte into a scale value, so that we can say scale * value >> 8 instead of
    178     alpha * value / 255.
    179 
    180     In debugging, asserts that alpha is 0..255
    181 */
    182 static inline unsigned SkAlpha255To256(U8CPU alpha) {
    183     SkASSERT(SkToU8(alpha) == alpha);
    184     // this one assues that blending on top of an opaque dst keeps it that way
    185     // even though it is less accurate than a+(a>>7) for non-opaque dsts
    186     return alpha + 1;
    187 }
    188 
    189 /**
    190  *  Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80.
    191  *  This is slightly more accurate than SkAlpha255To256.
    192  */
    193 static inline unsigned Sk255To256(U8CPU value) {
    194     SkASSERT(SkToU8(value) == value);
    195     return value + (value >> 7);
    196 }
    197 
    198 /** Multiplify value by 0..256, and shift the result down 8
    199     (i.e. return (value * alpha256) >> 8)
    200  */
    201 #define SkAlphaMul(value, alpha256)     (((value) * (alpha256)) >> 8)
    202 
    203 /** Calculates 256 - (value * alpha256) / 255 in range [0,256],
    204  *  for [0,255] value and [0,256] alpha256.
    205  */
    206 static inline U16CPU SkAlphaMulInv256(U16CPU value, U16CPU alpha256) {
    207     unsigned prod = 0xFFFF - value * alpha256;
    208     return (prod + (prod >> 8)) >> 8;
    209 }
    210 
    211 //  The caller may want negative values, so keep all params signed (int)
    212 //  so we don't accidentally slip into unsigned math and lose the sign
    213 //  extension when we shift (in SkAlphaMul)
    214 static inline int SkAlphaBlend(int src, int dst, int scale256) {
    215     SkASSERT((unsigned)scale256 <= 256);
    216     return dst + SkAlphaMul(src - dst, scale256);
    217 }
    218 
    219 /**
    220  *  Returns (src * alpha + dst * (255 - alpha)) / 255
    221  *
    222  *  This is more accurate than SkAlphaBlend, but slightly slower
    223  */
    224 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
    225     SkASSERT((int16_t)src == src);
    226     SkASSERT((int16_t)dst == dst);
    227     SkASSERT((uint8_t)alpha == alpha);
    228 
    229     int prod = (src - dst) * alpha + 128;
    230     prod = (prod + (prod >> 8)) >> 8;
    231     return dst + prod;
    232 }
    233 
    234 static inline U8CPU SkUnitScalarClampToByte(SkScalar x) {
    235     return static_cast<U8CPU>(SkScalarPin(x, 0, 1) * 255 + 0.5);
    236 }
    237 
    238 #define SK_R16_BITS     5
    239 #define SK_G16_BITS     6
    240 #define SK_B16_BITS     5
    241 
    242 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
    243 #define SK_G16_SHIFT    (SK_B16_BITS)
    244 #define SK_B16_SHIFT    0
    245 
    246 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
    247 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
    248 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
    249 
    250 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
    251 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
    252 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
    253 
    254 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
    255 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
    256 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
    257 
    258 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
    259     SkASSERT(r <= SK_R16_MASK);
    260     SkASSERT(g <= SK_G16_MASK);
    261     SkASSERT(b <= SK_B16_MASK);
    262 
    263     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
    264 }
    265 
    266 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
    267 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
    268 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
    269 
    270 /** Expand the 16bit color into a 32bit value that can be scaled all at once
    271     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
    272 */
    273 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
    274     SkASSERT(c == (uint16_t)c);
    275 
    276     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
    277 }
    278 
    279 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
    280     color value. The computation yields only 16bits of valid data, but we claim
    281     to return 32bits, so that the compiler won't generate extra instructions to
    282     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
    283     up to the caller to safely ignore them.
    284 */
    285 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
    286     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
    287 }
    288 
    289 /** Scale the 16bit color value by the 0..256 scale parameter.
    290     The computation yields only 16bits of valid data, but we claim
    291     to return 32bits, so that the compiler won't generate extra instructions to
    292     "clean" the top 16bits.
    293 */
    294 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
    295     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
    296 }
    297 
    298 // this helper explicitly returns a clean 16bit value (but slower)
    299 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
    300 
    301 /** Blend pre-expanded RGB32 with 16bit color value by the 0..32 scale parameter.
    302     The computation yields only 16bits of valid data, but we claim to return
    303     32bits, so that the compiler won't generate extra instructions to "clean"
    304     the top 16bits.
    305 */
    306 static inline U16CPU SkBlend32_RGB16(uint32_t src_expand, uint16_t dst, unsigned scale) {
    307     uint32_t dst_expand = SkExpand_rgb_16(dst) * scale;
    308     return SkCompact_rgb_16((src_expand + dst_expand) >> 5);
    309 }
    310 
    311 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
    312     The computation yields only 16bits of valid data, but we claim
    313     to return 32bits, so that the compiler won't generate extra instructions to
    314     "clean" the top 16bits.
    315 */
    316 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
    317     SkASSERT((unsigned)srcScale <= 256);
    318 
    319     srcScale >>= 3;
    320 
    321     uint32_t src32 = SkExpand_rgb_16(src);
    322     uint32_t dst32 = SkExpand_rgb_16(dst);
    323     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    324 }
    325 
    326 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
    327                                 int srcScale, int count) {
    328     SkASSERT(count > 0);
    329     SkASSERT((unsigned)srcScale <= 256);
    330 
    331     srcScale >>= 3;
    332 
    333     do {
    334         uint32_t src32 = SkExpand_rgb_16(*src++);
    335         uint32_t dst32 = SkExpand_rgb_16(*dst);
    336         *dst++ = static_cast<uint16_t>(
    337             SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)));
    338     } while (--count > 0);
    339 }
    340 
    341 #ifdef SK_DEBUG
    342     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
    343         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
    344         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
    345         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
    346 
    347         return a + b;
    348     }
    349 #else
    350     #define SkRGB16Add(a, b)  ((a) + (b))
    351 #endif
    352 
    353 ///////////////////////////////////////////////////////////////////////////////
    354 
    355 #define SK_A32_BITS     8
    356 #define SK_R32_BITS     8
    357 #define SK_G32_BITS     8
    358 #define SK_B32_BITS     8
    359 
    360 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
    361 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
    362 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
    363 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
    364 
    365 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
    366 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
    367 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
    368 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
    369 
    370 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
    371 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
    372 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
    373 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
    374 
    375 #ifdef SK_DEBUG
    376     #define SkPMColorAssert(color_value)                                    \
    377         do {                                                                \
    378             SkPMColor pm_color_value = (color_value);                       \
    379             uint32_t alpha_color_value = SkGetPackedA32(pm_color_value);    \
    380             SkA32Assert(alpha_color_value);                                 \
    381             SkASSERT(SkGetPackedR32(pm_color_value) <= alpha_color_value);  \
    382             SkASSERT(SkGetPackedG32(pm_color_value) <= alpha_color_value);  \
    383             SkASSERT(SkGetPackedB32(pm_color_value) <= alpha_color_value);  \
    384         } while (false)
    385 #else
    386     #define SkPMColorAssert(c)
    387 #endif
    388 
    389 static inline bool SkPMColorValid(SkPMColor c) {
    390     auto a = SkGetPackedA32(c);
    391     bool valid = a <= SK_A32_MASK
    392               && SkGetPackedR32(c) <= a
    393               && SkGetPackedG32(c) <= a
    394               && SkGetPackedB32(c) <= a;
    395     if (valid) {
    396         SkPMColorAssert(c);  // Make sure we're consistent when it counts.
    397     }
    398     return valid;
    399 }
    400 
    401 /**
    402  *  Pack the components into a SkPMColor, checking (in the debug version) that
    403  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
    404  */
    405 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    406     SkA32Assert(a);
    407     SkASSERT(r <= a);
    408     SkASSERT(g <= a);
    409     SkASSERT(b <= a);
    410 
    411     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    412            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    413 }
    414 
    415 static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) {
    416     return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c),
    417                               SkGetPackedG32(c), SkGetPackedB32(c));
    418 }
    419 
    420 static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) {
    421     return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c),
    422                               SkGetPackedG32(c), SkGetPackedB32(c));
    423 }
    424 
    425 /**
    426  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    427  * utility functions. Third parameter controls blending of the first two:
    428  *   (src, dst, 0) returns dst
    429  *   (src, dst, 0xFF) returns src
    430  *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
    431  */
    432 static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
    433                                          unsigned scale) {
    434     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
    435     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
    436     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
    437     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
    438 
    439     return SkPackARGB32(a, r, g, b);
    440 }
    441 
    442 /**
    443  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    444  * utility functions. Third parameter controls blending of the first two:
    445  *   (src, dst, 0) returns dst
    446  *   (src, dst, 0xFF) returns src
    447  */
    448 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
    449                                          U8CPU srcWeight) {
    450     unsigned scale = SkAlpha255To256(srcWeight);
    451     return SkFourByteInterp256(src, dst, scale);
    452 }
    453 
    454 /**
    455  * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
    456  */
    457 static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
    458     const uint32_t mask = 0x00FF00FF;
    459     *ag = (color >> 8) & mask;
    460     *rb = color & mask;
    461 }
    462 
    463 /**
    464  * 0xAARRGGBB -> 0x00AA00GG00RR00BB
    465  * (note, ARGB -> AGRB)
    466  */
    467 static inline uint64_t SkSplay(uint32_t color) {
    468     const uint32_t mask = 0x00FF00FF;
    469     uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
    470     agrb <<= 32;                          // 0x00AA00GG00000000
    471     agrb |= color & mask;                 // 0x00AA00GG00RR00BB
    472     return agrb;
    473 }
    474 
    475 /**
    476  * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
    477  */
    478 static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
    479     const uint32_t mask = 0xFF00FF00;
    480     return (ag & mask) | ((rb & mask) >> 8);
    481 }
    482 
    483 /**
    484  * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
    485  * (note, AGRB -> ARGB)
    486  */
    487 static inline uint32_t SkUnsplay(uint64_t agrb) {
    488     const uint32_t mask = 0xFF00FF00;
    489     return SkPMColor(
    490         ((agrb & mask) >> 8) |   // 0x00RR00BB
    491         ((agrb >> 32) & mask));  // 0xAARRGGBB
    492 }
    493 
    494 static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
    495     SkASSERT(scale <= 256);
    496 
    497     // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
    498     uint32_t src_ag, src_rb, dst_ag, dst_rb;
    499     SkSplay(src, &src_ag, &src_rb);
    500     SkSplay(dst, &dst_ag, &dst_rb);
    501 
    502     const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
    503     const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
    504 
    505     return SkUnsplay(ret_ag, ret_rb);
    506 }
    507 
    508 static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
    509     SkASSERT(scale <= 256);
    510     // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
    511     return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
    512 }
    513 
    514 // TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
    515 
    516 /**
    517  * Same as SkFourByteInterp256, but faster.
    518  */
    519 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
    520     // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
    521     if (sizeof(void*) == 4) {
    522         return SkFastFourByteInterp256_32(src, dst, scale);
    523     } else {
    524         return SkFastFourByteInterp256_64(src, dst, scale);
    525     }
    526 }
    527 
    528 /**
    529  * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
    530  * srcWeight scaling to [0, 256].
    531  */
    532 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
    533                                              SkPMColor dst,
    534                                              U8CPU srcWeight) {
    535     SkASSERT(srcWeight <= 255);
    536     // scale = srcWeight + (srcWeight >> 7) is more accurate than
    537     // scale = srcWeight + 1, but 7% slower
    538     return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
    539 }
    540 
    541 /**
    542  *  Same as SkPackARGB32, but this version guarantees to not check that the
    543  *  values are premultiplied in the debug version.
    544  */
    545 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    546     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    547            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    548 }
    549 
    550 static inline
    551 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    552     SkA32Assert(a);
    553     SkR32Assert(r);
    554     SkG32Assert(g);
    555     SkB32Assert(b);
    556 
    557     if (a != 255) {
    558         r = SkMulDiv255Round(r, a);
    559         g = SkMulDiv255Round(g, a);
    560         b = SkMulDiv255Round(b, a);
    561     }
    562     return SkPackARGB32(a, r, g, b);
    563 }
    564 
    565 // When Android is compiled optimizing for size, SkAlphaMulQ doesn't get
    566 // inlined; forcing inlining significantly improves performance.
    567 static SK_ALWAYS_INLINE uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
    568     uint32_t mask = 0xFF00FF;
    569 
    570     uint32_t rb = ((c & mask) * scale) >> 8;
    571     uint32_t ag = ((c >> 8) & mask) * scale;
    572     return (rb & mask) | (ag & ~mask);
    573 }
    574 
    575 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
    576     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
    577 }
    578 
    579 /**
    580  * Interpolates between colors src and dst using [0,256] scale.
    581  */
    582 static inline SkPMColor SkPMLerp(SkPMColor src, SkPMColor dst, unsigned scale) {
    583     return SkFastFourByteInterp256(src, dst, scale);
    584 }
    585 
    586 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
    587     SkASSERT((unsigned)aa <= 255);
    588 
    589     unsigned src_scale = SkAlpha255To256(aa);
    590     unsigned dst_scale = SkAlphaMulInv256(SkGetPackedA32(src), src_scale);
    591 
    592     const uint32_t mask = 0xFF00FF;
    593 
    594     uint32_t src_rb = (src & mask) * src_scale;
    595     uint32_t src_ag = ((src >> 8) & mask) * src_scale;
    596 
    597     uint32_t dst_rb = (dst & mask) * dst_scale;
    598     uint32_t dst_ag = ((dst >> 8) & mask) * dst_scale;
    599 
    600     return (((src_rb + dst_rb) >> 8) & mask) | ((src_ag + dst_ag) & ~mask);
    601 }
    602 
    603 ////////////////////////////////////////////////////////////////////////////////////////////
    604 // Convert a 32bit pixel to a 16bit pixel (no dither)
    605 
    606 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
    607 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
    608 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
    609 
    610 #ifdef SK_DEBUG
    611     static inline unsigned SkR32ToR16(unsigned r) {
    612         SkR32Assert(r);
    613         return SkR32ToR16_MACRO(r);
    614     }
    615     static inline unsigned SkG32ToG16(unsigned g) {
    616         SkG32Assert(g);
    617         return SkG32ToG16_MACRO(g);
    618     }
    619     static inline unsigned SkB32ToB16(unsigned b) {
    620         SkB32Assert(b);
    621         return SkB32ToB16_MACRO(b);
    622     }
    623 #else
    624     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
    625     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
    626     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
    627 #endif
    628 
    629 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
    630 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
    631 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
    632 
    633 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
    634     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
    635     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
    636     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
    637     return r | g | b;
    638 }
    639 
    640 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    641     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
    642             (SkG32ToG16(g) << SK_G16_SHIFT) |
    643             (SkB32ToB16(b) << SK_B16_SHIFT);
    644 }
    645 
    646 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
    647 
    648 /////////////////////////////////////////////////////////////////////////////////////////
    649 // Fast dither from 32->16
    650 
    651 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
    652 
    653 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    654     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
    655     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
    656     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
    657 
    658     return SkPackRGB16(r, g, b);
    659 }
    660 
    661 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
    662     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
    663 }
    664 
    665 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
    666     It is now suitable for combining with a scaled expanded_rgb_16 color
    667     as in SkSrcOver32To16().
    668     We must do this 565 high-bit replication, in order for the subsequent add
    669     to saturate properly (and not overflow). If we take the 8 bits as is, it is
    670     possible to overflow.
    671 */
    672 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
    673     unsigned sr = SkPacked32ToR16(c);
    674     unsigned sg = SkPacked32ToG16(c);
    675     unsigned sb = SkPacked32ToB16(c);
    676 
    677     sr = (sr << 5) | sr;
    678     sg = (sg << 5) | (sg >> 1);
    679     sb = (sb << 5) | sb;
    680     return (sr << 11) | (sg << 21) | (sb << 0);
    681 }
    682 
    683 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
    684     (with dirt in the high 16bits, so caller beware).
    685 */
    686 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
    687     unsigned sr = SkGetPackedR32(src);
    688     unsigned sg = SkGetPackedG32(src);
    689     unsigned sb = SkGetPackedB32(src);
    690 
    691     unsigned dr = SkGetPackedR16(dst);
    692     unsigned dg = SkGetPackedG16(dst);
    693     unsigned db = SkGetPackedB16(dst);
    694 
    695     unsigned isa = 255 - SkGetPackedA32(src);
    696 
    697     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
    698     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
    699     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
    700 
    701     return SkPackRGB16(dr, dg, db);
    702 }
    703 
    704 ////////////////////////////////////////////////////////////////////////////////////////////
    705 // Convert a 16bit pixel to a 32bit pixel
    706 
    707 static inline unsigned SkR16ToR32(unsigned r) {
    708     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
    709 }
    710 
    711 static inline unsigned SkG16ToG32(unsigned g) {
    712     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
    713 }
    714 
    715 static inline unsigned SkB16ToB32(unsigned b) {
    716     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
    717 }
    718 
    719 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
    720 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
    721 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
    722 
    723 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
    724     SkASSERT(src == SkToU16(src));
    725 
    726     unsigned    r = SkPacked16ToR32(src);
    727     unsigned    g = SkPacked16ToG32(src);
    728     unsigned    b = SkPacked16ToB32(src);
    729 
    730     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    731     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    732     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    733 
    734     return SkPackARGB32(0xFF, r, g, b);
    735 }
    736 
    737 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
    738 static inline SkColor SkPixel16ToColor(U16CPU src) {
    739     SkASSERT(src == SkToU16(src));
    740 
    741     unsigned    r = SkPacked16ToR32(src);
    742     unsigned    g = SkPacked16ToG32(src);
    743     unsigned    b = SkPacked16ToB32(src);
    744 
    745     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    746     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    747     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    748 
    749     return SkColorSetRGB(r, g, b);
    750 }
    751 
    752 ///////////////////////////////////////////////////////////////////////////////
    753 
    754 typedef uint16_t SkPMColor16;
    755 
    756 // Put in OpenGL order (r g b a)
    757 #define SK_A4444_SHIFT    0
    758 #define SK_R4444_SHIFT    12
    759 #define SK_G4444_SHIFT    8
    760 #define SK_B4444_SHIFT    4
    761 
    762 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
    763 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
    764 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
    765 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
    766 
    767 static inline U8CPU SkReplicateNibble(unsigned nib) {
    768     SkASSERT(nib <= 0xF);
    769     return (nib << 4) | nib;
    770 }
    771 
    772 #define SkA4444ToA32(a)     SkReplicateNibble(a)
    773 #define SkR4444ToR32(r)     SkReplicateNibble(r)
    774 #define SkG4444ToG32(g)     SkReplicateNibble(g)
    775 #define SkB4444ToB32(b)     SkReplicateNibble(b)
    776 
    777 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
    778 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
    779 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
    780 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
    781 
    782 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
    783 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
    784 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
    785 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
    786 
    787 #ifdef SK_DEBUG
    788 static inline void SkPMColor16Assert(U16CPU c) {
    789     unsigned a = SkGetPackedA4444(c);
    790     unsigned r = SkGetPackedR4444(c);
    791     unsigned g = SkGetPackedG4444(c);
    792     unsigned b = SkGetPackedB4444(c);
    793 
    794     SkASSERT(a <= 0xF);
    795     SkASSERT(r <= a);
    796     SkASSERT(g <= a);
    797     SkASSERT(b <= a);
    798 }
    799 #else
    800 #define SkPMColor16Assert(c)
    801 #endif
    802 
    803 static inline unsigned SkAlpha15To16(unsigned a) {
    804     SkASSERT(a <= 0xF);
    805     return a + (a >> 3);
    806 }
    807 
    808 #ifdef SK_DEBUG
    809     static inline int SkAlphaMul4(int value, int scale) {
    810         SkASSERT((unsigned)scale <= 0x10);
    811         return value * scale >> 4;
    812     }
    813 #else
    814     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
    815 #endif
    816 
    817 static inline unsigned SkR4444ToR565(unsigned r) {
    818     SkASSERT(r <= 0xF);
    819     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
    820 }
    821 
    822 static inline unsigned SkG4444ToG565(unsigned g) {
    823     SkASSERT(g <= 0xF);
    824     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
    825 }
    826 
    827 static inline unsigned SkB4444ToB565(unsigned b) {
    828     SkASSERT(b <= 0xF);
    829     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
    830 }
    831 
    832 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
    833                                          unsigned g, unsigned b) {
    834     SkASSERT(a <= 0xF);
    835     SkASSERT(r <= a);
    836     SkASSERT(g <= a);
    837     SkASSERT(b <= a);
    838 
    839     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
    840                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
    841 }
    842 
    843 static inline SkPMColor16 SkAlphaMulQ4(SkPMColor16 c, int scale) {
    844     SkASSERT(scale <= 16);
    845 
    846     const unsigned mask = 0xF0F;    //gMask_0F0F;
    847 
    848 #if 0
    849     unsigned rb = ((c & mask) * scale) >> 4;
    850     unsigned ag = ((c >> 4) & mask) * scale;
    851     return (rb & mask) | (ag & ~mask);
    852 #else
    853     unsigned expanded_c = (c & mask) | ((c & (mask << 4)) << 12);
    854     unsigned scaled_c = (expanded_c * scale) >> 4;
    855     return (scaled_c & mask) | ((scaled_c >> 12) & (mask << 4));
    856 #endif
    857 }
    858 
    859 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
    860     once by a value up to 16.
    861 */
    862 static inline uint32_t SkExpand_4444(U16CPU c) {
    863     SkASSERT(c == (uint16_t)c);
    864 
    865     const unsigned mask = 0xF0F;    //gMask_0F0F;
    866     return (c & mask) | ((c & ~mask) << 12);
    867 }
    868 
    869 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
    870     unsigned sa = SkGetPackedA4444(s);
    871     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
    872     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
    873     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
    874 
    875     // To avoid overflow, we have to clear the low bit of the synthetic sg
    876     // if the src alpha is <= 7.
    877     // to see why, try blending 0x4444 on top of 565-white and watch green
    878     // overflow (sum == 64)
    879     sg &= ~(~(sa >> 3) & 1);
    880 
    881     unsigned scale = SkAlpha15To16(15 - sa);
    882     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
    883     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
    884     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
    885 
    886 #if 0
    887     if (sg + dg > 63) {
    888         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
    889     }
    890 #endif
    891     return SkPackRGB16(sr + dr, sg + dg, sb + db);
    892 }
    893 
    894 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
    895     SkASSERT((unsigned)scale16 <= 16);
    896 
    897     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
    898 }
    899 
    900 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
    901     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
    902                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
    903                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
    904                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
    905     return d | (d << 4);
    906 }
    907 
    908 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
    909     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
    910     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
    911     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
    912     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
    913 }
    914 
    915 // cheap 2x2 dither
    916 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
    917                                                U8CPU g, U8CPU b) {
    918     // to ensure that we stay a legal premultiplied color, we take the max()
    919     // of the truncated and dithered alpha values. If we didn't, cases like
    920     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
    921     // which is not legal premultiplied, since a < color
    922     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
    923     a = SkMax32(a >> 4, dithered_a);
    924     // these we just dither in place
    925     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
    926     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
    927     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
    928 
    929     return SkPackARGB4444(a, r, g, b);
    930 }
    931 
    932 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
    933     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
    934                                 SkGetPackedG32(c), SkGetPackedB32(c));
    935 }
    936 
    937 /*  Assumes 16bit is in standard RGBA order.
    938     Transforms a normal ARGB_8888 into the same byte order as
    939     expanded ARGB_4444, but keeps each component 8bits
    940 */
    941 static inline uint32_t SkExpand_8888(SkPMColor c) {
    942     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
    943             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
    944             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
    945             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
    946 }
    947 
    948 /*  Undo the operation of SkExpand_8888, turning the argument back into
    949     a SkPMColor.
    950 */
    951 static inline SkPMColor SkCompact_8888(uint32_t c) {
    952     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
    953             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
    954             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
    955             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
    956 }
    957 
    958 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
    959     but this routine just keeps the high 4bits of each component in the low
    960     4bits of the result (just like a newly expanded PMColor16).
    961 */
    962 static inline uint32_t SkExpand32_4444(SkPMColor c) {
    963     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
    964             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
    965             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
    966             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
    967 }
    968 
    969 // takes two values and alternamtes them as part of a memset16
    970 // used for cheap 2x2 dithering when the colors are opaque
    971 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
    972 
    973 ///////////////////////////////////////////////////////////////////////////////
    974 
    975 static inline int SkUpscale31To32(int value) {
    976     SkASSERT((unsigned)value <= 31);
    977     return value + (value >> 4);
    978 }
    979 
    980 static inline int SkBlend32(int src, int dst, int scale) {
    981     SkASSERT((unsigned)src <= 0xFF);
    982     SkASSERT((unsigned)dst <= 0xFF);
    983     SkASSERT((unsigned)scale <= 32);
    984     return dst + ((src - dst) * scale >> 5);
    985 }
    986 
    987 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
    988                                      SkPMColor dst, uint16_t mask) {
    989     if (mask == 0) {
    990         return dst;
    991     }
    992 
    993     /*  We want all of these in 5bits, hence the shifts in case one of them
    994      *  (green) is 6bits.
    995      */
    996     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    997     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    998     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    999 
   1000     // Now upscale them to 0..32, so we can use blend32
   1001     maskR = SkUpscale31To32(maskR);
   1002     maskG = SkUpscale31To32(maskG);
   1003     maskB = SkUpscale31To32(maskB);
   1004 
   1005     // srcA has been upscaled to 256 before passed into this function
   1006     maskR = maskR * srcA >> 8;
   1007     maskG = maskG * srcA >> 8;
   1008     maskB = maskB * srcA >> 8;
   1009 
   1010     int dstR = SkGetPackedR32(dst);
   1011     int dstG = SkGetPackedG32(dst);
   1012     int dstB = SkGetPackedB32(dst);
   1013 
   1014     // LCD blitting is only supported if the dst is known/required
   1015     // to be opaque
   1016     return SkPackARGB32(0xFF,
   1017                         SkBlend32(srcR, dstR, maskR),
   1018                         SkBlend32(srcG, dstG, maskG),
   1019                         SkBlend32(srcB, dstB, maskB));
   1020 }
   1021 
   1022 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
   1023                                            SkPMColor dst, uint16_t mask,
   1024                                            SkPMColor opaqueDst) {
   1025     if (mask == 0) {
   1026         return dst;
   1027     }
   1028 
   1029     if (0xFFFF == mask) {
   1030         return opaqueDst;
   1031     }
   1032 
   1033     /*  We want all of these in 5bits, hence the shifts in case one of them
   1034      *  (green) is 6bits.
   1035      */
   1036     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
   1037     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
   1038     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
   1039 
   1040     // Now upscale them to 0..32, so we can use blend32
   1041     maskR = SkUpscale31To32(maskR);
   1042     maskG = SkUpscale31To32(maskG);
   1043     maskB = SkUpscale31To32(maskB);
   1044 
   1045     int dstR = SkGetPackedR32(dst);
   1046     int dstG = SkGetPackedG32(dst);
   1047     int dstB = SkGetPackedB32(dst);
   1048 
   1049     // LCD blitting is only supported if the dst is known/required
   1050     // to be opaque
   1051     return SkPackARGB32(0xFF,
   1052                         SkBlend32(srcR, dstR, maskR),
   1053                         SkBlend32(srcG, dstG, maskG),
   1054                         SkBlend32(srcB, dstB, maskB));
   1055 }
   1056 
   1057 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
   1058                                   SkColor src, int width, SkPMColor) {
   1059     int srcA = SkColorGetA(src);
   1060     int srcR = SkColorGetR(src);
   1061     int srcG = SkColorGetG(src);
   1062     int srcB = SkColorGetB(src);
   1063 
   1064     srcA = SkAlpha255To256(srcA);
   1065 
   1066     for (int i = 0; i < width; i++) {
   1067         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
   1068     }
   1069 }
   1070 
   1071 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
   1072                                         SkColor src, int width,
   1073                                         SkPMColor opaqueDst) {
   1074     int srcR = SkColorGetR(src);
   1075     int srcG = SkColorGetG(src);
   1076     int srcB = SkColorGetB(src);
   1077 
   1078     for (int i = 0; i < width; i++) {
   1079         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
   1080                                     opaqueDst);
   1081     }
   1082 }
   1083 
   1084 #endif
   1085