Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2006 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkColorPriv_DEFINED
      9 #define SkColorPriv_DEFINED
     10 
     11 // turn this own for extra debug checking when blending onto 565
     12 #ifdef SK_DEBUG
     13     #define CHECK_FOR_565_OVERFLOW
     14 #endif
     15 
     16 #include "SkColor.h"
     17 #include "SkMath.h"
     18 
     19 //////////////////////////////////////////////////////////////////////////////
     20 
     21 #define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFF))
     22 
     23 /*
     24  *  Skia's 32bit backend only supports 1 sizzle order at a time (compile-time).
     25  *  This is specified by 4 defines SK_A32_SHIFT, SK_R32_SHIFT, ... for G and B.
     26  *
     27  *  For easier compatibility with Skia's GPU backend, we further restrict these
     28  *  to either (in memory-byte-order) RGBA or BGRA. Note that this "order" does
     29  *  not directly correspond to the same shift-order, since we have to take endianess
     30  *  into account.
     31  *
     32  *  Here we enforce this constraint.
     33  */
     34 
     35 #ifdef SK_CPU_BENDIAN
     36     #define SK_RGBA_R32_SHIFT   24
     37     #define SK_RGBA_G32_SHIFT   16
     38     #define SK_RGBA_B32_SHIFT   8
     39     #define SK_RGBA_A32_SHIFT   0
     40 
     41     #define SK_BGRA_B32_SHIFT   24
     42     #define SK_BGRA_G32_SHIFT   16
     43     #define SK_BGRA_R32_SHIFT   8
     44     #define SK_BGRA_A32_SHIFT   0
     45 #else
     46     #define SK_RGBA_R32_SHIFT   0
     47     #define SK_RGBA_G32_SHIFT   8
     48     #define SK_RGBA_B32_SHIFT   16
     49     #define SK_RGBA_A32_SHIFT   24
     50 
     51     #define SK_BGRA_B32_SHIFT   0
     52     #define SK_BGRA_G32_SHIFT   8
     53     #define SK_BGRA_R32_SHIFT   16
     54     #define SK_BGRA_A32_SHIFT   24
     55 #endif
     56 
     57 #if defined(SK_PMCOLOR_IS_RGBA) && defined(SK_PMCOLOR_IS_BGRA)
     58     #error "can't define PMCOLOR to be RGBA and BGRA"
     59 #endif
     60 
     61 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA  \
     62     (SK_A32_SHIFT == SK_RGBA_A32_SHIFT &&    \
     63      SK_R32_SHIFT == SK_RGBA_R32_SHIFT &&    \
     64      SK_G32_SHIFT == SK_RGBA_G32_SHIFT &&    \
     65      SK_B32_SHIFT == SK_RGBA_B32_SHIFT)
     66 
     67 #define LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA  \
     68     (SK_A32_SHIFT == SK_BGRA_A32_SHIFT &&    \
     69      SK_R32_SHIFT == SK_BGRA_R32_SHIFT &&    \
     70      SK_G32_SHIFT == SK_BGRA_G32_SHIFT &&    \
     71      SK_B32_SHIFT == SK_BGRA_B32_SHIFT)
     72 
     73 
     74 #if defined(SK_PMCOLOR_IS_RGBA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     75     #error "SK_PMCOLOR_IS_RGBA does not match SK_*32_SHIFT values"
     76 #endif
     77 
     78 #if defined(SK_PMCOLOR_IS_BGRA) && !LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     79     #error "SK_PMCOLOR_IS_BGRA does not match SK_*32_SHIFT values"
     80 #endif
     81 
     82 #if !defined(SK_PMCOLOR_IS_RGBA) && !defined(SK_PMCOLOR_IS_RGBA)
     83     // deduce which to define from the _SHIFT defines
     84 
     85     #if LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     86         #define SK_PMCOLOR_IS_RGBA
     87     #elif LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     88         #define SK_PMCOLOR_IS_BGRA
     89     #else
     90         #error "need 32bit packing to be either RGBA or BGRA"
     91     #endif
     92 #endif
     93 
     94 // hide these now that we're done
     95 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_RGBA
     96 #undef LOCAL_PMCOLOR_SHIFTS_EQUIVALENT_TO_BGRA
     97 
     98 //////////////////////////////////////////////////////////////////////////////
     99 
    100 // Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
    101 // pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
    102 // no need to pass in the colortype to this function.
    103 static inline uint32_t SkSwizzle_RB(uint32_t c) {
    104     static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
    105 
    106     unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
    107     unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
    108     return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
    109 }
    110 
    111 static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    112     SkASSERT_IS_BYTE(a);
    113     SkASSERT_IS_BYTE(r);
    114     SkASSERT_IS_BYTE(g);
    115     SkASSERT_IS_BYTE(b);
    116     return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
    117            (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
    118 }
    119 
    120 static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    121     SkASSERT_IS_BYTE(a);
    122     SkASSERT_IS_BYTE(r);
    123     SkASSERT_IS_BYTE(g);
    124     SkASSERT_IS_BYTE(b);
    125     return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
    126            (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
    127 }
    128 
    129 static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
    130 #ifdef SK_PMCOLOR_IS_RGBA
    131     return c;
    132 #else
    133     return SkSwizzle_RB(c);
    134 #endif
    135 }
    136 
    137 static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
    138 #ifdef SK_PMCOLOR_IS_BGRA
    139     return c;
    140 #else
    141     return SkSwizzle_RB(c);
    142 #endif
    143 }
    144 
    145 //////////////////////////////////////////////////////////////////////////////
    146 
    147 ///@{
    148 /** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
    149 #define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
    150 #define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
    151 #define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
    152 ///@}
    153 
    154 ///@{
    155 /** A float value which specifies this channel's contribution to luminance. */
    156 #define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
    157 #define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
    158 #define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
    159 ///@}
    160 
    161 /** Computes the luminance from the given r, g, and b in accordance with
    162     SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
    163 */
    164 static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
    165     //The following is
    166     //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
    167     //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
    168     return (r * 54 + g * 183 + b * 19) >> 8;
    169 }
    170 
    171 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
    172     byte into a scale value, so that we can say scale * value >> 8 instead of
    173     alpha * value / 255.
    174 
    175     In debugging, asserts that alpha is 0..255
    176 */
    177 static inline unsigned SkAlpha255To256(U8CPU alpha) {
    178     SkASSERT(SkToU8(alpha) == alpha);
    179     // this one assues that blending on top of an opaque dst keeps it that way
    180     // even though it is less accurate than a+(a>>7) for non-opaque dsts
    181     return alpha + 1;
    182 }
    183 
    184 /**
    185  *  Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80.
    186  *  This is slightly more accurate than SkAlpha255To256.
    187  */
    188 static inline unsigned Sk255To256(U8CPU value) {
    189     SkASSERT(SkToU8(value) == value);
    190     return value + (value >> 7);
    191 }
    192 
    193 /** Multiplify value by 0..256, and shift the result down 8
    194     (i.e. return (value * alpha256) >> 8)
    195  */
    196 #define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
    197 
    198 //  The caller may want negative values, so keep all params signed (int)
    199 //  so we don't accidentally slip into unsigned math and lose the sign
    200 //  extension when we shift (in SkAlphaMul)
    201 static inline int SkAlphaBlend(int src, int dst, int scale256) {
    202     SkASSERT((unsigned)scale256 <= 256);
    203     return dst + SkAlphaMul(src - dst, scale256);
    204 }
    205 
    206 /**
    207  *  Returns (src * alpha + dst * (255 - alpha)) / 255
    208  *
    209  *  This is more accurate than SkAlphaBlend, but slightly slower
    210  */
    211 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
    212     SkASSERT((int16_t)src == src);
    213     SkASSERT((int16_t)dst == dst);
    214     SkASSERT((uint8_t)alpha == alpha);
    215 
    216     int prod = SkMulS16(src - dst, alpha) + 128;
    217     prod = (prod + (prod >> 8)) >> 8;
    218     return dst + prod;
    219 }
    220 
    221 #define SK_R16_BITS     5
    222 #define SK_G16_BITS     6
    223 #define SK_B16_BITS     5
    224 
    225 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
    226 #define SK_G16_SHIFT    (SK_B16_BITS)
    227 #define SK_B16_SHIFT    0
    228 
    229 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
    230 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
    231 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
    232 
    233 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
    234 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
    235 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
    236 
    237 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
    238 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
    239 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
    240 
    241 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
    242     SkASSERT(r <= SK_R16_MASK);
    243     SkASSERT(g <= SK_G16_MASK);
    244     SkASSERT(b <= SK_B16_MASK);
    245 
    246     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
    247 }
    248 
    249 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
    250 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
    251 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
    252 
    253 /** Expand the 16bit color into a 32bit value that can be scaled all at once
    254     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
    255 */
    256 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
    257     SkASSERT(c == (uint16_t)c);
    258 
    259     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
    260 }
    261 
    262 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
    263     color value. The computation yields only 16bits of valid data, but we claim
    264     to return 32bits, so that the compiler won't generate extra instructions to
    265     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
    266     up to the caller to safely ignore them.
    267 */
    268 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
    269     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
    270 }
    271 
    272 /** Scale the 16bit color value by the 0..256 scale parameter.
    273     The computation yields only 16bits of valid data, but we claim
    274     to return 32bits, so that the compiler won't generate extra instructions to
    275     "clean" the top 16bits.
    276 */
    277 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
    278     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
    279 }
    280 
    281 // this helper explicitly returns a clean 16bit value (but slower)
    282 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
    283 
    284 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
    285     The computation yields only 16bits of valid data, but we claim
    286     to return 32bits, so that the compiler won't generate extra instructions to
    287     "clean" the top 16bits.
    288 */
    289 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
    290     SkASSERT((unsigned)srcScale <= 256);
    291 
    292     srcScale >>= 3;
    293 
    294     uint32_t src32 = SkExpand_rgb_16(src);
    295     uint32_t dst32 = SkExpand_rgb_16(dst);
    296     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    297 }
    298 
    299 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
    300                                 int srcScale, int count) {
    301     SkASSERT(count > 0);
    302     SkASSERT((unsigned)srcScale <= 256);
    303 
    304     srcScale >>= 3;
    305 
    306     do {
    307         uint32_t src32 = SkExpand_rgb_16(*src++);
    308         uint32_t dst32 = SkExpand_rgb_16(*dst);
    309         *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    310     } while (--count > 0);
    311 }
    312 
    313 #ifdef SK_DEBUG
    314     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
    315         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
    316         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
    317         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
    318 
    319         return a + b;
    320     }
    321 #else
    322     #define SkRGB16Add(a, b)  ((a) + (b))
    323 #endif
    324 
    325 ///////////////////////////////////////////////////////////////////////////////
    326 
    327 #define SK_A32_BITS     8
    328 #define SK_R32_BITS     8
    329 #define SK_G32_BITS     8
    330 #define SK_B32_BITS     8
    331 
    332 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
    333 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
    334 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
    335 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
    336 
    337 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
    338 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
    339 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
    340 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
    341 
    342 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
    343 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
    344 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
    345 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
    346 
    347 #ifdef SK_DEBUG
    348     static inline void SkPMColorAssert(SkPMColor c) {
    349         unsigned a = SkGetPackedA32(c);
    350         unsigned r = SkGetPackedR32(c);
    351         unsigned g = SkGetPackedG32(c);
    352         unsigned b = SkGetPackedB32(c);
    353 
    354         SkA32Assert(a);
    355         SkASSERT(r <= a);
    356         SkASSERT(g <= a);
    357         SkASSERT(b <= a);
    358     }
    359 #else
    360     #define SkPMColorAssert(c)
    361 #endif
    362 
    363 /**
    364  *  Pack the components into a SkPMColor, checking (in the debug version) that
    365  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
    366  */
    367 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    368     SkA32Assert(a);
    369     SkASSERT(r <= a);
    370     SkASSERT(g <= a);
    371     SkASSERT(b <= a);
    372 
    373     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    374            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    375 }
    376 
    377 static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) {
    378     return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c),
    379                               SkGetPackedG32(c), SkGetPackedB32(c));
    380 }
    381 
    382 static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) {
    383     return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c),
    384                               SkGetPackedG32(c), SkGetPackedB32(c));
    385 }
    386 
    387 /**
    388  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    389  * utility functions. Third parameter controls blending of the first two:
    390  *   (src, dst, 0) returns dst
    391  *   (src, dst, 0xFF) returns src
    392  *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
    393  */
    394 static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
    395                                          unsigned scale) {
    396     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
    397     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
    398     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
    399     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
    400 
    401     return SkPackARGB32(a, r, g, b);
    402 }
    403 
    404 /**
    405  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    406  * utility functions. Third parameter controls blending of the first two:
    407  *   (src, dst, 0) returns dst
    408  *   (src, dst, 0xFF) returns src
    409  */
    410 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
    411                                          U8CPU srcWeight) {
    412     unsigned scale = SkAlpha255To256(srcWeight);
    413     return SkFourByteInterp256(src, dst, scale);
    414 }
    415 
    416 /**
    417  * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
    418  */
    419 static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
    420     const uint32_t mask = 0x00FF00FF;
    421     *ag = (color >> 8) & mask;
    422     *rb = color & mask;
    423 }
    424 
    425 /**
    426  * 0xAARRGGBB -> 0x00AA00GG00RR00BB
    427  * (note, ARGB -> AGRB)
    428  */
    429 static inline uint64_t SkSplay(uint32_t color) {
    430     const uint32_t mask = 0x00FF00FF;
    431     uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
    432     agrb <<= 32;                          // 0x00AA00GG00000000
    433     agrb |= color & mask;                 // 0x00AA00GG00RR00BB
    434     return agrb;
    435 }
    436 
    437 /**
    438  * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
    439  */
    440 static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
    441     const uint32_t mask = 0xFF00FF00;
    442     return (ag & mask) | ((rb & mask) >> 8);
    443 }
    444 
    445 /**
    446  * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
    447  * (note, AGRB -> ARGB)
    448  */
    449 static inline uint32_t SkUnsplay(uint64_t agrb) {
    450     const uint32_t mask = 0xFF00FF00;
    451     return SkPMColor(
    452         ((agrb & mask) >> 8) |   // 0x00RR00BB
    453         ((agrb >> 32) & mask));  // 0xAARRGGBB
    454 }
    455 
    456 static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
    457     SkASSERT(scale <= 256);
    458 
    459     // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
    460     uint32_t src_ag, src_rb, dst_ag, dst_rb;
    461     SkSplay(src, &src_ag, &src_rb);
    462     SkSplay(dst, &dst_ag, &dst_rb);
    463 
    464     const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
    465     const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
    466 
    467     return SkUnsplay(ret_ag, ret_rb);
    468 }
    469 
    470 static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
    471     SkASSERT(scale <= 256);
    472     // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
    473     return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
    474 }
    475 
    476 // TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
    477 
    478 /**
    479  * Same as SkFourByteInterp256, but faster.
    480  */
    481 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
    482     // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
    483     if (sizeof(void*) == 4) {
    484         return SkFastFourByteInterp256_32(src, dst, scale);
    485     } else {
    486         return SkFastFourByteInterp256_64(src, dst, scale);
    487     }
    488 }
    489 
    490 /**
    491  * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
    492  * srcWeight scaling to [0, 256].
    493  */
    494 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
    495                                              SkPMColor dst,
    496                                              U8CPU srcWeight) {
    497     SkASSERT(srcWeight <= 255);
    498     // scale = srcWeight + (srcWeight >> 7) is more accurate than
    499     // scale = srcWeight + 1, but 7% slower
    500     return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
    501 }
    502 
    503 /**
    504  *  Same as SkPackARGB32, but this version guarantees to not check that the
    505  *  values are premultiplied in the debug version.
    506  */
    507 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    508     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    509            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    510 }
    511 
    512 static inline
    513 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    514     SkA32Assert(a);
    515     SkR32Assert(r);
    516     SkG32Assert(g);
    517     SkB32Assert(b);
    518 
    519     if (a != 255) {
    520         r = SkMulDiv255Round(r, a);
    521         g = SkMulDiv255Round(g, a);
    522         b = SkMulDiv255Round(b, a);
    523     }
    524     return SkPackARGB32(a, r, g, b);
    525 }
    526 
    527 // When Android is compiled optimizing for size, SkAlphaMulQ doesn't get
    528 // inlined; forcing inlining significantly improves performance.
    529 static SK_ALWAYS_INLINE uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
    530     uint32_t mask = 0xFF00FF;
    531 
    532     uint32_t rb = ((c & mask) * scale) >> 8;
    533     uint32_t ag = ((c >> 8) & mask) * scale;
    534     return (rb & mask) | (ag & ~mask);
    535 }
    536 
    537 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
    538     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
    539 }
    540 
    541 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
    542     SkASSERT((unsigned)aa <= 255);
    543 
    544     unsigned src_scale = SkAlpha255To256(aa);
    545     unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
    546 
    547     return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
    548 }
    549 
    550 ////////////////////////////////////////////////////////////////////////////////////////////
    551 // Convert a 32bit pixel to a 16bit pixel (no dither)
    552 
    553 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
    554 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
    555 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
    556 
    557 #ifdef SK_DEBUG
    558     static inline unsigned SkR32ToR16(unsigned r) {
    559         SkR32Assert(r);
    560         return SkR32ToR16_MACRO(r);
    561     }
    562     static inline unsigned SkG32ToG16(unsigned g) {
    563         SkG32Assert(g);
    564         return SkG32ToG16_MACRO(g);
    565     }
    566     static inline unsigned SkB32ToB16(unsigned b) {
    567         SkB32Assert(b);
    568         return SkB32ToB16_MACRO(b);
    569     }
    570 #else
    571     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
    572     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
    573     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
    574 #endif
    575 
    576 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
    577 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
    578 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
    579 
    580 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
    581     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
    582     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
    583     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
    584     return r | g | b;
    585 }
    586 
    587 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    588     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
    589             (SkG32ToG16(g) << SK_G16_SHIFT) |
    590             (SkB32ToB16(b) << SK_B16_SHIFT);
    591 }
    592 
    593 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
    594 
    595 /////////////////////////////////////////////////////////////////////////////////////////
    596 // Fast dither from 32->16
    597 
    598 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
    599 
    600 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    601     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
    602     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
    603     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
    604 
    605     return SkPackRGB16(r, g, b);
    606 }
    607 
    608 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
    609     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
    610 }
    611 
    612 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
    613     It is now suitable for combining with a scaled expanded_rgb_16 color
    614     as in SkSrcOver32To16().
    615     We must do this 565 high-bit replication, in order for the subsequent add
    616     to saturate properly (and not overflow). If we take the 8 bits as is, it is
    617     possible to overflow.
    618 */
    619 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
    620     unsigned sr = SkPacked32ToR16(c);
    621     unsigned sg = SkPacked32ToG16(c);
    622     unsigned sb = SkPacked32ToB16(c);
    623 
    624     sr = (sr << 5) | sr;
    625     sg = (sg << 5) | (sg >> 1);
    626     sb = (sb << 5) | sb;
    627     return (sr << 11) | (sg << 21) | (sb << 0);
    628 }
    629 
    630 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
    631     (with dirt in the high 16bits, so caller beware).
    632 */
    633 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
    634     unsigned sr = SkGetPackedR32(src);
    635     unsigned sg = SkGetPackedG32(src);
    636     unsigned sb = SkGetPackedB32(src);
    637 
    638     unsigned dr = SkGetPackedR16(dst);
    639     unsigned dg = SkGetPackedG16(dst);
    640     unsigned db = SkGetPackedB16(dst);
    641 
    642     unsigned isa = 255 - SkGetPackedA32(src);
    643 
    644     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
    645     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
    646     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
    647 
    648     return SkPackRGB16(dr, dg, db);
    649 }
    650 
    651 ////////////////////////////////////////////////////////////////////////////////////////////
    652 // Convert a 16bit pixel to a 32bit pixel
    653 
    654 static inline unsigned SkR16ToR32(unsigned r) {
    655     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
    656 }
    657 
    658 static inline unsigned SkG16ToG32(unsigned g) {
    659     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
    660 }
    661 
    662 static inline unsigned SkB16ToB32(unsigned b) {
    663     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
    664 }
    665 
    666 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
    667 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
    668 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
    669 
    670 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
    671     SkASSERT(src == SkToU16(src));
    672 
    673     unsigned    r = SkPacked16ToR32(src);
    674     unsigned    g = SkPacked16ToG32(src);
    675     unsigned    b = SkPacked16ToB32(src);
    676 
    677     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    678     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    679     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    680 
    681     return SkPackARGB32(0xFF, r, g, b);
    682 }
    683 
    684 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
    685 static inline SkColor SkPixel16ToColor(U16CPU src) {
    686     SkASSERT(src == SkToU16(src));
    687 
    688     unsigned    r = SkPacked16ToR32(src);
    689     unsigned    g = SkPacked16ToG32(src);
    690     unsigned    b = SkPacked16ToB32(src);
    691 
    692     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    693     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    694     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    695 
    696     return SkColorSetRGB(r, g, b);
    697 }
    698 
    699 ///////////////////////////////////////////////////////////////////////////////
    700 
    701 typedef uint16_t SkPMColor16;
    702 
    703 // Put in OpenGL order (r g b a)
    704 #define SK_A4444_SHIFT    0
    705 #define SK_R4444_SHIFT    12
    706 #define SK_G4444_SHIFT    8
    707 #define SK_B4444_SHIFT    4
    708 
    709 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
    710 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
    711 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
    712 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
    713 
    714 static inline U8CPU SkReplicateNibble(unsigned nib) {
    715     SkASSERT(nib <= 0xF);
    716     return (nib << 4) | nib;
    717 }
    718 
    719 #define SkA4444ToA32(a)     SkReplicateNibble(a)
    720 #define SkR4444ToR32(r)     SkReplicateNibble(r)
    721 #define SkG4444ToG32(g)     SkReplicateNibble(g)
    722 #define SkB4444ToB32(b)     SkReplicateNibble(b)
    723 
    724 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
    725 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
    726 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
    727 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
    728 
    729 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
    730 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
    731 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
    732 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
    733 
    734 #ifdef SK_DEBUG
    735 static inline void SkPMColor16Assert(U16CPU c) {
    736     unsigned a = SkGetPackedA4444(c);
    737     unsigned r = SkGetPackedR4444(c);
    738     unsigned g = SkGetPackedG4444(c);
    739     unsigned b = SkGetPackedB4444(c);
    740 
    741     SkASSERT(a <= 0xF);
    742     SkASSERT(r <= a);
    743     SkASSERT(g <= a);
    744     SkASSERT(b <= a);
    745 }
    746 #else
    747 #define SkPMColor16Assert(c)
    748 #endif
    749 
    750 static inline unsigned SkAlpha15To16(unsigned a) {
    751     SkASSERT(a <= 0xF);
    752     return a + (a >> 3);
    753 }
    754 
    755 #ifdef SK_DEBUG
    756     static inline int SkAlphaMul4(int value, int scale) {
    757         SkASSERT((unsigned)scale <= 0x10);
    758         return value * scale >> 4;
    759     }
    760 #else
    761     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
    762 #endif
    763 
    764 static inline unsigned SkR4444ToR565(unsigned r) {
    765     SkASSERT(r <= 0xF);
    766     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
    767 }
    768 
    769 static inline unsigned SkG4444ToG565(unsigned g) {
    770     SkASSERT(g <= 0xF);
    771     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
    772 }
    773 
    774 static inline unsigned SkB4444ToB565(unsigned b) {
    775     SkASSERT(b <= 0xF);
    776     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
    777 }
    778 
    779 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
    780                                          unsigned g, unsigned b) {
    781     SkASSERT(a <= 0xF);
    782     SkASSERT(r <= a);
    783     SkASSERT(g <= a);
    784     SkASSERT(b <= a);
    785 
    786     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
    787                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
    788 }
    789 
    790 static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
    791     SkASSERT(scale <= 16);
    792 
    793     const unsigned mask = 0xF0F;    //gMask_0F0F;
    794 
    795 #if 0
    796     unsigned rb = ((c & mask) * scale) >> 4;
    797     unsigned ag = ((c >> 4) & mask) * scale;
    798     return (rb & mask) | (ag & ~mask);
    799 #else
    800     c = (c & mask) | ((c & (mask << 4)) << 12);
    801     c = c * scale >> 4;
    802     return (c & mask) | ((c >> 12) & (mask << 4));
    803 #endif
    804 }
    805 
    806 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
    807     once by a value up to 16. Used in conjunction with SkCompact_4444.
    808 */
    809 static inline uint32_t SkExpand_4444(U16CPU c) {
    810     SkASSERT(c == (uint16_t)c);
    811 
    812     const unsigned mask = 0xF0F;    //gMask_0F0F;
    813     return (c & mask) | ((c & ~mask) << 12);
    814 }
    815 
    816 /** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
    817     NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
    818     It does this for speed, since if it is being written directly to 16bits of
    819     memory, the top 16bits will be ignored. Casting the result to uint16_t here
    820     would add 2 more instructions, slow us down. It is up to the caller to
    821     perform the cast if needed.
    822 */
    823 static inline U16CPU SkCompact_4444(uint32_t c) {
    824     const unsigned mask = 0xF0F;    //gMask_0F0F;
    825     return (c & mask) | ((c >> 12) & ~mask);
    826 }
    827 
    828 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
    829     unsigned sa = SkGetPackedA4444(s);
    830     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
    831     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
    832     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
    833 
    834     // To avoid overflow, we have to clear the low bit of the synthetic sg
    835     // if the src alpha is <= 7.
    836     // to see why, try blending 0x4444 on top of 565-white and watch green
    837     // overflow (sum == 64)
    838     sg &= ~(~(sa >> 3) & 1);
    839 
    840     unsigned scale = SkAlpha15To16(15 - sa);
    841     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
    842     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
    843     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
    844 
    845 #if 0
    846     if (sg + dg > 63) {
    847         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
    848     }
    849 #endif
    850     return SkPackRGB16(sr + dr, sg + dg, sb + db);
    851 }
    852 
    853 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
    854     SkASSERT((unsigned)scale16 <= 16);
    855 
    856     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
    857 }
    858 
    859 static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
    860     SkASSERT((unsigned)scale16 <= 16);
    861 
    862     uint32_t src32 = SkExpand_4444(src) * scale16;
    863     // the scaled srcAlpha is the bottom byte
    864 #ifdef SK_DEBUG
    865     {
    866         unsigned srcA = SkGetPackedA4444(src) * scale16;
    867         SkASSERT(srcA == (src32 & 0xFF));
    868     }
    869 #endif
    870     unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
    871     uint32_t dst32 = SkExpand_4444(dst) * dstScale;
    872     return SkCompact_4444((src32 + dst32) >> 4);
    873 }
    874 
    875 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
    876     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
    877                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
    878                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
    879                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
    880     return d | (d << 4);
    881 }
    882 
    883 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
    884     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
    885     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
    886     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
    887     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
    888 }
    889 
    890 // cheap 2x2 dither
    891 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
    892                                                U8CPU g, U8CPU b) {
    893     // to ensure that we stay a legal premultiplied color, we take the max()
    894     // of the truncated and dithered alpha values. If we didn't, cases like
    895     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
    896     // which is not legal premultiplied, since a < color
    897     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
    898     a = SkMax32(a >> 4, dithered_a);
    899     // these we just dither in place
    900     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
    901     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
    902     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
    903 
    904     return SkPackARGB4444(a, r, g, b);
    905 }
    906 
    907 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
    908     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
    909                                 SkGetPackedG32(c), SkGetPackedB32(c));
    910 }
    911 
    912 /*  Assumes 16bit is in standard RGBA order.
    913     Transforms a normal ARGB_8888 into the same byte order as
    914     expanded ARGB_4444, but keeps each component 8bits
    915 */
    916 static inline uint32_t SkExpand_8888(SkPMColor c) {
    917     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
    918             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
    919             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
    920             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
    921 }
    922 
    923 /*  Undo the operation of SkExpand_8888, turning the argument back into
    924     a SkPMColor.
    925 */
    926 static inline SkPMColor SkCompact_8888(uint32_t c) {
    927     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
    928             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
    929             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
    930             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
    931 }
    932 
    933 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
    934     but this routine just keeps the high 4bits of each component in the low
    935     4bits of the result (just like a newly expanded PMColor16).
    936 */
    937 static inline uint32_t SkExpand32_4444(SkPMColor c) {
    938     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
    939             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
    940             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
    941             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
    942 }
    943 
    944 // takes two values and alternamtes them as part of a memset16
    945 // used for cheap 2x2 dithering when the colors are opaque
    946 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
    947 
    948 ///////////////////////////////////////////////////////////////////////////////
    949 
    950 static inline int SkUpscale31To32(int value) {
    951     SkASSERT((unsigned)value <= 31);
    952     return value + (value >> 4);
    953 }
    954 
    955 static inline int SkBlend32(int src, int dst, int scale) {
    956     SkASSERT((unsigned)src <= 0xFF);
    957     SkASSERT((unsigned)dst <= 0xFF);
    958     SkASSERT((unsigned)scale <= 32);
    959     return dst + ((src - dst) * scale >> 5);
    960 }
    961 
    962 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
    963                                      SkPMColor dst, uint16_t mask) {
    964     if (mask == 0) {
    965         return dst;
    966     }
    967 
    968     /*  We want all of these in 5bits, hence the shifts in case one of them
    969      *  (green) is 6bits.
    970      */
    971     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    972     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    973     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    974 
    975     // Now upscale them to 0..32, so we can use blend32
    976     maskR = SkUpscale31To32(maskR);
    977     maskG = SkUpscale31To32(maskG);
    978     maskB = SkUpscale31To32(maskB);
    979 
    980     // srcA has been upscaled to 256 before passed into this function
    981     maskR = maskR * srcA >> 8;
    982     maskG = maskG * srcA >> 8;
    983     maskB = maskB * srcA >> 8;
    984 
    985     int dstR = SkGetPackedR32(dst);
    986     int dstG = SkGetPackedG32(dst);
    987     int dstB = SkGetPackedB32(dst);
    988 
    989     // LCD blitting is only supported if the dst is known/required
    990     // to be opaque
    991     return SkPackARGB32(0xFF,
    992                         SkBlend32(srcR, dstR, maskR),
    993                         SkBlend32(srcG, dstG, maskG),
    994                         SkBlend32(srcB, dstB, maskB));
    995 }
    996 
    997 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
    998                                            SkPMColor dst, uint16_t mask,
    999                                            SkPMColor opaqueDst) {
   1000     if (mask == 0) {
   1001         return dst;
   1002     }
   1003 
   1004     if (0xFFFF == mask) {
   1005         return opaqueDst;
   1006     }
   1007 
   1008     /*  We want all of these in 5bits, hence the shifts in case one of them
   1009      *  (green) is 6bits.
   1010      */
   1011     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
   1012     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
   1013     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
   1014 
   1015     // Now upscale them to 0..32, so we can use blend32
   1016     maskR = SkUpscale31To32(maskR);
   1017     maskG = SkUpscale31To32(maskG);
   1018     maskB = SkUpscale31To32(maskB);
   1019 
   1020     int dstR = SkGetPackedR32(dst);
   1021     int dstG = SkGetPackedG32(dst);
   1022     int dstB = SkGetPackedB32(dst);
   1023 
   1024     // LCD blitting is only supported if the dst is known/required
   1025     // to be opaque
   1026     return SkPackARGB32(0xFF,
   1027                         SkBlend32(srcR, dstR, maskR),
   1028                         SkBlend32(srcG, dstG, maskG),
   1029                         SkBlend32(srcB, dstB, maskB));
   1030 }
   1031 
   1032 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
   1033                                   SkColor src, int width, SkPMColor) {
   1034     int srcA = SkColorGetA(src);
   1035     int srcR = SkColorGetR(src);
   1036     int srcG = SkColorGetG(src);
   1037     int srcB = SkColorGetB(src);
   1038 
   1039     srcA = SkAlpha255To256(srcA);
   1040 
   1041     for (int i = 0; i < width; i++) {
   1042         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
   1043     }
   1044 }
   1045 
   1046 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
   1047                                         SkColor src, int width,
   1048                                         SkPMColor opaqueDst) {
   1049     int srcR = SkColorGetR(src);
   1050     int srcG = SkColorGetG(src);
   1051     int srcB = SkColorGetB(src);
   1052 
   1053     for (int i = 0; i < width; i++) {
   1054         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
   1055                                     opaqueDst);
   1056     }
   1057 }
   1058 
   1059 #endif
   1060