Home | History | Annotate | Download | only in core
      1 
      2 /*
      3  * Copyright 2006 The Android Open Source Project
      4  *
      5  * Use of this source code is governed by a BSD-style license that can be
      6  * found in the LICENSE file.
      7  */
      8 
      9 
     10 #ifndef SkColorPriv_DEFINED
     11 #define SkColorPriv_DEFINED
     12 
     13 // turn this own for extra debug checking when blending onto 565
     14 #ifdef SK_DEBUG
     15     #define CHECK_FOR_565_OVERFLOW
     16 #endif
     17 
     18 #include "SkColor.h"
     19 #include "SkMath.h"
     20 
     21 ///@{
     22 /** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
     23 #define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
     24 #define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
     25 #define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
     26 ///@}
     27 
     28 ///@{
     29 /** A float value which specifies this channel's contribution to luminance. */
     30 #define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
     31 #define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
     32 #define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
     33 ///@}
     34 
     35 /** Computes the luminance from the given r, g, and b in accordance with
     36     SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
     37 */
     38 static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
     39     //The following is
     40     //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
     41     //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
     42     return (r * 54 + g * 183 + b * 19) >> 8;
     43 }
     44 
     45 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
     46     byte into a scale value, so that we can say scale * value >> 8 instead of
     47     alpha * value / 255.
     48 
     49     In debugging, asserts that alpha is 0..255
     50 */
     51 static inline unsigned SkAlpha255To256(U8CPU alpha) {
     52     SkASSERT(SkToU8(alpha) == alpha);
     53     // this one assues that blending on top of an opaque dst keeps it that way
     54     // even though it is less accurate than a+(a>>7) for non-opaque dsts
     55     return alpha + 1;
     56 }
     57 
     58 /** Multiplify value by 0..256, and shift the result down 8
     59     (i.e. return (value * alpha256) >> 8)
     60  */
     61 #define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
     62 
     63 //  The caller may want negative values, so keep all params signed (int)
     64 //  so we don't accidentally slip into unsigned math and lose the sign
     65 //  extension when we shift (in SkAlphaMul)
     66 static inline int SkAlphaBlend(int src, int dst, int scale256) {
     67     SkASSERT((unsigned)scale256 <= 256);
     68     return dst + SkAlphaMul(src - dst, scale256);
     69 }
     70 
     71 /**
     72  *  Returns (src * alpha + dst * (255 - alpha)) / 255
     73  *
     74  *  This is more accurate than SkAlphaBlend, but slightly slower
     75  */
     76 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
     77     SkASSERT((int16_t)src == src);
     78     SkASSERT((int16_t)dst == dst);
     79     SkASSERT((uint8_t)alpha == alpha);
     80 
     81     int prod = SkMulS16(src - dst, alpha) + 128;
     82     prod = (prod + (prod >> 8)) >> 8;
     83     return dst + prod;
     84 }
     85 
     86 #define SK_R16_BITS     5
     87 #define SK_G16_BITS     6
     88 #define SK_B16_BITS     5
     89 
     90 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
     91 #define SK_G16_SHIFT    (SK_B16_BITS)
     92 #define SK_B16_SHIFT    0
     93 
     94 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
     95 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
     96 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
     97 
     98 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
     99 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
    100 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
    101 
    102 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
    103 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
    104 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
    105 
    106 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
    107     SkASSERT(r <= SK_R16_MASK);
    108     SkASSERT(g <= SK_G16_MASK);
    109     SkASSERT(b <= SK_B16_MASK);
    110 
    111     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
    112 }
    113 
    114 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
    115 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
    116 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
    117 
    118 /** Expand the 16bit color into a 32bit value that can be scaled all at once
    119     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
    120 */
    121 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
    122     SkASSERT(c == (uint16_t)c);
    123 
    124     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
    125 }
    126 
    127 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
    128     color value. The computation yields only 16bits of valid data, but we claim
    129     to return 32bits, so that the compiler won't generate extra instructions to
    130     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
    131     up to the caller to safely ignore them.
    132 */
    133 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
    134     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
    135 }
    136 
    137 /** Scale the 16bit color value by the 0..256 scale parameter.
    138     The computation yields only 16bits of valid data, but we claim
    139     to return 32bits, so that the compiler won't generate extra instructions to
    140     "clean" the top 16bits.
    141 */
    142 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
    143     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
    144 }
    145 
    146 // this helper explicitly returns a clean 16bit value (but slower)
    147 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
    148 
    149 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
    150     The computation yields only 16bits of valid data, but we claim
    151     to return 32bits, so that the compiler won't generate extra instructions to
    152     "clean" the top 16bits.
    153 */
    154 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
    155     SkASSERT((unsigned)srcScale <= 256);
    156 
    157     srcScale >>= 3;
    158 
    159     uint32_t src32 = SkExpand_rgb_16(src);
    160     uint32_t dst32 = SkExpand_rgb_16(dst);
    161     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    162 }
    163 
    164 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
    165                                 int srcScale, int count) {
    166     SkASSERT(count > 0);
    167     SkASSERT((unsigned)srcScale <= 256);
    168 
    169     srcScale >>= 3;
    170 
    171     do {
    172         uint32_t src32 = SkExpand_rgb_16(*src++);
    173         uint32_t dst32 = SkExpand_rgb_16(*dst);
    174         *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    175     } while (--count > 0);
    176 }
    177 
    178 #ifdef SK_DEBUG
    179     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
    180         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
    181         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
    182         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
    183 
    184         return a + b;
    185     }
    186 #else
    187     #define SkRGB16Add(a, b)  ((a) + (b))
    188 #endif
    189 
    190 ///////////////////////////////////////////////////////////////////////////////
    191 
    192 #define SK_A32_BITS     8
    193 #define SK_R32_BITS     8
    194 #define SK_G32_BITS     8
    195 #define SK_B32_BITS     8
    196 
    197 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
    198 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
    199 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
    200 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
    201 
    202 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
    203 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
    204 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
    205 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
    206 
    207 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
    208 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
    209 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
    210 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
    211 
    212 #ifdef SK_DEBUG
    213     static inline void SkPMColorAssert(SkPMColor c) {
    214         unsigned a = SkGetPackedA32(c);
    215         unsigned r = SkGetPackedR32(c);
    216         unsigned g = SkGetPackedG32(c);
    217         unsigned b = SkGetPackedB32(c);
    218 
    219         SkA32Assert(a);
    220         SkASSERT(r <= a);
    221         SkASSERT(g <= a);
    222         SkASSERT(b <= a);
    223     }
    224 #else
    225     #define SkPMColorAssert(c)
    226 #endif
    227 
    228 /**
    229  *  Pack the components into a SkPMColor, checking (in the debug version) that
    230  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
    231  */
    232 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    233     SkA32Assert(a);
    234     SkASSERT(r <= a);
    235     SkASSERT(g <= a);
    236     SkASSERT(b <= a);
    237 
    238     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    239            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    240 }
    241 
    242 /**
    243  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    244  * utility functions. Third parameter controls blending of the first two:
    245  *   (src, dst, 0) returns dst
    246  *   (src, dst, 0xFF) returns src
    247  *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
    248  */
    249 static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
    250                                          unsigned scale) {
    251     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
    252     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
    253     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
    254     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
    255 
    256     return SkPackARGB32(a, r, g, b);
    257 }
    258 
    259 /**
    260  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    261  * utility functions. Third parameter controls blending of the first two:
    262  *   (src, dst, 0) returns dst
    263  *   (src, dst, 0xFF) returns src
    264  */
    265 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
    266                                          U8CPU srcWeight) {
    267     unsigned scale = SkAlpha255To256(srcWeight);
    268     return SkFourByteInterp256(src, dst, scale);
    269 }
    270 
    271 /**
    272  * 32b optimized version; currently appears to be 10% faster even on 64b
    273  * architectures than an equivalent 64b version and 30% faster than
    274  * SkFourByteInterp(). Third parameter controls blending of the first two:
    275  *   (src, dst, 0) returns dst
    276  *   (src, dst, 0xFF) returns src
    277  * ** Does not match the results of SkFourByteInterp() because we use
    278  * a more accurate scale computation!
    279  * TODO: migrate Skia function to using an accurate 255->266 alpha
    280  * conversion.
    281  */
    282 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
    283                                              SkPMColor dst,
    284                                              U8CPU srcWeight) {
    285     SkASSERT(srcWeight < 256);
    286 
    287     // Reorders ARGB to AG-RB in order to reduce the number of operations.
    288     const uint32_t mask = 0xFF00FF;
    289     uint32_t src_rb = src & mask;
    290     uint32_t src_ag = (src >> 8) & mask;
    291     uint32_t dst_rb = dst & mask;
    292     uint32_t dst_ag = (dst >> 8) & mask;
    293 
    294     // scale = srcWeight + (srcWeight >> 7) is more accurate than
    295     // scale = srcWeight + 1, but 7% slower
    296     int scale = srcWeight + (srcWeight >> 7);
    297 
    298     uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
    299     uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
    300 
    301     return (ret_ag & ~mask) | ((ret_rb & ~mask) >> 8);
    302 }
    303 
    304 /**
    305  *  Same as SkPackARGB32, but this version guarantees to not check that the
    306  *  values are premultiplied in the debug version.
    307  */
    308 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    309     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    310            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    311 }
    312 
    313 static inline
    314 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    315     SkA32Assert(a);
    316     SkA32Assert(r);
    317     SkA32Assert(g);
    318     SkA32Assert(b);
    319 
    320     if (a != 255) {
    321         r = SkMulDiv255Round(r, a);
    322         g = SkMulDiv255Round(g, a);
    323         b = SkMulDiv255Round(b, a);
    324     }
    325     return SkPackARGB32(a, r, g, b);
    326 }
    327 
    328 SK_API extern const uint32_t gMask_00FF00FF;
    329 
    330 static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
    331     uint32_t mask = gMask_00FF00FF;
    332 //    uint32_t mask = 0xFF00FF;
    333 
    334     uint32_t rb = ((c & mask) * scale) >> 8;
    335     uint32_t ag = ((c >> 8) & mask) * scale;
    336     return (rb & mask) | (ag & ~mask);
    337 }
    338 
    339 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
    340     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
    341 }
    342 
    343 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
    344     SkASSERT((unsigned)aa <= 255);
    345 
    346     unsigned src_scale = SkAlpha255To256(aa);
    347     unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
    348 
    349     return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
    350 }
    351 
    352 ////////////////////////////////////////////////////////////////////////////////////////////
    353 // Convert a 32bit pixel to a 16bit pixel (no dither)
    354 
    355 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
    356 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
    357 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
    358 
    359 #ifdef SK_DEBUG
    360     static inline unsigned SkR32ToR16(unsigned r) {
    361         SkR32Assert(r);
    362         return SkR32ToR16_MACRO(r);
    363     }
    364     static inline unsigned SkG32ToG16(unsigned g) {
    365         SkG32Assert(g);
    366         return SkG32ToG16_MACRO(g);
    367     }
    368     static inline unsigned SkB32ToB16(unsigned b) {
    369         SkB32Assert(b);
    370         return SkB32ToB16_MACRO(b);
    371     }
    372 #else
    373     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
    374     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
    375     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
    376 #endif
    377 
    378 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
    379 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
    380 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
    381 
    382 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
    383     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
    384     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
    385     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
    386     return r | g | b;
    387 }
    388 
    389 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    390     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
    391             (SkG32ToG16(g) << SK_G16_SHIFT) |
    392             (SkB32ToB16(b) << SK_B16_SHIFT);
    393 }
    394 
    395 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
    396 
    397 /////////////////////////////////////////////////////////////////////////////////////////
    398 // Fast dither from 32->16
    399 
    400 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
    401 
    402 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    403     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
    404     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
    405     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
    406 
    407     return SkPackRGB16(r, g, b);
    408 }
    409 
    410 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
    411     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
    412 }
    413 
    414 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
    415     It is now suitable for combining with a scaled expanded_rgb_16 color
    416     as in SkSrcOver32To16().
    417     We must do this 565 high-bit replication, in order for the subsequent add
    418     to saturate properly (and not overflow). If we take the 8 bits as is, it is
    419     possible to overflow.
    420 */
    421 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
    422     unsigned sr = SkPacked32ToR16(c);
    423     unsigned sg = SkPacked32ToG16(c);
    424     unsigned sb = SkPacked32ToB16(c);
    425 
    426     sr = (sr << 5) | sr;
    427     sg = (sg << 5) | (sg >> 1);
    428     sb = (sb << 5) | sb;
    429     return (sr << 11) | (sg << 21) | (sb << 0);
    430 }
    431 
    432 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
    433     (with dirt in the high 16bits, so caller beware).
    434 */
    435 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
    436     unsigned sr = SkGetPackedR32(src);
    437     unsigned sg = SkGetPackedG32(src);
    438     unsigned sb = SkGetPackedB32(src);
    439 
    440     unsigned dr = SkGetPackedR16(dst);
    441     unsigned dg = SkGetPackedG16(dst);
    442     unsigned db = SkGetPackedB16(dst);
    443 
    444     unsigned isa = 255 - SkGetPackedA32(src);
    445 
    446     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
    447     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
    448     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
    449 
    450     return SkPackRGB16(dr, dg, db);
    451 }
    452 
    453 ////////////////////////////////////////////////////////////////////////////////////////////
    454 // Convert a 16bit pixel to a 32bit pixel
    455 
    456 static inline unsigned SkR16ToR32(unsigned r) {
    457     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
    458 }
    459 
    460 static inline unsigned SkG16ToG32(unsigned g) {
    461     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
    462 }
    463 
    464 static inline unsigned SkB16ToB32(unsigned b) {
    465     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
    466 }
    467 
    468 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
    469 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
    470 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
    471 
    472 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
    473     SkASSERT(src == SkToU16(src));
    474 
    475     unsigned    r = SkPacked16ToR32(src);
    476     unsigned    g = SkPacked16ToG32(src);
    477     unsigned    b = SkPacked16ToB32(src);
    478 
    479     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    480     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    481     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    482 
    483     return SkPackARGB32(0xFF, r, g, b);
    484 }
    485 
    486 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
    487 static inline SkColor SkPixel16ToColor(U16CPU src) {
    488     SkASSERT(src == SkToU16(src));
    489 
    490     unsigned    r = SkPacked16ToR32(src);
    491     unsigned    g = SkPacked16ToG32(src);
    492     unsigned    b = SkPacked16ToB32(src);
    493 
    494     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    495     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    496     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    497 
    498     return SkColorSetRGB(r, g, b);
    499 }
    500 
    501 ///////////////////////////////////////////////////////////////////////////////
    502 
    503 typedef uint16_t SkPMColor16;
    504 
    505 // Put in OpenGL order (r g b a)
    506 #define SK_A4444_SHIFT    0
    507 #define SK_R4444_SHIFT    12
    508 #define SK_G4444_SHIFT    8
    509 #define SK_B4444_SHIFT    4
    510 
    511 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
    512 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
    513 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
    514 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
    515 
    516 static inline U8CPU SkReplicateNibble(unsigned nib) {
    517     SkASSERT(nib <= 0xF);
    518     return (nib << 4) | nib;
    519 }
    520 
    521 #define SkA4444ToA32(a)     SkReplicateNibble(a)
    522 #define SkR4444ToR32(r)     SkReplicateNibble(r)
    523 #define SkG4444ToG32(g)     SkReplicateNibble(g)
    524 #define SkB4444ToB32(b)     SkReplicateNibble(b)
    525 
    526 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
    527 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
    528 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
    529 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
    530 
    531 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
    532 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
    533 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
    534 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
    535 
    536 #ifdef SK_DEBUG
    537 static inline void SkPMColor16Assert(U16CPU c) {
    538     unsigned a = SkGetPackedA4444(c);
    539     unsigned r = SkGetPackedR4444(c);
    540     unsigned g = SkGetPackedG4444(c);
    541     unsigned b = SkGetPackedB4444(c);
    542 
    543     SkASSERT(a <= 0xF);
    544     SkASSERT(r <= a);
    545     SkASSERT(g <= a);
    546     SkASSERT(b <= a);
    547 }
    548 #else
    549 #define SkPMColor16Assert(c)
    550 #endif
    551 
    552 static inline unsigned SkAlpha15To16(unsigned a) {
    553     SkASSERT(a <= 0xF);
    554     return a + (a >> 3);
    555 }
    556 
    557 #ifdef SK_DEBUG
    558     static inline int SkAlphaMul4(int value, int scale) {
    559         SkASSERT((unsigned)scale <= 0x10);
    560         return value * scale >> 4;
    561     }
    562 #else
    563     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
    564 #endif
    565 
    566 static inline unsigned SkR4444ToR565(unsigned r) {
    567     SkASSERT(r <= 0xF);
    568     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
    569 }
    570 
    571 static inline unsigned SkG4444ToG565(unsigned g) {
    572     SkASSERT(g <= 0xF);
    573     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
    574 }
    575 
    576 static inline unsigned SkB4444ToB565(unsigned b) {
    577     SkASSERT(b <= 0xF);
    578     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
    579 }
    580 
    581 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
    582                                          unsigned g, unsigned b) {
    583     SkASSERT(a <= 0xF);
    584     SkASSERT(r <= a);
    585     SkASSERT(g <= a);
    586     SkASSERT(b <= a);
    587 
    588     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
    589                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
    590 }
    591 
    592 extern const uint16_t gMask_0F0F;
    593 
    594 static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
    595     SkASSERT(scale <= 16);
    596 
    597     const unsigned mask = 0xF0F;    //gMask_0F0F;
    598 
    599 #if 0
    600     unsigned rb = ((c & mask) * scale) >> 4;
    601     unsigned ag = ((c >> 4) & mask) * scale;
    602     return (rb & mask) | (ag & ~mask);
    603 #else
    604     c = (c & mask) | ((c & (mask << 4)) << 12);
    605     c = c * scale >> 4;
    606     return (c & mask) | ((c >> 12) & (mask << 4));
    607 #endif
    608 }
    609 
    610 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
    611     once by a value up to 16. Used in conjunction with SkCompact_4444.
    612 */
    613 static inline uint32_t SkExpand_4444(U16CPU c) {
    614     SkASSERT(c == (uint16_t)c);
    615 
    616     const unsigned mask = 0xF0F;    //gMask_0F0F;
    617     return (c & mask) | ((c & ~mask) << 12);
    618 }
    619 
    620 /** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
    621     NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
    622     It does this for speed, since if it is being written directly to 16bits of
    623     memory, the top 16bits will be ignored. Casting the result to uint16_t here
    624     would add 2 more instructions, slow us down. It is up to the caller to
    625     perform the cast if needed.
    626 */
    627 static inline U16CPU SkCompact_4444(uint32_t c) {
    628     const unsigned mask = 0xF0F;    //gMask_0F0F;
    629     return (c & mask) | ((c >> 12) & ~mask);
    630 }
    631 
    632 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
    633     unsigned sa = SkGetPackedA4444(s);
    634     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
    635     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
    636     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
    637 
    638     // To avoid overflow, we have to clear the low bit of the synthetic sg
    639     // if the src alpha is <= 7.
    640     // to see why, try blending 0x4444 on top of 565-white and watch green
    641     // overflow (sum == 64)
    642     sg &= ~(~(sa >> 3) & 1);
    643 
    644     unsigned scale = SkAlpha15To16(15 - sa);
    645     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
    646     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
    647     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
    648 
    649 #if 0
    650     if (sg + dg > 63) {
    651         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
    652     }
    653 #endif
    654     return SkPackRGB16(sr + dr, sg + dg, sb + db);
    655 }
    656 
    657 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
    658     SkASSERT((unsigned)scale16 <= 16);
    659 
    660     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
    661 }
    662 
    663 static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
    664     SkASSERT((unsigned)scale16 <= 16);
    665 
    666     uint32_t src32 = SkExpand_4444(src) * scale16;
    667     // the scaled srcAlpha is the bottom byte
    668 #ifdef SK_DEBUG
    669     {
    670         unsigned srcA = SkGetPackedA4444(src) * scale16;
    671         SkASSERT(srcA == (src32 & 0xFF));
    672     }
    673 #endif
    674     unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
    675     uint32_t dst32 = SkExpand_4444(dst) * dstScale;
    676     return SkCompact_4444((src32 + dst32) >> 4);
    677 }
    678 
    679 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
    680     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
    681                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
    682                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
    683                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
    684     return d | (d << 4);
    685 }
    686 
    687 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
    688     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
    689     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
    690     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
    691     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
    692 }
    693 
    694 // cheap 2x2 dither
    695 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
    696                                                U8CPU g, U8CPU b) {
    697     // to ensure that we stay a legal premultiplied color, we take the max()
    698     // of the truncated and dithered alpha values. If we didn't, cases like
    699     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
    700     // which is not legal premultiplied, since a < color
    701     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
    702     a = SkMax32(a >> 4, dithered_a);
    703     // these we just dither in place
    704     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
    705     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
    706     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
    707 
    708     return SkPackARGB4444(a, r, g, b);
    709 }
    710 
    711 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
    712     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
    713                                 SkGetPackedG32(c), SkGetPackedB32(c));
    714 }
    715 
    716 /*  Assumes 16bit is in standard RGBA order.
    717     Transforms a normal ARGB_8888 into the same byte order as
    718     expanded ARGB_4444, but keeps each component 8bits
    719 */
    720 static inline uint32_t SkExpand_8888(SkPMColor c) {
    721     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
    722             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
    723             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
    724             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
    725 }
    726 
    727 /*  Undo the operation of SkExpand_8888, turning the argument back into
    728     a SkPMColor.
    729 */
    730 static inline SkPMColor SkCompact_8888(uint32_t c) {
    731     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
    732             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
    733             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
    734             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
    735 }
    736 
    737 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
    738     but this routine just keeps the high 4bits of each component in the low
    739     4bits of the result (just like a newly expanded PMColor16).
    740 */
    741 static inline uint32_t SkExpand32_4444(SkPMColor c) {
    742     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
    743             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
    744             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
    745             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
    746 }
    747 
    748 // takes two values and alternamtes them as part of a memset16
    749 // used for cheap 2x2 dithering when the colors are opaque
    750 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
    751 
    752 ///////////////////////////////////////////////////////////////////////////////
    753 
    754 static inline int SkUpscale31To32(int value) {
    755     SkASSERT((unsigned)value <= 31);
    756     return value + (value >> 4);
    757 }
    758 
    759 static inline int SkBlend32(int src, int dst, int scale) {
    760     SkASSERT((unsigned)src <= 0xFF);
    761     SkASSERT((unsigned)dst <= 0xFF);
    762     SkASSERT((unsigned)scale <= 32);
    763     return dst + ((src - dst) * scale >> 5);
    764 }
    765 
    766 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
    767                                      SkPMColor dst, uint16_t mask) {
    768     if (mask == 0) {
    769         return dst;
    770     }
    771 
    772     /*  We want all of these in 5bits, hence the shifts in case one of them
    773      *  (green) is 6bits.
    774      */
    775     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    776     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    777     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    778 
    779     // Now upscale them to 0..32, so we can use blend32
    780     maskR = SkUpscale31To32(maskR);
    781     maskG = SkUpscale31To32(maskG);
    782     maskB = SkUpscale31To32(maskB);
    783 
    784     // srcA has been upscaled to 256 before passed into this function
    785     maskR = maskR * srcA >> 8;
    786     maskG = maskG * srcA >> 8;
    787     maskB = maskB * srcA >> 8;
    788 
    789     int dstR = SkGetPackedR32(dst);
    790     int dstG = SkGetPackedG32(dst);
    791     int dstB = SkGetPackedB32(dst);
    792 
    793     // LCD blitting is only supported if the dst is known/required
    794     // to be opaque
    795     return SkPackARGB32(0xFF,
    796                         SkBlend32(srcR, dstR, maskR),
    797                         SkBlend32(srcG, dstG, maskG),
    798                         SkBlend32(srcB, dstB, maskB));
    799 }
    800 
    801 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
    802                                            SkPMColor dst, uint16_t mask,
    803                                            SkPMColor opaqueDst) {
    804     if (mask == 0) {
    805         return dst;
    806     }
    807 
    808     if (0xFFFF == mask) {
    809         return opaqueDst;
    810     }
    811 
    812     /*  We want all of these in 5bits, hence the shifts in case one of them
    813      *  (green) is 6bits.
    814      */
    815     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    816     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    817     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    818 
    819     // Now upscale them to 0..32, so we can use blend32
    820     maskR = SkUpscale31To32(maskR);
    821     maskG = SkUpscale31To32(maskG);
    822     maskB = SkUpscale31To32(maskB);
    823 
    824     int dstR = SkGetPackedR32(dst);
    825     int dstG = SkGetPackedG32(dst);
    826     int dstB = SkGetPackedB32(dst);
    827 
    828     // LCD blitting is only supported if the dst is known/required
    829     // to be opaque
    830     return SkPackARGB32(0xFF,
    831                         SkBlend32(srcR, dstR, maskR),
    832                         SkBlend32(srcG, dstG, maskG),
    833                         SkBlend32(srcB, dstB, maskB));
    834 }
    835 
    836 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t src[],
    837                                   SkColor color, int width, SkPMColor) {
    838     int srcA = SkColorGetA(color);
    839     int srcR = SkColorGetR(color);
    840     int srcG = SkColorGetG(color);
    841     int srcB = SkColorGetB(color);
    842 
    843     srcA = SkAlpha255To256(srcA);
    844 
    845     for (int i = 0; i < width; i++) {
    846         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], src[i]);
    847     }
    848 }
    849 
    850 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t src[],
    851                                         SkColor color, int width,
    852                                         SkPMColor opaqueDst) {
    853     int srcR = SkColorGetR(color);
    854     int srcG = SkColorGetG(color);
    855     int srcB = SkColorGetB(color);
    856 
    857     for (int i = 0; i < width; i++) {
    858         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], src[i],
    859                                     opaqueDst);
    860     }
    861 }
    862 
    863 #endif
    864