Home | History | Annotate | Download | only in core
      1 
      2 /*
      3  * Copyright 2006 The Android Open Source Project
      4  *
      5  * Use of this source code is governed by a BSD-style license that can be
      6  * found in the LICENSE file.
      7  */
      8 
      9 
     10 #ifndef SkColorPriv_DEFINED
     11 #define SkColorPriv_DEFINED
     12 
     13 // turn this own for extra debug checking when blending onto 565
     14 #ifdef SK_DEBUG
     15     #define CHECK_FOR_565_OVERFLOW
     16 #endif
     17 
     18 #include "SkColor.h"
     19 #include "SkMath.h"
     20 
     21 ///@{
     22 /** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
     23 #define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
     24 #define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
     25 #define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
     26 ///@}
     27 
     28 ///@{
     29 /** A float value which specifies this channel's contribution to luminance. */
     30 #define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
     31 #define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
     32 #define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
     33 ///@}
     34 
     35 /** Computes the luminance from the given r, g, and b in accordance with
     36     SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
     37 */
     38 static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
     39     //The following is
     40     //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
     41     //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
     42     return (r * 54 + g * 183 + b * 19) >> 8;
     43 }
     44 
     45 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
     46     byte into a scale value, so that we can say scale * value >> 8 instead of
     47     alpha * value / 255.
     48 
     49     In debugging, asserts that alpha is 0..255
     50 */
     51 static inline unsigned SkAlpha255To256(U8CPU alpha) {
     52     SkASSERT(SkToU8(alpha) == alpha);
     53     // this one assues that blending on top of an opaque dst keeps it that way
     54     // even though it is less accurate than a+(a>>7) for non-opaque dsts
     55     return alpha + 1;
     56 }
     57 
     58 /** Multiplify value by 0..256, and shift the result down 8
     59     (i.e. return (value * alpha256) >> 8)
     60  */
     61 #define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
     62 
     63 //  The caller may want negative values, so keep all params signed (int)
     64 //  so we don't accidentally slip into unsigned math and lose the sign
     65 //  extension when we shift (in SkAlphaMul)
     66 static inline int SkAlphaBlend(int src, int dst, int scale256) {
     67     SkASSERT((unsigned)scale256 <= 256);
     68     return dst + SkAlphaMul(src - dst, scale256);
     69 }
     70 
     71 /**
     72  *  Returns (src * alpha + dst * (255 - alpha)) / 255
     73  *
     74  *  This is more accurate than SkAlphaBlend, but slightly slower
     75  */
     76 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
     77     SkASSERT((int16_t)src == src);
     78     SkASSERT((int16_t)dst == dst);
     79     SkASSERT((uint8_t)alpha == alpha);
     80 
     81     int prod = SkMulS16(src - dst, alpha) + 128;
     82     prod = (prod + (prod >> 8)) >> 8;
     83     return dst + prod;
     84 }
     85 
     86 #define SK_R16_BITS     5
     87 #define SK_G16_BITS     6
     88 #define SK_B16_BITS     5
     89 
     90 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
     91 #define SK_G16_SHIFT    (SK_B16_BITS)
     92 #define SK_B16_SHIFT    0
     93 
     94 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
     95 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
     96 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
     97 
     98 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
     99 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
    100 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
    101 
    102 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
    103 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
    104 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
    105 
    106 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
    107     SkASSERT(r <= SK_R16_MASK);
    108     SkASSERT(g <= SK_G16_MASK);
    109     SkASSERT(b <= SK_B16_MASK);
    110 
    111     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
    112 }
    113 
    114 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
    115 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
    116 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
    117 
    118 /** Expand the 16bit color into a 32bit value that can be scaled all at once
    119     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
    120 */
    121 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
    122     SkASSERT(c == (uint16_t)c);
    123 
    124     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
    125 }
    126 
    127 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
    128     color value. The computation yields only 16bits of valid data, but we claim
    129     to return 32bits, so that the compiler won't generate extra instructions to
    130     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
    131     up to the caller to safely ignore them.
    132 */
    133 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
    134     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
    135 }
    136 
    137 /** Scale the 16bit color value by the 0..256 scale parameter.
    138     The computation yields only 16bits of valid data, but we claim
    139     to return 32bits, so that the compiler won't generate extra instructions to
    140     "clean" the top 16bits.
    141 */
    142 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
    143     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
    144 }
    145 
    146 // this helper explicitly returns a clean 16bit value (but slower)
    147 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
    148 
    149 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
    150     The computation yields only 16bits of valid data, but we claim
    151     to return 32bits, so that the compiler won't generate extra instructions to
    152     "clean" the top 16bits.
    153 */
    154 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
    155     SkASSERT((unsigned)srcScale <= 256);
    156 
    157     srcScale >>= 3;
    158 
    159     uint32_t src32 = SkExpand_rgb_16(src);
    160     uint32_t dst32 = SkExpand_rgb_16(dst);
    161     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    162 }
    163 
    164 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
    165                                 int srcScale, int count) {
    166     SkASSERT(count > 0);
    167     SkASSERT((unsigned)srcScale <= 256);
    168 
    169     srcScale >>= 3;
    170 
    171     do {
    172         uint32_t src32 = SkExpand_rgb_16(*src++);
    173         uint32_t dst32 = SkExpand_rgb_16(*dst);
    174         *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    175     } while (--count > 0);
    176 }
    177 
    178 #ifdef SK_DEBUG
    179     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
    180         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
    181         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
    182         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
    183 
    184         return a + b;
    185     }
    186 #else
    187     #define SkRGB16Add(a, b)  ((a) + (b))
    188 #endif
    189 
    190 ///////////////////////////////////////////////////////////////////////////////
    191 
    192 #define SK_A32_BITS     8
    193 #define SK_R32_BITS     8
    194 #define SK_G32_BITS     8
    195 #define SK_B32_BITS     8
    196 
    197 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
    198 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
    199 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
    200 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
    201 
    202 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
    203 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
    204 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
    205 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
    206 
    207 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
    208 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
    209 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
    210 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
    211 
    212 #ifdef SK_DEBUG
    213     static inline void SkPMColorAssert(SkPMColor c) {
    214         unsigned a = SkGetPackedA32(c);
    215         unsigned r = SkGetPackedR32(c);
    216         unsigned g = SkGetPackedG32(c);
    217         unsigned b = SkGetPackedB32(c);
    218 
    219         SkA32Assert(a);
    220         SkASSERT(r <= a);
    221         SkASSERT(g <= a);
    222         SkASSERT(b <= a);
    223     }
    224 #else
    225     #define SkPMColorAssert(c)
    226 #endif
    227 
    228 /**
    229  *  Pack the components into a SkPMColor, checking (in the debug version) that
    230  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
    231  */
    232 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    233     SkA32Assert(a);
    234     SkASSERT(r <= a);
    235     SkASSERT(g <= a);
    236     SkASSERT(b <= a);
    237 
    238     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    239            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    240 }
    241 
    242 /**
    243  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    244  * utility functions. Third parameter controls blending of the first two:
    245  *   (src, dst, 0) returns dst
    246  *   (src, dst, 0xFF) returns src
    247  *   srcWeight is [0..256], unlike SkFourByteInterp which takes [0..255]
    248  */
    249 static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst,
    250                                          unsigned scale) {
    251     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
    252     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
    253     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
    254     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
    255 
    256     return SkPackARGB32(a, r, g, b);
    257 }
    258 
    259 /**
    260  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    261  * utility functions. Third parameter controls blending of the first two:
    262  *   (src, dst, 0) returns dst
    263  *   (src, dst, 0xFF) returns src
    264  */
    265 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
    266                                          U8CPU srcWeight) {
    267     unsigned scale = SkAlpha255To256(srcWeight);
    268     return SkFourByteInterp256(src, dst, scale);
    269 }
    270 
    271 /**
    272  * 32b optimized version; currently appears to be 10% faster even on 64b
    273  * architectures than an equivalent 64b version and 30% faster than
    274  * SkFourByteInterp(). Third parameter controls blending of the first two:
    275  *   (src, dst, 0) returns dst
    276  *   (src, dst, 256) returns src
    277  * ** Does not match the results of SkFourByteInterp256() because we use
    278  * a more accurate scale computation!
    279  * TODO: migrate Skia function to using an accurate 255->266 alpha
    280  * conversion.
    281  */
    282 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src,
    283                                                 SkPMColor dst,
    284                                                 unsigned scale) {
    285     SkASSERT(scale <= 256);
    286 
    287     // Reorders ARGB to AG-RB in order to reduce the number of operations.
    288     const uint32_t mask = 0xFF00FF;
    289     uint32_t src_rb = src & mask;
    290     uint32_t src_ag = (src >> 8) & mask;
    291     uint32_t dst_rb = dst & mask;
    292     uint32_t dst_ag = (dst >> 8) & mask;
    293 
    294     uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
    295     uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
    296 
    297     return (ret_ag & ~mask) | ((ret_rb & ~mask) >> 8);
    298 }
    299 
    300 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
    301                                              SkPMColor dst,
    302                                              U8CPU srcWeight) {
    303     SkASSERT(srcWeight <= 255);
    304     // scale = srcWeight + (srcWeight >> 7) is more accurate than
    305     // scale = srcWeight + 1, but 7% slower
    306     return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
    307 }
    308 
    309 /**
    310  *  Same as SkPackARGB32, but this version guarantees to not check that the
    311  *  values are premultiplied in the debug version.
    312  */
    313 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    314     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    315            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    316 }
    317 
    318 static inline
    319 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    320     SkA32Assert(a);
    321     SkR32Assert(r);
    322     SkG32Assert(g);
    323     SkB32Assert(b);
    324 
    325     if (a != 255) {
    326         r = SkMulDiv255Round(r, a);
    327         g = SkMulDiv255Round(g, a);
    328         b = SkMulDiv255Round(b, a);
    329     }
    330     return SkPackARGB32(a, r, g, b);
    331 }
    332 
    333 SK_API extern const uint32_t gMask_00FF00FF;
    334 
    335 static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
    336     uint32_t mask = gMask_00FF00FF;
    337 
    338     uint32_t rb = ((c & mask) * scale) >> 8;
    339     uint32_t ag = ((c >> 8) & mask) * scale;
    340     return (rb & mask) | (ag & ~mask);
    341 }
    342 
    343 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
    344     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
    345 }
    346 
    347 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
    348     SkASSERT((unsigned)aa <= 255);
    349 
    350     unsigned src_scale = SkAlpha255To256(aa);
    351     unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
    352 
    353     return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
    354 }
    355 
    356 ////////////////////////////////////////////////////////////////////////////////////////////
    357 // Convert a 32bit pixel to a 16bit pixel (no dither)
    358 
    359 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
    360 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
    361 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
    362 
    363 #ifdef SK_DEBUG
    364     static inline unsigned SkR32ToR16(unsigned r) {
    365         SkR32Assert(r);
    366         return SkR32ToR16_MACRO(r);
    367     }
    368     static inline unsigned SkG32ToG16(unsigned g) {
    369         SkG32Assert(g);
    370         return SkG32ToG16_MACRO(g);
    371     }
    372     static inline unsigned SkB32ToB16(unsigned b) {
    373         SkB32Assert(b);
    374         return SkB32ToB16_MACRO(b);
    375     }
    376 #else
    377     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
    378     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
    379     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
    380 #endif
    381 
    382 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
    383 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
    384 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
    385 
    386 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
    387     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
    388     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
    389     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
    390     return r | g | b;
    391 }
    392 
    393 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    394     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
    395             (SkG32ToG16(g) << SK_G16_SHIFT) |
    396             (SkB32ToB16(b) << SK_B16_SHIFT);
    397 }
    398 
    399 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
    400 
    401 /////////////////////////////////////////////////////////////////////////////////////////
    402 // Fast dither from 32->16
    403 
    404 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
    405 
    406 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    407     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
    408     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
    409     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
    410 
    411     return SkPackRGB16(r, g, b);
    412 }
    413 
    414 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
    415     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
    416 }
    417 
    418 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
    419     It is now suitable for combining with a scaled expanded_rgb_16 color
    420     as in SkSrcOver32To16().
    421     We must do this 565 high-bit replication, in order for the subsequent add
    422     to saturate properly (and not overflow). If we take the 8 bits as is, it is
    423     possible to overflow.
    424 */
    425 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
    426     unsigned sr = SkPacked32ToR16(c);
    427     unsigned sg = SkPacked32ToG16(c);
    428     unsigned sb = SkPacked32ToB16(c);
    429 
    430     sr = (sr << 5) | sr;
    431     sg = (sg << 5) | (sg >> 1);
    432     sb = (sb << 5) | sb;
    433     return (sr << 11) | (sg << 21) | (sb << 0);
    434 }
    435 
    436 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
    437     (with dirt in the high 16bits, so caller beware).
    438 */
    439 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
    440     unsigned sr = SkGetPackedR32(src);
    441     unsigned sg = SkGetPackedG32(src);
    442     unsigned sb = SkGetPackedB32(src);
    443 
    444     unsigned dr = SkGetPackedR16(dst);
    445     unsigned dg = SkGetPackedG16(dst);
    446     unsigned db = SkGetPackedB16(dst);
    447 
    448     unsigned isa = 255 - SkGetPackedA32(src);
    449 
    450     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
    451     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
    452     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
    453 
    454     return SkPackRGB16(dr, dg, db);
    455 }
    456 
    457 ////////////////////////////////////////////////////////////////////////////////////////////
    458 // Convert a 16bit pixel to a 32bit pixel
    459 
    460 static inline unsigned SkR16ToR32(unsigned r) {
    461     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
    462 }
    463 
    464 static inline unsigned SkG16ToG32(unsigned g) {
    465     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
    466 }
    467 
    468 static inline unsigned SkB16ToB32(unsigned b) {
    469     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
    470 }
    471 
    472 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
    473 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
    474 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
    475 
    476 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
    477     SkASSERT(src == SkToU16(src));
    478 
    479     unsigned    r = SkPacked16ToR32(src);
    480     unsigned    g = SkPacked16ToG32(src);
    481     unsigned    b = SkPacked16ToB32(src);
    482 
    483     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    484     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    485     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    486 
    487     return SkPackARGB32(0xFF, r, g, b);
    488 }
    489 
    490 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
    491 static inline SkColor SkPixel16ToColor(U16CPU src) {
    492     SkASSERT(src == SkToU16(src));
    493 
    494     unsigned    r = SkPacked16ToR32(src);
    495     unsigned    g = SkPacked16ToG32(src);
    496     unsigned    b = SkPacked16ToB32(src);
    497 
    498     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    499     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    500     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    501 
    502     return SkColorSetRGB(r, g, b);
    503 }
    504 
    505 ///////////////////////////////////////////////////////////////////////////////
    506 
    507 typedef uint16_t SkPMColor16;
    508 
    509 // Put in OpenGL order (r g b a)
    510 #define SK_A4444_SHIFT    0
    511 #define SK_R4444_SHIFT    12
    512 #define SK_G4444_SHIFT    8
    513 #define SK_B4444_SHIFT    4
    514 
    515 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
    516 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
    517 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
    518 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
    519 
    520 static inline U8CPU SkReplicateNibble(unsigned nib) {
    521     SkASSERT(nib <= 0xF);
    522     return (nib << 4) | nib;
    523 }
    524 
    525 #define SkA4444ToA32(a)     SkReplicateNibble(a)
    526 #define SkR4444ToR32(r)     SkReplicateNibble(r)
    527 #define SkG4444ToG32(g)     SkReplicateNibble(g)
    528 #define SkB4444ToB32(b)     SkReplicateNibble(b)
    529 
    530 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
    531 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
    532 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
    533 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
    534 
    535 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
    536 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
    537 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
    538 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
    539 
    540 #ifdef SK_DEBUG
    541 static inline void SkPMColor16Assert(U16CPU c) {
    542     unsigned a = SkGetPackedA4444(c);
    543     unsigned r = SkGetPackedR4444(c);
    544     unsigned g = SkGetPackedG4444(c);
    545     unsigned b = SkGetPackedB4444(c);
    546 
    547     SkASSERT(a <= 0xF);
    548     SkASSERT(r <= a);
    549     SkASSERT(g <= a);
    550     SkASSERT(b <= a);
    551 }
    552 #else
    553 #define SkPMColor16Assert(c)
    554 #endif
    555 
    556 static inline unsigned SkAlpha15To16(unsigned a) {
    557     SkASSERT(a <= 0xF);
    558     return a + (a >> 3);
    559 }
    560 
    561 #ifdef SK_DEBUG
    562     static inline int SkAlphaMul4(int value, int scale) {
    563         SkASSERT((unsigned)scale <= 0x10);
    564         return value * scale >> 4;
    565     }
    566 #else
    567     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
    568 #endif
    569 
    570 static inline unsigned SkR4444ToR565(unsigned r) {
    571     SkASSERT(r <= 0xF);
    572     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
    573 }
    574 
    575 static inline unsigned SkG4444ToG565(unsigned g) {
    576     SkASSERT(g <= 0xF);
    577     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
    578 }
    579 
    580 static inline unsigned SkB4444ToB565(unsigned b) {
    581     SkASSERT(b <= 0xF);
    582     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
    583 }
    584 
    585 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
    586                                          unsigned g, unsigned b) {
    587     SkASSERT(a <= 0xF);
    588     SkASSERT(r <= a);
    589     SkASSERT(g <= a);
    590     SkASSERT(b <= a);
    591 
    592     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
    593                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
    594 }
    595 
    596 extern const uint16_t gMask_0F0F;
    597 
    598 static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
    599     SkASSERT(scale <= 16);
    600 
    601     const unsigned mask = 0xF0F;    //gMask_0F0F;
    602 
    603 #if 0
    604     unsigned rb = ((c & mask) * scale) >> 4;
    605     unsigned ag = ((c >> 4) & mask) * scale;
    606     return (rb & mask) | (ag & ~mask);
    607 #else
    608     c = (c & mask) | ((c & (mask << 4)) << 12);
    609     c = c * scale >> 4;
    610     return (c & mask) | ((c >> 12) & (mask << 4));
    611 #endif
    612 }
    613 
    614 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
    615     once by a value up to 16. Used in conjunction with SkCompact_4444.
    616 */
    617 static inline uint32_t SkExpand_4444(U16CPU c) {
    618     SkASSERT(c == (uint16_t)c);
    619 
    620     const unsigned mask = 0xF0F;    //gMask_0F0F;
    621     return (c & mask) | ((c & ~mask) << 12);
    622 }
    623 
    624 /** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
    625     NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
    626     It does this for speed, since if it is being written directly to 16bits of
    627     memory, the top 16bits will be ignored. Casting the result to uint16_t here
    628     would add 2 more instructions, slow us down. It is up to the caller to
    629     perform the cast if needed.
    630 */
    631 static inline U16CPU SkCompact_4444(uint32_t c) {
    632     const unsigned mask = 0xF0F;    //gMask_0F0F;
    633     return (c & mask) | ((c >> 12) & ~mask);
    634 }
    635 
    636 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
    637     unsigned sa = SkGetPackedA4444(s);
    638     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
    639     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
    640     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
    641 
    642     // To avoid overflow, we have to clear the low bit of the synthetic sg
    643     // if the src alpha is <= 7.
    644     // to see why, try blending 0x4444 on top of 565-white and watch green
    645     // overflow (sum == 64)
    646     sg &= ~(~(sa >> 3) & 1);
    647 
    648     unsigned scale = SkAlpha15To16(15 - sa);
    649     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
    650     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
    651     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
    652 
    653 #if 0
    654     if (sg + dg > 63) {
    655         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
    656     }
    657 #endif
    658     return SkPackRGB16(sr + dr, sg + dg, sb + db);
    659 }
    660 
    661 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
    662     SkASSERT((unsigned)scale16 <= 16);
    663 
    664     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
    665 }
    666 
    667 static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
    668     SkASSERT((unsigned)scale16 <= 16);
    669 
    670     uint32_t src32 = SkExpand_4444(src) * scale16;
    671     // the scaled srcAlpha is the bottom byte
    672 #ifdef SK_DEBUG
    673     {
    674         unsigned srcA = SkGetPackedA4444(src) * scale16;
    675         SkASSERT(srcA == (src32 & 0xFF));
    676     }
    677 #endif
    678     unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
    679     uint32_t dst32 = SkExpand_4444(dst) * dstScale;
    680     return SkCompact_4444((src32 + dst32) >> 4);
    681 }
    682 
    683 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
    684     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
    685                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
    686                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
    687                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
    688     return d | (d << 4);
    689 }
    690 
    691 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
    692     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
    693     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
    694     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
    695     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
    696 }
    697 
    698 // cheap 2x2 dither
    699 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
    700                                                U8CPU g, U8CPU b) {
    701     // to ensure that we stay a legal premultiplied color, we take the max()
    702     // of the truncated and dithered alpha values. If we didn't, cases like
    703     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
    704     // which is not legal premultiplied, since a < color
    705     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
    706     a = SkMax32(a >> 4, dithered_a);
    707     // these we just dither in place
    708     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
    709     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
    710     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
    711 
    712     return SkPackARGB4444(a, r, g, b);
    713 }
    714 
    715 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
    716     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
    717                                 SkGetPackedG32(c), SkGetPackedB32(c));
    718 }
    719 
    720 /*  Assumes 16bit is in standard RGBA order.
    721     Transforms a normal ARGB_8888 into the same byte order as
    722     expanded ARGB_4444, but keeps each component 8bits
    723 */
    724 static inline uint32_t SkExpand_8888(SkPMColor c) {
    725     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
    726             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
    727             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
    728             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
    729 }
    730 
    731 /*  Undo the operation of SkExpand_8888, turning the argument back into
    732     a SkPMColor.
    733 */
    734 static inline SkPMColor SkCompact_8888(uint32_t c) {
    735     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
    736             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
    737             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
    738             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
    739 }
    740 
    741 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
    742     but this routine just keeps the high 4bits of each component in the low
    743     4bits of the result (just like a newly expanded PMColor16).
    744 */
    745 static inline uint32_t SkExpand32_4444(SkPMColor c) {
    746     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
    747             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
    748             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
    749             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
    750 }
    751 
    752 // takes two values and alternamtes them as part of a memset16
    753 // used for cheap 2x2 dithering when the colors are opaque
    754 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
    755 
    756 ///////////////////////////////////////////////////////////////////////////////
    757 
    758 static inline int SkUpscale31To32(int value) {
    759     SkASSERT((unsigned)value <= 31);
    760     return value + (value >> 4);
    761 }
    762 
    763 static inline int SkBlend32(int src, int dst, int scale) {
    764     SkASSERT((unsigned)src <= 0xFF);
    765     SkASSERT((unsigned)dst <= 0xFF);
    766     SkASSERT((unsigned)scale <= 32);
    767     return dst + ((src - dst) * scale >> 5);
    768 }
    769 
    770 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
    771                                      SkPMColor dst, uint16_t mask) {
    772     if (mask == 0) {
    773         return dst;
    774     }
    775 
    776     /*  We want all of these in 5bits, hence the shifts in case one of them
    777      *  (green) is 6bits.
    778      */
    779     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    780     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    781     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    782 
    783     // Now upscale them to 0..32, so we can use blend32
    784     maskR = SkUpscale31To32(maskR);
    785     maskG = SkUpscale31To32(maskG);
    786     maskB = SkUpscale31To32(maskB);
    787 
    788     // srcA has been upscaled to 256 before passed into this function
    789     maskR = maskR * srcA >> 8;
    790     maskG = maskG * srcA >> 8;
    791     maskB = maskB * srcA >> 8;
    792 
    793     int dstR = SkGetPackedR32(dst);
    794     int dstG = SkGetPackedG32(dst);
    795     int dstB = SkGetPackedB32(dst);
    796 
    797     // LCD blitting is only supported if the dst is known/required
    798     // to be opaque
    799     return SkPackARGB32(0xFF,
    800                         SkBlend32(srcR, dstR, maskR),
    801                         SkBlend32(srcG, dstG, maskG),
    802                         SkBlend32(srcB, dstB, maskB));
    803 }
    804 
    805 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
    806                                            SkPMColor dst, uint16_t mask,
    807                                            SkPMColor opaqueDst) {
    808     if (mask == 0) {
    809         return dst;
    810     }
    811 
    812     if (0xFFFF == mask) {
    813         return opaqueDst;
    814     }
    815 
    816     /*  We want all of these in 5bits, hence the shifts in case one of them
    817      *  (green) is 6bits.
    818      */
    819     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    820     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    821     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    822 
    823     // Now upscale them to 0..32, so we can use blend32
    824     maskR = SkUpscale31To32(maskR);
    825     maskG = SkUpscale31To32(maskG);
    826     maskB = SkUpscale31To32(maskB);
    827 
    828     int dstR = SkGetPackedR32(dst);
    829     int dstG = SkGetPackedG32(dst);
    830     int dstB = SkGetPackedB32(dst);
    831 
    832     // LCD blitting is only supported if the dst is known/required
    833     // to be opaque
    834     return SkPackARGB32(0xFF,
    835                         SkBlend32(srcR, dstR, maskR),
    836                         SkBlend32(srcG, dstG, maskG),
    837                         SkBlend32(srcB, dstB, maskB));
    838 }
    839 
    840 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t mask[],
    841                                   SkColor src, int width, SkPMColor) {
    842     int srcA = SkColorGetA(src);
    843     int srcR = SkColorGetR(src);
    844     int srcG = SkColorGetG(src);
    845     int srcB = SkColorGetB(src);
    846 
    847     srcA = SkAlpha255To256(srcA);
    848 
    849     for (int i = 0; i < width; i++) {
    850         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
    851     }
    852 }
    853 
    854 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t mask[],
    855                                         SkColor src, int width,
    856                                         SkPMColor opaqueDst) {
    857     int srcR = SkColorGetR(src);
    858     int srcG = SkColorGetG(src);
    859     int srcB = SkColorGetB(src);
    860 
    861     for (int i = 0; i < width; i++) {
    862         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
    863                                     opaqueDst);
    864     }
    865 }
    866 
    867 #endif
    868