Home | History | Annotate | Download | only in core
      1 
      2 /*
      3  * Copyright 2006 The Android Open Source Project
      4  *
      5  * Use of this source code is governed by a BSD-style license that can be
      6  * found in the LICENSE file.
      7  */
      8 
      9 
     10 #ifndef SkColorPriv_DEFINED
     11 #define SkColorPriv_DEFINED
     12 
     13 // turn this own for extra debug checking when blending onto 565
     14 #ifdef SK_DEBUG
     15     #define CHECK_FOR_565_OVERFLOW
     16 #endif
     17 
     18 #include "SkColor.h"
     19 #include "SkMath.h"
     20 
     21 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
     22     byte into a scale value, so that we can say scale * value >> 8 instead of
     23     alpha * value / 255.
     24 
     25     In debugging, asserts that alpha is 0..255
     26 */
     27 static inline unsigned SkAlpha255To256(U8CPU alpha) {
     28     SkASSERT(SkToU8(alpha) == alpha);
     29     // this one assues that blending on top of an opaque dst keeps it that way
     30     // even though it is less accurate than a+(a>>7) for non-opaque dsts
     31     return alpha + 1;
     32 }
     33 
     34 /** Multiplify value by 0..256, and shift the result down 8
     35     (i.e. return (value * alpha256) >> 8)
     36  */
     37 #define SkAlphaMul(value, alpha256)     (SkMulS16(value, alpha256) >> 8)
     38 
     39 //  The caller may want negative values, so keep all params signed (int)
     40 //  so we don't accidentally slip into unsigned math and lose the sign
     41 //  extension when we shift (in SkAlphaMul)
     42 static inline int SkAlphaBlend(int src, int dst, int scale256) {
     43     SkASSERT((unsigned)scale256 <= 256);
     44     return dst + SkAlphaMul(src - dst, scale256);
     45 }
     46 
     47 /**
     48  *  Returns (src * alpha + dst * (255 - alpha)) / 255
     49  *
     50  *  This is more accurate than SkAlphaBlend, but slightly slower
     51  */
     52 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) {
     53     SkASSERT((int16_t)src == src);
     54     SkASSERT((int16_t)dst == dst);
     55     SkASSERT((uint8_t)alpha == alpha);
     56 
     57     int prod = SkMulS16(src - dst, alpha) + 128;
     58     prod = (prod + (prod >> 8)) >> 8;
     59     return dst + prod;
     60 }
     61 
     62 #define SK_R16_BITS     5
     63 #define SK_G16_BITS     6
     64 #define SK_B16_BITS     5
     65 
     66 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
     67 #define SK_G16_SHIFT    (SK_B16_BITS)
     68 #define SK_B16_SHIFT    0
     69 
     70 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
     71 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
     72 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
     73 
     74 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
     75 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
     76 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
     77 
     78 #define SkR16Assert(r)  SkASSERT((unsigned)(r) <= SK_R16_MASK)
     79 #define SkG16Assert(g)  SkASSERT((unsigned)(g) <= SK_G16_MASK)
     80 #define SkB16Assert(b)  SkASSERT((unsigned)(b) <= SK_B16_MASK)
     81 
     82 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
     83     SkASSERT(r <= SK_R16_MASK);
     84     SkASSERT(g <= SK_G16_MASK);
     85     SkASSERT(b <= SK_B16_MASK);
     86 
     87     return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
     88 }
     89 
     90 #define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
     91 #define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
     92 #define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
     93 
     94 /** Expand the 16bit color into a 32bit value that can be scaled all at once
     95     by a value up to 32. Used in conjunction with SkCompact_rgb_16.
     96 */
     97 static inline uint32_t SkExpand_rgb_16(U16CPU c) {
     98     SkASSERT(c == (uint16_t)c);
     99 
    100     return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
    101 }
    102 
    103 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
    104     color value. The computation yields only 16bits of valid data, but we claim
    105     to return 32bits, so that the compiler won't generate extra instructions to
    106     "clean" the top 16bits. However, the top 16 can contain garbage, so it is
    107     up to the caller to safely ignore them.
    108 */
    109 static inline U16CPU SkCompact_rgb_16(uint32_t c) {
    110     return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
    111 }
    112 
    113 /** Scale the 16bit color value by the 0..256 scale parameter.
    114     The computation yields only 16bits of valid data, but we claim
    115     to return 32bits, so that the compiler won't generate extra instructions to
    116     "clean" the top 16bits.
    117 */
    118 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
    119     return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
    120 }
    121 
    122 // this helper explicitly returns a clean 16bit value (but slower)
    123 #define SkAlphaMulRGB16_ToU16(c, s)  (uint16_t)SkAlphaMulRGB16(c, s)
    124 
    125 /** Blend src and dst 16bit colors by the 0..256 scale parameter.
    126     The computation yields only 16bits of valid data, but we claim
    127     to return 32bits, so that the compiler won't generate extra instructions to
    128     "clean" the top 16bits.
    129 */
    130 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
    131     SkASSERT((unsigned)srcScale <= 256);
    132 
    133     srcScale >>= 3;
    134 
    135     uint32_t src32 = SkExpand_rgb_16(src);
    136     uint32_t dst32 = SkExpand_rgb_16(dst);
    137     return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    138 }
    139 
    140 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
    141                                 int srcScale, int count) {
    142     SkASSERT(count > 0);
    143     SkASSERT((unsigned)srcScale <= 256);
    144 
    145     srcScale >>= 3;
    146 
    147     do {
    148         uint32_t src32 = SkExpand_rgb_16(*src++);
    149         uint32_t dst32 = SkExpand_rgb_16(*dst);
    150         *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
    151     } while (--count > 0);
    152 }
    153 
    154 #ifdef SK_DEBUG
    155     static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
    156         SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
    157         SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
    158         SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
    159 
    160         return a + b;
    161     }
    162 #else
    163     #define SkRGB16Add(a, b)  ((a) + (b))
    164 #endif
    165 
    166 ///////////////////////////////////////////////////////////////////////////////
    167 
    168 #define SK_A32_BITS     8
    169 #define SK_R32_BITS     8
    170 #define SK_G32_BITS     8
    171 #define SK_B32_BITS     8
    172 
    173 #define SK_A32_MASK     ((1 << SK_A32_BITS) - 1)
    174 #define SK_R32_MASK     ((1 << SK_R32_BITS) - 1)
    175 #define SK_G32_MASK     ((1 << SK_G32_BITS) - 1)
    176 #define SK_B32_MASK     ((1 << SK_B32_BITS) - 1)
    177 
    178 #define SkGetPackedA32(packed)      ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
    179 #define SkGetPackedR32(packed)      ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
    180 #define SkGetPackedG32(packed)      ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
    181 #define SkGetPackedB32(packed)      ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
    182 
    183 #define SkA32Assert(a)  SkASSERT((unsigned)(a) <= SK_A32_MASK)
    184 #define SkR32Assert(r)  SkASSERT((unsigned)(r) <= SK_R32_MASK)
    185 #define SkG32Assert(g)  SkASSERT((unsigned)(g) <= SK_G32_MASK)
    186 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
    187 
    188 #ifdef SK_DEBUG
    189     static inline void SkPMColorAssert(SkPMColor c) {
    190         unsigned a = SkGetPackedA32(c);
    191         unsigned r = SkGetPackedR32(c);
    192         unsigned g = SkGetPackedG32(c);
    193         unsigned b = SkGetPackedB32(c);
    194 
    195         SkA32Assert(a);
    196         SkASSERT(r <= a);
    197         SkASSERT(g <= a);
    198         SkASSERT(b <= a);
    199     }
    200 #else
    201     #define SkPMColorAssert(c)
    202 #endif
    203 
    204 /**
    205  *  Pack the components into a SkPMColor, checking (in the debug version) that
    206  *  the components are 0..255, and are already premultiplied (i.e. alpha >= color)
    207  */
    208 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    209     SkA32Assert(a);
    210     SkASSERT(r <= a);
    211     SkASSERT(g <= a);
    212     SkASSERT(b <= a);
    213 
    214     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    215            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    216 }
    217 
    218 /**
    219  * Abstract 4-byte interpolation, implemented on top of SkPMColor
    220  * utility functions. Third parameter controls blending of the first two:
    221  *   (src, dst, 0) returns dst
    222  *   (src, dst, 0xFF) returns src
    223  */
    224 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
    225                                          U8CPU srcWeight) {
    226     unsigned scale = SkAlpha255To256(srcWeight);
    227 
    228     unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale);
    229     unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale);
    230     unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale);
    231     unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale);
    232 
    233     return SkPackARGB32(a, r, g, b);
    234 }
    235 
    236 /**
    237  * 32b optimized version; currently appears to be 10% faster even on 64b
    238  * architectures than an equivalent 64b version and 30% faster than
    239  * SkFourByteInterp(). Third parameter controls blending of the first two:
    240  *   (src, dst, 0) returns dst
    241  *   (src, dst, 0xFF) returns src
    242  * ** Does not match the results of SkFourByteInterp() because we use
    243  * a more accurate scale computation!
    244  * TODO: migrate Skia function to using an accurate 255->266 alpha
    245  * conversion.
    246  */
    247 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
    248                                              SkPMColor dst,
    249                                              U8CPU srcWeight) {
    250     SkASSERT(srcWeight < 256);
    251 
    252     // Reorders ARGB to AG-RB in order to reduce the number of operations.
    253     const uint32_t mask = 0xFF00FF;
    254     uint32_t src_rb = src & mask;
    255     uint32_t src_ag = (src >> 8) & mask;
    256     uint32_t dst_rb = dst & mask;
    257     uint32_t dst_ag = (dst >> 8) & mask;
    258 
    259     // scale = srcWeight + (srcWeight >> 7) is more accurate than
    260     // scale = srcWeight + 1, but 7% slower
    261     int scale = srcWeight + (srcWeight >> 7);
    262 
    263     uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
    264     uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
    265 
    266     return (ret_ag & ~mask) | ((ret_rb & ~mask) >> 8);
    267 }
    268 
    269 /**
    270  *  Same as SkPackARGB32, but this version guarantees to not check that the
    271  *  values are premultiplied in the debug version.
    272  */
    273 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    274     return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) |
    275            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
    276 }
    277 
    278 static inline
    279 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
    280     SkA32Assert(a);
    281     SkA32Assert(r);
    282     SkA32Assert(g);
    283     SkA32Assert(b);
    284 
    285     if (a != 255) {
    286         r = SkMulDiv255Round(r, a);
    287         g = SkMulDiv255Round(g, a);
    288         b = SkMulDiv255Round(b, a);
    289     }
    290     return SkPackARGB32(a, r, g, b);
    291 }
    292 
    293 SK_API extern const uint32_t gMask_00FF00FF;
    294 
    295 static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
    296     uint32_t mask = gMask_00FF00FF;
    297 //    uint32_t mask = 0xFF00FF;
    298 
    299     uint32_t rb = ((c & mask) * scale) >> 8;
    300     uint32_t ag = ((c >> 8) & mask) * scale;
    301     return (rb & mask) | (ag & ~mask);
    302 }
    303 
    304 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
    305     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
    306 }
    307 
    308 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
    309     SkASSERT((unsigned)aa <= 255);
    310 
    311     unsigned src_scale = SkAlpha255To256(aa);
    312     unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
    313 
    314     return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
    315 }
    316 
    317 ////////////////////////////////////////////////////////////////////////////////////////////
    318 // Convert a 32bit pixel to a 16bit pixel (no dither)
    319 
    320 #define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
    321 #define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
    322 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
    323 
    324 #ifdef SK_DEBUG
    325     static inline unsigned SkR32ToR16(unsigned r) {
    326         SkR32Assert(r);
    327         return SkR32ToR16_MACRO(r);
    328     }
    329     static inline unsigned SkG32ToG16(unsigned g) {
    330         SkG32Assert(g);
    331         return SkG32ToG16_MACRO(g);
    332     }
    333     static inline unsigned SkB32ToB16(unsigned b) {
    334         SkB32Assert(b);
    335         return SkB32ToB16_MACRO(b);
    336     }
    337 #else
    338     #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
    339     #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
    340     #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
    341 #endif
    342 
    343 #define SkPacked32ToR16(c)  (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
    344 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
    345 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
    346 
    347 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
    348     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
    349     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
    350     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
    351     return r | g | b;
    352 }
    353 
    354 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    355     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
    356             (SkG32ToG16(g) << SK_G16_SHIFT) |
    357             (SkB32ToB16(b) << SK_B16_SHIFT);
    358 }
    359 
    360 #define SkPixel32ToPixel16_ToU16(src)   SkToU16(SkPixel32ToPixel16(src))
    361 
    362 /////////////////////////////////////////////////////////////////////////////////////////
    363 // Fast dither from 32->16
    364 
    365 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
    366 
    367 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
    368     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
    369     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
    370     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
    371 
    372     return SkPackRGB16(r, g, b);
    373 }
    374 
    375 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
    376     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
    377 }
    378 
    379 /*  Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
    380     It is now suitable for combining with a scaled expanded_rgb_16 color
    381     as in SkSrcOver32To16().
    382     We must do this 565 high-bit replication, in order for the subsequent add
    383     to saturate properly (and not overflow). If we take the 8 bits as is, it is
    384     possible to overflow.
    385 */
    386 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
    387     unsigned sr = SkPacked32ToR16(c);
    388     unsigned sg = SkPacked32ToG16(c);
    389     unsigned sb = SkPacked32ToB16(c);
    390 
    391     sr = (sr << 5) | sr;
    392     sg = (sg << 5) | (sg >> 1);
    393     sb = (sb << 5) | sb;
    394     return (sr << 11) | (sg << 21) | (sb << 0);
    395 }
    396 
    397 /*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
    398     (with dirt in the high 16bits, so caller beware).
    399 */
    400 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
    401     unsigned sr = SkGetPackedR32(src);
    402     unsigned sg = SkGetPackedG32(src);
    403     unsigned sb = SkGetPackedB32(src);
    404 
    405     unsigned dr = SkGetPackedR16(dst);
    406     unsigned dg = SkGetPackedG16(dst);
    407     unsigned db = SkGetPackedB16(dst);
    408 
    409     unsigned isa = 255 - SkGetPackedA32(src);
    410 
    411     dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
    412     dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
    413     db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
    414 
    415     return SkPackRGB16(dr, dg, db);
    416 }
    417 
    418 ////////////////////////////////////////////////////////////////////////////////////////////
    419 // Convert a 16bit pixel to a 32bit pixel
    420 
    421 static inline unsigned SkR16ToR32(unsigned r) {
    422     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
    423 }
    424 
    425 static inline unsigned SkG16ToG32(unsigned g) {
    426     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
    427 }
    428 
    429 static inline unsigned SkB16ToB32(unsigned b) {
    430     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
    431 }
    432 
    433 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
    434 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
    435 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
    436 
    437 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
    438     SkASSERT(src == SkToU16(src));
    439 
    440     unsigned    r = SkPacked16ToR32(src);
    441     unsigned    g = SkPacked16ToG32(src);
    442     unsigned    b = SkPacked16ToB32(src);
    443 
    444     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    445     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    446     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    447 
    448     return SkPackARGB32(0xFF, r, g, b);
    449 }
    450 
    451 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor
    452 static inline SkColor SkPixel16ToColor(U16CPU src) {
    453     SkASSERT(src == SkToU16(src));
    454 
    455     unsigned    r = SkPacked16ToR32(src);
    456     unsigned    g = SkPacked16ToG32(src);
    457     unsigned    b = SkPacked16ToB32(src);
    458 
    459     SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
    460     SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
    461     SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
    462 
    463     return SkColorSetRGB(r, g, b);
    464 }
    465 
    466 ///////////////////////////////////////////////////////////////////////////////
    467 
    468 typedef uint16_t SkPMColor16;
    469 
    470 // Put in OpenGL order (r g b a)
    471 #define SK_A4444_SHIFT    0
    472 #define SK_R4444_SHIFT    12
    473 #define SK_G4444_SHIFT    8
    474 #define SK_B4444_SHIFT    4
    475 
    476 #define SkA32To4444(a)  ((unsigned)(a) >> 4)
    477 #define SkR32To4444(r)  ((unsigned)(r) >> 4)
    478 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
    479 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
    480 
    481 static inline U8CPU SkReplicateNibble(unsigned nib) {
    482     SkASSERT(nib <= 0xF);
    483     return (nib << 4) | nib;
    484 }
    485 
    486 #define SkA4444ToA32(a)     SkReplicateNibble(a)
    487 #define SkR4444ToR32(r)     SkReplicateNibble(r)
    488 #define SkG4444ToG32(g)     SkReplicateNibble(g)
    489 #define SkB4444ToB32(b)     SkReplicateNibble(b)
    490 
    491 #define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
    492 #define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
    493 #define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
    494 #define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
    495 
    496 #define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
    497 #define SkPacked4444ToR32(c)    SkReplicateNibble(SkGetPackedR4444(c))
    498 #define SkPacked4444ToG32(c)    SkReplicateNibble(SkGetPackedG4444(c))
    499 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
    500 
    501 #ifdef SK_DEBUG
    502 static inline void SkPMColor16Assert(U16CPU c) {
    503     unsigned a = SkGetPackedA4444(c);
    504     unsigned r = SkGetPackedR4444(c);
    505     unsigned g = SkGetPackedG4444(c);
    506     unsigned b = SkGetPackedB4444(c);
    507 
    508     SkASSERT(a <= 0xF);
    509     SkASSERT(r <= a);
    510     SkASSERT(g <= a);
    511     SkASSERT(b <= a);
    512 }
    513 #else
    514 #define SkPMColor16Assert(c)
    515 #endif
    516 
    517 static inline unsigned SkAlpha15To16(unsigned a) {
    518     SkASSERT(a <= 0xF);
    519     return a + (a >> 3);
    520 }
    521 
    522 #ifdef SK_DEBUG
    523     static inline int SkAlphaMul4(int value, int scale) {
    524         SkASSERT((unsigned)scale <= 0x10);
    525         return value * scale >> 4;
    526     }
    527 #else
    528     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
    529 #endif
    530 
    531 static inline unsigned SkR4444ToR565(unsigned r) {
    532     SkASSERT(r <= 0xF);
    533     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
    534 }
    535 
    536 static inline unsigned SkG4444ToG565(unsigned g) {
    537     SkASSERT(g <= 0xF);
    538     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
    539 }
    540 
    541 static inline unsigned SkB4444ToB565(unsigned b) {
    542     SkASSERT(b <= 0xF);
    543     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
    544 }
    545 
    546 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
    547                                          unsigned g, unsigned b) {
    548     SkASSERT(a <= 0xF);
    549     SkASSERT(r <= a);
    550     SkASSERT(g <= a);
    551     SkASSERT(b <= a);
    552 
    553     return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) |
    554                          (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT));
    555 }
    556 
    557 extern const uint16_t gMask_0F0F;
    558 
    559 static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
    560     SkASSERT(scale <= 16);
    561 
    562     const unsigned mask = 0xF0F;    //gMask_0F0F;
    563 
    564 #if 0
    565     unsigned rb = ((c & mask) * scale) >> 4;
    566     unsigned ag = ((c >> 4) & mask) * scale;
    567     return (rb & mask) | (ag & ~mask);
    568 #else
    569     c = (c & mask) | ((c & (mask << 4)) << 12);
    570     c = c * scale >> 4;
    571     return (c & mask) | ((c >> 12) & (mask << 4));
    572 #endif
    573 }
    574 
    575 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
    576     once by a value up to 16. Used in conjunction with SkCompact_4444.
    577 */
    578 static inline uint32_t SkExpand_4444(U16CPU c) {
    579     SkASSERT(c == (uint16_t)c);
    580 
    581     const unsigned mask = 0xF0F;    //gMask_0F0F;
    582     return (c & mask) | ((c & ~mask) << 12);
    583 }
    584 
    585 /** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
    586     NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
    587     It does this for speed, since if it is being written directly to 16bits of
    588     memory, the top 16bits will be ignored. Casting the result to uint16_t here
    589     would add 2 more instructions, slow us down. It is up to the caller to
    590     perform the cast if needed.
    591 */
    592 static inline U16CPU SkCompact_4444(uint32_t c) {
    593     const unsigned mask = 0xF0F;    //gMask_0F0F;
    594     return (c & mask) | ((c >> 12) & ~mask);
    595 }
    596 
    597 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
    598     unsigned sa = SkGetPackedA4444(s);
    599     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
    600     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
    601     unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
    602 
    603     // To avoid overflow, we have to clear the low bit of the synthetic sg
    604     // if the src alpha is <= 7.
    605     // to see why, try blending 0x4444 on top of 565-white and watch green
    606     // overflow (sum == 64)
    607     sg &= ~(~(sa >> 3) & 1);
    608 
    609     unsigned scale = SkAlpha15To16(15 - sa);
    610     unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
    611     unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
    612     unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
    613 
    614 #if 0
    615     if (sg + dg > 63) {
    616         SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
    617     }
    618 #endif
    619     return SkPackRGB16(sr + dr, sg + dg, sb + db);
    620 }
    621 
    622 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
    623     SkASSERT((unsigned)scale16 <= 16);
    624 
    625     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
    626 }
    627 
    628 static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
    629     SkASSERT((unsigned)scale16 <= 16);
    630 
    631     uint32_t src32 = SkExpand_4444(src) * scale16;
    632     // the scaled srcAlpha is the bottom byte
    633 #ifdef SK_DEBUG
    634     {
    635         unsigned srcA = SkGetPackedA4444(src) * scale16;
    636         SkASSERT(srcA == (src32 & 0xFF));
    637     }
    638 #endif
    639     unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
    640     uint32_t dst32 = SkExpand_4444(dst) * dstScale;
    641     return SkCompact_4444((src32 + dst32) >> 4);
    642 }
    643 
    644 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
    645     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
    646                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
    647                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
    648                  (SkGetPackedB4444(c) << SK_B32_SHIFT);
    649     return d | (d << 4);
    650 }
    651 
    652 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
    653     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
    654     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
    655     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
    656     (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
    657 }
    658 
    659 // cheap 2x2 dither
    660 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
    661                                                U8CPU g, U8CPU b) {
    662     // to ensure that we stay a legal premultiplied color, we take the max()
    663     // of the truncated and dithered alpha values. If we didn't, cases like
    664     // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...)
    665     // which is not legal premultiplied, since a < color
    666     unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
    667     a = SkMax32(a >> 4, dithered_a);
    668     // these we just dither in place
    669     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
    670     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
    671     b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4;
    672 
    673     return SkPackARGB4444(a, r, g, b);
    674 }
    675 
    676 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
    677     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
    678                                 SkGetPackedG32(c), SkGetPackedB32(c));
    679 }
    680 
    681 /*  Assumes 16bit is in standard RGBA order.
    682     Transforms a normal ARGB_8888 into the same byte order as
    683     expanded ARGB_4444, but keeps each component 8bits
    684 */
    685 static inline uint32_t SkExpand_8888(SkPMColor c) {
    686     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
    687             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
    688             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
    689             (((c >> SK_A32_SHIFT) & 0xFF) <<  0);
    690 }
    691 
    692 /*  Undo the operation of SkExpand_8888, turning the argument back into
    693     a SkPMColor.
    694 */
    695 static inline SkPMColor SkCompact_8888(uint32_t c) {
    696     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
    697             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
    698             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
    699             (((c >>  0) & 0xFF) << SK_A32_SHIFT);
    700 }
    701 
    702 /*  Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
    703     but this routine just keeps the high 4bits of each component in the low
    704     4bits of the result (just like a newly expanded PMColor16).
    705 */
    706 static inline uint32_t SkExpand32_4444(SkPMColor c) {
    707     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
    708             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
    709             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
    710             (((c >> (SK_A32_SHIFT + 4)) & 0xF) <<  0);
    711 }
    712 
    713 // takes two values and alternamtes them as part of a memset16
    714 // used for cheap 2x2 dithering when the colors are opaque
    715 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
    716 
    717 ///////////////////////////////////////////////////////////////////////////////
    718 
    719 static inline int SkUpscale31To32(int value) {
    720     SkASSERT((unsigned)value <= 31);
    721     return value + (value >> 4);
    722 }
    723 
    724 static inline int SkBlend32(int src, int dst, int scale) {
    725     SkASSERT((unsigned)src <= 0xFF);
    726     SkASSERT((unsigned)dst <= 0xFF);
    727     SkASSERT((unsigned)scale <= 32);
    728     return dst + ((src - dst) * scale >> 5);
    729 }
    730 
    731 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
    732                                      SkPMColor dst, uint16_t mask) {
    733     if (mask == 0) {
    734         return dst;
    735     }
    736 
    737     /*  We want all of these in 5bits, hence the shifts in case one of them
    738      *  (green) is 6bits.
    739      */
    740     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    741     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    742     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    743 
    744     // Now upscale them to 0..32, so we can use blend32
    745     maskR = SkUpscale31To32(maskR);
    746     maskG = SkUpscale31To32(maskG);
    747     maskB = SkUpscale31To32(maskB);
    748 
    749     // srcA has been upscaled to 256 before passed into this function
    750     maskR = maskR * srcA >> 8;
    751     maskG = maskG * srcA >> 8;
    752     maskB = maskB * srcA >> 8;
    753 
    754     int dstR = SkGetPackedR32(dst);
    755     int dstG = SkGetPackedG32(dst);
    756     int dstB = SkGetPackedB32(dst);
    757 
    758     // LCD blitting is only supported if the dst is known/required
    759     // to be opaque
    760     return SkPackARGB32(0xFF,
    761                         SkBlend32(srcR, dstR, maskR),
    762                         SkBlend32(srcG, dstG, maskG),
    763                         SkBlend32(srcB, dstB, maskB));
    764 }
    765 
    766 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
    767                                            SkPMColor dst, uint16_t mask,
    768                                            SkPMColor opaqueDst) {
    769     if (mask == 0) {
    770         return dst;
    771     }
    772 
    773     if (0xFFFF == mask) {
    774         return opaqueDst;
    775     }
    776 
    777     /*  We want all of these in 5bits, hence the shifts in case one of them
    778      *  (green) is 6bits.
    779      */
    780     int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
    781     int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
    782     int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
    783 
    784     // Now upscale them to 0..32, so we can use blend32
    785     maskR = SkUpscale31To32(maskR);
    786     maskG = SkUpscale31To32(maskG);
    787     maskB = SkUpscale31To32(maskB);
    788 
    789     int dstR = SkGetPackedR32(dst);
    790     int dstG = SkGetPackedG32(dst);
    791     int dstB = SkGetPackedB32(dst);
    792 
    793     // LCD blitting is only supported if the dst is known/required
    794     // to be opaque
    795     return SkPackARGB32(0xFF,
    796                         SkBlend32(srcR, dstR, maskR),
    797                         SkBlend32(srcG, dstG, maskG),
    798                         SkBlend32(srcB, dstB, maskB));
    799 }
    800 
    801 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t src[],
    802                                   SkColor color, int width, SkPMColor) {
    803     int srcA = SkColorGetA(color);
    804     int srcR = SkColorGetR(color);
    805     int srcG = SkColorGetG(color);
    806     int srcB = SkColorGetB(color);
    807 
    808     srcA = SkAlpha255To256(srcA);
    809 
    810     for (int i = 0; i < width; i++) {
    811         dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], src[i]);
    812     }
    813 }
    814 
    815 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t src[],
    816                                         SkColor color, int width,
    817                                         SkPMColor opaqueDst) {
    818     int srcR = SkColorGetR(color);
    819     int srcG = SkColorGetG(color);
    820     int srcB = SkColorGetB(color);
    821 
    822     for (int i = 0; i < width; i++) {
    823         dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], src[i],
    824                                     opaqueDst);
    825     }
    826 }
    827 
    828 #endif
    829 
    830