1 2 /* 3 * Copyright 2006 The Android Open Source Project 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8 9 10 #ifndef SkColorPriv_DEFINED 11 #define SkColorPriv_DEFINED 12 13 // turn this own for extra debug checking when blending onto 565 14 #ifdef SK_DEBUG 15 #define CHECK_FOR_565_OVERFLOW 16 #endif 17 18 #include "SkColor.h" 19 #include "SkMath.h" 20 21 /** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a 22 byte into a scale value, so that we can say scale * value >> 8 instead of 23 alpha * value / 255. 24 25 In debugging, asserts that alpha is 0..255 26 */ 27 static inline unsigned SkAlpha255To256(U8CPU alpha) { 28 SkASSERT(SkToU8(alpha) == alpha); 29 // this one assues that blending on top of an opaque dst keeps it that way 30 // even though it is less accurate than a+(a>>7) for non-opaque dsts 31 return alpha + 1; 32 } 33 34 /** Multiplify value by 0..256, and shift the result down 8 35 (i.e. return (value * alpha256) >> 8) 36 */ 37 #define SkAlphaMul(value, alpha256) (SkMulS16(value, alpha256) >> 8) 38 39 // The caller may want negative values, so keep all params signed (int) 40 // so we don't accidentally slip into unsigned math and lose the sign 41 // extension when we shift (in SkAlphaMul) 42 static inline int SkAlphaBlend(int src, int dst, int scale256) { 43 SkASSERT((unsigned)scale256 <= 256); 44 return dst + SkAlphaMul(src - dst, scale256); 45 } 46 47 /** 48 * Returns (src * alpha + dst * (255 - alpha)) / 255 49 * 50 * This is more accurate than SkAlphaBlend, but slightly slower 51 */ 52 static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) { 53 SkASSERT((int16_t)src == src); 54 SkASSERT((int16_t)dst == dst); 55 SkASSERT((uint8_t)alpha == alpha); 56 57 int prod = SkMulS16(src - dst, alpha) + 128; 58 prod = (prod + (prod >> 8)) >> 8; 59 return dst + prod; 60 } 61 62 #define SK_R16_BITS 5 63 #define SK_G16_BITS 6 64 #define SK_B16_BITS 5 65 66 #define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS) 67 #define SK_G16_SHIFT (SK_B16_BITS) 68 #define SK_B16_SHIFT 0 69 70 #define SK_R16_MASK ((1 << SK_R16_BITS) - 1) 71 #define SK_G16_MASK ((1 << SK_G16_BITS) - 1) 72 #define SK_B16_MASK ((1 << SK_B16_BITS) - 1) 73 74 #define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK) 75 #define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK) 76 #define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK) 77 78 #define SkR16Assert(r) SkASSERT((unsigned)(r) <= SK_R16_MASK) 79 #define SkG16Assert(g) SkASSERT((unsigned)(g) <= SK_G16_MASK) 80 #define SkB16Assert(b) SkASSERT((unsigned)(b) <= SK_B16_MASK) 81 82 static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) { 83 SkASSERT(r <= SK_R16_MASK); 84 SkASSERT(g <= SK_G16_MASK); 85 SkASSERT(b <= SK_B16_MASK); 86 87 return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT)); 88 } 89 90 #define SK_R16_MASK_IN_PLACE (SK_R16_MASK << SK_R16_SHIFT) 91 #define SK_G16_MASK_IN_PLACE (SK_G16_MASK << SK_G16_SHIFT) 92 #define SK_B16_MASK_IN_PLACE (SK_B16_MASK << SK_B16_SHIFT) 93 94 /** Expand the 16bit color into a 32bit value that can be scaled all at once 95 by a value up to 32. Used in conjunction with SkCompact_rgb_16. 96 */ 97 static inline uint32_t SkExpand_rgb_16(U16CPU c) { 98 SkASSERT(c == (uint16_t)c); 99 100 return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE); 101 } 102 103 /** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit 104 color value. The computation yields only 16bits of valid data, but we claim 105 to return 32bits, so that the compiler won't generate extra instructions to 106 "clean" the top 16bits. However, the top 16 can contain garbage, so it is 107 up to the caller to safely ignore them. 108 */ 109 static inline U16CPU SkCompact_rgb_16(uint32_t c) { 110 return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE); 111 } 112 113 /** Scale the 16bit color value by the 0..256 scale parameter. 114 The computation yields only 16bits of valid data, but we claim 115 to return 32bits, so that the compiler won't generate extra instructions to 116 "clean" the top 16bits. 117 */ 118 static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) { 119 return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5); 120 } 121 122 // this helper explicitly returns a clean 16bit value (but slower) 123 #define SkAlphaMulRGB16_ToU16(c, s) (uint16_t)SkAlphaMulRGB16(c, s) 124 125 /** Blend src and dst 16bit colors by the 0..256 scale parameter. 126 The computation yields only 16bits of valid data, but we claim 127 to return 32bits, so that the compiler won't generate extra instructions to 128 "clean" the top 16bits. 129 */ 130 static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) { 131 SkASSERT((unsigned)srcScale <= 256); 132 133 srcScale >>= 3; 134 135 uint32_t src32 = SkExpand_rgb_16(src); 136 uint32_t dst32 = SkExpand_rgb_16(dst); 137 return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)); 138 } 139 140 static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[], 141 int srcScale, int count) { 142 SkASSERT(count > 0); 143 SkASSERT((unsigned)srcScale <= 256); 144 145 srcScale >>= 3; 146 147 do { 148 uint32_t src32 = SkExpand_rgb_16(*src++); 149 uint32_t dst32 = SkExpand_rgb_16(*dst); 150 *dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)); 151 } while (--count > 0); 152 } 153 154 #ifdef SK_DEBUG 155 static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) { 156 SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK); 157 SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK); 158 SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK); 159 160 return a + b; 161 } 162 #else 163 #define SkRGB16Add(a, b) ((a) + (b)) 164 #endif 165 166 /////////////////////////////////////////////////////////////////////////////// 167 168 #define SK_A32_BITS 8 169 #define SK_R32_BITS 8 170 #define SK_G32_BITS 8 171 #define SK_B32_BITS 8 172 173 #define SK_A32_MASK ((1 << SK_A32_BITS) - 1) 174 #define SK_R32_MASK ((1 << SK_R32_BITS) - 1) 175 #define SK_G32_MASK ((1 << SK_G32_BITS) - 1) 176 #define SK_B32_MASK ((1 << SK_B32_BITS) - 1) 177 178 #define SkGetPackedA32(packed) ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24) 179 #define SkGetPackedR32(packed) ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24) 180 #define SkGetPackedG32(packed) ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24) 181 #define SkGetPackedB32(packed) ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24) 182 183 #define SkA32Assert(a) SkASSERT((unsigned)(a) <= SK_A32_MASK) 184 #define SkR32Assert(r) SkASSERT((unsigned)(r) <= SK_R32_MASK) 185 #define SkG32Assert(g) SkASSERT((unsigned)(g) <= SK_G32_MASK) 186 #define SkB32Assert(b) SkASSERT((unsigned)(b) <= SK_B32_MASK) 187 188 #ifdef SK_DEBUG 189 static inline void SkPMColorAssert(SkPMColor c) { 190 unsigned a = SkGetPackedA32(c); 191 unsigned r = SkGetPackedR32(c); 192 unsigned g = SkGetPackedG32(c); 193 unsigned b = SkGetPackedB32(c); 194 195 SkA32Assert(a); 196 SkASSERT(r <= a); 197 SkASSERT(g <= a); 198 SkASSERT(b <= a); 199 } 200 #else 201 #define SkPMColorAssert(c) 202 #endif 203 204 /** 205 * Pack the components into a SkPMColor, checking (in the debug version) that 206 * the components are 0..255, and are already premultiplied (i.e. alpha >= color) 207 */ 208 static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) { 209 SkA32Assert(a); 210 SkASSERT(r <= a); 211 SkASSERT(g <= a); 212 SkASSERT(b <= a); 213 214 return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) | 215 (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT); 216 } 217 218 /** 219 * Abstract 4-byte interpolation, implemented on top of SkPMColor 220 * utility functions. Third parameter controls blending of the first two: 221 * (src, dst, 0) returns dst 222 * (src, dst, 0xFF) returns src 223 */ 224 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst, 225 U8CPU srcWeight) { 226 unsigned scale = SkAlpha255To256(srcWeight); 227 228 unsigned a = SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale); 229 unsigned r = SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale); 230 unsigned g = SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale); 231 unsigned b = SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale); 232 233 return SkPackARGB32(a, r, g, b); 234 } 235 236 /** 237 * 32b optimized version; currently appears to be 10% faster even on 64b 238 * architectures than an equivalent 64b version and 30% faster than 239 * SkFourByteInterp(). Third parameter controls blending of the first two: 240 * (src, dst, 0) returns dst 241 * (src, dst, 0xFF) returns src 242 * ** Does not match the results of SkFourByteInterp() because we use 243 * a more accurate scale computation! 244 * TODO: migrate Skia function to using an accurate 255->266 alpha 245 * conversion. 246 */ 247 static inline SkPMColor SkFastFourByteInterp(SkPMColor src, 248 SkPMColor dst, 249 U8CPU srcWeight) { 250 SkASSERT(srcWeight < 256); 251 252 // Reorders ARGB to AG-RB in order to reduce the number of operations. 253 const uint32_t mask = 0xFF00FF; 254 uint32_t src_rb = src & mask; 255 uint32_t src_ag = (src >> 8) & mask; 256 uint32_t dst_rb = dst & mask; 257 uint32_t dst_ag = (dst >> 8) & mask; 258 259 // scale = srcWeight + (srcWeight >> 7) is more accurate than 260 // scale = srcWeight + 1, but 7% slower 261 int scale = srcWeight + (srcWeight >> 7); 262 263 uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb; 264 uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag; 265 266 return (ret_ag & ~mask) | ((ret_rb & ~mask) >> 8); 267 } 268 269 /** 270 * Same as SkPackARGB32, but this version guarantees to not check that the 271 * values are premultiplied in the debug version. 272 */ 273 static inline SkPMColor SkPackARGB32NoCheck(U8CPU a, U8CPU r, U8CPU g, U8CPU b) { 274 return (a << SK_A32_SHIFT) | (r << SK_R32_SHIFT) | 275 (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT); 276 } 277 278 static inline 279 SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) { 280 SkA32Assert(a); 281 SkA32Assert(r); 282 SkA32Assert(g); 283 SkA32Assert(b); 284 285 if (a != 255) { 286 r = SkMulDiv255Round(r, a); 287 g = SkMulDiv255Round(g, a); 288 b = SkMulDiv255Round(b, a); 289 } 290 return SkPackARGB32(a, r, g, b); 291 } 292 293 SK_API extern const uint32_t gMask_00FF00FF; 294 295 static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) { 296 uint32_t mask = gMask_00FF00FF; 297 // uint32_t mask = 0xFF00FF; 298 299 uint32_t rb = ((c & mask) * scale) >> 8; 300 uint32_t ag = ((c >> 8) & mask) * scale; 301 return (rb & mask) | (ag & ~mask); 302 } 303 304 static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) { 305 return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src))); 306 } 307 308 static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) { 309 SkASSERT((unsigned)aa <= 255); 310 311 unsigned src_scale = SkAlpha255To256(aa); 312 unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale)); 313 314 return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale); 315 } 316 317 //////////////////////////////////////////////////////////////////////////////////////////// 318 // Convert a 32bit pixel to a 16bit pixel (no dither) 319 320 #define SkR32ToR16_MACRO(r) ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS)) 321 #define SkG32ToG16_MACRO(g) ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS)) 322 #define SkB32ToB16_MACRO(b) ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS)) 323 324 #ifdef SK_DEBUG 325 static inline unsigned SkR32ToR16(unsigned r) { 326 SkR32Assert(r); 327 return SkR32ToR16_MACRO(r); 328 } 329 static inline unsigned SkG32ToG16(unsigned g) { 330 SkG32Assert(g); 331 return SkG32ToG16_MACRO(g); 332 } 333 static inline unsigned SkB32ToB16(unsigned b) { 334 SkB32Assert(b); 335 return SkB32ToB16_MACRO(b); 336 } 337 #else 338 #define SkR32ToR16(r) SkR32ToR16_MACRO(r) 339 #define SkG32ToG16(g) SkG32ToG16_MACRO(g) 340 #define SkB32ToB16(b) SkB32ToB16_MACRO(b) 341 #endif 342 343 #define SkPacked32ToR16(c) (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK) 344 #define SkPacked32ToG16(c) (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK) 345 #define SkPacked32ToB16(c) (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK) 346 347 static inline U16CPU SkPixel32ToPixel16(SkPMColor c) { 348 unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT; 349 unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT; 350 unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT; 351 return r | g | b; 352 } 353 354 static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) { 355 return (SkR32ToR16(r) << SK_R16_SHIFT) | 356 (SkG32ToG16(g) << SK_G16_SHIFT) | 357 (SkB32ToB16(b) << SK_B16_SHIFT); 358 } 359 360 #define SkPixel32ToPixel16_ToU16(src) SkToU16(SkPixel32ToPixel16(src)) 361 362 ///////////////////////////////////////////////////////////////////////////////////////// 363 // Fast dither from 32->16 364 365 #define SkShouldDitherXY(x, y) (((x) ^ (y)) & 1) 366 367 static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) { 368 r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS); 369 g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS); 370 b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS); 371 372 return SkPackRGB16(r, g, b); 373 } 374 375 static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) { 376 return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c)); 377 } 378 379 /* Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits) 380 It is now suitable for combining with a scaled expanded_rgb_16 color 381 as in SkSrcOver32To16(). 382 We must do this 565 high-bit replication, in order for the subsequent add 383 to saturate properly (and not overflow). If we take the 8 bits as is, it is 384 possible to overflow. 385 */ 386 static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) { 387 unsigned sr = SkPacked32ToR16(c); 388 unsigned sg = SkPacked32ToG16(c); 389 unsigned sb = SkPacked32ToB16(c); 390 391 sr = (sr << 5) | sr; 392 sg = (sg << 5) | (sg >> 1); 393 sb = (sb << 5) | sb; 394 return (sr << 11) | (sg << 21) | (sb << 0); 395 } 396 397 /* SrcOver the 32bit src color with the 16bit dst, returning a 16bit value 398 (with dirt in the high 16bits, so caller beware). 399 */ 400 static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) { 401 unsigned sr = SkGetPackedR32(src); 402 unsigned sg = SkGetPackedG32(src); 403 unsigned sb = SkGetPackedB32(src); 404 405 unsigned dr = SkGetPackedR16(dst); 406 unsigned dg = SkGetPackedG16(dst); 407 unsigned db = SkGetPackedB16(dst); 408 409 unsigned isa = 255 - SkGetPackedA32(src); 410 411 dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS); 412 dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS); 413 db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS); 414 415 return SkPackRGB16(dr, dg, db); 416 } 417 418 //////////////////////////////////////////////////////////////////////////////////////////// 419 // Convert a 16bit pixel to a 32bit pixel 420 421 static inline unsigned SkR16ToR32(unsigned r) { 422 return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8)); 423 } 424 425 static inline unsigned SkG16ToG32(unsigned g) { 426 return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8)); 427 } 428 429 static inline unsigned SkB16ToB32(unsigned b) { 430 return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8)); 431 } 432 433 #define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c)) 434 #define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c)) 435 #define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c)) 436 437 static inline SkPMColor SkPixel16ToPixel32(U16CPU src) { 438 SkASSERT(src == SkToU16(src)); 439 440 unsigned r = SkPacked16ToR32(src); 441 unsigned g = SkPacked16ToG32(src); 442 unsigned b = SkPacked16ToB32(src); 443 444 SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src)); 445 SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src)); 446 SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src)); 447 448 return SkPackARGB32(0xFF, r, g, b); 449 } 450 451 // similar to SkPixel16ToPixel32, but returns SkColor instead of SkPMColor 452 static inline SkColor SkPixel16ToColor(U16CPU src) { 453 SkASSERT(src == SkToU16(src)); 454 455 unsigned r = SkPacked16ToR32(src); 456 unsigned g = SkPacked16ToG32(src); 457 unsigned b = SkPacked16ToB32(src); 458 459 SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src)); 460 SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src)); 461 SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src)); 462 463 return SkColorSetRGB(r, g, b); 464 } 465 466 /////////////////////////////////////////////////////////////////////////////// 467 468 typedef uint16_t SkPMColor16; 469 470 // Put in OpenGL order (r g b a) 471 #define SK_A4444_SHIFT 0 472 #define SK_R4444_SHIFT 12 473 #define SK_G4444_SHIFT 8 474 #define SK_B4444_SHIFT 4 475 476 #define SkA32To4444(a) ((unsigned)(a) >> 4) 477 #define SkR32To4444(r) ((unsigned)(r) >> 4) 478 #define SkG32To4444(g) ((unsigned)(g) >> 4) 479 #define SkB32To4444(b) ((unsigned)(b) >> 4) 480 481 static inline U8CPU SkReplicateNibble(unsigned nib) { 482 SkASSERT(nib <= 0xF); 483 return (nib << 4) | nib; 484 } 485 486 #define SkA4444ToA32(a) SkReplicateNibble(a) 487 #define SkR4444ToR32(r) SkReplicateNibble(r) 488 #define SkG4444ToG32(g) SkReplicateNibble(g) 489 #define SkB4444ToB32(b) SkReplicateNibble(b) 490 491 #define SkGetPackedA4444(c) (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF) 492 #define SkGetPackedR4444(c) (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF) 493 #define SkGetPackedG4444(c) (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF) 494 #define SkGetPackedB4444(c) (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF) 495 496 #define SkPacked4444ToA32(c) SkReplicateNibble(SkGetPackedA4444(c)) 497 #define SkPacked4444ToR32(c) SkReplicateNibble(SkGetPackedR4444(c)) 498 #define SkPacked4444ToG32(c) SkReplicateNibble(SkGetPackedG4444(c)) 499 #define SkPacked4444ToB32(c) SkReplicateNibble(SkGetPackedB4444(c)) 500 501 #ifdef SK_DEBUG 502 static inline void SkPMColor16Assert(U16CPU c) { 503 unsigned a = SkGetPackedA4444(c); 504 unsigned r = SkGetPackedR4444(c); 505 unsigned g = SkGetPackedG4444(c); 506 unsigned b = SkGetPackedB4444(c); 507 508 SkASSERT(a <= 0xF); 509 SkASSERT(r <= a); 510 SkASSERT(g <= a); 511 SkASSERT(b <= a); 512 } 513 #else 514 #define SkPMColor16Assert(c) 515 #endif 516 517 static inline unsigned SkAlpha15To16(unsigned a) { 518 SkASSERT(a <= 0xF); 519 return a + (a >> 3); 520 } 521 522 #ifdef SK_DEBUG 523 static inline int SkAlphaMul4(int value, int scale) { 524 SkASSERT((unsigned)scale <= 0x10); 525 return value * scale >> 4; 526 } 527 #else 528 #define SkAlphaMul4(value, scale) ((value) * (scale) >> 4) 529 #endif 530 531 static inline unsigned SkR4444ToR565(unsigned r) { 532 SkASSERT(r <= 0xF); 533 return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS)); 534 } 535 536 static inline unsigned SkG4444ToG565(unsigned g) { 537 SkASSERT(g <= 0xF); 538 return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS)); 539 } 540 541 static inline unsigned SkB4444ToB565(unsigned b) { 542 SkASSERT(b <= 0xF); 543 return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS)); 544 } 545 546 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r, 547 unsigned g, unsigned b) { 548 SkASSERT(a <= 0xF); 549 SkASSERT(r <= a); 550 SkASSERT(g <= a); 551 SkASSERT(b <= a); 552 553 return (SkPMColor16)((a << SK_A4444_SHIFT) | (r << SK_R4444_SHIFT) | 554 (g << SK_G4444_SHIFT) | (b << SK_B4444_SHIFT)); 555 } 556 557 extern const uint16_t gMask_0F0F; 558 559 static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) { 560 SkASSERT(scale <= 16); 561 562 const unsigned mask = 0xF0F; //gMask_0F0F; 563 564 #if 0 565 unsigned rb = ((c & mask) * scale) >> 4; 566 unsigned ag = ((c >> 4) & mask) * scale; 567 return (rb & mask) | (ag & ~mask); 568 #else 569 c = (c & mask) | ((c & (mask << 4)) << 12); 570 c = c * scale >> 4; 571 return (c & mask) | ((c >> 12) & (mask << 4)); 572 #endif 573 } 574 575 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at 576 once by a value up to 16. Used in conjunction with SkCompact_4444. 577 */ 578 static inline uint32_t SkExpand_4444(U16CPU c) { 579 SkASSERT(c == (uint16_t)c); 580 581 const unsigned mask = 0xF0F; //gMask_0F0F; 582 return (c & mask) | ((c & ~mask) << 12); 583 } 584 585 /** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16. 586 NOTE: this explicitly does not clean the top 16 bits (which may be garbage). 587 It does this for speed, since if it is being written directly to 16bits of 588 memory, the top 16bits will be ignored. Casting the result to uint16_t here 589 would add 2 more instructions, slow us down. It is up to the caller to 590 perform the cast if needed. 591 */ 592 static inline U16CPU SkCompact_4444(uint32_t c) { 593 const unsigned mask = 0xF0F; //gMask_0F0F; 594 return (c & mask) | ((c >> 12) & ~mask); 595 } 596 597 static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) { 598 unsigned sa = SkGetPackedA4444(s); 599 unsigned sr = SkR4444ToR565(SkGetPackedR4444(s)); 600 unsigned sg = SkG4444ToG565(SkGetPackedG4444(s)); 601 unsigned sb = SkB4444ToB565(SkGetPackedB4444(s)); 602 603 // To avoid overflow, we have to clear the low bit of the synthetic sg 604 // if the src alpha is <= 7. 605 // to see why, try blending 0x4444 on top of 565-white and watch green 606 // overflow (sum == 64) 607 sg &= ~(~(sa >> 3) & 1); 608 609 unsigned scale = SkAlpha15To16(15 - sa); 610 unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale); 611 unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale); 612 unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale); 613 614 #if 0 615 if (sg + dg > 63) { 616 SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg); 617 } 618 #endif 619 return SkPackRGB16(sr + dr, sg + dg, sb + db); 620 } 621 622 static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) { 623 SkASSERT((unsigned)scale16 <= 16); 624 625 return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst); 626 } 627 628 static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) { 629 SkASSERT((unsigned)scale16 <= 16); 630 631 uint32_t src32 = SkExpand_4444(src) * scale16; 632 // the scaled srcAlpha is the bottom byte 633 #ifdef SK_DEBUG 634 { 635 unsigned srcA = SkGetPackedA4444(src) * scale16; 636 SkASSERT(srcA == (src32 & 0xFF)); 637 } 638 #endif 639 unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4; 640 uint32_t dst32 = SkExpand_4444(dst) * dstScale; 641 return SkCompact_4444((src32 + dst32) >> 4); 642 } 643 644 static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) { 645 uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) | 646 (SkGetPackedR4444(c) << SK_R32_SHIFT) | 647 (SkGetPackedG4444(c) << SK_G32_SHIFT) | 648 (SkGetPackedB4444(c) << SK_B32_SHIFT); 649 return d | (d << 4); 650 } 651 652 static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) { 653 return (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) | 654 (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) | 655 (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) | 656 (((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT); 657 } 658 659 // cheap 2x2 dither 660 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r, 661 U8CPU g, U8CPU b) { 662 // to ensure that we stay a legal premultiplied color, we take the max() 663 // of the truncated and dithered alpha values. If we didn't, cases like 664 // SkDitherARGB32To4444(0x31, 0x2E, ...) would generate SkPackARGB4444(2, 3, ...) 665 // which is not legal premultiplied, since a < color 666 unsigned dithered_a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4; 667 a = SkMax32(a >> 4, dithered_a); 668 // these we just dither in place 669 r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4; 670 g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4; 671 b = ((b << 1) - ((b >> 4 << 4) | (b >> 4))) >> 4; 672 673 return SkPackARGB4444(a, r, g, b); 674 } 675 676 static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) { 677 return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c), 678 SkGetPackedG32(c), SkGetPackedB32(c)); 679 } 680 681 /* Assumes 16bit is in standard RGBA order. 682 Transforms a normal ARGB_8888 into the same byte order as 683 expanded ARGB_4444, but keeps each component 8bits 684 */ 685 static inline uint32_t SkExpand_8888(SkPMColor c) { 686 return (((c >> SK_R32_SHIFT) & 0xFF) << 24) | 687 (((c >> SK_G32_SHIFT) & 0xFF) << 8) | 688 (((c >> SK_B32_SHIFT) & 0xFF) << 16) | 689 (((c >> SK_A32_SHIFT) & 0xFF) << 0); 690 } 691 692 /* Undo the operation of SkExpand_8888, turning the argument back into 693 a SkPMColor. 694 */ 695 static inline SkPMColor SkCompact_8888(uint32_t c) { 696 return (((c >> 24) & 0xFF) << SK_R32_SHIFT) | 697 (((c >> 8) & 0xFF) << SK_G32_SHIFT) | 698 (((c >> 16) & 0xFF) << SK_B32_SHIFT) | 699 (((c >> 0) & 0xFF) << SK_A32_SHIFT); 700 } 701 702 /* Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format, 703 but this routine just keeps the high 4bits of each component in the low 704 4bits of the result (just like a newly expanded PMColor16). 705 */ 706 static inline uint32_t SkExpand32_4444(SkPMColor c) { 707 return (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) | 708 (((c >> (SK_G32_SHIFT + 4)) & 0xF) << 8) | 709 (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) | 710 (((c >> (SK_A32_SHIFT + 4)) & 0xF) << 0); 711 } 712 713 // takes two values and alternamtes them as part of a memset16 714 // used for cheap 2x2 dithering when the colors are opaque 715 void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n); 716 717 /////////////////////////////////////////////////////////////////////////////// 718 719 static inline int SkUpscale31To32(int value) { 720 SkASSERT((unsigned)value <= 31); 721 return value + (value >> 4); 722 } 723 724 static inline int SkBlend32(int src, int dst, int scale) { 725 SkASSERT((unsigned)src <= 0xFF); 726 SkASSERT((unsigned)dst <= 0xFF); 727 SkASSERT((unsigned)scale <= 32); 728 return dst + ((src - dst) * scale >> 5); 729 } 730 731 static inline SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB, 732 SkPMColor dst, uint16_t mask) { 733 if (mask == 0) { 734 return dst; 735 } 736 737 /* We want all of these in 5bits, hence the shifts in case one of them 738 * (green) is 6bits. 739 */ 740 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5); 741 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5); 742 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5); 743 744 // Now upscale them to 0..32, so we can use blend32 745 maskR = SkUpscale31To32(maskR); 746 maskG = SkUpscale31To32(maskG); 747 maskB = SkUpscale31To32(maskB); 748 749 // srcA has been upscaled to 256 before passed into this function 750 maskR = maskR * srcA >> 8; 751 maskG = maskG * srcA >> 8; 752 maskB = maskB * srcA >> 8; 753 754 int dstR = SkGetPackedR32(dst); 755 int dstG = SkGetPackedG32(dst); 756 int dstB = SkGetPackedB32(dst); 757 758 // LCD blitting is only supported if the dst is known/required 759 // to be opaque 760 return SkPackARGB32(0xFF, 761 SkBlend32(srcR, dstR, maskR), 762 SkBlend32(srcG, dstG, maskG), 763 SkBlend32(srcB, dstB, maskB)); 764 } 765 766 static inline SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB, 767 SkPMColor dst, uint16_t mask, 768 SkPMColor opaqueDst) { 769 if (mask == 0) { 770 return dst; 771 } 772 773 if (0xFFFF == mask) { 774 return opaqueDst; 775 } 776 777 /* We want all of these in 5bits, hence the shifts in case one of them 778 * (green) is 6bits. 779 */ 780 int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5); 781 int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5); 782 int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5); 783 784 // Now upscale them to 0..32, so we can use blend32 785 maskR = SkUpscale31To32(maskR); 786 maskG = SkUpscale31To32(maskG); 787 maskB = SkUpscale31To32(maskB); 788 789 int dstR = SkGetPackedR32(dst); 790 int dstG = SkGetPackedG32(dst); 791 int dstB = SkGetPackedB32(dst); 792 793 // LCD blitting is only supported if the dst is known/required 794 // to be opaque 795 return SkPackARGB32(0xFF, 796 SkBlend32(srcR, dstR, maskR), 797 SkBlend32(srcG, dstG, maskG), 798 SkBlend32(srcB, dstB, maskB)); 799 } 800 801 static inline void SkBlitLCD16Row(SkPMColor dst[], const uint16_t src[], 802 SkColor color, int width, SkPMColor) { 803 int srcA = SkColorGetA(color); 804 int srcR = SkColorGetR(color); 805 int srcG = SkColorGetG(color); 806 int srcB = SkColorGetB(color); 807 808 srcA = SkAlpha255To256(srcA); 809 810 for (int i = 0; i < width; i++) { 811 dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], src[i]); 812 } 813 } 814 815 static inline void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t src[], 816 SkColor color, int width, 817 SkPMColor opaqueDst) { 818 int srcR = SkColorGetR(color); 819 int srcG = SkColorGetG(color); 820 int srcB = SkColorGetB(color); 821 822 for (int i = 0; i < width; i++) { 823 dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], src[i], 824 opaqueDst); 825 } 826 } 827 828 #endif 829 830