1 2 #include "SkBlitMask.h" 3 #include "SkColor_opts_neon.h" 4 5 static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB, 6 const void* SK_RESTRICT maskPtr, size_t maskRB, 7 SkColor, int width, int height) { 8 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; 9 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; 10 11 maskRB -= width; 12 dstRB -= (width << 2); 13 do { 14 int w = width; 15 while (w >= 8) { 16 uint8x8_t vmask = vld1_u8(mask); 17 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); 18 uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); 19 20 vdevice = SkAlphaMulQ_neon8(vdevice, vscale); 21 vdevice.val[NEON_A] += vmask; 22 23 vst4_u8((uint8_t*)device, vdevice); 24 25 mask += 8; 26 device += 8; 27 w -= 8; 28 } 29 while (w-- > 0) { 30 unsigned aa = *mask++; 31 *device = (aa << SK_A32_SHIFT) 32 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); 33 device += 1; 34 }; 35 device = (uint32_t*)((char*)device + dstRB); 36 mask += maskRB; 37 } while (--height != 0); 38 } 39 40 template <bool isColor> 41 static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, 42 const void* SK_RESTRICT maskPtr, size_t maskRB, 43 SkColor color, int width, int height) { 44 SkPMColor pmc = SkPreMultiplyColor(color); 45 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; 46 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; 47 uint8x8x4_t vpmc; 48 49 maskRB -= width; 50 dstRB -= (width << 2); 51 52 if (width >= 8) { 53 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); 54 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); 55 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); 56 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); 57 } 58 do { 59 int w = width; 60 while (w >= 8) { 61 uint8x8_t vmask = vld1_u8(mask); 62 uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); 63 if (isColor) { 64 vscale = vsubw_u8(vdupq_n_u16(256), 65 SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); 66 } else { 67 vscale = vsubw_u8(vdupq_n_u16(256), vmask); 68 } 69 uint8x8x4_t vdev = vld4_u8((uint8_t*)device); 70 71 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) 72 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); 73 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) 74 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); 75 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) 76 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); 77 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) 78 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); 79 80 vst4_u8((uint8_t*)device, vdev); 81 82 mask += 8; 83 device += 8; 84 w -= 8; 85 } 86 87 while (w--) { 88 unsigned aa = *mask++; 89 if (isColor) { 90 *device = SkBlendARGB32(pmc, *device, aa); 91 } else { 92 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) 93 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); 94 } 95 device += 1; 96 }; 97 98 device = (uint32_t*)((char*)device + dstRB); 99 mask += maskRB; 100 101 } while (--height != 0); 102 } 103 104 static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB, 105 const void* SK_RESTRICT maskPtr, size_t maskRB, 106 SkColor color, int width, int height) { 107 D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, height); 108 } 109 110 static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB, 111 const void* SK_RESTRICT maskPtr, size_t maskRB, 112 SkColor color, int width, int height) { 113 D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, height); 114 } 115 116 SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) { 117 if (SK_ColorBLACK == color) { 118 return D32_A8_Black_neon; 119 } else if (0xFF == SkColorGetA(color)) { 120 return D32_A8_Opaque_neon; 121 } else { 122 return D32_A8_Color_neon; 123 } 124 } 125 126 //////////////////////////////////////////////////////////////////////////////// 127 128 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[], 129 SkColor color, int width, 130 SkPMColor opaqueDst) { 131 int colR = SkColorGetR(color); 132 int colG = SkColorGetG(color); 133 int colB = SkColorGetB(color); 134 135 uint8x8_t vcolR, vcolG, vcolB; 136 uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB; 137 138 if (width >= 8) { 139 vcolR = vdup_n_u8(colR); 140 vcolG = vdup_n_u8(colG); 141 vcolB = vdup_n_u8(colB); 142 vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst)); 143 vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst)); 144 vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst)); 145 vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst)); 146 } 147 148 while (width >= 8) { 149 uint8x8x4_t vdst; 150 uint16x8_t vmask; 151 uint16x8_t vmaskR, vmaskG, vmaskB; 152 uint8x8_t vsel_trans, vsel_opq; 153 154 vdst = vld4_u8((uint8_t*)dst); 155 vmask = vld1q_u16(src); 156 157 // Prepare compare masks 158 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0))); 159 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF))); 160 161 // Get all the color masks on 5 bits 162 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); 163 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), 164 SK_B16_BITS + SK_R16_BITS + 1); 165 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); 166 167 // Upscale to 0..32 168 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); 169 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); 170 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); 171 172 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF)); 173 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]); 174 175 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); 176 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); 177 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); 178 179 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]); 180 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]); 181 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]); 182 183 vst4_u8((uint8_t*)dst, vdst); 184 185 dst += 8; 186 src += 8; 187 width -= 8; 188 } 189 190 // Leftovers 191 for (int i = 0; i < width; i++) { 192 dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i], 193 opaqueDst); 194 } 195 } 196 197 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[], 198 SkColor color, int width, SkPMColor) { 199 int colA = SkColorGetA(color); 200 int colR = SkColorGetR(color); 201 int colG = SkColorGetG(color); 202 int colB = SkColorGetB(color); 203 204 colA = SkAlpha255To256(colA); 205 206 uint8x8_t vcolR, vcolG, vcolB; 207 uint16x8_t vcolA; 208 209 if (width >= 8) { 210 vcolA = vdupq_n_u16(colA); 211 vcolR = vdup_n_u8(colR); 212 vcolG = vdup_n_u8(colG); 213 vcolB = vdup_n_u8(colB); 214 } 215 216 while (width >= 8) { 217 uint8x8x4_t vdst; 218 uint16x8_t vmask; 219 uint16x8_t vmaskR, vmaskG, vmaskB; 220 221 vdst = vld4_u8((uint8_t*)dst); 222 vmask = vld1q_u16(src); 223 224 // Get all the color masks on 5 bits 225 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); 226 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), 227 SK_B16_BITS + SK_R16_BITS + 1); 228 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); 229 230 // Upscale to 0..32 231 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); 232 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); 233 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); 234 235 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8); 236 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8); 237 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8); 238 239 vdst.val[NEON_A] = vdup_n_u8(0xFF); 240 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); 241 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); 242 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); 243 244 vst4_u8((uint8_t*)dst, vdst); 245 246 dst += 8; 247 src += 8; 248 width -= 8; 249 } 250 251 for (int i = 0; i < width; i++) { 252 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]); 253 } 254 } 255