Home | History | Annotate | Download | only in opts
      1 
      2 #include "SkBlitMask.h"
      3 #include "SkColor_opts_neon.h"
      4 
      5 static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB,
      6                               const void* SK_RESTRICT maskPtr, size_t maskRB,
      7                               SkColor, int width, int height) {
      8     SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
      9     const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
     10 
     11     maskRB -= width;
     12     dstRB -= (width << 2);
     13     do {
     14         int w = width;
     15         while (w >= 8) {
     16             uint8x8_t vmask = vld1_u8(mask);
     17             uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
     18             uint8x8x4_t vdevice = vld4_u8((uint8_t*)device);
     19 
     20             vdevice = SkAlphaMulQ_neon8(vdevice, vscale);
     21             vdevice.val[NEON_A] += vmask;
     22 
     23             vst4_u8((uint8_t*)device, vdevice);
     24 
     25             mask += 8;
     26             device += 8;
     27             w -= 8;
     28         }
     29         while (w-- > 0) {
     30             unsigned aa = *mask++;
     31             *device = (aa << SK_A32_SHIFT)
     32                         + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
     33             device += 1;
     34         };
     35         device = (uint32_t*)((char*)device + dstRB);
     36         mask += maskRB;
     37     } while (--height != 0);
     38 }
     39 
     40 template <bool isColor>
     41 static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
     42                                      const void* SK_RESTRICT maskPtr, size_t maskRB,
     43                                      SkColor color, int width, int height) {
     44     SkPMColor pmc = SkPreMultiplyColor(color);
     45     SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
     46     const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
     47     uint8x8x4_t vpmc;
     48 
     49     maskRB -= width;
     50     dstRB -= (width << 2);
     51 
     52     if (width >= 8) {
     53         vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
     54         vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
     55         vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
     56         vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
     57     }
     58     do {
     59         int w = width;
     60         while (w >= 8) {
     61             uint8x8_t vmask = vld1_u8(mask);
     62             uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask);
     63             if (isColor) {
     64                 vscale = vsubw_u8(vdupq_n_u16(256),
     65                             SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256));
     66             } else {
     67                 vscale = vsubw_u8(vdupq_n_u16(256), vmask);
     68             }
     69             uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
     70 
     71             vdev.val[NEON_A] =   SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
     72                                + SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
     73             vdev.val[NEON_R] =   SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
     74                                + SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
     75             vdev.val[NEON_G] =   SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
     76                                + SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
     77             vdev.val[NEON_B] =   SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
     78                                + SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
     79 
     80             vst4_u8((uint8_t*)device, vdev);
     81 
     82             mask += 8;
     83             device += 8;
     84             w -= 8;
     85         }
     86 
     87         while (w--) {
     88             unsigned aa = *mask++;
     89             if (isColor) {
     90                 *device = SkBlendARGB32(pmc, *device, aa);
     91             } else {
     92                 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
     93                             + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
     94             }
     95             device += 1;
     96         };
     97 
     98         device = (uint32_t*)((char*)device + dstRB);
     99         mask += maskRB;
    100 
    101     } while (--height != 0);
    102 }
    103 
    104 static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB,
    105                                const void* SK_RESTRICT maskPtr, size_t maskRB,
    106                                SkColor color, int width, int height) {
    107     D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, height);
    108 }
    109 
    110 static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
    111                               const void* SK_RESTRICT maskPtr, size_t maskRB,
    112                               SkColor color, int width, int height) {
    113     D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, height);
    114 }
    115 
    116 SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) {
    117     if (SK_ColorBLACK == color) {
    118         return D32_A8_Black_neon;
    119     } else if (0xFF == SkColorGetA(color)) {
    120         return D32_A8_Opaque_neon;
    121     } else {
    122         return D32_A8_Color_neon;
    123     }
    124 }
    125 
    126 ////////////////////////////////////////////////////////////////////////////////
    127 
    128 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
    129                                         SkColor color, int width,
    130                                         SkPMColor opaqueDst) {
    131     int colR = SkColorGetR(color);
    132     int colG = SkColorGetG(color);
    133     int colB = SkColorGetB(color);
    134 
    135     uint8x8_t vcolR, vcolG, vcolB;
    136     uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB;
    137 
    138     if (width >= 8) {
    139         vcolR = vdup_n_u8(colR);
    140         vcolG = vdup_n_u8(colG);
    141         vcolB = vdup_n_u8(colB);
    142         vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
    143         vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
    144         vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
    145         vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
    146     }
    147 
    148     while (width >= 8) {
    149         uint8x8x4_t vdst;
    150         uint16x8_t vmask;
    151         uint16x8_t vmaskR, vmaskG, vmaskB;
    152         uint8x8_t vsel_trans, vsel_opq;
    153 
    154         vdst = vld4_u8((uint8_t*)dst);
    155         vmask = vld1q_u16(src);
    156 
    157         // Prepare compare masks
    158         vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
    159         vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
    160 
    161         // Get all the color masks on 5 bits
    162         vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
    163         vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
    164                              SK_B16_BITS + SK_R16_BITS + 1);
    165         vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
    166 
    167         // Upscale to 0..32
    168         vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
    169         vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
    170         vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
    171 
    172         vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF));
    173         vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
    174 
    175         vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
    176         vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
    177         vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
    178 
    179         vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
    180         vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
    181         vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
    182 
    183         vst4_u8((uint8_t*)dst, vdst);
    184 
    185         dst += 8;
    186         src += 8;
    187         width -= 8;
    188     }
    189 
    190     // Leftovers
    191     for (int i = 0; i < width; i++) {
    192         dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i],
    193                                     opaqueDst);
    194     }
    195 }
    196 
    197 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[],
    198                                    SkColor color, int width, SkPMColor) {
    199     int colA = SkColorGetA(color);
    200     int colR = SkColorGetR(color);
    201     int colG = SkColorGetG(color);
    202     int colB = SkColorGetB(color);
    203 
    204     colA = SkAlpha255To256(colA);
    205 
    206     uint8x8_t vcolR, vcolG, vcolB;
    207     uint16x8_t vcolA;
    208 
    209     if (width >= 8) {
    210         vcolA = vdupq_n_u16(colA);
    211         vcolR = vdup_n_u8(colR);
    212         vcolG = vdup_n_u8(colG);
    213         vcolB = vdup_n_u8(colB);
    214     }
    215 
    216     while (width >= 8) {
    217         uint8x8x4_t vdst;
    218         uint16x8_t vmask;
    219         uint16x8_t vmaskR, vmaskG, vmaskB;
    220 
    221         vdst = vld4_u8((uint8_t*)dst);
    222         vmask = vld1q_u16(src);
    223 
    224         // Get all the color masks on 5 bits
    225         vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
    226         vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
    227                              SK_B16_BITS + SK_R16_BITS + 1);
    228         vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
    229 
    230         // Upscale to 0..32
    231         vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
    232         vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
    233         vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
    234 
    235         vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
    236         vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
    237         vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
    238 
    239         vdst.val[NEON_A] = vdup_n_u8(0xFF);
    240         vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
    241         vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
    242         vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
    243 
    244         vst4_u8((uint8_t*)dst, vdst);
    245 
    246         dst += 8;
    247         src += 8;
    248         width -= 8;
    249     }
    250 
    251     for (int i = 0; i < width; i++) {
    252         dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]);
    253     }
    254 }
    255