1 /* 2 * Copyright 2015 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkBlitMask.h" 9 #include "SkColor_opts_neon.h" 10 11 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[], 12 SkColor color, int width, 13 SkPMColor opaqueDst) { 14 int colR = SkColorGetR(color); 15 int colG = SkColorGetG(color); 16 int colB = SkColorGetB(color); 17 18 uint8x8_t vcolR = vdup_n_u8(colR); 19 uint8x8_t vcolG = vdup_n_u8(colG); 20 uint8x8_t vcolB = vdup_n_u8(colB); 21 uint8x8_t vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst)); 22 uint8x8_t vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst)); 23 uint8x8_t vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst)); 24 uint8x8_t vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst)); 25 26 while (width >= 8) { 27 uint8x8x4_t vdst; 28 uint16x8_t vmask; 29 uint16x8_t vmaskR, vmaskG, vmaskB; 30 uint8x8_t vsel_trans, vsel_opq; 31 32 vdst = vld4_u8((uint8_t*)dst); 33 vmask = vld1q_u16(src); 34 35 // Prepare compare masks 36 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0))); 37 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF))); 38 39 // Get all the color masks on 5 bits 40 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); 41 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), 42 SK_B16_BITS + SK_R16_BITS + 1); 43 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); 44 45 // Upscale to 0..32 46 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); 47 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); 48 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); 49 50 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF)); 51 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]); 52 53 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); 54 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); 55 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); 56 57 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]); 58 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]); 59 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]); 60 61 vst4_u8((uint8_t*)dst, vdst); 62 63 dst += 8; 64 src += 8; 65 width -= 8; 66 } 67 68 // Leftovers 69 for (int i = 0; i < width; i++) { 70 dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i], 71 opaqueDst); 72 } 73 } 74 75 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[], 76 SkColor color, int width, SkPMColor) { 77 int colA = SkColorGetA(color); 78 int colR = SkColorGetR(color); 79 int colG = SkColorGetG(color); 80 int colB = SkColorGetB(color); 81 82 colA = SkAlpha255To256(colA); 83 84 uint16x8_t vcolA = vdupq_n_u16(colA); 85 uint8x8_t vcolR = vdup_n_u8(colR); 86 uint8x8_t vcolG = vdup_n_u8(colG); 87 uint8x8_t vcolB = vdup_n_u8(colB); 88 89 while (width >= 8) { 90 uint8x8x4_t vdst; 91 uint16x8_t vmask; 92 uint16x8_t vmaskR, vmaskG, vmaskB; 93 94 vdst = vld4_u8((uint8_t*)dst); 95 vmask = vld1q_u16(src); 96 97 // Get all the color masks on 5 bits 98 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); 99 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), 100 SK_B16_BITS + SK_R16_BITS + 1); 101 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); 102 103 // Upscale to 0..32 104 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); 105 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); 106 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); 107 108 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8); 109 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8); 110 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8); 111 112 vdst.val[NEON_A] = vdup_n_u8(0xFF); 113 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); 114 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); 115 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); 116 117 vst4_u8((uint8_t*)dst, vdst); 118 119 dst += 8; 120 src += 8; 121 width -= 8; 122 } 123 124 for (int i = 0; i < width; i++) { 125 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]); 126 } 127 } 128 129 #define LOAD_LANE_16(reg, n) \ 130 reg = vld1q_lane_u16(device, reg, n); \ 131 device = (uint16_t*)((char*)device + deviceRB); 132 133 #define STORE_LANE_16(reg, n) \ 134 vst1_lane_u16(dst, reg, n); \ 135 dst = (uint16_t*)((char*)dst + deviceRB); 136 137 void SkRGB16BlitterBlitV_neon(uint16_t* device, 138 int height, 139 size_t deviceRB, 140 unsigned scale, 141 uint32_t src32) { 142 if (height >= 8) 143 { 144 uint16_t* dst = device; 145 146 // prepare constants 147 uint16x8_t vdev = vdupq_n_u16(0); 148 uint16x8_t vmaskq_g16 = vdupq_n_u16(SK_G16_MASK_IN_PLACE); 149 uint16x8_t vmaskq_ng16 = vdupq_n_u16(~SK_G16_MASK_IN_PLACE); 150 uint32x4_t vsrc32 = vdupq_n_u32(src32); 151 uint32x4_t vscale5 = vdupq_n_u32((uint32_t)scale); 152 153 while (height >= 8){ 154 LOAD_LANE_16(vdev, 0) 155 LOAD_LANE_16(vdev, 1) 156 LOAD_LANE_16(vdev, 2) 157 LOAD_LANE_16(vdev, 3) 158 LOAD_LANE_16(vdev, 4) 159 LOAD_LANE_16(vdev, 5) 160 LOAD_LANE_16(vdev, 6) 161 LOAD_LANE_16(vdev, 7) 162 163 // Expand_rgb_16 164 uint16x8x2_t vdst = vzipq_u16((vdev & vmaskq_ng16), (vdev & vmaskq_g16)); 165 uint32x4_t vdst32_lo = vmulq_u32(vreinterpretq_u32_u16(vdst.val[0]), vscale5); 166 uint32x4_t vdst32_hi = vmulq_u32(vreinterpretq_u32_u16(vdst.val[1]), vscale5); 167 168 // Compact_rgb_16 169 vdst32_lo = vaddq_u32(vdst32_lo, vsrc32); 170 vdst32_hi = vaddq_u32(vdst32_hi, vsrc32); 171 vdst32_lo = vshrq_n_u32(vdst32_lo, 5); 172 vdst32_hi = vshrq_n_u32(vdst32_hi, 5); 173 174 uint16x4_t vtmp_lo = vmovn_u32(vdst32_lo) & vget_low_u16(vmaskq_ng16); 175 uint16x4_t vtmp_hi = vshrn_n_u32(vdst32_lo, 16) & vget_low_u16(vmaskq_g16); 176 uint16x4_t vdst16_lo = vorr_u16(vtmp_lo, vtmp_hi); 177 vtmp_lo = vmovn_u32(vdst32_hi) & vget_low_u16(vmaskq_ng16); 178 vtmp_hi = vshrn_n_u32(vdst32_hi, 16) & vget_low_u16(vmaskq_g16); 179 uint16x4_t vdst16_hi = vorr_u16(vtmp_lo, vtmp_hi); 180 181 STORE_LANE_16(vdst16_lo, 0) 182 STORE_LANE_16(vdst16_lo, 1) 183 STORE_LANE_16(vdst16_lo, 2) 184 STORE_LANE_16(vdst16_lo, 3) 185 STORE_LANE_16(vdst16_hi, 0) 186 STORE_LANE_16(vdst16_hi, 1) 187 STORE_LANE_16(vdst16_hi, 2) 188 STORE_LANE_16(vdst16_hi, 3) 189 height -= 8; 190 } 191 } 192 while (height != 0){ 193 uint32_t dst32 = SkExpand_rgb_16(*device) * scale; 194 *device = SkCompact_rgb_16((src32 + dst32) >> 5); 195 device = (uint16_t*)((char*)device + deviceRB); 196 height--; 197 } 198 } 199 200 #undef LOAD_LANE_16 201 #undef STORE_LANE_16 202