1 /* 2 * Copyright 2015 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkColor_opts_neon_DEFINED 9 #define SkColor_opts_neon_DEFINED 10 11 #include "SkTypes.h" 12 #include "SkColorPriv.h" 13 14 #include <arm_neon.h> 15 16 #define NEON_A (SK_A32_SHIFT / 8) 17 #define NEON_R (SK_R32_SHIFT / 8) 18 #define NEON_G (SK_G32_SHIFT / 8) 19 #define NEON_B (SK_B32_SHIFT / 8) 20 21 static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) { 22 return vaddw_u8(vdupq_n_u16(1), alpha); 23 } 24 25 static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) { 26 return vshrn_n_u16(vmovl_u8(color) * scale, 8); 27 } 28 29 static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) { 30 uint8x8x4_t ret; 31 32 ret.val[NEON_A] = SkAlphaMul_neon8(color.val[NEON_A], scale); 33 ret.val[NEON_R] = SkAlphaMul_neon8(color.val[NEON_R], scale); 34 ret.val[NEON_G] = SkAlphaMul_neon8(color.val[NEON_G], scale); 35 ret.val[NEON_B] = SkAlphaMul_neon8(color.val[NEON_B], scale); 36 37 return ret; 38 } 39 40 /* This function expands 8 pixels from RGB565 (R, G, B from high to low) to 41 * SkPMColor (all possible configurations supported) in the exact same way as 42 * SkPixel16ToPixel32. 43 */ 44 static inline uint8x8x4_t SkPixel16ToPixel32_neon8(uint16x8_t vsrc) { 45 46 uint8x8x4_t ret; 47 uint8x8_t vr, vg, vb; 48 49 vr = vmovn_u16(vshrq_n_u16(vsrc, SK_R16_SHIFT)); 50 vg = vmovn_u16(vshrq_n_u16(vshlq_n_u16(vsrc, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS)); 51 vb = vmovn_u16(vsrc & vdupq_n_u16(SK_B16_MASK)); 52 53 ret.val[NEON_A] = vdup_n_u8(0xFF); 54 ret.val[NEON_R] = vshl_n_u8(vr, 8 - SK_R16_BITS) | vshr_n_u8(vr, 2 * SK_R16_BITS - 8); 55 ret.val[NEON_G] = vshl_n_u8(vg, 8 - SK_G16_BITS) | vshr_n_u8(vg, 2 * SK_G16_BITS - 8); 56 ret.val[NEON_B] = vshl_n_u8(vb, 8 - SK_B16_BITS) | vshr_n_u8(vb, 2 * SK_B16_BITS - 8); 57 58 return ret; 59 } 60 61 /* This function packs 8 pixels from SkPMColor (all possible configurations 62 * supported) to RGB565 (R, G, B from high to low) in the exact same way as 63 * SkPixel32ToPixel16. 64 */ 65 static inline uint16x8_t SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc) { 66 67 uint16x8_t ret; 68 69 ret = vshll_n_u8(vsrc.val[NEON_R], 8); 70 ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_G], 8), SK_R16_BITS); 71 ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_B], 8), SK_R16_BITS + SK_G16_BITS); 72 73 return ret; 74 } 75 76 /* This function blends 8 pixels of the same channel in the exact same way as 77 * SkBlend32. 78 */ 79 static inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) { 80 int16x8_t src_wide, dst_wide; 81 82 src_wide = vreinterpretq_s16_u16(vmovl_u8(src)); 83 dst_wide = vreinterpretq_s16_u16(vmovl_u8(dst)); 84 85 src_wide = (src_wide - dst_wide) * vreinterpretq_s16_u16(scale); 86 87 dst_wide += vshrq_n_s16(src_wide, 5); 88 89 return vmovn_u16(vreinterpretq_u16_s16(dst_wide)); 90 } 91 92 static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst, 93 unsigned srcScale) { 94 SkASSERT(srcScale <= 256); 95 int16x8_t vscale = vdupq_n_s16(srcScale); 96 int16x8_t vsrc_wide, vdst_wide, vdiff; 97 uint8x8_t res; 98 99 vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src)))); 100 vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst)))); 101 102 vdiff = vsrc_wide - vdst_wide; 103 vdiff *= vscale; 104 105 vdiff = vshrq_n_s16(vdiff, 8); 106 107 vdst_wide += vdiff; 108 109 res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide)); 110 111 return vget_lane_u32(vreinterpret_u32_u8(res), 0); 112 } 113 114 static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst, 115 U8CPU srcWeight) { 116 SkASSERT(srcWeight <= 255); 117 unsigned scale = SkAlpha255To256(srcWeight); 118 return SkFourByteInterp256_neon(src, dst, scale); 119 } 120 121 #endif /* #ifndef SkColor_opts_neon_DEFINED */ 122