Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2015 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkColor_opts_neon_DEFINED
      9 #define SkColor_opts_neon_DEFINED
     10 
     11 #include "SkTypes.h"
     12 #include "SkColorPriv.h"
     13 
     14 #include <arm_neon.h>
     15 
     16 #define NEON_A (SK_A32_SHIFT / 8)
     17 #define NEON_R (SK_R32_SHIFT / 8)
     18 #define NEON_G (SK_G32_SHIFT / 8)
     19 #define NEON_B (SK_B32_SHIFT / 8)
     20 
     21 static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) {
     22     return vaddw_u8(vdupq_n_u16(1), alpha);
     23 }
     24 
     25 static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) {
     26     return vshrn_n_u16(vmovl_u8(color) * scale, 8);
     27 }
     28 
     29 static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) {
     30     uint8x8x4_t ret;
     31 
     32     ret.val[NEON_A] = SkAlphaMul_neon8(color.val[NEON_A], scale);
     33     ret.val[NEON_R] = SkAlphaMul_neon8(color.val[NEON_R], scale);
     34     ret.val[NEON_G] = SkAlphaMul_neon8(color.val[NEON_G], scale);
     35     ret.val[NEON_B] = SkAlphaMul_neon8(color.val[NEON_B], scale);
     36 
     37     return ret;
     38 }
     39 
     40 /* This function expands 8 pixels from RGB565 (R, G, B from high to low) to
     41  * SkPMColor (all possible configurations supported) in the exact same way as
     42  * SkPixel16ToPixel32.
     43  */
     44 static inline uint8x8x4_t SkPixel16ToPixel32_neon8(uint16x8_t vsrc) {
     45 
     46     uint8x8x4_t ret;
     47     uint8x8_t vr, vg, vb;
     48 
     49     vr = vmovn_u16(vshrq_n_u16(vsrc, SK_R16_SHIFT));
     50     vg = vmovn_u16(vshrq_n_u16(vshlq_n_u16(vsrc, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS));
     51     vb = vmovn_u16(vsrc & vdupq_n_u16(SK_B16_MASK));
     52 
     53     ret.val[NEON_A] = vdup_n_u8(0xFF);
     54     ret.val[NEON_R] = vshl_n_u8(vr, 8 - SK_R16_BITS) | vshr_n_u8(vr, 2 * SK_R16_BITS - 8);
     55     ret.val[NEON_G] = vshl_n_u8(vg, 8 - SK_G16_BITS) | vshr_n_u8(vg, 2 * SK_G16_BITS - 8);
     56     ret.val[NEON_B] = vshl_n_u8(vb, 8 - SK_B16_BITS) | vshr_n_u8(vb, 2 * SK_B16_BITS - 8);
     57 
     58     return ret;
     59 }
     60 
     61 /* This function packs 8 pixels from SkPMColor (all possible configurations
     62  * supported) to RGB565 (R, G, B from high to low) in the exact same way as
     63  * SkPixel32ToPixel16.
     64  */
     65 static inline uint16x8_t SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc) {
     66 
     67     uint16x8_t ret;
     68 
     69     ret = vshll_n_u8(vsrc.val[NEON_R], 8);
     70     ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_G], 8), SK_R16_BITS);
     71     ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_B], 8), SK_R16_BITS + SK_G16_BITS);
     72 
     73     return ret;
     74 }
     75 
     76 /* This function blends 8 pixels of the same channel in the exact same way as
     77  * SkBlend32.
     78  */
     79 static inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) {
     80     int16x8_t src_wide, dst_wide;
     81 
     82     src_wide = vreinterpretq_s16_u16(vmovl_u8(src));
     83     dst_wide = vreinterpretq_s16_u16(vmovl_u8(dst));
     84 
     85     src_wide = (src_wide - dst_wide) * vreinterpretq_s16_u16(scale);
     86 
     87     dst_wide += vshrq_n_s16(src_wide, 5);
     88 
     89     return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
     90 }
     91 
     92 static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst,
     93                                                  unsigned srcScale) {
     94     SkASSERT(srcScale <= 256);
     95     int16x8_t vscale = vdupq_n_s16(srcScale);
     96     int16x8_t vsrc_wide, vdst_wide, vdiff;
     97     uint8x8_t res;
     98 
     99     vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src))));
    100     vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst))));
    101 
    102     vdiff = vsrc_wide - vdst_wide;
    103     vdiff *= vscale;
    104 
    105     vdiff = vshrq_n_s16(vdiff, 8);
    106 
    107     vdst_wide += vdiff;
    108 
    109     res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide));
    110 
    111     return vget_lane_u32(vreinterpret_u32_u8(res), 0);
    112 }
    113 
    114 static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst,
    115                                               U8CPU srcWeight) {
    116     SkASSERT(srcWeight <= 255);
    117     unsigned scale = SkAlpha255To256(srcWeight);
    118     return SkFourByteInterp256_neon(src, dst, scale);
    119 }
    120 
    121 #endif /* #ifndef SkColor_opts_neon_DEFINED */
    122