Home | History | Annotate | Download | only in core
      1 
      2 /*
      3  * Copyright 2009 The Android Open Source Project
      4  *
      5  * Use of this source code is governed by a BSD-style license that can be
      6  * found in the LICENSE file.
      7  */
      8 
      9 
     10 #include "SkColorPriv.h"
     11 
     12 /*
     13     Filter_32_opaque
     14 
     15     There is no hard-n-fast rule that the filtering must produce
     16     exact results for the color components, but if the 4 incoming colors are
     17     all opaque, then the output color must also be opaque. Subsequent parts of
     18     the drawing pipeline may rely on this (e.g. which blitrow proc to use).
     19  */
     20 
     21 #if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN)
     22 static inline void Filter_32_opaque_neon(unsigned x, unsigned y,
     23                                     SkPMColor a00, SkPMColor a01,
     24                                     SkPMColor a10, SkPMColor a11,
     25                                     SkPMColor *dst) {
     26     asm volatile(
     27                  "vdup.8         d0, %[y]                \n\t"   // duplicate y into d0
     28                  "vmov.u8        d16, #16                \n\t"   // set up constant in d16
     29                  "vsub.u8        d1, d16, d0             \n\t"   // d1 = 16-y
     30 
     31                  "vdup.32        d4, %[a00]              \n\t"   // duplicate a00 into d4
     32                  "vdup.32        d5, %[a10]              \n\t"   // duplicate a10 into d5
     33                  "vmov.32        d4[1], %[a01]           \n\t"   // set top of d4 to a01
     34                  "vmov.32        d5[1], %[a11]           \n\t"   // set top of d5 to a11
     35 
     36                  "vmull.u8       q3, d4, d1              \n\t"   // q3 = [a01|a00] * (16-y)
     37                  "vmull.u8       q0, d5, d0              \n\t"   // q0 = [a11|a10] * y
     38 
     39                  "vdup.16        d5, %[x]                \n\t"   // duplicate x into d5
     40                  "vmov.u16       d16, #16                \n\t"   // set up constant in d16
     41                  "vsub.u16       d3, d16, d5             \n\t"   // d3 = 16-x
     42 
     43                  "vmul.i16       d4, d7, d5              \n\t"   // d4  = a01 * x
     44                  "vmla.i16       d4, d1, d5              \n\t"   // d4 += a11 * x
     45                  "vmla.i16       d4, d6, d3              \n\t"   // d4 += a00 * (16-x)
     46                  "vmla.i16       d4, d0, d3              \n\t"   // d4 += a10 * (16-x)
     47                  "vshrn.i16      d0, q2, #8              \n\t"   // shift down result by 8
     48                  "vst1.32        {d0[0]}, [%[dst]]       \n\t"   // store result
     49                  :
     50                  : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)
     51                  : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
     52                  );
     53 }
     54 
     55 static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
     56                                           SkPMColor a00, SkPMColor a01,
     57                                           SkPMColor a10, SkPMColor a11,
     58                                           SkPMColor *dst, uint16_t scale) {
     59     asm volatile(
     60                  "vdup.8         d0, %[y]                \n\t"   // duplicate y into d0
     61                  "vmov.u8        d16, #16                \n\t"   // set up constant in d16
     62                  "vsub.u8        d1, d16, d0             \n\t"   // d1 = 16-y
     63 
     64                  "vdup.32        d4, %[a00]              \n\t"   // duplicate a00 into d4
     65                  "vdup.32        d5, %[a10]              \n\t"   // duplicate a10 into d5
     66                  "vmov.32        d4[1], %[a01]           \n\t"   // set top of d4 to a01
     67                  "vmov.32        d5[1], %[a11]           \n\t"   // set top of d5 to a11
     68 
     69                  "vmull.u8       q3, d4, d1              \n\t"   // q3 = [a01|a00] * (16-y)
     70                  "vmull.u8       q0, d5, d0              \n\t"   // q0 = [a11|a10] * y
     71 
     72                  "vdup.16        d5, %[x]                \n\t"   // duplicate x into d5
     73                  "vmov.u16       d16, #16                \n\t"   // set up constant in d16
     74                  "vsub.u16       d3, d16, d5             \n\t"   // d3 = 16-x
     75 
     76                  "vmul.i16       d4, d7, d5              \n\t"   // d4  = a01 * x
     77                  "vmla.i16       d4, d1, d5              \n\t"   // d4 += a11 * x
     78                  "vmla.i16       d4, d6, d3              \n\t"   // d4 += a00 * (16-x)
     79                  "vmla.i16       d4, d0, d3              \n\t"   // d4 += a10 * (16-x)
     80                  "vdup.16        d3, %[scale]            \n\t"   // duplicate scale into d3
     81                  "vshr.u16       d4, d4, #8              \n\t"   // shift down result by 8
     82                  "vmul.i16       d4, d4, d3              \n\t"   // multiply result by scale
     83                  "vshrn.i16      d0, q2, #8              \n\t"   // shift down result by 8
     84                  "vst1.32        {d0[0]}, [%[dst]]       \n\t"   // store result
     85                  :
     86                  : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)
     87                  : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
     88                  );
     89 }
     90 #define Filter_32_opaque    Filter_32_opaque_neon
     91 #define Filter_32_alpha     Filter_32_alpha_neon
     92 #else
     93 static inline void Filter_32_opaque_portable(unsigned x, unsigned y,
     94                                              SkPMColor a00, SkPMColor a01,
     95                                              SkPMColor a10, SkPMColor a11,
     96                                              SkPMColor* dstColor) {
     97     SkASSERT((unsigned)x <= 0xF);
     98     SkASSERT((unsigned)y <= 0xF);
     99 
    100     int xy = x * y;
    101     static const uint32_t mask = gMask_00FF00FF; //0xFF00FF;
    102 
    103     int scale = 256 - 16*y - 16*x + xy;
    104     uint32_t lo = (a00 & mask) * scale;
    105     uint32_t hi = ((a00 >> 8) & mask) * scale;
    106 
    107     scale = 16*x - xy;
    108     lo += (a01 & mask) * scale;
    109     hi += ((a01 >> 8) & mask) * scale;
    110 
    111     scale = 16*y - xy;
    112     lo += (a10 & mask) * scale;
    113     hi += ((a10 >> 8) & mask) * scale;
    114 
    115     lo += (a11 & mask) * xy;
    116     hi += ((a11 >> 8) & mask) * xy;
    117 
    118     *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
    119 }
    120 
    121 static inline void Filter_32_alpha_portable(unsigned x, unsigned y,
    122                                             SkPMColor a00, SkPMColor a01,
    123                                             SkPMColor a10, SkPMColor a11,
    124                                             SkPMColor* dstColor,
    125                                             unsigned alphaScale) {
    126     SkASSERT((unsigned)x <= 0xF);
    127     SkASSERT((unsigned)y <= 0xF);
    128     SkASSERT(alphaScale <= 256);
    129 
    130     int xy = x * y;
    131     static const uint32_t mask = gMask_00FF00FF; //0xFF00FF;
    132 
    133     int scale = 256 - 16*y - 16*x + xy;
    134     uint32_t lo = (a00 & mask) * scale;
    135     uint32_t hi = ((a00 >> 8) & mask) * scale;
    136 
    137     scale = 16*x - xy;
    138     lo += (a01 & mask) * scale;
    139     hi += ((a01 >> 8) & mask) * scale;
    140 
    141     scale = 16*y - xy;
    142     lo += (a10 & mask) * scale;
    143     hi += ((a10 >> 8) & mask) * scale;
    144 
    145     lo += (a11 & mask) * xy;
    146     hi += ((a11 >> 8) & mask) * xy;
    147 
    148     lo = ((lo >> 8) & mask) * alphaScale;
    149     hi = ((hi >> 8) & mask) * alphaScale;
    150 
    151     *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
    152 }
    153 #define Filter_32_opaque    Filter_32_opaque_portable
    154 #define Filter_32_alpha     Filter_32_alpha_portable
    155 #endif
    156 
    157