Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifdef __arm__
     18 #ifdef ANDROID
     19     #include <machine/cpu-features.h>
     20 #endif
     21 #endif
     22 
     23 #include "SkColorPriv.h"
     24 
     25 /*
     26     Filter_32_opaque
     27 
     28     There is no hard-n-fast rule that the filtering must produce
     29     exact results for the color components, but if the 4 incoming colors are
     30     all opaque, then the output color must also be opaque. Subsequent parts of
     31     the drawing pipeline may rely on this (e.g. which blitrow proc to use).
     32  */
     33 
     34 #if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN)
     35 static inline void Filter_32_opaque_neon(unsigned x, unsigned y,
     36                                     SkPMColor a00, SkPMColor a01,
     37                                     SkPMColor a10, SkPMColor a11,
     38                                     SkPMColor *dst) {
     39     asm volatile(
     40                  "vdup.8         d0, %[y]                \n\t"   // duplicate y into d0
     41                  "vmov.u8        d16, #16                \n\t"   // set up constant in d16
     42                  "vsub.u8        d1, d16, d0             \n\t"   // d1 = 16-y
     43 
     44                  "vdup.32        d4, %[a00]              \n\t"   // duplicate a00 into d4
     45                  "vdup.32        d5, %[a10]              \n\t"   // duplicate a10 into d5
     46                  "vmov.32        d4[1], %[a01]           \n\t"   // set top of d4 to a01
     47                  "vmov.32        d5[1], %[a11]           \n\t"   // set top of d5 to a11
     48 
     49                  "vmull.u8       q3, d4, d1              \n\t"   // q3 = [a01|a00] * (16-y)
     50                  "vmull.u8       q0, d5, d0              \n\t"   // q0 = [a11|a10] * y
     51 
     52                  "vdup.16        d5, %[x]                \n\t"   // duplicate x into d5
     53                  "vmov.u16       d16, #16                \n\t"   // set up constant in d16
     54                  "vsub.u16       d3, d16, d5             \n\t"   // d3 = 16-x
     55 
     56                  "vmul.i16       d4, d7, d5              \n\t"   // d4  = a01 * x
     57                  "vmla.i16       d4, d1, d5              \n\t"   // d4 += a11 * x
     58                  "vmla.i16       d4, d6, d3              \n\t"   // d4 += a00 * (16-x)
     59                  "vmla.i16       d4, d0, d3              \n\t"   // d4 += a10 * (16-x)
     60                  "vshrn.i16      d0, q2, #8              \n\t"   // shift down result by 8
     61                  "vst1.32        {d0[0]}, [%[dst]]       \n\t"   // store result
     62                  :
     63                  : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)
     64                  : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
     65                  );
     66 }
     67 
     68 static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
     69                                           SkPMColor a00, SkPMColor a01,
     70                                           SkPMColor a10, SkPMColor a11,
     71                                           SkPMColor *dst, uint16_t scale) {
     72     asm volatile(
     73                  "vdup.8         d0, %[y]                \n\t"   // duplicate y into d0
     74                  "vmov.u8        d16, #16                \n\t"   // set up constant in d16
     75                  "vsub.u8        d1, d16, d0             \n\t"   // d1 = 16-y
     76 
     77                  "vdup.32        d4, %[a00]              \n\t"   // duplicate a00 into d4
     78                  "vdup.32        d5, %[a10]              \n\t"   // duplicate a10 into d5
     79                  "vmov.32        d4[1], %[a01]           \n\t"   // set top of d4 to a01
     80                  "vmov.32        d5[1], %[a11]           \n\t"   // set top of d5 to a11
     81 
     82                  "vmull.u8       q3, d4, d1              \n\t"   // q3 = [a01|a00] * (16-y)
     83                  "vmull.u8       q0, d5, d0              \n\t"   // q0 = [a11|a10] * y
     84 
     85                  "vdup.16        d5, %[x]                \n\t"   // duplicate x into d5
     86                  "vmov.u16       d16, #16                \n\t"   // set up constant in d16
     87                  "vsub.u16       d3, d16, d5             \n\t"   // d3 = 16-x
     88 
     89                  "vmul.i16       d4, d7, d5              \n\t"   // d4  = a01 * x
     90                  "vmla.i16       d4, d1, d5              \n\t"   // d4 += a11 * x
     91                  "vmla.i16       d4, d6, d3              \n\t"   // d4 += a00 * (16-x)
     92                  "vmla.i16       d4, d0, d3              \n\t"   // d4 += a10 * (16-x)
     93                  "vdup.16        d3, %[scale]            \n\t"   // duplicate scale into d3
     94                  "vshr.u16       d4, d4, #8              \n\t"   // shift down result by 8
     95                  "vmul.i16       d4, d4, d3              \n\t"   // multiply result by scale
     96                  "vshrn.i16      d0, q2, #8              \n\t"   // shift down result by 8
     97                  "vst1.32        {d0[0]}, [%[dst]]       \n\t"   // store result
     98                  :
     99                  : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)
    100                  : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
    101                  );
    102 }
    103 #define Filter_32_opaque    Filter_32_opaque_neon
    104 #define Filter_32_alpha     Filter_32_alpha_neon
    105 #else
    106 static inline void Filter_32_opaque_portable(unsigned x, unsigned y,
    107                                              SkPMColor a00, SkPMColor a01,
    108                                              SkPMColor a10, SkPMColor a11,
    109                                              SkPMColor* dstColor) {
    110     SkASSERT((unsigned)x <= 0xF);
    111     SkASSERT((unsigned)y <= 0xF);
    112 
    113     int xy = x * y;
    114     uint32_t mask = gMask_00FF00FF; //0xFF00FF;
    115 
    116     int scale = 256 - 16*y - 16*x + xy;
    117     uint32_t lo = (a00 & mask) * scale;
    118     uint32_t hi = ((a00 >> 8) & mask) * scale;
    119 
    120     scale = 16*x - xy;
    121     lo += (a01 & mask) * scale;
    122     hi += ((a01 >> 8) & mask) * scale;
    123 
    124     scale = 16*y - xy;
    125     lo += (a10 & mask) * scale;
    126     hi += ((a10 >> 8) & mask) * scale;
    127 
    128     lo += (a11 & mask) * xy;
    129     hi += ((a11 >> 8) & mask) * xy;
    130 
    131     *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
    132 }
    133 
    134 static inline void Filter_32_alpha_portable(unsigned x, unsigned y,
    135                                             SkPMColor a00, SkPMColor a01,
    136                                             SkPMColor a10, SkPMColor a11,
    137                                             SkPMColor* dstColor,
    138                                             unsigned alphaScale) {
    139     SkASSERT((unsigned)x <= 0xF);
    140     SkASSERT((unsigned)y <= 0xF);
    141     SkASSERT(alphaScale <= 256);
    142 
    143     int xy = x * y;
    144     uint32_t mask = gMask_00FF00FF; //0xFF00FF;
    145 
    146     int scale = 256 - 16*y - 16*x + xy;
    147     uint32_t lo = (a00 & mask) * scale;
    148     uint32_t hi = ((a00 >> 8) & mask) * scale;
    149 
    150     scale = 16*x - xy;
    151     lo += (a01 & mask) * scale;
    152     hi += ((a01 >> 8) & mask) * scale;
    153 
    154     scale = 16*y - xy;
    155     lo += (a10 & mask) * scale;
    156     hi += ((a10 >> 8) & mask) * scale;
    157 
    158     lo += (a11 & mask) * xy;
    159     hi += ((a11 >> 8) & mask) * xy;
    160 
    161     lo = ((lo >> 8) & mask) * alphaScale;
    162     hi = ((hi >> 8) & mask) * alphaScale;
    163 
    164     *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
    165 }
    166 #define Filter_32_opaque    Filter_32_opaque_portable
    167 #define Filter_32_alpha     Filter_32_alpha_portable
    168 #endif
    169 
    170