1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifdef __arm__ 18 #ifdef ANDROID 19 #include <machine/cpu-features.h> 20 #endif 21 #endif 22 23 #include "SkColorPriv.h" 24 25 /* 26 Filter_32_opaque 27 28 There is no hard-n-fast rule that the filtering must produce 29 exact results for the color components, but if the 4 incoming colors are 30 all opaque, then the output color must also be opaque. Subsequent parts of 31 the drawing pipeline may rely on this (e.g. which blitrow proc to use). 32 */ 33 34 #if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) 35 static inline void Filter_32_opaque_neon(unsigned x, unsigned y, 36 SkPMColor a00, SkPMColor a01, 37 SkPMColor a10, SkPMColor a11, 38 SkPMColor *dst) { 39 asm volatile( 40 "vdup.8 d0, %[y] \n\t" // duplicate y into d0 41 "vmov.u8 d16, #16 \n\t" // set up constant in d16 42 "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y 43 44 "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 45 "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 46 "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 47 "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 48 49 "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) 50 "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y 51 52 "vdup.16 d5, %[x] \n\t" // duplicate x into d5 53 "vmov.u16 d16, #16 \n\t" // set up constant in d16 54 "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x 55 56 "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x 57 "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x 58 "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) 59 "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) 60 "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 61 "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result 62 : 63 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst) 64 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" 65 ); 66 } 67 68 static inline void Filter_32_alpha_neon(unsigned x, unsigned y, 69 SkPMColor a00, SkPMColor a01, 70 SkPMColor a10, SkPMColor a11, 71 SkPMColor *dst, uint16_t scale) { 72 asm volatile( 73 "vdup.8 d0, %[y] \n\t" // duplicate y into d0 74 "vmov.u8 d16, #16 \n\t" // set up constant in d16 75 "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y 76 77 "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 78 "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 79 "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 80 "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 81 82 "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) 83 "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y 84 85 "vdup.16 d5, %[x] \n\t" // duplicate x into d5 86 "vmov.u16 d16, #16 \n\t" // set up constant in d16 87 "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x 88 89 "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x 90 "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x 91 "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) 92 "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) 93 "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3 94 "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8 95 "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale 96 "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 97 "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result 98 : 99 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale) 100 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" 101 ); 102 } 103 #define Filter_32_opaque Filter_32_opaque_neon 104 #define Filter_32_alpha Filter_32_alpha_neon 105 #else 106 static inline void Filter_32_opaque_portable(unsigned x, unsigned y, 107 SkPMColor a00, SkPMColor a01, 108 SkPMColor a10, SkPMColor a11, 109 SkPMColor* dstColor) { 110 SkASSERT((unsigned)x <= 0xF); 111 SkASSERT((unsigned)y <= 0xF); 112 113 int xy = x * y; 114 uint32_t mask = gMask_00FF00FF; //0xFF00FF; 115 116 int scale = 256 - 16*y - 16*x + xy; 117 uint32_t lo = (a00 & mask) * scale; 118 uint32_t hi = ((a00 >> 8) & mask) * scale; 119 120 scale = 16*x - xy; 121 lo += (a01 & mask) * scale; 122 hi += ((a01 >> 8) & mask) * scale; 123 124 scale = 16*y - xy; 125 lo += (a10 & mask) * scale; 126 hi += ((a10 >> 8) & mask) * scale; 127 128 lo += (a11 & mask) * xy; 129 hi += ((a11 >> 8) & mask) * xy; 130 131 *dstColor = ((lo >> 8) & mask) | (hi & ~mask); 132 } 133 134 static inline void Filter_32_alpha_portable(unsigned x, unsigned y, 135 SkPMColor a00, SkPMColor a01, 136 SkPMColor a10, SkPMColor a11, 137 SkPMColor* dstColor, 138 unsigned alphaScale) { 139 SkASSERT((unsigned)x <= 0xF); 140 SkASSERT((unsigned)y <= 0xF); 141 SkASSERT(alphaScale <= 256); 142 143 int xy = x * y; 144 uint32_t mask = gMask_00FF00FF; //0xFF00FF; 145 146 int scale = 256 - 16*y - 16*x + xy; 147 uint32_t lo = (a00 & mask) * scale; 148 uint32_t hi = ((a00 >> 8) & mask) * scale; 149 150 scale = 16*x - xy; 151 lo += (a01 & mask) * scale; 152 hi += ((a01 >> 8) & mask) * scale; 153 154 scale = 16*y - xy; 155 lo += (a10 & mask) * scale; 156 hi += ((a10 >> 8) & mask) * scale; 157 158 lo += (a11 & mask) * xy; 159 hi += ((a11 >> 8) & mask) * xy; 160 161 lo = ((lo >> 8) & mask) * alphaScale; 162 hi = ((hi >> 8) & mask) * alphaScale; 163 164 *dstColor = ((lo >> 8) & mask) | (hi & ~mask); 165 } 166 #define Filter_32_opaque Filter_32_opaque_portable 167 #define Filter_32_alpha Filter_32_alpha_portable 168 #endif 169 170