1 2 /* 3 * Copyright 2009 The Android Open Source Project 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8 9 10 #include "SkColorPriv.h" 11 12 /* 13 Filter_32_opaque 14 15 There is no hard-n-fast rule that the filtering must produce 16 exact results for the color components, but if the 4 incoming colors are 17 all opaque, then the output color must also be opaque. Subsequent parts of 18 the drawing pipeline may rely on this (e.g. which blitrow proc to use). 19 */ 20 21 #if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) 22 static inline void Filter_32_opaque_neon(unsigned x, unsigned y, 23 SkPMColor a00, SkPMColor a01, 24 SkPMColor a10, SkPMColor a11, 25 SkPMColor *dst) { 26 asm volatile( 27 "vdup.8 d0, %[y] \n\t" // duplicate y into d0 28 "vmov.u8 d16, #16 \n\t" // set up constant in d16 29 "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y 30 31 "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 32 "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 33 "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 34 "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 35 36 "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) 37 "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y 38 39 "vdup.16 d5, %[x] \n\t" // duplicate x into d5 40 "vmov.u16 d16, #16 \n\t" // set up constant in d16 41 "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x 42 43 "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x 44 "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x 45 "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) 46 "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) 47 "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 48 "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result 49 : 50 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst) 51 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" 52 ); 53 } 54 55 static inline void Filter_32_alpha_neon(unsigned x, unsigned y, 56 SkPMColor a00, SkPMColor a01, 57 SkPMColor a10, SkPMColor a11, 58 SkPMColor *dst, uint16_t scale) { 59 asm volatile( 60 "vdup.8 d0, %[y] \n\t" // duplicate y into d0 61 "vmov.u8 d16, #16 \n\t" // set up constant in d16 62 "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y 63 64 "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 65 "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 66 "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 67 "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 68 69 "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) 70 "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y 71 72 "vdup.16 d5, %[x] \n\t" // duplicate x into d5 73 "vmov.u16 d16, #16 \n\t" // set up constant in d16 74 "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x 75 76 "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x 77 "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x 78 "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) 79 "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) 80 "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3 81 "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8 82 "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale 83 "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 84 "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result 85 : 86 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale) 87 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" 88 ); 89 } 90 #define Filter_32_opaque Filter_32_opaque_neon 91 #define Filter_32_alpha Filter_32_alpha_neon 92 #else 93 static inline void Filter_32_opaque_portable(unsigned x, unsigned y, 94 SkPMColor a00, SkPMColor a01, 95 SkPMColor a10, SkPMColor a11, 96 SkPMColor* dstColor) { 97 SkASSERT((unsigned)x <= 0xF); 98 SkASSERT((unsigned)y <= 0xF); 99 100 int xy = x * y; 101 static const uint32_t mask = gMask_00FF00FF; //0xFF00FF; 102 103 int scale = 256 - 16*y - 16*x + xy; 104 uint32_t lo = (a00 & mask) * scale; 105 uint32_t hi = ((a00 >> 8) & mask) * scale; 106 107 scale = 16*x - xy; 108 lo += (a01 & mask) * scale; 109 hi += ((a01 >> 8) & mask) * scale; 110 111 scale = 16*y - xy; 112 lo += (a10 & mask) * scale; 113 hi += ((a10 >> 8) & mask) * scale; 114 115 lo += (a11 & mask) * xy; 116 hi += ((a11 >> 8) & mask) * xy; 117 118 *dstColor = ((lo >> 8) & mask) | (hi & ~mask); 119 } 120 121 static inline void Filter_32_alpha_portable(unsigned x, unsigned y, 122 SkPMColor a00, SkPMColor a01, 123 SkPMColor a10, SkPMColor a11, 124 SkPMColor* dstColor, 125 unsigned alphaScale) { 126 SkASSERT((unsigned)x <= 0xF); 127 SkASSERT((unsigned)y <= 0xF); 128 SkASSERT(alphaScale <= 256); 129 130 int xy = x * y; 131 static const uint32_t mask = gMask_00FF00FF; //0xFF00FF; 132 133 int scale = 256 - 16*y - 16*x + xy; 134 uint32_t lo = (a00 & mask) * scale; 135 uint32_t hi = ((a00 >> 8) & mask) * scale; 136 137 scale = 16*x - xy; 138 lo += (a01 & mask) * scale; 139 hi += ((a01 >> 8) & mask) * scale; 140 141 scale = 16*y - xy; 142 lo += (a10 & mask) * scale; 143 hi += ((a10 >> 8) & mask) * scale; 144 145 lo += (a11 & mask) * xy; 146 hi += ((a11 >> 8) & mask) * xy; 147 148 lo = ((lo >> 8) & mask) * alphaScale; 149 hi = ((hi >> 8) & mask) * alphaScale; 150 151 *dstColor = ((lo >> 8) & mask) | (hi & ~mask); 152 } 153 #define Filter_32_opaque Filter_32_opaque_portable 154 #define Filter_32_alpha Filter_32_alpha_portable 155 #endif 156 157