1 /* 2 * Copyright (C) 2012 University of Szeged 3 * Copyright (C) 2012 Gabor Rapcsanyi 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #ifndef FEBlendNEON_h 28 #define FEBlendNEON_h 29 30 #include "platform/graphics/filters/FEBlend.h" 31 32 #if HAVE(ARM_NEON_INTRINSICS) 33 34 #include <arm_neon.h> 35 36 namespace blink { 37 38 class FEBlendUtilitiesNEON { 39 public: 40 static inline uint16x8_t div255(uint16x8_t num, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 41 { 42 uint16x8_t quotient = vshrq_n_u16(num, 8); 43 uint16x8_t remainder = vaddq_u16(vsubq_u16(num, vmulq_u16(sixteenConst255, quotient)), sixteenConstOne); 44 return vaddq_u16(quotient, vshrq_n_u16(remainder, 8)); 45 } 46 47 static inline uint16x8_t normal(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t, 48 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 49 { 50 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 51 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 52 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 53 return vaddq_u16(tmp3, pixelA); 54 } 55 56 static inline uint16x8_t multiply(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 57 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 58 { 59 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 60 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 61 uint16x8_t tmp3 = vaddq_u16(vsubq_u16(sixteenConst255, alphaB), pixelB); 62 uint16x8_t tmp4 = vmulq_u16(tmp3, pixelA); 63 uint16x8_t tmp5 = vaddq_u16(tmp2, tmp4); 64 return div255(tmp5, sixteenConst255, sixteenConstOne); 65 } 66 67 static inline uint16x8_t screen(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t, uint16x8_t, 68 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 69 { 70 uint16x8_t tmp1 = vaddq_u16(pixelA, pixelB); 71 uint16x8_t tmp2 = vmulq_u16(pixelA, pixelB); 72 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 73 return vsubq_u16(tmp1, tmp3); 74 } 75 76 static inline uint16x8_t darken(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 77 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 78 { 79 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 80 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 81 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 82 uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA); 83 84 uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB); 85 uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA); 86 uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne); 87 uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB); 88 89 return vminq_u16(tmp4, tmp8); 90 } 91 92 static inline uint16x8_t lighten(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 93 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 94 { 95 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 96 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 97 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 98 uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA); 99 100 uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB); 101 uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA); 102 uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne); 103 uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB); 104 105 return vmaxq_u16(tmp4, tmp8); 106 } 107 }; 108 109 void FEBlend::platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray, 110 unsigned colorArrayLength) 111 { 112 uint8_t* sourcePixelA = reinterpret_cast<uint8_t*>(srcPixelArrayA); 113 uint8_t* sourcePixelB = reinterpret_cast<uint8_t*>(srcPixelArrayB); 114 uint8_t* destinationPixel = reinterpret_cast<uint8_t*>(dstPixelArray); 115 116 uint16x8_t sixteenConst255 = vdupq_n_u16(255); 117 uint16x8_t sixteenConstOne = vdupq_n_u16(1); 118 119 unsigned colorOffset = 0; 120 while (colorOffset < colorArrayLength) { 121 unsigned char alphaA1 = srcPixelArrayA[colorOffset + 3]; 122 unsigned char alphaB1 = srcPixelArrayB[colorOffset + 3]; 123 unsigned char alphaA2 = srcPixelArrayA[colorOffset + 7]; 124 unsigned char alphaB2 = srcPixelArrayB[colorOffset + 7]; 125 126 uint16x8_t doubblePixelA = vmovl_u8(vld1_u8(sourcePixelA + colorOffset)); 127 uint16x8_t doubblePixelB = vmovl_u8(vld1_u8(sourcePixelB + colorOffset)); 128 uint16x8_t alphaA = vcombine_u16(vdup_n_u16(alphaA1), vdup_n_u16(alphaA2)); 129 uint16x8_t alphaB = vcombine_u16(vdup_n_u16(alphaB1), vdup_n_u16(alphaB2)); 130 131 uint16x8_t result; 132 switch (m_mode) { 133 case WebBlendModeNormal: 134 result = FEBlendUtilitiesNEON::normal(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 135 break; 136 case WebBlendModeMultiply: 137 result = FEBlendUtilitiesNEON::multiply(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 138 break; 139 case WebBlendModeScreen: 140 result = FEBlendUtilitiesNEON::screen(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 141 break; 142 case WebBlendModeDarken: 143 result = FEBlendUtilitiesNEON::darken(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 144 break; 145 case WebBlendModeLighten: 146 result = FEBlendUtilitiesNEON::lighten(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 147 break; 148 default: 149 result = vdupq_n_u16(0); 150 break; 151 } 152 153 vst1_u8(destinationPixel + colorOffset, vmovn_u16(result)); 154 155 unsigned char alphaR1 = 255 - ((255 - alphaA1) * (255 - alphaB1)) / 255; 156 unsigned char alphaR2 = 255 - ((255 - alphaA2) * (255 - alphaB2)) / 255; 157 158 dstPixelArray[colorOffset + 3] = alphaR1; 159 dstPixelArray[colorOffset + 7] = alphaR2; 160 161 colorOffset += 8; 162 if (colorOffset > colorArrayLength) { 163 ASSERT(colorOffset - 4 == colorArrayLength); 164 colorOffset = colorArrayLength - 8; 165 } 166 } 167 } 168 169 } // namespace blink 170 171 #endif // HAVE(ARM_NEON_INTRINSICS) 172 173 #endif // FEBlendNEON_h 174