1 /* 2 * Copyright (C) 2012 Gabor Rapcsanyi (rgabor (at) inf.u-szeged.hu), University of Szeged 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef GraphicsContext3DNEON_h 27 #define GraphicsContext3DNEON_h 28 29 #if HAVE(ARM_NEON_INTRINSICS) 30 31 #include <arm_neon.h> 32 33 namespace WebCore { 34 35 namespace SIMD { 36 37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 38 { 39 unsigned componentsPerRow = pixelsPerRow * 4; 40 unsigned tailComponents = componentsPerRow % 16; 41 unsigned componentsSize = componentsPerRow - tailComponents; 42 const uint8_t* src = reinterpret_cast<const uint8_t*>(source); 43 44 for (unsigned i = 0; i < componentsSize; i += 16) { 45 uint8x16x2_t components = vld2q_u8(src + i * 2); 46 vst1q_u8(destination + i, components.val[1]); 47 } 48 49 source += componentsSize; 50 destination += componentsSize; 51 pixelsPerRow = tailComponents / 4; 52 } 53 54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 55 { 56 unsigned componentsPerRow = pixelsPerRow * 3; 57 unsigned tailComponents = componentsPerRow % 24; 58 unsigned componentsSize = componentsPerRow - tailComponents; 59 60 uint8x8_t componentA = vdup_n_u8(0xFF); 61 for (unsigned i = 0; i < componentsSize; i += 24) { 62 uint16x8x3_t RGB16 = vld3q_u16(source + i); 63 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8)); 64 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8)); 65 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8)); 66 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; 67 vst4_u8(destination, RGBA8); 68 destination += 32; 69 } 70 71 source += componentsSize; 72 pixelsPerRow = tailComponents / 3; 73 } 74 75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 76 { 77 unsigned componentsPerRow = pixelsPerRow * 4; 78 unsigned tailComponents = componentsPerRow % 32; 79 unsigned componentsSize = componentsPerRow - tailComponents; 80 81 for (unsigned i = 0; i < componentsSize; i += 32) { 82 uint16x8x4_t ARGB16 = vld4q_u16(source + i); 83 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); 84 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); 85 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); 86 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); 87 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; 88 vst4_u8(destination + i, RGBA8); 89 } 90 91 source += componentsSize; 92 destination += componentsSize; 93 pixelsPerRow = tailComponents / 4; 94 } 95 96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 97 { 98 unsigned componentsPerRow = pixelsPerRow * 4; 99 unsigned tailComponents = componentsPerRow % 32; 100 unsigned componentsSize = componentsPerRow - tailComponents; 101 102 for (unsigned i = 0; i < componentsSize; i += 32) { 103 uint16x8x4_t ARGB16 = vld4q_u16(source + i); 104 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); 105 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); 106 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); 107 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); 108 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; 109 vst4_u8(destination + i, RGBA8); 110 } 111 112 source += componentsSize; 113 destination += componentsSize; 114 pixelsPerRow = tailComponents / 4; 115 } 116 117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 118 { 119 unsigned tailPixels = pixelsPerRow % 8; 120 unsigned pixelSize = pixelsPerRow - tailPixels; 121 122 uint16x8_t immediate0x0f = vdupq_n_u16(0x0F); 123 for (unsigned i = 0; i < pixelSize; i += 8) { 124 uint16x8_t eightPixels = vld1q_u16(source + i); 125 126 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12)); 127 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8), immediate0x0f)); 128 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4), immediate0x0f)); 129 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f)); 130 131 componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR); 132 componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG); 133 componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB); 134 componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA); 135 136 uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}}; 137 vst4_u8(destination, destComponents); 138 destination += 32; 139 } 140 141 source += pixelSize; 142 pixelsPerRow = tailPixels; 143 } 144 145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow) 146 { 147 unsigned componentsPerRow = pixelsPerRow * 4; 148 unsigned tailComponents = componentsPerRow % 32; 149 unsigned componentsSize = componentsPerRow - tailComponents; 150 151 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); 152 uint8x8_t immediate0xf0 = vdup_n_u8(0xF0); 153 for (unsigned i = 0; i < componentsSize; i += 32) { 154 uint8x8x4_t RGBA8 = vld4_u8(source + i); 155 156 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0); 157 uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4); 158 uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0); 159 uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4); 160 161 uint8x8x2_t RGBA4; 162 RGBA4.val[0] = vorr_u8(componentB, componentA); 163 RGBA4.val[1] = vorr_u8(componentR, componentG); 164 vst2_u8(dst, RGBA4); 165 dst += 16; 166 } 167 168 source += componentsSize; 169 destination += componentsSize / 4; 170 pixelsPerRow = tailComponents / 4; 171 } 172 173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 174 { 175 unsigned tailPixels = pixelsPerRow % 8; 176 unsigned pixelSize = pixelsPerRow - tailPixels; 177 178 uint8x8_t immediate0x7 = vdup_n_u8(0x7); 179 uint8x8_t immediate0xff = vdup_n_u8(0xFF); 180 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); 181 uint16x8_t immediate0x1 = vdupq_n_u16(0x1); 182 183 for (unsigned i = 0; i < pixelSize; i += 8) { 184 uint16x8_t eightPixels = vld1q_u16(source + i); 185 186 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11)); 187 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 6), immediate0x1f)); 188 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 1), immediate0x1f)); 189 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x1)); 190 191 componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7)); 192 componentG = vorr_u8(vshl_n_u8(componentG, 3), vand_u8(componentG, immediate0x7)); 193 componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7)); 194 componentA = vmul_u8(componentA, immediate0xff); 195 196 uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}}; 197 vst4_u8(destination, destComponents); 198 destination += 32; 199 } 200 201 source += pixelSize; 202 pixelsPerRow = tailPixels; 203 } 204 205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow) 206 { 207 unsigned componentsPerRow = pixelsPerRow * 4; 208 unsigned tailComponents = componentsPerRow % 32; 209 unsigned componentsSize = componentsPerRow - tailComponents; 210 211 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); 212 213 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); 214 uint8x8_t immediate0x18 = vdup_n_u8(0x18); 215 for (unsigned i = 0; i < componentsSize; i += 32) { 216 uint8x8x4_t RGBA8 = vld4_u8(source + i); 217 218 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8); 219 uint8x8_t componentG3bit = vshr_n_u8(RGBA8.val[1], 5); 220 221 uint8x8_t componentG2bit = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x18), 3); 222 uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 2); 223 uint8x8_t componentA = vshr_n_u8(RGBA8.val[3], 7); 224 225 uint8x8x2_t RGBA5551; 226 RGBA5551.val[0] = vorr_u8(vorr_u8(componentG2bit, componentB), componentA); 227 RGBA5551.val[1] = vorr_u8(componentR, componentG3bit); 228 vst2_u8(dst, RGBA5551); 229 dst += 16; 230 } 231 232 source += componentsSize; 233 destination += componentsSize / 4; 234 pixelsPerRow = tailComponents / 4; 235 } 236 237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 238 { 239 unsigned tailPixels = pixelsPerRow % 8; 240 unsigned pixelSize = pixelsPerRow - tailPixels; 241 242 uint16x8_t immediate0x3f = vdupq_n_u16(0x3F); 243 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); 244 uint8x8_t immediate0x3 = vdup_n_u8(0x3); 245 uint8x8_t immediate0x7 = vdup_n_u8(0x7); 246 247 uint8x8_t componentA = vdup_n_u8(0xFF); 248 249 for (unsigned i = 0; i < pixelSize; i += 8) { 250 uint16x8_t eightPixels = vld1q_u16(source + i); 251 252 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11)); 253 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 5), immediate0x3f)); 254 uint8x8_t componentB = vqmovn_u16(vandq_u16(eightPixels, immediate0x1f)); 255 256 componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7)); 257 componentG = vorr_u8(vshl_n_u8(componentG, 2), vand_u8(componentG, immediate0x3)); 258 componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7)); 259 260 uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}}; 261 vst4_u8(destination, destComponents); 262 destination += 32; 263 } 264 265 source += pixelSize; 266 pixelsPerRow = tailPixels; 267 } 268 269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow) 270 { 271 unsigned componentsPerRow = pixelsPerRow * 4; 272 unsigned tailComponents = componentsPerRow % 32; 273 unsigned componentsSize = componentsPerRow - tailComponents; 274 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); 275 276 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); 277 uint8x8_t immediate0x1c = vdup_n_u8(0x1C); 278 for (unsigned i = 0; i < componentsSize; i += 32) { 279 uint8x8x4_t RGBA8 = vld4_u8(source + i); 280 281 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8); 282 uint8x8_t componentGLeft = vshr_n_u8(RGBA8.val[1], 5); 283 uint8x8_t componentGRight = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x1c), 3); 284 uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 3); 285 286 uint8x8x2_t RGB565; 287 RGB565.val[0] = vorr_u8(componentGRight, componentB); 288 RGB565.val[1] = vorr_u8(componentR, componentGLeft); 289 vst2_u8(dst, RGB565); 290 dst += 16; 291 } 292 293 source += componentsSize; 294 destination += componentsSize / 4; 295 pixelsPerRow = tailComponents / 4; 296 } 297 298 } // namespace SIMD 299 300 } // namespace WebCore 301 302 #endif // HAVE(ARM_NEON_INTRINSICS) 303 304 #endif // GraphicsContext3DNEON_h 305