1 /*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __ARM_NEON_H 25 #define __ARM_NEON_H 26 27 #ifndef __ARM_NEON__ 28 #error "NEON support not enabled" 29 #endif 30 31 #include <stdint.h> 32 33 typedef float float32_t; 34 typedef int8_t poly8_t; 35 typedef int16_t poly16_t; 36 typedef uint16_t float16_t; 37 typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; 38 typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; 39 typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; 40 typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; 41 typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t; 42 typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; 43 typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t; 44 typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; 45 typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; 46 typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; 47 typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; 48 typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; 49 typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; 50 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; 51 typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; 52 typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; 53 typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; 54 typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; 55 typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; 56 typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; 57 typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; 58 typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; 59 typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t; 60 typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; 61 62 typedef struct int8x8x2_t { 63 int8x8_t val[2]; 64 } int8x8x2_t; 65 66 typedef struct int8x16x2_t { 67 int8x16_t val[2]; 68 } int8x16x2_t; 69 70 typedef struct int16x4x2_t { 71 int16x4_t val[2]; 72 } int16x4x2_t; 73 74 typedef struct int16x8x2_t { 75 int16x8_t val[2]; 76 } int16x8x2_t; 77 78 typedef struct int32x2x2_t { 79 int32x2_t val[2]; 80 } int32x2x2_t; 81 82 typedef struct int32x4x2_t { 83 int32x4_t val[2]; 84 } int32x4x2_t; 85 86 typedef struct int64x1x2_t { 87 int64x1_t val[2]; 88 } int64x1x2_t; 89 90 typedef struct int64x2x2_t { 91 int64x2_t val[2]; 92 } int64x2x2_t; 93 94 typedef struct uint8x8x2_t { 95 uint8x8_t val[2]; 96 } uint8x8x2_t; 97 98 typedef struct uint8x16x2_t { 99 uint8x16_t val[2]; 100 } uint8x16x2_t; 101 102 typedef struct uint16x4x2_t { 103 uint16x4_t val[2]; 104 } uint16x4x2_t; 105 106 typedef struct uint16x8x2_t { 107 uint16x8_t val[2]; 108 } uint16x8x2_t; 109 110 typedef struct uint32x2x2_t { 111 uint32x2_t val[2]; 112 } uint32x2x2_t; 113 114 typedef struct uint32x4x2_t { 115 uint32x4_t val[2]; 116 } uint32x4x2_t; 117 118 typedef struct uint64x1x2_t { 119 uint64x1_t val[2]; 120 } uint64x1x2_t; 121 122 typedef struct uint64x2x2_t { 123 uint64x2_t val[2]; 124 } uint64x2x2_t; 125 126 typedef struct float16x4x2_t { 127 float16x4_t val[2]; 128 } float16x4x2_t; 129 130 typedef struct float16x8x2_t { 131 float16x8_t val[2]; 132 } float16x8x2_t; 133 134 typedef struct float32x2x2_t { 135 float32x2_t val[2]; 136 } float32x2x2_t; 137 138 typedef struct float32x4x2_t { 139 float32x4_t val[2]; 140 } float32x4x2_t; 141 142 typedef struct poly8x8x2_t { 143 poly8x8_t val[2]; 144 } poly8x8x2_t; 145 146 typedef struct poly8x16x2_t { 147 poly8x16_t val[2]; 148 } poly8x16x2_t; 149 150 typedef struct poly16x4x2_t { 151 poly16x4_t val[2]; 152 } poly16x4x2_t; 153 154 typedef struct poly16x8x2_t { 155 poly16x8_t val[2]; 156 } poly16x8x2_t; 157 158 typedef struct int8x8x3_t { 159 int8x8_t val[3]; 160 } int8x8x3_t; 161 162 typedef struct int8x16x3_t { 163 int8x16_t val[3]; 164 } int8x16x3_t; 165 166 typedef struct int16x4x3_t { 167 int16x4_t val[3]; 168 } int16x4x3_t; 169 170 typedef struct int16x8x3_t { 171 int16x8_t val[3]; 172 } int16x8x3_t; 173 174 typedef struct int32x2x3_t { 175 int32x2_t val[3]; 176 } int32x2x3_t; 177 178 typedef struct int32x4x3_t { 179 int32x4_t val[3]; 180 } int32x4x3_t; 181 182 typedef struct int64x1x3_t { 183 int64x1_t val[3]; 184 } int64x1x3_t; 185 186 typedef struct int64x2x3_t { 187 int64x2_t val[3]; 188 } int64x2x3_t; 189 190 typedef struct uint8x8x3_t { 191 uint8x8_t val[3]; 192 } uint8x8x3_t; 193 194 typedef struct uint8x16x3_t { 195 uint8x16_t val[3]; 196 } uint8x16x3_t; 197 198 typedef struct uint16x4x3_t { 199 uint16x4_t val[3]; 200 } uint16x4x3_t; 201 202 typedef struct uint16x8x3_t { 203 uint16x8_t val[3]; 204 } uint16x8x3_t; 205 206 typedef struct uint32x2x3_t { 207 uint32x2_t val[3]; 208 } uint32x2x3_t; 209 210 typedef struct uint32x4x3_t { 211 uint32x4_t val[3]; 212 } uint32x4x3_t; 213 214 typedef struct uint64x1x3_t { 215 uint64x1_t val[3]; 216 } uint64x1x3_t; 217 218 typedef struct uint64x2x3_t { 219 uint64x2_t val[3]; 220 } uint64x2x3_t; 221 222 typedef struct float16x4x3_t { 223 float16x4_t val[3]; 224 } float16x4x3_t; 225 226 typedef struct float16x8x3_t { 227 float16x8_t val[3]; 228 } float16x8x3_t; 229 230 typedef struct float32x2x3_t { 231 float32x2_t val[3]; 232 } float32x2x3_t; 233 234 typedef struct float32x4x3_t { 235 float32x4_t val[3]; 236 } float32x4x3_t; 237 238 typedef struct poly8x8x3_t { 239 poly8x8_t val[3]; 240 } poly8x8x3_t; 241 242 typedef struct poly8x16x3_t { 243 poly8x16_t val[3]; 244 } poly8x16x3_t; 245 246 typedef struct poly16x4x3_t { 247 poly16x4_t val[3]; 248 } poly16x4x3_t; 249 250 typedef struct poly16x8x3_t { 251 poly16x8_t val[3]; 252 } poly16x8x3_t; 253 254 typedef struct int8x8x4_t { 255 int8x8_t val[4]; 256 } int8x8x4_t; 257 258 typedef struct int8x16x4_t { 259 int8x16_t val[4]; 260 } int8x16x4_t; 261 262 typedef struct int16x4x4_t { 263 int16x4_t val[4]; 264 } int16x4x4_t; 265 266 typedef struct int16x8x4_t { 267 int16x8_t val[4]; 268 } int16x8x4_t; 269 270 typedef struct int32x2x4_t { 271 int32x2_t val[4]; 272 } int32x2x4_t; 273 274 typedef struct int32x4x4_t { 275 int32x4_t val[4]; 276 } int32x4x4_t; 277 278 typedef struct int64x1x4_t { 279 int64x1_t val[4]; 280 } int64x1x4_t; 281 282 typedef struct int64x2x4_t { 283 int64x2_t val[4]; 284 } int64x2x4_t; 285 286 typedef struct uint8x8x4_t { 287 uint8x8_t val[4]; 288 } uint8x8x4_t; 289 290 typedef struct uint8x16x4_t { 291 uint8x16_t val[4]; 292 } uint8x16x4_t; 293 294 typedef struct uint16x4x4_t { 295 uint16x4_t val[4]; 296 } uint16x4x4_t; 297 298 typedef struct uint16x8x4_t { 299 uint16x8_t val[4]; 300 } uint16x8x4_t; 301 302 typedef struct uint32x2x4_t { 303 uint32x2_t val[4]; 304 } uint32x2x4_t; 305 306 typedef struct uint32x4x4_t { 307 uint32x4_t val[4]; 308 } uint32x4x4_t; 309 310 typedef struct uint64x1x4_t { 311 uint64x1_t val[4]; 312 } uint64x1x4_t; 313 314 typedef struct uint64x2x4_t { 315 uint64x2_t val[4]; 316 } uint64x2x4_t; 317 318 typedef struct float16x4x4_t { 319 float16x4_t val[4]; 320 } float16x4x4_t; 321 322 typedef struct float16x8x4_t { 323 float16x8_t val[4]; 324 } float16x8x4_t; 325 326 typedef struct float32x2x4_t { 327 float32x2_t val[4]; 328 } float32x2x4_t; 329 330 typedef struct float32x4x4_t { 331 float32x4_t val[4]; 332 } float32x4x4_t; 333 334 typedef struct poly8x8x4_t { 335 poly8x8_t val[4]; 336 } poly8x8x4_t; 337 338 typedef struct poly8x16x4_t { 339 poly8x16_t val[4]; 340 } poly8x16x4_t; 341 342 typedef struct poly16x4x4_t { 343 poly16x4_t val[4]; 344 } poly16x4x4_t; 345 346 typedef struct poly16x8x4_t { 347 poly16x8_t val[4]; 348 } poly16x8x4_t; 349 350 #define __ai static inline __attribute__((__always_inline__, __nodebug__)) 351 352 __ai int16x8_t vmovl_s8(int8x8_t __a) { 353 return (int16x8_t)__builtin_neon_vmovl_v(__a, 33); } 354 __ai int32x4_t vmovl_s16(int16x4_t __a) { 355 return (int32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 34); } 356 __ai int64x2_t vmovl_s32(int32x2_t __a) { 357 return (int64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 35); } 358 __ai uint16x8_t vmovl_u8(uint8x8_t __a) { 359 return (uint16x8_t)__builtin_neon_vmovl_v((int8x8_t)__a, 49); } 360 __ai uint32x4_t vmovl_u16(uint16x4_t __a) { 361 return (uint32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 50); } 362 __ai uint64x2_t vmovl_u32(uint32x2_t __a) { 363 return (uint64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 51); } 364 365 __ai int16x8_t vmull_s8(int8x8_t __a, int8x8_t __b) { 366 return (int16x8_t)__builtin_neon_vmull_v(__a, __b, 33); } 367 __ai int32x4_t vmull_s16(int16x4_t __a, int16x4_t __b) { 368 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 34); } 369 __ai int64x2_t vmull_s32(int32x2_t __a, int32x2_t __b) { 370 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 35); } 371 __ai uint16x8_t vmull_u8(uint8x8_t __a, uint8x8_t __b) { 372 return (uint16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 49); } 373 __ai uint32x4_t vmull_u16(uint16x4_t __a, uint16x4_t __b) { 374 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 50); } 375 __ai uint64x2_t vmull_u32(uint32x2_t __a, uint32x2_t __b) { 376 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 51); } 377 __ai poly16x8_t vmull_p8(poly8x8_t __a, poly8x8_t __b) { 378 return (poly16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 37); } 379 380 __ai int8x8_t vabd_s8(int8x8_t __a, int8x8_t __b) { 381 return (int8x8_t)__builtin_neon_vabd_v(__a, __b, 0); } 382 __ai int16x4_t vabd_s16(int16x4_t __a, int16x4_t __b) { 383 return (int16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 384 __ai int32x2_t vabd_s32(int32x2_t __a, int32x2_t __b) { 385 return (int32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 386 __ai uint8x8_t vabd_u8(uint8x8_t __a, uint8x8_t __b) { 387 return (uint8x8_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 16); } 388 __ai uint16x4_t vabd_u16(uint16x4_t __a, uint16x4_t __b) { 389 return (uint16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 17); } 390 __ai uint32x2_t vabd_u32(uint32x2_t __a, uint32x2_t __b) { 391 return (uint32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 18); } 392 __ai float32x2_t vabd_f32(float32x2_t __a, float32x2_t __b) { 393 return (float32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 7); } 394 __ai int8x16_t vabdq_s8(int8x16_t __a, int8x16_t __b) { 395 return (int8x16_t)__builtin_neon_vabdq_v(__a, __b, 32); } 396 __ai int16x8_t vabdq_s16(int16x8_t __a, int16x8_t __b) { 397 return (int16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 398 __ai int32x4_t vabdq_s32(int32x4_t __a, int32x4_t __b) { 399 return (int32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 400 __ai uint8x16_t vabdq_u8(uint8x16_t __a, uint8x16_t __b) { 401 return (uint8x16_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 402 __ai uint16x8_t vabdq_u16(uint16x8_t __a, uint16x8_t __b) { 403 return (uint16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 404 __ai uint32x4_t vabdq_u32(uint32x4_t __a, uint32x4_t __b) { 405 return (uint32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 406 __ai float32x4_t vabdq_f32(float32x4_t __a, float32x4_t __b) { 407 return (float32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 39); } 408 409 __ai int8x8_t vaba_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { 410 return __a + vabd_s8(__b, __c); } 411 __ai int16x4_t vaba_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { 412 return __a + vabd_s16(__b, __c); } 413 __ai int32x2_t vaba_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { 414 return __a + vabd_s32(__b, __c); } 415 __ai uint8x8_t vaba_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { 416 return __a + vabd_u8(__b, __c); } 417 __ai uint16x4_t vaba_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { 418 return __a + vabd_u16(__b, __c); } 419 __ai uint32x2_t vaba_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { 420 return __a + vabd_u32(__b, __c); } 421 __ai int8x16_t vabaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { 422 return __a + vabdq_s8(__b, __c); } 423 __ai int16x8_t vabaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { 424 return __a + vabdq_s16(__b, __c); } 425 __ai int32x4_t vabaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { 426 return __a + vabdq_s32(__b, __c); } 427 __ai uint8x16_t vabaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { 428 return __a + vabdq_u8(__b, __c); } 429 __ai uint16x8_t vabaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { 430 return __a + vabdq_u16(__b, __c); } 431 __ai uint32x4_t vabaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { 432 return __a + vabdq_u32(__b, __c); } 433 434 __ai int16x8_t vabal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { 435 return __a + (int16x8_t)vmovl_u8((uint8x8_t)vabd_s8(__b, __c)); } 436 __ai int32x4_t vabal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { 437 return __a + (int32x4_t)vmovl_u16((uint16x4_t)vabd_s16(__b, __c)); } 438 __ai int64x2_t vabal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { 439 return __a + (int64x2_t)vmovl_u32((uint32x2_t)vabd_s32(__b, __c)); } 440 __ai uint16x8_t vabal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { 441 return __a + vmovl_u8(vabd_u8(__b, __c)); } 442 __ai uint32x4_t vabal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { 443 return __a + vmovl_u16(vabd_u16(__b, __c)); } 444 __ai uint64x2_t vabal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { 445 return __a + vmovl_u32(vabd_u32(__b, __c)); } 446 447 __ai int16x8_t vabdl_s8(int8x8_t __a, int8x8_t __b) { 448 return (int16x8_t)vmovl_u8((uint8x8_t)vabd_s8(__a, __b)); } 449 __ai int32x4_t vabdl_s16(int16x4_t __a, int16x4_t __b) { 450 return (int32x4_t)vmovl_u16((uint16x4_t)vabd_s16(__a, __b)); } 451 __ai int64x2_t vabdl_s32(int32x2_t __a, int32x2_t __b) { 452 return (int64x2_t)vmovl_u32((uint32x2_t)vabd_s32(__a, __b)); } 453 __ai uint16x8_t vabdl_u8(uint8x8_t __a, uint8x8_t __b) { 454 return vmovl_u8(vabd_u8(__a, __b)); } 455 __ai uint32x4_t vabdl_u16(uint16x4_t __a, uint16x4_t __b) { 456 return vmovl_u16(vabd_u16(__a, __b)); } 457 __ai uint64x2_t vabdl_u32(uint32x2_t __a, uint32x2_t __b) { 458 return vmovl_u32(vabd_u32(__a, __b)); } 459 460 __ai int8x8_t vabs_s8(int8x8_t __a) { 461 return (int8x8_t)__builtin_neon_vabs_v(__a, 0); } 462 __ai int16x4_t vabs_s16(int16x4_t __a) { 463 return (int16x4_t)__builtin_neon_vabs_v((int8x8_t)__a, 1); } 464 __ai int32x2_t vabs_s32(int32x2_t __a) { 465 return (int32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 2); } 466 __ai float32x2_t vabs_f32(float32x2_t __a) { 467 return (float32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 7); } 468 __ai int8x16_t vabsq_s8(int8x16_t __a) { 469 return (int8x16_t)__builtin_neon_vabsq_v(__a, 32); } 470 __ai int16x8_t vabsq_s16(int16x8_t __a) { 471 return (int16x8_t)__builtin_neon_vabsq_v((int8x16_t)__a, 33); } 472 __ai int32x4_t vabsq_s32(int32x4_t __a) { 473 return (int32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 34); } 474 __ai float32x4_t vabsq_f32(float32x4_t __a) { 475 return (float32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 39); } 476 477 __ai int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b) { 478 return __a + __b; } 479 __ai int16x4_t vadd_s16(int16x4_t __a, int16x4_t __b) { 480 return __a + __b; } 481 __ai int32x2_t vadd_s32(int32x2_t __a, int32x2_t __b) { 482 return __a + __b; } 483 __ai int64x1_t vadd_s64(int64x1_t __a, int64x1_t __b) { 484 return __a + __b; } 485 __ai float32x2_t vadd_f32(float32x2_t __a, float32x2_t __b) { 486 return __a + __b; } 487 __ai uint8x8_t vadd_u8(uint8x8_t __a, uint8x8_t __b) { 488 return __a + __b; } 489 __ai uint16x4_t vadd_u16(uint16x4_t __a, uint16x4_t __b) { 490 return __a + __b; } 491 __ai uint32x2_t vadd_u32(uint32x2_t __a, uint32x2_t __b) { 492 return __a + __b; } 493 __ai uint64x1_t vadd_u64(uint64x1_t __a, uint64x1_t __b) { 494 return __a + __b; } 495 __ai int8x16_t vaddq_s8(int8x16_t __a, int8x16_t __b) { 496 return __a + __b; } 497 __ai int16x8_t vaddq_s16(int16x8_t __a, int16x8_t __b) { 498 return __a + __b; } 499 __ai int32x4_t vaddq_s32(int32x4_t __a, int32x4_t __b) { 500 return __a + __b; } 501 __ai int64x2_t vaddq_s64(int64x2_t __a, int64x2_t __b) { 502 return __a + __b; } 503 __ai float32x4_t vaddq_f32(float32x4_t __a, float32x4_t __b) { 504 return __a + __b; } 505 __ai uint8x16_t vaddq_u8(uint8x16_t __a, uint8x16_t __b) { 506 return __a + __b; } 507 __ai uint16x8_t vaddq_u16(uint16x8_t __a, uint16x8_t __b) { 508 return __a + __b; } 509 __ai uint32x4_t vaddq_u32(uint32x4_t __a, uint32x4_t __b) { 510 return __a + __b; } 511 __ai uint64x2_t vaddq_u64(uint64x2_t __a, uint64x2_t __b) { 512 return __a + __b; } 513 514 __ai int8x8_t vaddhn_s16(int16x8_t __a, int16x8_t __b) { 515 return (int8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } 516 __ai int16x4_t vaddhn_s32(int32x4_t __a, int32x4_t __b) { 517 return (int16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } 518 __ai int32x2_t vaddhn_s64(int64x2_t __a, int64x2_t __b) { 519 return (int32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } 520 __ai uint8x8_t vaddhn_u16(uint16x8_t __a, uint16x8_t __b) { 521 return (uint8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } 522 __ai uint16x4_t vaddhn_u32(uint32x4_t __a, uint32x4_t __b) { 523 return (uint16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } 524 __ai uint32x2_t vaddhn_u64(uint64x2_t __a, uint64x2_t __b) { 525 return (uint32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } 526 527 __ai int16x8_t vaddl_s8(int8x8_t __a, int8x8_t __b) { 528 return vmovl_s8(__a) + vmovl_s8(__b); } 529 __ai int32x4_t vaddl_s16(int16x4_t __a, int16x4_t __b) { 530 return vmovl_s16(__a) + vmovl_s16(__b); } 531 __ai int64x2_t vaddl_s32(int32x2_t __a, int32x2_t __b) { 532 return vmovl_s32(__a) + vmovl_s32(__b); } 533 __ai uint16x8_t vaddl_u8(uint8x8_t __a, uint8x8_t __b) { 534 return vmovl_u8(__a) + vmovl_u8(__b); } 535 __ai uint32x4_t vaddl_u16(uint16x4_t __a, uint16x4_t __b) { 536 return vmovl_u16(__a) + vmovl_u16(__b); } 537 __ai uint64x2_t vaddl_u32(uint32x2_t __a, uint32x2_t __b) { 538 return vmovl_u32(__a) + vmovl_u32(__b); } 539 540 __ai int16x8_t vaddw_s8(int16x8_t __a, int8x8_t __b) { 541 return __a + vmovl_s8(__b); } 542 __ai int32x4_t vaddw_s16(int32x4_t __a, int16x4_t __b) { 543 return __a + vmovl_s16(__b); } 544 __ai int64x2_t vaddw_s32(int64x2_t __a, int32x2_t __b) { 545 return __a + vmovl_s32(__b); } 546 __ai uint16x8_t vaddw_u8(uint16x8_t __a, uint8x8_t __b) { 547 return __a + vmovl_u8(__b); } 548 __ai uint32x4_t vaddw_u16(uint32x4_t __a, uint16x4_t __b) { 549 return __a + vmovl_u16(__b); } 550 __ai uint64x2_t vaddw_u32(uint64x2_t __a, uint32x2_t __b) { 551 return __a + vmovl_u32(__b); } 552 553 __ai int8x8_t vand_s8(int8x8_t __a, int8x8_t __b) { 554 return __a & __b; } 555 __ai int16x4_t vand_s16(int16x4_t __a, int16x4_t __b) { 556 return __a & __b; } 557 __ai int32x2_t vand_s32(int32x2_t __a, int32x2_t __b) { 558 return __a & __b; } 559 __ai int64x1_t vand_s64(int64x1_t __a, int64x1_t __b) { 560 return __a & __b; } 561 __ai uint8x8_t vand_u8(uint8x8_t __a, uint8x8_t __b) { 562 return __a & __b; } 563 __ai uint16x4_t vand_u16(uint16x4_t __a, uint16x4_t __b) { 564 return __a & __b; } 565 __ai uint32x2_t vand_u32(uint32x2_t __a, uint32x2_t __b) { 566 return __a & __b; } 567 __ai uint64x1_t vand_u64(uint64x1_t __a, uint64x1_t __b) { 568 return __a & __b; } 569 __ai int8x16_t vandq_s8(int8x16_t __a, int8x16_t __b) { 570 return __a & __b; } 571 __ai int16x8_t vandq_s16(int16x8_t __a, int16x8_t __b) { 572 return __a & __b; } 573 __ai int32x4_t vandq_s32(int32x4_t __a, int32x4_t __b) { 574 return __a & __b; } 575 __ai int64x2_t vandq_s64(int64x2_t __a, int64x2_t __b) { 576 return __a & __b; } 577 __ai uint8x16_t vandq_u8(uint8x16_t __a, uint8x16_t __b) { 578 return __a & __b; } 579 __ai uint16x8_t vandq_u16(uint16x8_t __a, uint16x8_t __b) { 580 return __a & __b; } 581 __ai uint32x4_t vandq_u32(uint32x4_t __a, uint32x4_t __b) { 582 return __a & __b; } 583 __ai uint64x2_t vandq_u64(uint64x2_t __a, uint64x2_t __b) { 584 return __a & __b; } 585 586 __ai int8x8_t vbic_s8(int8x8_t __a, int8x8_t __b) { 587 return __a & ~__b; } 588 __ai int16x4_t vbic_s16(int16x4_t __a, int16x4_t __b) { 589 return __a & ~__b; } 590 __ai int32x2_t vbic_s32(int32x2_t __a, int32x2_t __b) { 591 return __a & ~__b; } 592 __ai int64x1_t vbic_s64(int64x1_t __a, int64x1_t __b) { 593 return __a & ~__b; } 594 __ai uint8x8_t vbic_u8(uint8x8_t __a, uint8x8_t __b) { 595 return __a & ~__b; } 596 __ai uint16x4_t vbic_u16(uint16x4_t __a, uint16x4_t __b) { 597 return __a & ~__b; } 598 __ai uint32x2_t vbic_u32(uint32x2_t __a, uint32x2_t __b) { 599 return __a & ~__b; } 600 __ai uint64x1_t vbic_u64(uint64x1_t __a, uint64x1_t __b) { 601 return __a & ~__b; } 602 __ai int8x16_t vbicq_s8(int8x16_t __a, int8x16_t __b) { 603 return __a & ~__b; } 604 __ai int16x8_t vbicq_s16(int16x8_t __a, int16x8_t __b) { 605 return __a & ~__b; } 606 __ai int32x4_t vbicq_s32(int32x4_t __a, int32x4_t __b) { 607 return __a & ~__b; } 608 __ai int64x2_t vbicq_s64(int64x2_t __a, int64x2_t __b) { 609 return __a & ~__b; } 610 __ai uint8x16_t vbicq_u8(uint8x16_t __a, uint8x16_t __b) { 611 return __a & ~__b; } 612 __ai uint16x8_t vbicq_u16(uint16x8_t __a, uint16x8_t __b) { 613 return __a & ~__b; } 614 __ai uint32x4_t vbicq_u32(uint32x4_t __a, uint32x4_t __b) { 615 return __a & ~__b; } 616 __ai uint64x2_t vbicq_u64(uint64x2_t __a, uint64x2_t __b) { 617 return __a & ~__b; } 618 619 __ai int8x8_t vbsl_s8(uint8x8_t __a, int8x8_t __b, int8x8_t __c) { 620 return (int8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, __b, __c, 0); } 621 __ai int16x4_t vbsl_s16(uint16x4_t __a, int16x4_t __b, int16x4_t __c) { 622 return (int16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 1); } 623 __ai int32x2_t vbsl_s32(uint32x2_t __a, int32x2_t __b, int32x2_t __c) { 624 return (int32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 2); } 625 __ai int64x1_t vbsl_s64(uint64x1_t __a, int64x1_t __b, int64x1_t __c) { 626 return (int64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 3); } 627 __ai uint8x8_t vbsl_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { 628 return (uint8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 16); } 629 __ai uint16x4_t vbsl_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { 630 return (uint16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 17); } 631 __ai uint32x2_t vbsl_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { 632 return (uint32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 18); } 633 __ai uint64x1_t vbsl_u64(uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) { 634 return (uint64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 19); } 635 __ai float32x2_t vbsl_f32(uint32x2_t __a, float32x2_t __b, float32x2_t __c) { 636 return (float32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 7); } 637 __ai poly8x8_t vbsl_p8(uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) { 638 return (poly8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 4); } 639 __ai poly16x4_t vbsl_p16(uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) { 640 return (poly16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 5); } 641 __ai int8x16_t vbslq_s8(uint8x16_t __a, int8x16_t __b, int8x16_t __c) { 642 return (int8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, __b, __c, 32); } 643 __ai int16x8_t vbslq_s16(uint16x8_t __a, int16x8_t __b, int16x8_t __c) { 644 return (int16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 33); } 645 __ai int32x4_t vbslq_s32(uint32x4_t __a, int32x4_t __b, int32x4_t __c) { 646 return (int32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } 647 __ai int64x2_t vbslq_s64(uint64x2_t __a, int64x2_t __b, int64x2_t __c) { 648 return (int64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 35); } 649 __ai uint8x16_t vbslq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { 650 return (uint8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); } 651 __ai uint16x8_t vbslq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { 652 return (uint16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 49); } 653 __ai uint32x4_t vbslq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { 654 return (uint32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } 655 __ai uint64x2_t vbslq_u64(uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) { 656 return (uint64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 51); } 657 __ai float32x4_t vbslq_f32(uint32x4_t __a, float32x4_t __b, float32x4_t __c) { 658 return (float32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 39); } 659 __ai poly8x16_t vbslq_p8(uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) { 660 return (poly8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 36); } 661 __ai poly16x8_t vbslq_p16(uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) { 662 return (poly16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 37); } 663 664 __ai uint32x2_t vcage_f32(float32x2_t __a, float32x2_t __b) { 665 return (uint32x2_t)__builtin_neon_vcage_v((int8x8_t)__a, (int8x8_t)__b, 18); } 666 __ai uint32x4_t vcageq_f32(float32x4_t __a, float32x4_t __b) { 667 return (uint32x4_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 668 669 __ai uint32x2_t vcagt_f32(float32x2_t __a, float32x2_t __b) { 670 return (uint32x2_t)__builtin_neon_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 18); } 671 __ai uint32x4_t vcagtq_f32(float32x4_t __a, float32x4_t __b) { 672 return (uint32x4_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 673 674 __ai uint32x2_t vcale_f32(float32x2_t __a, float32x2_t __b) { 675 return (uint32x2_t)__builtin_neon_vcale_v((int8x8_t)__a, (int8x8_t)__b, 18); } 676 __ai uint32x4_t vcaleq_f32(float32x4_t __a, float32x4_t __b) { 677 return (uint32x4_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 678 679 __ai uint32x2_t vcalt_f32(float32x2_t __a, float32x2_t __b) { 680 return (uint32x2_t)__builtin_neon_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 18); } 681 __ai uint32x4_t vcaltq_f32(float32x4_t __a, float32x4_t __b) { 682 return (uint32x4_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 683 684 __ai uint8x8_t vceq_s8(int8x8_t __a, int8x8_t __b) { 685 return (uint8x8_t)(__a == __b); } 686 __ai uint16x4_t vceq_s16(int16x4_t __a, int16x4_t __b) { 687 return (uint16x4_t)(__a == __b); } 688 __ai uint32x2_t vceq_s32(int32x2_t __a, int32x2_t __b) { 689 return (uint32x2_t)(__a == __b); } 690 __ai uint32x2_t vceq_f32(float32x2_t __a, float32x2_t __b) { 691 return (uint32x2_t)(__a == __b); } 692 __ai uint8x8_t vceq_u8(uint8x8_t __a, uint8x8_t __b) { 693 return (uint8x8_t)(__a == __b); } 694 __ai uint16x4_t vceq_u16(uint16x4_t __a, uint16x4_t __b) { 695 return (uint16x4_t)(__a == __b); } 696 __ai uint32x2_t vceq_u32(uint32x2_t __a, uint32x2_t __b) { 697 return (uint32x2_t)(__a == __b); } 698 __ai uint8x8_t vceq_p8(poly8x8_t __a, poly8x8_t __b) { 699 return (uint8x8_t)(__a == __b); } 700 __ai uint8x16_t vceqq_s8(int8x16_t __a, int8x16_t __b) { 701 return (uint8x16_t)(__a == __b); } 702 __ai uint16x8_t vceqq_s16(int16x8_t __a, int16x8_t __b) { 703 return (uint16x8_t)(__a == __b); } 704 __ai uint32x4_t vceqq_s32(int32x4_t __a, int32x4_t __b) { 705 return (uint32x4_t)(__a == __b); } 706 __ai uint32x4_t vceqq_f32(float32x4_t __a, float32x4_t __b) { 707 return (uint32x4_t)(__a == __b); } 708 __ai uint8x16_t vceqq_u8(uint8x16_t __a, uint8x16_t __b) { 709 return (uint8x16_t)(__a == __b); } 710 __ai uint16x8_t vceqq_u16(uint16x8_t __a, uint16x8_t __b) { 711 return (uint16x8_t)(__a == __b); } 712 __ai uint32x4_t vceqq_u32(uint32x4_t __a, uint32x4_t __b) { 713 return (uint32x4_t)(__a == __b); } 714 __ai uint8x16_t vceqq_p8(poly8x16_t __a, poly8x16_t __b) { 715 return (uint8x16_t)(__a == __b); } 716 717 __ai uint8x8_t vcge_s8(int8x8_t __a, int8x8_t __b) { 718 return (uint8x8_t)(__a >= __b); } 719 __ai uint16x4_t vcge_s16(int16x4_t __a, int16x4_t __b) { 720 return (uint16x4_t)(__a >= __b); } 721 __ai uint32x2_t vcge_s32(int32x2_t __a, int32x2_t __b) { 722 return (uint32x2_t)(__a >= __b); } 723 __ai uint32x2_t vcge_f32(float32x2_t __a, float32x2_t __b) { 724 return (uint32x2_t)(__a >= __b); } 725 __ai uint8x8_t vcge_u8(uint8x8_t __a, uint8x8_t __b) { 726 return (uint8x8_t)(__a >= __b); } 727 __ai uint16x4_t vcge_u16(uint16x4_t __a, uint16x4_t __b) { 728 return (uint16x4_t)(__a >= __b); } 729 __ai uint32x2_t vcge_u32(uint32x2_t __a, uint32x2_t __b) { 730 return (uint32x2_t)(__a >= __b); } 731 __ai uint8x16_t vcgeq_s8(int8x16_t __a, int8x16_t __b) { 732 return (uint8x16_t)(__a >= __b); } 733 __ai uint16x8_t vcgeq_s16(int16x8_t __a, int16x8_t __b) { 734 return (uint16x8_t)(__a >= __b); } 735 __ai uint32x4_t vcgeq_s32(int32x4_t __a, int32x4_t __b) { 736 return (uint32x4_t)(__a >= __b); } 737 __ai uint32x4_t vcgeq_f32(float32x4_t __a, float32x4_t __b) { 738 return (uint32x4_t)(__a >= __b); } 739 __ai uint8x16_t vcgeq_u8(uint8x16_t __a, uint8x16_t __b) { 740 return (uint8x16_t)(__a >= __b); } 741 __ai uint16x8_t vcgeq_u16(uint16x8_t __a, uint16x8_t __b) { 742 return (uint16x8_t)(__a >= __b); } 743 __ai uint32x4_t vcgeq_u32(uint32x4_t __a, uint32x4_t __b) { 744 return (uint32x4_t)(__a >= __b); } 745 746 __ai uint8x8_t vcgt_s8(int8x8_t __a, int8x8_t __b) { 747 return (uint8x8_t)(__a > __b); } 748 __ai uint16x4_t vcgt_s16(int16x4_t __a, int16x4_t __b) { 749 return (uint16x4_t)(__a > __b); } 750 __ai uint32x2_t vcgt_s32(int32x2_t __a, int32x2_t __b) { 751 return (uint32x2_t)(__a > __b); } 752 __ai uint32x2_t vcgt_f32(float32x2_t __a, float32x2_t __b) { 753 return (uint32x2_t)(__a > __b); } 754 __ai uint8x8_t vcgt_u8(uint8x8_t __a, uint8x8_t __b) { 755 return (uint8x8_t)(__a > __b); } 756 __ai uint16x4_t vcgt_u16(uint16x4_t __a, uint16x4_t __b) { 757 return (uint16x4_t)(__a > __b); } 758 __ai uint32x2_t vcgt_u32(uint32x2_t __a, uint32x2_t __b) { 759 return (uint32x2_t)(__a > __b); } 760 __ai uint8x16_t vcgtq_s8(int8x16_t __a, int8x16_t __b) { 761 return (uint8x16_t)(__a > __b); } 762 __ai uint16x8_t vcgtq_s16(int16x8_t __a, int16x8_t __b) { 763 return (uint16x8_t)(__a > __b); } 764 __ai uint32x4_t vcgtq_s32(int32x4_t __a, int32x4_t __b) { 765 return (uint32x4_t)(__a > __b); } 766 __ai uint32x4_t vcgtq_f32(float32x4_t __a, float32x4_t __b) { 767 return (uint32x4_t)(__a > __b); } 768 __ai uint8x16_t vcgtq_u8(uint8x16_t __a, uint8x16_t __b) { 769 return (uint8x16_t)(__a > __b); } 770 __ai uint16x8_t vcgtq_u16(uint16x8_t __a, uint16x8_t __b) { 771 return (uint16x8_t)(__a > __b); } 772 __ai uint32x4_t vcgtq_u32(uint32x4_t __a, uint32x4_t __b) { 773 return (uint32x4_t)(__a > __b); } 774 775 __ai uint8x8_t vcle_s8(int8x8_t __a, int8x8_t __b) { 776 return (uint8x8_t)(__a <= __b); } 777 __ai uint16x4_t vcle_s16(int16x4_t __a, int16x4_t __b) { 778 return (uint16x4_t)(__a <= __b); } 779 __ai uint32x2_t vcle_s32(int32x2_t __a, int32x2_t __b) { 780 return (uint32x2_t)(__a <= __b); } 781 __ai uint32x2_t vcle_f32(float32x2_t __a, float32x2_t __b) { 782 return (uint32x2_t)(__a <= __b); } 783 __ai uint8x8_t vcle_u8(uint8x8_t __a, uint8x8_t __b) { 784 return (uint8x8_t)(__a <= __b); } 785 __ai uint16x4_t vcle_u16(uint16x4_t __a, uint16x4_t __b) { 786 return (uint16x4_t)(__a <= __b); } 787 __ai uint32x2_t vcle_u32(uint32x2_t __a, uint32x2_t __b) { 788 return (uint32x2_t)(__a <= __b); } 789 __ai uint8x16_t vcleq_s8(int8x16_t __a, int8x16_t __b) { 790 return (uint8x16_t)(__a <= __b); } 791 __ai uint16x8_t vcleq_s16(int16x8_t __a, int16x8_t __b) { 792 return (uint16x8_t)(__a <= __b); } 793 __ai uint32x4_t vcleq_s32(int32x4_t __a, int32x4_t __b) { 794 return (uint32x4_t)(__a <= __b); } 795 __ai uint32x4_t vcleq_f32(float32x4_t __a, float32x4_t __b) { 796 return (uint32x4_t)(__a <= __b); } 797 __ai uint8x16_t vcleq_u8(uint8x16_t __a, uint8x16_t __b) { 798 return (uint8x16_t)(__a <= __b); } 799 __ai uint16x8_t vcleq_u16(uint16x8_t __a, uint16x8_t __b) { 800 return (uint16x8_t)(__a <= __b); } 801 __ai uint32x4_t vcleq_u32(uint32x4_t __a, uint32x4_t __b) { 802 return (uint32x4_t)(__a <= __b); } 803 804 __ai int8x8_t vcls_s8(int8x8_t __a) { 805 return (int8x8_t)__builtin_neon_vcls_v(__a, 0); } 806 __ai int16x4_t vcls_s16(int16x4_t __a) { 807 return (int16x4_t)__builtin_neon_vcls_v((int8x8_t)__a, 1); } 808 __ai int32x2_t vcls_s32(int32x2_t __a) { 809 return (int32x2_t)__builtin_neon_vcls_v((int8x8_t)__a, 2); } 810 __ai int8x16_t vclsq_s8(int8x16_t __a) { 811 return (int8x16_t)__builtin_neon_vclsq_v(__a, 32); } 812 __ai int16x8_t vclsq_s16(int16x8_t __a) { 813 return (int16x8_t)__builtin_neon_vclsq_v((int8x16_t)__a, 33); } 814 __ai int32x4_t vclsq_s32(int32x4_t __a) { 815 return (int32x4_t)__builtin_neon_vclsq_v((int8x16_t)__a, 34); } 816 817 __ai uint8x8_t vclt_s8(int8x8_t __a, int8x8_t __b) { 818 return (uint8x8_t)(__a < __b); } 819 __ai uint16x4_t vclt_s16(int16x4_t __a, int16x4_t __b) { 820 return (uint16x4_t)(__a < __b); } 821 __ai uint32x2_t vclt_s32(int32x2_t __a, int32x2_t __b) { 822 return (uint32x2_t)(__a < __b); } 823 __ai uint32x2_t vclt_f32(float32x2_t __a, float32x2_t __b) { 824 return (uint32x2_t)(__a < __b); } 825 __ai uint8x8_t vclt_u8(uint8x8_t __a, uint8x8_t __b) { 826 return (uint8x8_t)(__a < __b); } 827 __ai uint16x4_t vclt_u16(uint16x4_t __a, uint16x4_t __b) { 828 return (uint16x4_t)(__a < __b); } 829 __ai uint32x2_t vclt_u32(uint32x2_t __a, uint32x2_t __b) { 830 return (uint32x2_t)(__a < __b); } 831 __ai uint8x16_t vcltq_s8(int8x16_t __a, int8x16_t __b) { 832 return (uint8x16_t)(__a < __b); } 833 __ai uint16x8_t vcltq_s16(int16x8_t __a, int16x8_t __b) { 834 return (uint16x8_t)(__a < __b); } 835 __ai uint32x4_t vcltq_s32(int32x4_t __a, int32x4_t __b) { 836 return (uint32x4_t)(__a < __b); } 837 __ai uint32x4_t vcltq_f32(float32x4_t __a, float32x4_t __b) { 838 return (uint32x4_t)(__a < __b); } 839 __ai uint8x16_t vcltq_u8(uint8x16_t __a, uint8x16_t __b) { 840 return (uint8x16_t)(__a < __b); } 841 __ai uint16x8_t vcltq_u16(uint16x8_t __a, uint16x8_t __b) { 842 return (uint16x8_t)(__a < __b); } 843 __ai uint32x4_t vcltq_u32(uint32x4_t __a, uint32x4_t __b) { 844 return (uint32x4_t)(__a < __b); } 845 846 __ai int8x8_t vclz_s8(int8x8_t __a) { 847 return (int8x8_t)__builtin_neon_vclz_v(__a, 0); } 848 __ai int16x4_t vclz_s16(int16x4_t __a) { 849 return (int16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 1); } 850 __ai int32x2_t vclz_s32(int32x2_t __a) { 851 return (int32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 2); } 852 __ai uint8x8_t vclz_u8(uint8x8_t __a) { 853 return (uint8x8_t)__builtin_neon_vclz_v((int8x8_t)__a, 16); } 854 __ai uint16x4_t vclz_u16(uint16x4_t __a) { 855 return (uint16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 17); } 856 __ai uint32x2_t vclz_u32(uint32x2_t __a) { 857 return (uint32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 18); } 858 __ai int8x16_t vclzq_s8(int8x16_t __a) { 859 return (int8x16_t)__builtin_neon_vclzq_v(__a, 32); } 860 __ai int16x8_t vclzq_s16(int16x8_t __a) { 861 return (int16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 33); } 862 __ai int32x4_t vclzq_s32(int32x4_t __a) { 863 return (int32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 34); } 864 __ai uint8x16_t vclzq_u8(uint8x16_t __a) { 865 return (uint8x16_t)__builtin_neon_vclzq_v((int8x16_t)__a, 48); } 866 __ai uint16x8_t vclzq_u16(uint16x8_t __a) { 867 return (uint16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 49); } 868 __ai uint32x4_t vclzq_u32(uint32x4_t __a) { 869 return (uint32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 50); } 870 871 __ai uint8x8_t vcnt_u8(uint8x8_t __a) { 872 return (uint8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 16); } 873 __ai int8x8_t vcnt_s8(int8x8_t __a) { 874 return (int8x8_t)__builtin_neon_vcnt_v(__a, 0); } 875 __ai poly8x8_t vcnt_p8(poly8x8_t __a) { 876 return (poly8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 4); } 877 __ai uint8x16_t vcntq_u8(uint8x16_t __a) { 878 return (uint8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 48); } 879 __ai int8x16_t vcntq_s8(int8x16_t __a) { 880 return (int8x16_t)__builtin_neon_vcntq_v(__a, 32); } 881 __ai poly8x16_t vcntq_p8(poly8x16_t __a) { 882 return (poly8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 36); } 883 884 __ai int8x16_t vcombine_s8(int8x8_t __a, int8x8_t __b) { 885 return (int8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 886 __ai int16x8_t vcombine_s16(int16x4_t __a, int16x4_t __b) { 887 return (int16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 888 __ai int32x4_t vcombine_s32(int32x2_t __a, int32x2_t __b) { 889 return (int32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 890 __ai int64x2_t vcombine_s64(int64x1_t __a, int64x1_t __b) { 891 return (int64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 892 __ai float16x8_t vcombine_f16(float16x4_t __a, float16x4_t __b) { 893 return (float16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 894 __ai float32x4_t vcombine_f32(float32x2_t __a, float32x2_t __b) { 895 return (float32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 896 __ai uint8x16_t vcombine_u8(uint8x8_t __a, uint8x8_t __b) { 897 return (uint8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 898 __ai uint16x8_t vcombine_u16(uint16x4_t __a, uint16x4_t __b) { 899 return (uint16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 900 __ai uint32x4_t vcombine_u32(uint32x2_t __a, uint32x2_t __b) { 901 return (uint32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 902 __ai uint64x2_t vcombine_u64(uint64x1_t __a, uint64x1_t __b) { 903 return (uint64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 904 __ai poly8x16_t vcombine_p8(poly8x8_t __a, poly8x8_t __b) { 905 return (poly8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 906 __ai poly16x8_t vcombine_p16(poly16x4_t __a, poly16x4_t __b) { 907 return (poly16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } 908 909 __ai int8x8_t vcreate_s8(uint64_t __a) { 910 return (int8x8_t)__a; } 911 __ai int16x4_t vcreate_s16(uint64_t __a) { 912 return (int16x4_t)__a; } 913 __ai int32x2_t vcreate_s32(uint64_t __a) { 914 return (int32x2_t)__a; } 915 __ai float16x4_t vcreate_f16(uint64_t __a) { 916 return (float16x4_t)__a; } 917 __ai float32x2_t vcreate_f32(uint64_t __a) { 918 return (float32x2_t)__a; } 919 __ai uint8x8_t vcreate_u8(uint64_t __a) { 920 return (uint8x8_t)__a; } 921 __ai uint16x4_t vcreate_u16(uint64_t __a) { 922 return (uint16x4_t)__a; } 923 __ai uint32x2_t vcreate_u32(uint64_t __a) { 924 return (uint32x2_t)__a; } 925 __ai uint64x1_t vcreate_u64(uint64_t __a) { 926 return (uint64x1_t)__a; } 927 __ai poly8x8_t vcreate_p8(uint64_t __a) { 928 return (poly8x8_t)__a; } 929 __ai poly16x4_t vcreate_p16(uint64_t __a) { 930 return (poly16x4_t)__a; } 931 __ai int64x1_t vcreate_s64(uint64_t __a) { 932 return (int64x1_t)__a; } 933 934 __ai float16x4_t vcvt_f16_f32(float32x4_t __a) { 935 return (float16x4_t)__builtin_neon_vcvt_f16_v((int8x16_t)__a, 6); } 936 937 __ai float32x2_t vcvt_f32_s32(int32x2_t __a) { 938 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 2); } 939 __ai float32x2_t vcvt_f32_u32(uint32x2_t __a) { 940 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 18); } 941 __ai float32x4_t vcvtq_f32_s32(int32x4_t __a) { 942 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 34); } 943 __ai float32x4_t vcvtq_f32_u32(uint32x4_t __a) { 944 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 50); } 945 946 __ai float32x4_t vcvt_f32_f16(float16x4_t __a) { 947 return (float32x4_t)__builtin_neon_vcvt_f32_f16((int8x8_t)__a, 6); } 948 949 #define vcvt_n_f32_s32(a, __b) __extension__ ({ \ 950 int32x2_t __a = (a); \ 951 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 2); }) 952 #define vcvt_n_f32_u32(a, __b) __extension__ ({ \ 953 uint32x2_t __a = (a); \ 954 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 18); }) 955 #define vcvtq_n_f32_s32(a, __b) __extension__ ({ \ 956 int32x4_t __a = (a); \ 957 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 34); }) 958 #define vcvtq_n_f32_u32(a, __b) __extension__ ({ \ 959 uint32x4_t __a = (a); \ 960 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 50); }) 961 962 #define vcvt_n_s32_f32(a, __b) __extension__ ({ \ 963 float32x2_t __a = (a); \ 964 (int32x2_t)__builtin_neon_vcvt_n_s32_v((int8x8_t)__a, __b, 2); }) 965 #define vcvtq_n_s32_f32(a, __b) __extension__ ({ \ 966 float32x4_t __a = (a); \ 967 (int32x4_t)__builtin_neon_vcvtq_n_s32_v((int8x16_t)__a, __b, 34); }) 968 969 #define vcvt_n_u32_f32(a, __b) __extension__ ({ \ 970 float32x2_t __a = (a); \ 971 (uint32x2_t)__builtin_neon_vcvt_n_u32_v((int8x8_t)__a, __b, 18); }) 972 #define vcvtq_n_u32_f32(a, __b) __extension__ ({ \ 973 float32x4_t __a = (a); \ 974 (uint32x4_t)__builtin_neon_vcvtq_n_u32_v((int8x16_t)__a, __b, 50); }) 975 976 __ai int32x2_t vcvt_s32_f32(float32x2_t __a) { 977 return (int32x2_t)__builtin_neon_vcvt_s32_v((int8x8_t)__a, 2); } 978 __ai int32x4_t vcvtq_s32_f32(float32x4_t __a) { 979 return (int32x4_t)__builtin_neon_vcvtq_s32_v((int8x16_t)__a, 34); } 980 981 __ai uint32x2_t vcvt_u32_f32(float32x2_t __a) { 982 return (uint32x2_t)__builtin_neon_vcvt_u32_v((int8x8_t)__a, 18); } 983 __ai uint32x4_t vcvtq_u32_f32(float32x4_t __a) { 984 return (uint32x4_t)__builtin_neon_vcvtq_u32_v((int8x16_t)__a, 50); } 985 986 #define vdup_lane_u8(a, __b) __extension__ ({ \ 987 uint8x8_t __a = (a); \ 988 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) 989 #define vdup_lane_u16(a, __b) __extension__ ({ \ 990 uint16x4_t __a = (a); \ 991 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) 992 #define vdup_lane_u32(a, __b) __extension__ ({ \ 993 uint32x2_t __a = (a); \ 994 __builtin_shufflevector(__a, __a, __b, __b); }) 995 #define vdup_lane_s8(a, __b) __extension__ ({ \ 996 int8x8_t __a = (a); \ 997 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) 998 #define vdup_lane_s16(a, __b) __extension__ ({ \ 999 int16x4_t __a = (a); \ 1000 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) 1001 #define vdup_lane_s32(a, __b) __extension__ ({ \ 1002 int32x2_t __a = (a); \ 1003 __builtin_shufflevector(__a, __a, __b, __b); }) 1004 #define vdup_lane_p8(a, __b) __extension__ ({ \ 1005 poly8x8_t __a = (a); \ 1006 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) 1007 #define vdup_lane_p16(a, __b) __extension__ ({ \ 1008 poly16x4_t __a = (a); \ 1009 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) 1010 #define vdup_lane_f32(a, __b) __extension__ ({ \ 1011 float32x2_t __a = (a); \ 1012 __builtin_shufflevector(__a, __a, __b, __b); }) 1013 #define vdupq_lane_u8(a, __b) __extension__ ({ \ 1014 uint8x8_t __a = (a); \ 1015 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) 1016 #define vdupq_lane_u16(a, __b) __extension__ ({ \ 1017 uint16x4_t __a = (a); \ 1018 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) 1019 #define vdupq_lane_u32(a, __b) __extension__ ({ \ 1020 uint32x2_t __a = (a); \ 1021 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) 1022 #define vdupq_lane_s8(a, __b) __extension__ ({ \ 1023 int8x8_t __a = (a); \ 1024 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) 1025 #define vdupq_lane_s16(a, __b) __extension__ ({ \ 1026 int16x4_t __a = (a); \ 1027 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) 1028 #define vdupq_lane_s32(a, __b) __extension__ ({ \ 1029 int32x2_t __a = (a); \ 1030 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) 1031 #define vdupq_lane_p8(a, __b) __extension__ ({ \ 1032 poly8x8_t __a = (a); \ 1033 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) 1034 #define vdupq_lane_p16(a, __b) __extension__ ({ \ 1035 poly16x4_t __a = (a); \ 1036 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) 1037 #define vdupq_lane_f32(a, __b) __extension__ ({ \ 1038 float32x2_t __a = (a); \ 1039 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) 1040 #define vdup_lane_s64(a, __b) __extension__ ({ \ 1041 int64x1_t __a = (a); \ 1042 __builtin_shufflevector(__a, __a, __b); }) 1043 #define vdup_lane_u64(a, __b) __extension__ ({ \ 1044 uint64x1_t __a = (a); \ 1045 __builtin_shufflevector(__a, __a, __b); }) 1046 #define vdupq_lane_s64(a, __b) __extension__ ({ \ 1047 int64x1_t __a = (a); \ 1048 __builtin_shufflevector(__a, __a, __b, __b); }) 1049 #define vdupq_lane_u64(a, __b) __extension__ ({ \ 1050 uint64x1_t __a = (a); \ 1051 __builtin_shufflevector(__a, __a, __b, __b); }) 1052 1053 __ai uint8x8_t vdup_n_u8(uint8_t __a) { 1054 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 1055 __ai uint16x4_t vdup_n_u16(uint16_t __a) { 1056 return (uint16x4_t){ __a, __a, __a, __a }; } 1057 __ai uint32x2_t vdup_n_u32(uint32_t __a) { 1058 return (uint32x2_t){ __a, __a }; } 1059 __ai int8x8_t vdup_n_s8(int8_t __a) { 1060 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 1061 __ai int16x4_t vdup_n_s16(int16_t __a) { 1062 return (int16x4_t){ __a, __a, __a, __a }; } 1063 __ai int32x2_t vdup_n_s32(int32_t __a) { 1064 return (int32x2_t){ __a, __a }; } 1065 __ai poly8x8_t vdup_n_p8(poly8_t __a) { 1066 return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 1067 __ai poly16x4_t vdup_n_p16(poly16_t __a) { 1068 return (poly16x4_t){ __a, __a, __a, __a }; } 1069 __ai float32x2_t vdup_n_f32(float32_t __a) { 1070 return (float32x2_t){ __a, __a }; } 1071 __ai uint8x16_t vdupq_n_u8(uint8_t __a) { 1072 return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } 1073 __ai uint16x8_t vdupq_n_u16(uint16_t __a) { 1074 return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 1075 __ai uint32x4_t vdupq_n_u32(uint32_t __a) { 1076 return (uint32x4_t){ __a, __a, __a, __a }; } 1077 __ai int8x16_t vdupq_n_s8(int8_t __a) { 1078 return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } 1079 __ai int16x8_t vdupq_n_s16(int16_t __a) { 1080 return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 1081 __ai int32x4_t vdupq_n_s32(int32_t __a) { 1082 return (int32x4_t){ __a, __a, __a, __a }; } 1083 __ai poly8x16_t vdupq_n_p8(poly8_t __a) { 1084 return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } 1085 __ai poly16x8_t vdupq_n_p16(poly16_t __a) { 1086 return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 1087 __ai float32x4_t vdupq_n_f32(float32_t __a) { 1088 return (float32x4_t){ __a, __a, __a, __a }; } 1089 __ai int64x1_t vdup_n_s64(int64_t __a) { 1090 return (int64x1_t){ __a }; } 1091 __ai uint64x1_t vdup_n_u64(uint64_t __a) { 1092 return (uint64x1_t){ __a }; } 1093 __ai int64x2_t vdupq_n_s64(int64_t __a) { 1094 return (int64x2_t){ __a, __a }; } 1095 __ai uint64x2_t vdupq_n_u64(uint64_t __a) { 1096 return (uint64x2_t){ __a, __a }; } 1097 1098 __ai int8x8_t veor_s8(int8x8_t __a, int8x8_t __b) { 1099 return __a ^ __b; } 1100 __ai int16x4_t veor_s16(int16x4_t __a, int16x4_t __b) { 1101 return __a ^ __b; } 1102 __ai int32x2_t veor_s32(int32x2_t __a, int32x2_t __b) { 1103 return __a ^ __b; } 1104 __ai int64x1_t veor_s64(int64x1_t __a, int64x1_t __b) { 1105 return __a ^ __b; } 1106 __ai uint8x8_t veor_u8(uint8x8_t __a, uint8x8_t __b) { 1107 return __a ^ __b; } 1108 __ai uint16x4_t veor_u16(uint16x4_t __a, uint16x4_t __b) { 1109 return __a ^ __b; } 1110 __ai uint32x2_t veor_u32(uint32x2_t __a, uint32x2_t __b) { 1111 return __a ^ __b; } 1112 __ai uint64x1_t veor_u64(uint64x1_t __a, uint64x1_t __b) { 1113 return __a ^ __b; } 1114 __ai int8x16_t veorq_s8(int8x16_t __a, int8x16_t __b) { 1115 return __a ^ __b; } 1116 __ai int16x8_t veorq_s16(int16x8_t __a, int16x8_t __b) { 1117 return __a ^ __b; } 1118 __ai int32x4_t veorq_s32(int32x4_t __a, int32x4_t __b) { 1119 return __a ^ __b; } 1120 __ai int64x2_t veorq_s64(int64x2_t __a, int64x2_t __b) { 1121 return __a ^ __b; } 1122 __ai uint8x16_t veorq_u8(uint8x16_t __a, uint8x16_t __b) { 1123 return __a ^ __b; } 1124 __ai uint16x8_t veorq_u16(uint16x8_t __a, uint16x8_t __b) { 1125 return __a ^ __b; } 1126 __ai uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) { 1127 return __a ^ __b; } 1128 __ai uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) { 1129 return __a ^ __b; } 1130 1131 #define vext_s8(a, b, __c) __extension__ ({ \ 1132 int8x8_t __a = (a); int8x8_t __b = (b); \ 1133 (int8x8_t)__builtin_neon_vext_v(__a, __b, __c, 0); }) 1134 #define vext_u8(a, b, __c) __extension__ ({ \ 1135 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 1136 (uint8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) 1137 #define vext_p8(a, b, __c) __extension__ ({ \ 1138 poly8x8_t __a = (a); poly8x8_t __b = (b); \ 1139 (poly8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) 1140 #define vext_s16(a, b, __c) __extension__ ({ \ 1141 int16x4_t __a = (a); int16x4_t __b = (b); \ 1142 (int16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 1143 #define vext_u16(a, b, __c) __extension__ ({ \ 1144 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 1145 (uint16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) 1146 #define vext_p16(a, b, __c) __extension__ ({ \ 1147 poly16x4_t __a = (a); poly16x4_t __b = (b); \ 1148 (poly16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) 1149 #define vext_s32(a, b, __c) __extension__ ({ \ 1150 int32x2_t __a = (a); int32x2_t __b = (b); \ 1151 (int32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 1152 #define vext_u32(a, b, __c) __extension__ ({ \ 1153 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 1154 (uint32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) 1155 #define vext_s64(a, b, __c) __extension__ ({ \ 1156 int64x1_t __a = (a); int64x1_t __b = (b); \ 1157 (int64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 1158 #define vext_u64(a, b, __c) __extension__ ({ \ 1159 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 1160 (uint64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) 1161 #define vext_f32(a, b, __c) __extension__ ({ \ 1162 float32x2_t __a = (a); float32x2_t __b = (b); \ 1163 (float32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 7); }) 1164 #define vextq_s8(a, b, __c) __extension__ ({ \ 1165 int8x16_t __a = (a); int8x16_t __b = (b); \ 1166 (int8x16_t)__builtin_neon_vextq_v(__a, __b, __c, 32); }) 1167 #define vextq_u8(a, b, __c) __extension__ ({ \ 1168 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 1169 (uint8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) 1170 #define vextq_p8(a, b, __c) __extension__ ({ \ 1171 poly8x16_t __a = (a); poly8x16_t __b = (b); \ 1172 (poly8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) 1173 #define vextq_s16(a, b, __c) __extension__ ({ \ 1174 int16x8_t __a = (a); int16x8_t __b = (b); \ 1175 (int16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) 1176 #define vextq_u16(a, b, __c) __extension__ ({ \ 1177 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 1178 (uint16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) 1179 #define vextq_p16(a, b, __c) __extension__ ({ \ 1180 poly16x8_t __a = (a); poly16x8_t __b = (b); \ 1181 (poly16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) 1182 #define vextq_s32(a, b, __c) __extension__ ({ \ 1183 int32x4_t __a = (a); int32x4_t __b = (b); \ 1184 (int32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) 1185 #define vextq_u32(a, b, __c) __extension__ ({ \ 1186 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 1187 (uint32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) 1188 #define vextq_s64(a, b, __c) __extension__ ({ \ 1189 int64x2_t __a = (a); int64x2_t __b = (b); \ 1190 (int64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) 1191 #define vextq_u64(a, b, __c) __extension__ ({ \ 1192 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 1193 (uint64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) 1194 #define vextq_f32(a, b, __c) __extension__ ({ \ 1195 float32x4_t __a = (a); float32x4_t __b = (b); \ 1196 (float32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 39); }) 1197 1198 __ai float32x2_t vfma_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { 1199 return (float32x2_t)__builtin_neon_vfma_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 7); } 1200 __ai float32x4_t vfmaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { 1201 return (float32x4_t)__builtin_neon_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 39); } 1202 1203 __ai int8x8_t vget_high_s8(int8x16_t __a) { 1204 return (int8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1205 __ai int16x4_t vget_high_s16(int16x8_t __a) { 1206 return (int16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1207 __ai int32x2_t vget_high_s32(int32x4_t __a) { 1208 return (int32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1209 __ai int64x1_t vget_high_s64(int64x2_t __a) { 1210 return (int64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1211 __ai float16x4_t vget_high_f16(float16x8_t __a) { 1212 return (float16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1213 __ai float32x2_t vget_high_f32(float32x4_t __a) { 1214 return (float32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1215 __ai uint8x8_t vget_high_u8(uint8x16_t __a) { 1216 return (uint8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1217 __ai uint16x4_t vget_high_u16(uint16x8_t __a) { 1218 return (uint16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1219 __ai uint32x2_t vget_high_u32(uint32x4_t __a) { 1220 return (uint32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1221 __ai uint64x1_t vget_high_u64(uint64x2_t __a) { 1222 return (uint64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1223 __ai poly8x8_t vget_high_p8(poly8x16_t __a) { 1224 return (poly8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1225 __ai poly16x4_t vget_high_p16(poly16x8_t __a) { 1226 return (poly16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1227 1228 #define vget_lane_u8(a, __b) __extension__ ({ \ 1229 uint8x8_t __a = (a); \ 1230 (uint8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); }) 1231 #define vget_lane_u16(a, __b) __extension__ ({ \ 1232 uint16x4_t __a = (a); \ 1233 (uint16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); }) 1234 #define vget_lane_u32(a, __b) __extension__ ({ \ 1235 uint32x2_t __a = (a); \ 1236 (uint32_t)__builtin_neon_vget_lane_i32((int32x2_t)__a, __b); }) 1237 #define vget_lane_s8(a, __b) __extension__ ({ \ 1238 int8x8_t __a = (a); \ 1239 (int8_t)__builtin_neon_vget_lane_i8(__a, __b); }) 1240 #define vget_lane_s16(a, __b) __extension__ ({ \ 1241 int16x4_t __a = (a); \ 1242 (int16_t)__builtin_neon_vget_lane_i16(__a, __b); }) 1243 #define vget_lane_s32(a, __b) __extension__ ({ \ 1244 int32x2_t __a = (a); \ 1245 (int32_t)__builtin_neon_vget_lane_i32(__a, __b); }) 1246 #define vget_lane_p8(a, __b) __extension__ ({ \ 1247 poly8x8_t __a = (a); \ 1248 (poly8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); }) 1249 #define vget_lane_p16(a, __b) __extension__ ({ \ 1250 poly16x4_t __a = (a); \ 1251 (poly16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); }) 1252 #define vget_lane_f32(a, __b) __extension__ ({ \ 1253 float32x2_t __a = (a); \ 1254 (float32_t)__builtin_neon_vget_lane_f32(__a, __b); }) 1255 #define vgetq_lane_u8(a, __b) __extension__ ({ \ 1256 uint8x16_t __a = (a); \ 1257 (uint8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); }) 1258 #define vgetq_lane_u16(a, __b) __extension__ ({ \ 1259 uint16x8_t __a = (a); \ 1260 (uint16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); }) 1261 #define vgetq_lane_u32(a, __b) __extension__ ({ \ 1262 uint32x4_t __a = (a); \ 1263 (uint32_t)__builtin_neon_vgetq_lane_i32((int32x4_t)__a, __b); }) 1264 #define vgetq_lane_s8(a, __b) __extension__ ({ \ 1265 int8x16_t __a = (a); \ 1266 (int8_t)__builtin_neon_vgetq_lane_i8(__a, __b); }) 1267 #define vgetq_lane_s16(a, __b) __extension__ ({ \ 1268 int16x8_t __a = (a); \ 1269 (int16_t)__builtin_neon_vgetq_lane_i16(__a, __b); }) 1270 #define vgetq_lane_s32(a, __b) __extension__ ({ \ 1271 int32x4_t __a = (a); \ 1272 (int32_t)__builtin_neon_vgetq_lane_i32(__a, __b); }) 1273 #define vgetq_lane_p8(a, __b) __extension__ ({ \ 1274 poly8x16_t __a = (a); \ 1275 (poly8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); }) 1276 #define vgetq_lane_p16(a, __b) __extension__ ({ \ 1277 poly16x8_t __a = (a); \ 1278 (poly16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); }) 1279 #define vgetq_lane_f32(a, __b) __extension__ ({ \ 1280 float32x4_t __a = (a); \ 1281 (float32_t)__builtin_neon_vgetq_lane_f32(__a, __b); }) 1282 #define vget_lane_s64(a, __b) __extension__ ({ \ 1283 int64x1_t __a = (a); \ 1284 (int64_t)__builtin_neon_vget_lane_i64(__a, __b); }) 1285 #define vget_lane_u64(a, __b) __extension__ ({ \ 1286 uint64x1_t __a = (a); \ 1287 (uint64_t)__builtin_neon_vget_lane_i64((int64x1_t)__a, __b); }) 1288 #define vgetq_lane_s64(a, __b) __extension__ ({ \ 1289 int64x2_t __a = (a); \ 1290 (int64_t)__builtin_neon_vgetq_lane_i64(__a, __b); }) 1291 #define vgetq_lane_u64(a, __b) __extension__ ({ \ 1292 uint64x2_t __a = (a); \ 1293 (uint64_t)__builtin_neon_vgetq_lane_i64((int64x2_t)__a, __b); }) 1294 1295 __ai int8x8_t vget_low_s8(int8x16_t __a) { 1296 return (int8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1297 __ai int16x4_t vget_low_s16(int16x8_t __a) { 1298 return (int16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1299 __ai int32x2_t vget_low_s32(int32x4_t __a) { 1300 return (int32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1301 __ai int64x1_t vget_low_s64(int64x2_t __a) { 1302 return (int64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1303 __ai float16x4_t vget_low_f16(float16x8_t __a) { 1304 return (float16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1305 __ai float32x2_t vget_low_f32(float32x4_t __a) { 1306 return (float32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1307 __ai uint8x8_t vget_low_u8(uint8x16_t __a) { 1308 return (uint8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1309 __ai uint16x4_t vget_low_u16(uint16x8_t __a) { 1310 return (uint16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1311 __ai uint32x2_t vget_low_u32(uint32x4_t __a) { 1312 return (uint32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1313 __ai uint64x1_t vget_low_u64(uint64x2_t __a) { 1314 return (uint64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1315 __ai poly8x8_t vget_low_p8(poly8x16_t __a) { 1316 return (poly8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1317 __ai poly16x4_t vget_low_p16(poly16x8_t __a) { 1318 return (poly16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1319 1320 __ai int8x8_t vhadd_s8(int8x8_t __a, int8x8_t __b) { 1321 return (int8x8_t)__builtin_neon_vhadd_v(__a, __b, 0); } 1322 __ai int16x4_t vhadd_s16(int16x4_t __a, int16x4_t __b) { 1323 return (int16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 1324 __ai int32x2_t vhadd_s32(int32x2_t __a, int32x2_t __b) { 1325 return (int32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 1326 __ai uint8x8_t vhadd_u8(uint8x8_t __a, uint8x8_t __b) { 1327 return (uint8x8_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } 1328 __ai uint16x4_t vhadd_u16(uint16x4_t __a, uint16x4_t __b) { 1329 return (uint16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } 1330 __ai uint32x2_t vhadd_u32(uint32x2_t __a, uint32x2_t __b) { 1331 return (uint32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } 1332 __ai int8x16_t vhaddq_s8(int8x16_t __a, int8x16_t __b) { 1333 return (int8x16_t)__builtin_neon_vhaddq_v(__a, __b, 32); } 1334 __ai int16x8_t vhaddq_s16(int16x8_t __a, int16x8_t __b) { 1335 return (int16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 1336 __ai int32x4_t vhaddq_s32(int32x4_t __a, int32x4_t __b) { 1337 return (int32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 1338 __ai uint8x16_t vhaddq_u8(uint8x16_t __a, uint8x16_t __b) { 1339 return (uint8x16_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 1340 __ai uint16x8_t vhaddq_u16(uint16x8_t __a, uint16x8_t __b) { 1341 return (uint16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 1342 __ai uint32x4_t vhaddq_u32(uint32x4_t __a, uint32x4_t __b) { 1343 return (uint32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 1344 1345 __ai int8x8_t vhsub_s8(int8x8_t __a, int8x8_t __b) { 1346 return (int8x8_t)__builtin_neon_vhsub_v(__a, __b, 0); } 1347 __ai int16x4_t vhsub_s16(int16x4_t __a, int16x4_t __b) { 1348 return (int16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } 1349 __ai int32x2_t vhsub_s32(int32x2_t __a, int32x2_t __b) { 1350 return (int32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } 1351 __ai uint8x8_t vhsub_u8(uint8x8_t __a, uint8x8_t __b) { 1352 return (uint8x8_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 16); } 1353 __ai uint16x4_t vhsub_u16(uint16x4_t __a, uint16x4_t __b) { 1354 return (uint16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 17); } 1355 __ai uint32x2_t vhsub_u32(uint32x2_t __a, uint32x2_t __b) { 1356 return (uint32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 18); } 1357 __ai int8x16_t vhsubq_s8(int8x16_t __a, int8x16_t __b) { 1358 return (int8x16_t)__builtin_neon_vhsubq_v(__a, __b, 32); } 1359 __ai int16x8_t vhsubq_s16(int16x8_t __a, int16x8_t __b) { 1360 return (int16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 1361 __ai int32x4_t vhsubq_s32(int32x4_t __a, int32x4_t __b) { 1362 return (int32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 1363 __ai uint8x16_t vhsubq_u8(uint8x16_t __a, uint8x16_t __b) { 1364 return (uint8x16_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 1365 __ai uint16x8_t vhsubq_u16(uint16x8_t __a, uint16x8_t __b) { 1366 return (uint16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 1367 __ai uint32x4_t vhsubq_u32(uint32x4_t __a, uint32x4_t __b) { 1368 return (uint32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 1369 1370 #define vld1q_u8(__a) __extension__ ({ \ 1371 (uint8x16_t)__builtin_neon_vld1q_v(__a, 48); }) 1372 #define vld1q_u16(__a) __extension__ ({ \ 1373 (uint16x8_t)__builtin_neon_vld1q_v(__a, 49); }) 1374 #define vld1q_u32(__a) __extension__ ({ \ 1375 (uint32x4_t)__builtin_neon_vld1q_v(__a, 50); }) 1376 #define vld1q_u64(__a) __extension__ ({ \ 1377 (uint64x2_t)__builtin_neon_vld1q_v(__a, 51); }) 1378 #define vld1q_s8(__a) __extension__ ({ \ 1379 (int8x16_t)__builtin_neon_vld1q_v(__a, 32); }) 1380 #define vld1q_s16(__a) __extension__ ({ \ 1381 (int16x8_t)__builtin_neon_vld1q_v(__a, 33); }) 1382 #define vld1q_s32(__a) __extension__ ({ \ 1383 (int32x4_t)__builtin_neon_vld1q_v(__a, 34); }) 1384 #define vld1q_s64(__a) __extension__ ({ \ 1385 (int64x2_t)__builtin_neon_vld1q_v(__a, 35); }) 1386 #define vld1q_f16(__a) __extension__ ({ \ 1387 (float16x8_t)__builtin_neon_vld1q_v(__a, 38); }) 1388 #define vld1q_f32(__a) __extension__ ({ \ 1389 (float32x4_t)__builtin_neon_vld1q_v(__a, 39); }) 1390 #define vld1q_p8(__a) __extension__ ({ \ 1391 (poly8x16_t)__builtin_neon_vld1q_v(__a, 36); }) 1392 #define vld1q_p16(__a) __extension__ ({ \ 1393 (poly16x8_t)__builtin_neon_vld1q_v(__a, 37); }) 1394 #define vld1_u8(__a) __extension__ ({ \ 1395 (uint8x8_t)__builtin_neon_vld1_v(__a, 16); }) 1396 #define vld1_u16(__a) __extension__ ({ \ 1397 (uint16x4_t)__builtin_neon_vld1_v(__a, 17); }) 1398 #define vld1_u32(__a) __extension__ ({ \ 1399 (uint32x2_t)__builtin_neon_vld1_v(__a, 18); }) 1400 #define vld1_u64(__a) __extension__ ({ \ 1401 (uint64x1_t)__builtin_neon_vld1_v(__a, 19); }) 1402 #define vld1_s8(__a) __extension__ ({ \ 1403 (int8x8_t)__builtin_neon_vld1_v(__a, 0); }) 1404 #define vld1_s16(__a) __extension__ ({ \ 1405 (int16x4_t)__builtin_neon_vld1_v(__a, 1); }) 1406 #define vld1_s32(__a) __extension__ ({ \ 1407 (int32x2_t)__builtin_neon_vld1_v(__a, 2); }) 1408 #define vld1_s64(__a) __extension__ ({ \ 1409 (int64x1_t)__builtin_neon_vld1_v(__a, 3); }) 1410 #define vld1_f16(__a) __extension__ ({ \ 1411 (float16x4_t)__builtin_neon_vld1_v(__a, 6); }) 1412 #define vld1_f32(__a) __extension__ ({ \ 1413 (float32x2_t)__builtin_neon_vld1_v(__a, 7); }) 1414 #define vld1_p8(__a) __extension__ ({ \ 1415 (poly8x8_t)__builtin_neon_vld1_v(__a, 4); }) 1416 #define vld1_p16(__a) __extension__ ({ \ 1417 (poly16x4_t)__builtin_neon_vld1_v(__a, 5); }) 1418 1419 #define vld1q_dup_u8(__a) __extension__ ({ \ 1420 (uint8x16_t)__builtin_neon_vld1q_dup_v(__a, 48); }) 1421 #define vld1q_dup_u16(__a) __extension__ ({ \ 1422 (uint16x8_t)__builtin_neon_vld1q_dup_v(__a, 49); }) 1423 #define vld1q_dup_u32(__a) __extension__ ({ \ 1424 (uint32x4_t)__builtin_neon_vld1q_dup_v(__a, 50); }) 1425 #define vld1q_dup_u64(__a) __extension__ ({ \ 1426 (uint64x2_t)__builtin_neon_vld1q_dup_v(__a, 51); }) 1427 #define vld1q_dup_s8(__a) __extension__ ({ \ 1428 (int8x16_t)__builtin_neon_vld1q_dup_v(__a, 32); }) 1429 #define vld1q_dup_s16(__a) __extension__ ({ \ 1430 (int16x8_t)__builtin_neon_vld1q_dup_v(__a, 33); }) 1431 #define vld1q_dup_s32(__a) __extension__ ({ \ 1432 (int32x4_t)__builtin_neon_vld1q_dup_v(__a, 34); }) 1433 #define vld1q_dup_s64(__a) __extension__ ({ \ 1434 (int64x2_t)__builtin_neon_vld1q_dup_v(__a, 35); }) 1435 #define vld1q_dup_f16(__a) __extension__ ({ \ 1436 (float16x8_t)__builtin_neon_vld1q_dup_v(__a, 38); }) 1437 #define vld1q_dup_f32(__a) __extension__ ({ \ 1438 (float32x4_t)__builtin_neon_vld1q_dup_v(__a, 39); }) 1439 #define vld1q_dup_p8(__a) __extension__ ({ \ 1440 (poly8x16_t)__builtin_neon_vld1q_dup_v(__a, 36); }) 1441 #define vld1q_dup_p16(__a) __extension__ ({ \ 1442 (poly16x8_t)__builtin_neon_vld1q_dup_v(__a, 37); }) 1443 #define vld1_dup_u8(__a) __extension__ ({ \ 1444 (uint8x8_t)__builtin_neon_vld1_dup_v(__a, 16); }) 1445 #define vld1_dup_u16(__a) __extension__ ({ \ 1446 (uint16x4_t)__builtin_neon_vld1_dup_v(__a, 17); }) 1447 #define vld1_dup_u32(__a) __extension__ ({ \ 1448 (uint32x2_t)__builtin_neon_vld1_dup_v(__a, 18); }) 1449 #define vld1_dup_u64(__a) __extension__ ({ \ 1450 (uint64x1_t)__builtin_neon_vld1_dup_v(__a, 19); }) 1451 #define vld1_dup_s8(__a) __extension__ ({ \ 1452 (int8x8_t)__builtin_neon_vld1_dup_v(__a, 0); }) 1453 #define vld1_dup_s16(__a) __extension__ ({ \ 1454 (int16x4_t)__builtin_neon_vld1_dup_v(__a, 1); }) 1455 #define vld1_dup_s32(__a) __extension__ ({ \ 1456 (int32x2_t)__builtin_neon_vld1_dup_v(__a, 2); }) 1457 #define vld1_dup_s64(__a) __extension__ ({ \ 1458 (int64x1_t)__builtin_neon_vld1_dup_v(__a, 3); }) 1459 #define vld1_dup_f16(__a) __extension__ ({ \ 1460 (float16x4_t)__builtin_neon_vld1_dup_v(__a, 6); }) 1461 #define vld1_dup_f32(__a) __extension__ ({ \ 1462 (float32x2_t)__builtin_neon_vld1_dup_v(__a, 7); }) 1463 #define vld1_dup_p8(__a) __extension__ ({ \ 1464 (poly8x8_t)__builtin_neon_vld1_dup_v(__a, 4); }) 1465 #define vld1_dup_p16(__a) __extension__ ({ \ 1466 (poly16x4_t)__builtin_neon_vld1_dup_v(__a, 5); }) 1467 1468 #define vld1q_lane_u8(__a, b, __c) __extension__ ({ \ 1469 uint8x16_t __b = (b); \ 1470 (uint8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 48); }) 1471 #define vld1q_lane_u16(__a, b, __c) __extension__ ({ \ 1472 uint16x8_t __b = (b); \ 1473 (uint16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 49); }) 1474 #define vld1q_lane_u32(__a, b, __c) __extension__ ({ \ 1475 uint32x4_t __b = (b); \ 1476 (uint32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 50); }) 1477 #define vld1q_lane_u64(__a, b, __c) __extension__ ({ \ 1478 uint64x2_t __b = (b); \ 1479 (uint64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 51); }) 1480 #define vld1q_lane_s8(__a, b, __c) __extension__ ({ \ 1481 int8x16_t __b = (b); \ 1482 (int8x16_t)__builtin_neon_vld1q_lane_v(__a, __b, __c, 32); }) 1483 #define vld1q_lane_s16(__a, b, __c) __extension__ ({ \ 1484 int16x8_t __b = (b); \ 1485 (int16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 33); }) 1486 #define vld1q_lane_s32(__a, b, __c) __extension__ ({ \ 1487 int32x4_t __b = (b); \ 1488 (int32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 34); }) 1489 #define vld1q_lane_s64(__a, b, __c) __extension__ ({ \ 1490 int64x2_t __b = (b); \ 1491 (int64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 35); }) 1492 #define vld1q_lane_f16(__a, b, __c) __extension__ ({ \ 1493 float16x8_t __b = (b); \ 1494 (float16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 38); }) 1495 #define vld1q_lane_f32(__a, b, __c) __extension__ ({ \ 1496 float32x4_t __b = (b); \ 1497 (float32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 39); }) 1498 #define vld1q_lane_p8(__a, b, __c) __extension__ ({ \ 1499 poly8x16_t __b = (b); \ 1500 (poly8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 36); }) 1501 #define vld1q_lane_p16(__a, b, __c) __extension__ ({ \ 1502 poly16x8_t __b = (b); \ 1503 (poly16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 37); }) 1504 #define vld1_lane_u8(__a, b, __c) __extension__ ({ \ 1505 uint8x8_t __b = (b); \ 1506 (uint8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 16); }) 1507 #define vld1_lane_u16(__a, b, __c) __extension__ ({ \ 1508 uint16x4_t __b = (b); \ 1509 (uint16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 17); }) 1510 #define vld1_lane_u32(__a, b, __c) __extension__ ({ \ 1511 uint32x2_t __b = (b); \ 1512 (uint32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 18); }) 1513 #define vld1_lane_u64(__a, b, __c) __extension__ ({ \ 1514 uint64x1_t __b = (b); \ 1515 (uint64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 19); }) 1516 #define vld1_lane_s8(__a, b, __c) __extension__ ({ \ 1517 int8x8_t __b = (b); \ 1518 (int8x8_t)__builtin_neon_vld1_lane_v(__a, __b, __c, 0); }) 1519 #define vld1_lane_s16(__a, b, __c) __extension__ ({ \ 1520 int16x4_t __b = (b); \ 1521 (int16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 1); }) 1522 #define vld1_lane_s32(__a, b, __c) __extension__ ({ \ 1523 int32x2_t __b = (b); \ 1524 (int32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 2); }) 1525 #define vld1_lane_s64(__a, b, __c) __extension__ ({ \ 1526 int64x1_t __b = (b); \ 1527 (int64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 3); }) 1528 #define vld1_lane_f16(__a, b, __c) __extension__ ({ \ 1529 float16x4_t __b = (b); \ 1530 (float16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 6); }) 1531 #define vld1_lane_f32(__a, b, __c) __extension__ ({ \ 1532 float32x2_t __b = (b); \ 1533 (float32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 7); }) 1534 #define vld1_lane_p8(__a, b, __c) __extension__ ({ \ 1535 poly8x8_t __b = (b); \ 1536 (poly8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 4); }) 1537 #define vld1_lane_p16(__a, b, __c) __extension__ ({ \ 1538 poly16x4_t __b = (b); \ 1539 (poly16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 5); }) 1540 1541 #define vld2q_u8(__a) __extension__ ({ \ 1542 uint8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 48); r; }) 1543 #define vld2q_u16(__a) __extension__ ({ \ 1544 uint16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 49); r; }) 1545 #define vld2q_u32(__a) __extension__ ({ \ 1546 uint32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 50); r; }) 1547 #define vld2q_s8(__a) __extension__ ({ \ 1548 int8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 32); r; }) 1549 #define vld2q_s16(__a) __extension__ ({ \ 1550 int16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 33); r; }) 1551 #define vld2q_s32(__a) __extension__ ({ \ 1552 int32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 34); r; }) 1553 #define vld2q_f16(__a) __extension__ ({ \ 1554 float16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 38); r; }) 1555 #define vld2q_f32(__a) __extension__ ({ \ 1556 float32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 39); r; }) 1557 #define vld2q_p8(__a) __extension__ ({ \ 1558 poly8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 36); r; }) 1559 #define vld2q_p16(__a) __extension__ ({ \ 1560 poly16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 37); r; }) 1561 #define vld2_u8(__a) __extension__ ({ \ 1562 uint8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 16); r; }) 1563 #define vld2_u16(__a) __extension__ ({ \ 1564 uint16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 17); r; }) 1565 #define vld2_u32(__a) __extension__ ({ \ 1566 uint32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 18); r; }) 1567 #define vld2_u64(__a) __extension__ ({ \ 1568 uint64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 19); r; }) 1569 #define vld2_s8(__a) __extension__ ({ \ 1570 int8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 0); r; }) 1571 #define vld2_s16(__a) __extension__ ({ \ 1572 int16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 1); r; }) 1573 #define vld2_s32(__a) __extension__ ({ \ 1574 int32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 2); r; }) 1575 #define vld2_s64(__a) __extension__ ({ \ 1576 int64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 3); r; }) 1577 #define vld2_f16(__a) __extension__ ({ \ 1578 float16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 6); r; }) 1579 #define vld2_f32(__a) __extension__ ({ \ 1580 float32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 7); r; }) 1581 #define vld2_p8(__a) __extension__ ({ \ 1582 poly8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 4); r; }) 1583 #define vld2_p16(__a) __extension__ ({ \ 1584 poly16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 5); r; }) 1585 1586 #define vld2_dup_u8(__a) __extension__ ({ \ 1587 uint8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 16); r; }) 1588 #define vld2_dup_u16(__a) __extension__ ({ \ 1589 uint16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 17); r; }) 1590 #define vld2_dup_u32(__a) __extension__ ({ \ 1591 uint32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 18); r; }) 1592 #define vld2_dup_u64(__a) __extension__ ({ \ 1593 uint64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 19); r; }) 1594 #define vld2_dup_s8(__a) __extension__ ({ \ 1595 int8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 0); r; }) 1596 #define vld2_dup_s16(__a) __extension__ ({ \ 1597 int16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 1); r; }) 1598 #define vld2_dup_s32(__a) __extension__ ({ \ 1599 int32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 2); r; }) 1600 #define vld2_dup_s64(__a) __extension__ ({ \ 1601 int64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 3); r; }) 1602 #define vld2_dup_f16(__a) __extension__ ({ \ 1603 float16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 6); r; }) 1604 #define vld2_dup_f32(__a) __extension__ ({ \ 1605 float32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 7); r; }) 1606 #define vld2_dup_p8(__a) __extension__ ({ \ 1607 poly8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 4); r; }) 1608 #define vld2_dup_p16(__a) __extension__ ({ \ 1609 poly16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 5); r; }) 1610 1611 #define vld2q_lane_u16(__a, b, __c) __extension__ ({ \ 1612 uint16x8x2_t __b = (b); \ 1613 uint16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); r; }) 1614 #define vld2q_lane_u32(__a, b, __c) __extension__ ({ \ 1615 uint32x4x2_t __b = (b); \ 1616 uint32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); r; }) 1617 #define vld2q_lane_s16(__a, b, __c) __extension__ ({ \ 1618 int16x8x2_t __b = (b); \ 1619 int16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); r; }) 1620 #define vld2q_lane_s32(__a, b, __c) __extension__ ({ \ 1621 int32x4x2_t __b = (b); \ 1622 int32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); r; }) 1623 #define vld2q_lane_f16(__a, b, __c) __extension__ ({ \ 1624 float16x8x2_t __b = (b); \ 1625 float16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); r; }) 1626 #define vld2q_lane_f32(__a, b, __c) __extension__ ({ \ 1627 float32x4x2_t __b = (b); \ 1628 float32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); r; }) 1629 #define vld2q_lane_p16(__a, b, __c) __extension__ ({ \ 1630 poly16x8x2_t __b = (b); \ 1631 poly16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); r; }) 1632 #define vld2_lane_u8(__a, b, __c) __extension__ ({ \ 1633 uint8x8x2_t __b = (b); \ 1634 uint8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 16); r; }) 1635 #define vld2_lane_u16(__a, b, __c) __extension__ ({ \ 1636 uint16x4x2_t __b = (b); \ 1637 uint16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 17); r; }) 1638 #define vld2_lane_u32(__a, b, __c) __extension__ ({ \ 1639 uint32x2x2_t __b = (b); \ 1640 uint32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 18); r; }) 1641 #define vld2_lane_s8(__a, b, __c) __extension__ ({ \ 1642 int8x8x2_t __b = (b); \ 1643 int8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 0); r; }) 1644 #define vld2_lane_s16(__a, b, __c) __extension__ ({ \ 1645 int16x4x2_t __b = (b); \ 1646 int16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 1); r; }) 1647 #define vld2_lane_s32(__a, b, __c) __extension__ ({ \ 1648 int32x2x2_t __b = (b); \ 1649 int32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 2); r; }) 1650 #define vld2_lane_f16(__a, b, __c) __extension__ ({ \ 1651 float16x4x2_t __b = (b); \ 1652 float16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 6); r; }) 1653 #define vld2_lane_f32(__a, b, __c) __extension__ ({ \ 1654 float32x2x2_t __b = (b); \ 1655 float32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 7); r; }) 1656 #define vld2_lane_p8(__a, b, __c) __extension__ ({ \ 1657 poly8x8x2_t __b = (b); \ 1658 poly8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 4); r; }) 1659 #define vld2_lane_p16(__a, b, __c) __extension__ ({ \ 1660 poly16x4x2_t __b = (b); \ 1661 poly16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 5); r; }) 1662 1663 #define vld3q_u8(__a) __extension__ ({ \ 1664 uint8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 48); r; }) 1665 #define vld3q_u16(__a) __extension__ ({ \ 1666 uint16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 49); r; }) 1667 #define vld3q_u32(__a) __extension__ ({ \ 1668 uint32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 50); r; }) 1669 #define vld3q_s8(__a) __extension__ ({ \ 1670 int8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 32); r; }) 1671 #define vld3q_s16(__a) __extension__ ({ \ 1672 int16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 33); r; }) 1673 #define vld3q_s32(__a) __extension__ ({ \ 1674 int32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 34); r; }) 1675 #define vld3q_f16(__a) __extension__ ({ \ 1676 float16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 38); r; }) 1677 #define vld3q_f32(__a) __extension__ ({ \ 1678 float32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 39); r; }) 1679 #define vld3q_p8(__a) __extension__ ({ \ 1680 poly8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 36); r; }) 1681 #define vld3q_p16(__a) __extension__ ({ \ 1682 poly16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 37); r; }) 1683 #define vld3_u8(__a) __extension__ ({ \ 1684 uint8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 16); r; }) 1685 #define vld3_u16(__a) __extension__ ({ \ 1686 uint16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 17); r; }) 1687 #define vld3_u32(__a) __extension__ ({ \ 1688 uint32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 18); r; }) 1689 #define vld3_u64(__a) __extension__ ({ \ 1690 uint64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 19); r; }) 1691 #define vld3_s8(__a) __extension__ ({ \ 1692 int8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 0); r; }) 1693 #define vld3_s16(__a) __extension__ ({ \ 1694 int16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 1); r; }) 1695 #define vld3_s32(__a) __extension__ ({ \ 1696 int32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 2); r; }) 1697 #define vld3_s64(__a) __extension__ ({ \ 1698 int64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 3); r; }) 1699 #define vld3_f16(__a) __extension__ ({ \ 1700 float16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 6); r; }) 1701 #define vld3_f32(__a) __extension__ ({ \ 1702 float32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 7); r; }) 1703 #define vld3_p8(__a) __extension__ ({ \ 1704 poly8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 4); r; }) 1705 #define vld3_p16(__a) __extension__ ({ \ 1706 poly16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 5); r; }) 1707 1708 #define vld3_dup_u8(__a) __extension__ ({ \ 1709 uint8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 16); r; }) 1710 #define vld3_dup_u16(__a) __extension__ ({ \ 1711 uint16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 17); r; }) 1712 #define vld3_dup_u32(__a) __extension__ ({ \ 1713 uint32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 18); r; }) 1714 #define vld3_dup_u64(__a) __extension__ ({ \ 1715 uint64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 19); r; }) 1716 #define vld3_dup_s8(__a) __extension__ ({ \ 1717 int8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 0); r; }) 1718 #define vld3_dup_s16(__a) __extension__ ({ \ 1719 int16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 1); r; }) 1720 #define vld3_dup_s32(__a) __extension__ ({ \ 1721 int32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 2); r; }) 1722 #define vld3_dup_s64(__a) __extension__ ({ \ 1723 int64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 3); r; }) 1724 #define vld3_dup_f16(__a) __extension__ ({ \ 1725 float16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 6); r; }) 1726 #define vld3_dup_f32(__a) __extension__ ({ \ 1727 float32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 7); r; }) 1728 #define vld3_dup_p8(__a) __extension__ ({ \ 1729 poly8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 4); r; }) 1730 #define vld3_dup_p16(__a) __extension__ ({ \ 1731 poly16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 5); r; }) 1732 1733 #define vld3q_lane_u16(__a, b, __c) __extension__ ({ \ 1734 uint16x8x3_t __b = (b); \ 1735 uint16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); r; }) 1736 #define vld3q_lane_u32(__a, b, __c) __extension__ ({ \ 1737 uint32x4x3_t __b = (b); \ 1738 uint32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); r; }) 1739 #define vld3q_lane_s16(__a, b, __c) __extension__ ({ \ 1740 int16x8x3_t __b = (b); \ 1741 int16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); r; }) 1742 #define vld3q_lane_s32(__a, b, __c) __extension__ ({ \ 1743 int32x4x3_t __b = (b); \ 1744 int32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); r; }) 1745 #define vld3q_lane_f16(__a, b, __c) __extension__ ({ \ 1746 float16x8x3_t __b = (b); \ 1747 float16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); r; }) 1748 #define vld3q_lane_f32(__a, b, __c) __extension__ ({ \ 1749 float32x4x3_t __b = (b); \ 1750 float32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); r; }) 1751 #define vld3q_lane_p16(__a, b, __c) __extension__ ({ \ 1752 poly16x8x3_t __b = (b); \ 1753 poly16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); r; }) 1754 #define vld3_lane_u8(__a, b, __c) __extension__ ({ \ 1755 uint8x8x3_t __b = (b); \ 1756 uint8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); r; }) 1757 #define vld3_lane_u16(__a, b, __c) __extension__ ({ \ 1758 uint16x4x3_t __b = (b); \ 1759 uint16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); r; }) 1760 #define vld3_lane_u32(__a, b, __c) __extension__ ({ \ 1761 uint32x2x3_t __b = (b); \ 1762 uint32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); r; }) 1763 #define vld3_lane_s8(__a, b, __c) __extension__ ({ \ 1764 int8x8x3_t __b = (b); \ 1765 int8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __c, 0); r; }) 1766 #define vld3_lane_s16(__a, b, __c) __extension__ ({ \ 1767 int16x4x3_t __b = (b); \ 1768 int16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); r; }) 1769 #define vld3_lane_s32(__a, b, __c) __extension__ ({ \ 1770 int32x2x3_t __b = (b); \ 1771 int32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); r; }) 1772 #define vld3_lane_f16(__a, b, __c) __extension__ ({ \ 1773 float16x4x3_t __b = (b); \ 1774 float16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); r; }) 1775 #define vld3_lane_f32(__a, b, __c) __extension__ ({ \ 1776 float32x2x3_t __b = (b); \ 1777 float32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); r; }) 1778 #define vld3_lane_p8(__a, b, __c) __extension__ ({ \ 1779 poly8x8x3_t __b = (b); \ 1780 poly8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); r; }) 1781 #define vld3_lane_p16(__a, b, __c) __extension__ ({ \ 1782 poly16x4x3_t __b = (b); \ 1783 poly16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); r; }) 1784 1785 #define vld4q_u8(__a) __extension__ ({ \ 1786 uint8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 48); r; }) 1787 #define vld4q_u16(__a) __extension__ ({ \ 1788 uint16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 49); r; }) 1789 #define vld4q_u32(__a) __extension__ ({ \ 1790 uint32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 50); r; }) 1791 #define vld4q_s8(__a) __extension__ ({ \ 1792 int8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 32); r; }) 1793 #define vld4q_s16(__a) __extension__ ({ \ 1794 int16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 33); r; }) 1795 #define vld4q_s32(__a) __extension__ ({ \ 1796 int32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 34); r; }) 1797 #define vld4q_f16(__a) __extension__ ({ \ 1798 float16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 38); r; }) 1799 #define vld4q_f32(__a) __extension__ ({ \ 1800 float32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 39); r; }) 1801 #define vld4q_p8(__a) __extension__ ({ \ 1802 poly8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 36); r; }) 1803 #define vld4q_p16(__a) __extension__ ({ \ 1804 poly16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 37); r; }) 1805 #define vld4_u8(__a) __extension__ ({ \ 1806 uint8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 16); r; }) 1807 #define vld4_u16(__a) __extension__ ({ \ 1808 uint16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 17); r; }) 1809 #define vld4_u32(__a) __extension__ ({ \ 1810 uint32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 18); r; }) 1811 #define vld4_u64(__a) __extension__ ({ \ 1812 uint64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 19); r; }) 1813 #define vld4_s8(__a) __extension__ ({ \ 1814 int8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 0); r; }) 1815 #define vld4_s16(__a) __extension__ ({ \ 1816 int16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 1); r; }) 1817 #define vld4_s32(__a) __extension__ ({ \ 1818 int32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 2); r; }) 1819 #define vld4_s64(__a) __extension__ ({ \ 1820 int64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 3); r; }) 1821 #define vld4_f16(__a) __extension__ ({ \ 1822 float16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 6); r; }) 1823 #define vld4_f32(__a) __extension__ ({ \ 1824 float32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 7); r; }) 1825 #define vld4_p8(__a) __extension__ ({ \ 1826 poly8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 4); r; }) 1827 #define vld4_p16(__a) __extension__ ({ \ 1828 poly16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 5); r; }) 1829 1830 #define vld4_dup_u8(__a) __extension__ ({ \ 1831 uint8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 16); r; }) 1832 #define vld4_dup_u16(__a) __extension__ ({ \ 1833 uint16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 17); r; }) 1834 #define vld4_dup_u32(__a) __extension__ ({ \ 1835 uint32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 18); r; }) 1836 #define vld4_dup_u64(__a) __extension__ ({ \ 1837 uint64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 19); r; }) 1838 #define vld4_dup_s8(__a) __extension__ ({ \ 1839 int8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 0); r; }) 1840 #define vld4_dup_s16(__a) __extension__ ({ \ 1841 int16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 1); r; }) 1842 #define vld4_dup_s32(__a) __extension__ ({ \ 1843 int32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 2); r; }) 1844 #define vld4_dup_s64(__a) __extension__ ({ \ 1845 int64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 3); r; }) 1846 #define vld4_dup_f16(__a) __extension__ ({ \ 1847 float16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 6); r; }) 1848 #define vld4_dup_f32(__a) __extension__ ({ \ 1849 float32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 7); r; }) 1850 #define vld4_dup_p8(__a) __extension__ ({ \ 1851 poly8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 4); r; }) 1852 #define vld4_dup_p16(__a) __extension__ ({ \ 1853 poly16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 5); r; }) 1854 1855 #define vld4q_lane_u16(__a, b, __c) __extension__ ({ \ 1856 uint16x8x4_t __b = (b); \ 1857 uint16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); r; }) 1858 #define vld4q_lane_u32(__a, b, __c) __extension__ ({ \ 1859 uint32x4x4_t __b = (b); \ 1860 uint32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); r; }) 1861 #define vld4q_lane_s16(__a, b, __c) __extension__ ({ \ 1862 int16x8x4_t __b = (b); \ 1863 int16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); r; }) 1864 #define vld4q_lane_s32(__a, b, __c) __extension__ ({ \ 1865 int32x4x4_t __b = (b); \ 1866 int32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); r; }) 1867 #define vld4q_lane_f16(__a, b, __c) __extension__ ({ \ 1868 float16x8x4_t __b = (b); \ 1869 float16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); r; }) 1870 #define vld4q_lane_f32(__a, b, __c) __extension__ ({ \ 1871 float32x4x4_t __b = (b); \ 1872 float32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); r; }) 1873 #define vld4q_lane_p16(__a, b, __c) __extension__ ({ \ 1874 poly16x8x4_t __b = (b); \ 1875 poly16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); r; }) 1876 #define vld4_lane_u8(__a, b, __c) __extension__ ({ \ 1877 uint8x8x4_t __b = (b); \ 1878 uint8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); r; }) 1879 #define vld4_lane_u16(__a, b, __c) __extension__ ({ \ 1880 uint16x4x4_t __b = (b); \ 1881 uint16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); r; }) 1882 #define vld4_lane_u32(__a, b, __c) __extension__ ({ \ 1883 uint32x2x4_t __b = (b); \ 1884 uint32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); r; }) 1885 #define vld4_lane_s8(__a, b, __c) __extension__ ({ \ 1886 int8x8x4_t __b = (b); \ 1887 int8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); r; }) 1888 #define vld4_lane_s16(__a, b, __c) __extension__ ({ \ 1889 int16x4x4_t __b = (b); \ 1890 int16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); r; }) 1891 #define vld4_lane_s32(__a, b, __c) __extension__ ({ \ 1892 int32x2x4_t __b = (b); \ 1893 int32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); r; }) 1894 #define vld4_lane_f16(__a, b, __c) __extension__ ({ \ 1895 float16x4x4_t __b = (b); \ 1896 float16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); r; }) 1897 #define vld4_lane_f32(__a, b, __c) __extension__ ({ \ 1898 float32x2x4_t __b = (b); \ 1899 float32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); r; }) 1900 #define vld4_lane_p8(__a, b, __c) __extension__ ({ \ 1901 poly8x8x4_t __b = (b); \ 1902 poly8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); r; }) 1903 #define vld4_lane_p16(__a, b, __c) __extension__ ({ \ 1904 poly16x4x4_t __b = (b); \ 1905 poly16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); r; }) 1906 1907 __ai int8x8_t vmax_s8(int8x8_t __a, int8x8_t __b) { 1908 return (int8x8_t)__builtin_neon_vmax_v(__a, __b, 0); } 1909 __ai int16x4_t vmax_s16(int16x4_t __a, int16x4_t __b) { 1910 return (int16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } 1911 __ai int32x2_t vmax_s32(int32x2_t __a, int32x2_t __b) { 1912 return (int32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } 1913 __ai uint8x8_t vmax_u8(uint8x8_t __a, uint8x8_t __b) { 1914 return (uint8x8_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 16); } 1915 __ai uint16x4_t vmax_u16(uint16x4_t __a, uint16x4_t __b) { 1916 return (uint16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 17); } 1917 __ai uint32x2_t vmax_u32(uint32x2_t __a, uint32x2_t __b) { 1918 return (uint32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 18); } 1919 __ai float32x2_t vmax_f32(float32x2_t __a, float32x2_t __b) { 1920 return (float32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 7); } 1921 __ai int8x16_t vmaxq_s8(int8x16_t __a, int8x16_t __b) { 1922 return (int8x16_t)__builtin_neon_vmaxq_v(__a, __b, 32); } 1923 __ai int16x8_t vmaxq_s16(int16x8_t __a, int16x8_t __b) { 1924 return (int16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 1925 __ai int32x4_t vmaxq_s32(int32x4_t __a, int32x4_t __b) { 1926 return (int32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 1927 __ai uint8x16_t vmaxq_u8(uint8x16_t __a, uint8x16_t __b) { 1928 return (uint8x16_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 1929 __ai uint16x8_t vmaxq_u16(uint16x8_t __a, uint16x8_t __b) { 1930 return (uint16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 1931 __ai uint32x4_t vmaxq_u32(uint32x4_t __a, uint32x4_t __b) { 1932 return (uint32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 1933 __ai float32x4_t vmaxq_f32(float32x4_t __a, float32x4_t __b) { 1934 return (float32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 39); } 1935 1936 __ai int8x8_t vmin_s8(int8x8_t __a, int8x8_t __b) { 1937 return (int8x8_t)__builtin_neon_vmin_v(__a, __b, 0); } 1938 __ai int16x4_t vmin_s16(int16x4_t __a, int16x4_t __b) { 1939 return (int16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } 1940 __ai int32x2_t vmin_s32(int32x2_t __a, int32x2_t __b) { 1941 return (int32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } 1942 __ai uint8x8_t vmin_u8(uint8x8_t __a, uint8x8_t __b) { 1943 return (uint8x8_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 16); } 1944 __ai uint16x4_t vmin_u16(uint16x4_t __a, uint16x4_t __b) { 1945 return (uint16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 17); } 1946 __ai uint32x2_t vmin_u32(uint32x2_t __a, uint32x2_t __b) { 1947 return (uint32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 18); } 1948 __ai float32x2_t vmin_f32(float32x2_t __a, float32x2_t __b) { 1949 return (float32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 7); } 1950 __ai int8x16_t vminq_s8(int8x16_t __a, int8x16_t __b) { 1951 return (int8x16_t)__builtin_neon_vminq_v(__a, __b, 32); } 1952 __ai int16x8_t vminq_s16(int16x8_t __a, int16x8_t __b) { 1953 return (int16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 1954 __ai int32x4_t vminq_s32(int32x4_t __a, int32x4_t __b) { 1955 return (int32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 1956 __ai uint8x16_t vminq_u8(uint8x16_t __a, uint8x16_t __b) { 1957 return (uint8x16_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 1958 __ai uint16x8_t vminq_u16(uint16x8_t __a, uint16x8_t __b) { 1959 return (uint16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 1960 __ai uint32x4_t vminq_u32(uint32x4_t __a, uint32x4_t __b) { 1961 return (uint32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 1962 __ai float32x4_t vminq_f32(float32x4_t __a, float32x4_t __b) { 1963 return (float32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 39); } 1964 1965 __ai int8x8_t vmla_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { 1966 return __a + (__b * __c); } 1967 __ai int16x4_t vmla_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { 1968 return __a + (__b * __c); } 1969 __ai int32x2_t vmla_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { 1970 return __a + (__b * __c); } 1971 __ai float32x2_t vmla_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { 1972 return __a + (__b * __c); } 1973 __ai uint8x8_t vmla_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { 1974 return __a + (__b * __c); } 1975 __ai uint16x4_t vmla_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { 1976 return __a + (__b * __c); } 1977 __ai uint32x2_t vmla_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { 1978 return __a + (__b * __c); } 1979 __ai int8x16_t vmlaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { 1980 return __a + (__b * __c); } 1981 __ai int16x8_t vmlaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { 1982 return __a + (__b * __c); } 1983 __ai int32x4_t vmlaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { 1984 return __a + (__b * __c); } 1985 __ai float32x4_t vmlaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { 1986 return __a + (__b * __c); } 1987 __ai uint8x16_t vmlaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { 1988 return __a + (__b * __c); } 1989 __ai uint16x8_t vmlaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { 1990 return __a + (__b * __c); } 1991 __ai uint32x4_t vmlaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { 1992 return __a + (__b * __c); } 1993 1994 __ai int16x8_t vmlal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { 1995 return __a + vmull_s8(__b, __c); } 1996 __ai int32x4_t vmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { 1997 return __a + vmull_s16(__b, __c); } 1998 __ai int64x2_t vmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { 1999 return __a + vmull_s32(__b, __c); } 2000 __ai uint16x8_t vmlal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { 2001 return __a + vmull_u8(__b, __c); } 2002 __ai uint32x4_t vmlal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { 2003 return __a + vmull_u16(__b, __c); } 2004 __ai uint64x2_t vmlal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { 2005 return __a + vmull_u32(__b, __c); } 2006 2007 #define vmlal_lane_s16(a, b, c, __d) __extension__ ({ \ 2008 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ 2009 __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2010 #define vmlal_lane_s32(a, b, c, __d) __extension__ ({ \ 2011 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ 2012 __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) 2013 #define vmlal_lane_u16(a, b, c, __d) __extension__ ({ \ 2014 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ 2015 __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2016 #define vmlal_lane_u32(a, b, c, __d) __extension__ ({ \ 2017 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ 2018 __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) 2019 2020 __ai int32x4_t vmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { 2021 return __a + vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); } 2022 __ai int64x2_t vmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { 2023 return __a + vmull_s32(__b, (int32x2_t){ __c, __c }); } 2024 __ai uint32x4_t vmlal_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) { 2025 return __a + vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); } 2026 __ai uint64x2_t vmlal_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) { 2027 return __a + vmull_u32(__b, (uint32x2_t){ __c, __c }); } 2028 2029 #define vmla_lane_s16(a, b, c, __d) __extension__ ({ \ 2030 int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ 2031 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2032 #define vmla_lane_s32(a, b, c, __d) __extension__ ({ \ 2033 int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ 2034 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) 2035 #define vmla_lane_u16(a, b, c, __d) __extension__ ({ \ 2036 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ 2037 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2038 #define vmla_lane_u32(a, b, c, __d) __extension__ ({ \ 2039 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ 2040 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) 2041 #define vmla_lane_f32(a, b, c, __d) __extension__ ({ \ 2042 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ 2043 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) 2044 #define vmlaq_lane_s16(a, b, c, __d) __extension__ ({ \ 2045 int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ 2046 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) 2047 #define vmlaq_lane_s32(a, b, c, __d) __extension__ ({ \ 2048 int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ 2049 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2050 #define vmlaq_lane_u16(a, b, c, __d) __extension__ ({ \ 2051 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ 2052 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) 2053 #define vmlaq_lane_u32(a, b, c, __d) __extension__ ({ \ 2054 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ 2055 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2056 #define vmlaq_lane_f32(a, b, c, __d) __extension__ ({ \ 2057 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ 2058 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2059 2060 __ai int16x4_t vmla_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) { 2061 return __a + (__b * (int16x4_t){ __c, __c, __c, __c }); } 2062 __ai int32x2_t vmla_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) { 2063 return __a + (__b * (int32x2_t){ __c, __c }); } 2064 __ai uint16x4_t vmla_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) { 2065 return __a + (__b * (uint16x4_t){ __c, __c, __c, __c }); } 2066 __ai uint32x2_t vmla_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) { 2067 return __a + (__b * (uint32x2_t){ __c, __c }); } 2068 __ai float32x2_t vmla_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { 2069 return __a + (__b * (float32x2_t){ __c, __c }); } 2070 __ai int16x8_t vmlaq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) { 2071 return __a + (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } 2072 __ai int32x4_t vmlaq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) { 2073 return __a + (__b * (int32x4_t){ __c, __c, __c, __c }); } 2074 __ai uint16x8_t vmlaq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) { 2075 return __a + (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } 2076 __ai uint32x4_t vmlaq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) { 2077 return __a + (__b * (uint32x4_t){ __c, __c, __c, __c }); } 2078 __ai float32x4_t vmlaq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { 2079 return __a + (__b * (float32x4_t){ __c, __c, __c, __c }); } 2080 2081 __ai int8x8_t vmls_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { 2082 return __a - (__b * __c); } 2083 __ai int16x4_t vmls_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { 2084 return __a - (__b * __c); } 2085 __ai int32x2_t vmls_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { 2086 return __a - (__b * __c); } 2087 __ai float32x2_t vmls_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { 2088 return __a - (__b * __c); } 2089 __ai uint8x8_t vmls_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { 2090 return __a - (__b * __c); } 2091 __ai uint16x4_t vmls_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { 2092 return __a - (__b * __c); } 2093 __ai uint32x2_t vmls_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { 2094 return __a - (__b * __c); } 2095 __ai int8x16_t vmlsq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { 2096 return __a - (__b * __c); } 2097 __ai int16x8_t vmlsq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { 2098 return __a - (__b * __c); } 2099 __ai int32x4_t vmlsq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { 2100 return __a - (__b * __c); } 2101 __ai float32x4_t vmlsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { 2102 return __a - (__b * __c); } 2103 __ai uint8x16_t vmlsq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { 2104 return __a - (__b * __c); } 2105 __ai uint16x8_t vmlsq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { 2106 return __a - (__b * __c); } 2107 __ai uint32x4_t vmlsq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { 2108 return __a - (__b * __c); } 2109 2110 __ai int16x8_t vmlsl_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { 2111 return __a - vmull_s8(__b, __c); } 2112 __ai int32x4_t vmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { 2113 return __a - vmull_s16(__b, __c); } 2114 __ai int64x2_t vmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { 2115 return __a - vmull_s32(__b, __c); } 2116 __ai uint16x8_t vmlsl_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { 2117 return __a - vmull_u8(__b, __c); } 2118 __ai uint32x4_t vmlsl_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { 2119 return __a - vmull_u16(__b, __c); } 2120 __ai uint64x2_t vmlsl_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { 2121 return __a - vmull_u32(__b, __c); } 2122 2123 #define vmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ 2124 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ 2125 __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2126 #define vmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ 2127 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ 2128 __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) 2129 #define vmlsl_lane_u16(a, b, c, __d) __extension__ ({ \ 2130 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ 2131 __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2132 #define vmlsl_lane_u32(a, b, c, __d) __extension__ ({ \ 2133 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ 2134 __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) 2135 2136 __ai int32x4_t vmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { 2137 return __a - vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); } 2138 __ai int64x2_t vmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { 2139 return __a - vmull_s32(__b, (int32x2_t){ __c, __c }); } 2140 __ai uint32x4_t vmlsl_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) { 2141 return __a - vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); } 2142 __ai uint64x2_t vmlsl_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) { 2143 return __a - vmull_u32(__b, (uint32x2_t){ __c, __c }); } 2144 2145 #define vmls_lane_s16(a, b, c, __d) __extension__ ({ \ 2146 int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ 2147 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2148 #define vmls_lane_s32(a, b, c, __d) __extension__ ({ \ 2149 int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ 2150 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) 2151 #define vmls_lane_u16(a, b, c, __d) __extension__ ({ \ 2152 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ 2153 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2154 #define vmls_lane_u32(a, b, c, __d) __extension__ ({ \ 2155 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ 2156 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) 2157 #define vmls_lane_f32(a, b, c, __d) __extension__ ({ \ 2158 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ 2159 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) 2160 #define vmlsq_lane_s16(a, b, c, __d) __extension__ ({ \ 2161 int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ 2162 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) 2163 #define vmlsq_lane_s32(a, b, c, __d) __extension__ ({ \ 2164 int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ 2165 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2166 #define vmlsq_lane_u16(a, b, c, __d) __extension__ ({ \ 2167 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ 2168 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) 2169 #define vmlsq_lane_u32(a, b, c, __d) __extension__ ({ \ 2170 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ 2171 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2172 #define vmlsq_lane_f32(a, b, c, __d) __extension__ ({ \ 2173 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ 2174 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2175 2176 __ai int16x4_t vmls_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) { 2177 return __a - (__b * (int16x4_t){ __c, __c, __c, __c }); } 2178 __ai int32x2_t vmls_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) { 2179 return __a - (__b * (int32x2_t){ __c, __c }); } 2180 __ai uint16x4_t vmls_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) { 2181 return __a - (__b * (uint16x4_t){ __c, __c, __c, __c }); } 2182 __ai uint32x2_t vmls_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) { 2183 return __a - (__b * (uint32x2_t){ __c, __c }); } 2184 __ai float32x2_t vmls_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { 2185 return __a - (__b * (float32x2_t){ __c, __c }); } 2186 __ai int16x8_t vmlsq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) { 2187 return __a - (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } 2188 __ai int32x4_t vmlsq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) { 2189 return __a - (__b * (int32x4_t){ __c, __c, __c, __c }); } 2190 __ai uint16x8_t vmlsq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) { 2191 return __a - (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } 2192 __ai uint32x4_t vmlsq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) { 2193 return __a - (__b * (uint32x4_t){ __c, __c, __c, __c }); } 2194 __ai float32x4_t vmlsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { 2195 return __a - (__b * (float32x4_t){ __c, __c, __c, __c }); } 2196 2197 __ai int8x8_t vmovn_s16(int16x8_t __a) { 2198 return (int8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 0); } 2199 __ai int16x4_t vmovn_s32(int32x4_t __a) { 2200 return (int16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 1); } 2201 __ai int32x2_t vmovn_s64(int64x2_t __a) { 2202 return (int32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 2); } 2203 __ai uint8x8_t vmovn_u16(uint16x8_t __a) { 2204 return (uint8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 16); } 2205 __ai uint16x4_t vmovn_u32(uint32x4_t __a) { 2206 return (uint16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 17); } 2207 __ai uint32x2_t vmovn_u64(uint64x2_t __a) { 2208 return (uint32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 18); } 2209 2210 __ai uint8x8_t vmov_n_u8(uint8_t __a) { 2211 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 2212 __ai uint16x4_t vmov_n_u16(uint16_t __a) { 2213 return (uint16x4_t){ __a, __a, __a, __a }; } 2214 __ai uint32x2_t vmov_n_u32(uint32_t __a) { 2215 return (uint32x2_t){ __a, __a }; } 2216 __ai int8x8_t vmov_n_s8(int8_t __a) { 2217 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 2218 __ai int16x4_t vmov_n_s16(int16_t __a) { 2219 return (int16x4_t){ __a, __a, __a, __a }; } 2220 __ai int32x2_t vmov_n_s32(int32_t __a) { 2221 return (int32x2_t){ __a, __a }; } 2222 __ai poly8x8_t vmov_n_p8(poly8_t __a) { 2223 return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 2224 __ai poly16x4_t vmov_n_p16(poly16_t __a) { 2225 return (poly16x4_t){ __a, __a, __a, __a }; } 2226 __ai float32x2_t vmov_n_f32(float32_t __a) { 2227 return (float32x2_t){ __a, __a }; } 2228 __ai uint8x16_t vmovq_n_u8(uint8_t __a) { 2229 return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } 2230 __ai uint16x8_t vmovq_n_u16(uint16_t __a) { 2231 return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 2232 __ai uint32x4_t vmovq_n_u32(uint32_t __a) { 2233 return (uint32x4_t){ __a, __a, __a, __a }; } 2234 __ai int8x16_t vmovq_n_s8(int8_t __a) { 2235 return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } 2236 __ai int16x8_t vmovq_n_s16(int16_t __a) { 2237 return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 2238 __ai int32x4_t vmovq_n_s32(int32_t __a) { 2239 return (int32x4_t){ __a, __a, __a, __a }; } 2240 __ai poly8x16_t vmovq_n_p8(poly8_t __a) { 2241 return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } 2242 __ai poly16x8_t vmovq_n_p16(poly16_t __a) { 2243 return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 2244 __ai float32x4_t vmovq_n_f32(float32_t __a) { 2245 return (float32x4_t){ __a, __a, __a, __a }; } 2246 __ai int64x1_t vmov_n_s64(int64_t __a) { 2247 return (int64x1_t){ __a }; } 2248 __ai uint64x1_t vmov_n_u64(uint64_t __a) { 2249 return (uint64x1_t){ __a }; } 2250 __ai int64x2_t vmovq_n_s64(int64_t __a) { 2251 return (int64x2_t){ __a, __a }; } 2252 __ai uint64x2_t vmovq_n_u64(uint64_t __a) { 2253 return (uint64x2_t){ __a, __a }; } 2254 2255 __ai int8x8_t vmul_s8(int8x8_t __a, int8x8_t __b) { 2256 return __a * __b; } 2257 __ai int16x4_t vmul_s16(int16x4_t __a, int16x4_t __b) { 2258 return __a * __b; } 2259 __ai int32x2_t vmul_s32(int32x2_t __a, int32x2_t __b) { 2260 return __a * __b; } 2261 __ai float32x2_t vmul_f32(float32x2_t __a, float32x2_t __b) { 2262 return __a * __b; } 2263 __ai uint8x8_t vmul_u8(uint8x8_t __a, uint8x8_t __b) { 2264 return __a * __b; } 2265 __ai uint16x4_t vmul_u16(uint16x4_t __a, uint16x4_t __b) { 2266 return __a * __b; } 2267 __ai uint32x2_t vmul_u32(uint32x2_t __a, uint32x2_t __b) { 2268 return __a * __b; } 2269 __ai int8x16_t vmulq_s8(int8x16_t __a, int8x16_t __b) { 2270 return __a * __b; } 2271 __ai int16x8_t vmulq_s16(int16x8_t __a, int16x8_t __b) { 2272 return __a * __b; } 2273 __ai int32x4_t vmulq_s32(int32x4_t __a, int32x4_t __b) { 2274 return __a * __b; } 2275 __ai float32x4_t vmulq_f32(float32x4_t __a, float32x4_t __b) { 2276 return __a * __b; } 2277 __ai uint8x16_t vmulq_u8(uint8x16_t __a, uint8x16_t __b) { 2278 return __a * __b; } 2279 __ai uint16x8_t vmulq_u16(uint16x8_t __a, uint16x8_t __b) { 2280 return __a * __b; } 2281 __ai uint32x4_t vmulq_u32(uint32x4_t __a, uint32x4_t __b) { 2282 return __a * __b; } 2283 2284 #define vmull_lane_s16(a, b, __c) __extension__ ({ \ 2285 int16x4_t __a = (a); int16x4_t __b = (b); \ 2286 vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2287 #define vmull_lane_s32(a, b, __c) __extension__ ({ \ 2288 int32x2_t __a = (a); int32x2_t __b = (b); \ 2289 vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2290 #define vmull_lane_u16(a, b, __c) __extension__ ({ \ 2291 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 2292 vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2293 #define vmull_lane_u32(a, b, __c) __extension__ ({ \ 2294 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 2295 vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2296 2297 __ai int32x4_t vmull_n_s16(int16x4_t __a, int16_t __b) { 2298 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); } 2299 __ai int64x2_t vmull_n_s32(int32x2_t __a, int32_t __b) { 2300 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); } 2301 __ai uint32x4_t vmull_n_u16(uint16x4_t __a, uint16_t __b) { 2302 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint16x4_t){ __b, __b, __b, __b }, 50); } 2303 __ai uint64x2_t vmull_n_u32(uint32x2_t __a, uint32_t __b) { 2304 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint32x2_t){ __b, __b }, 51); } 2305 2306 __ai poly8x8_t vmul_p8(poly8x8_t __a, poly8x8_t __b) { 2307 return (poly8x8_t)__builtin_neon_vmul_v((int8x8_t)__a, (int8x8_t)__b, 4); } 2308 __ai poly8x16_t vmulq_p8(poly8x16_t __a, poly8x16_t __b) { 2309 return (poly8x16_t)__builtin_neon_vmulq_v((int8x16_t)__a, (int8x16_t)__b, 36); } 2310 2311 #define vmul_lane_s16(a, b, __c) __extension__ ({ \ 2312 int16x4_t __a = (a); int16x4_t __b = (b); \ 2313 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) 2314 #define vmul_lane_s32(a, b, __c) __extension__ ({ \ 2315 int32x2_t __a = (a); int32x2_t __b = (b); \ 2316 __a * __builtin_shufflevector(__b, __b, __c, __c); }) 2317 #define vmul_lane_f32(a, b, __c) __extension__ ({ \ 2318 float32x2_t __a = (a); float32x2_t __b = (b); \ 2319 __a * __builtin_shufflevector(__b, __b, __c, __c); }) 2320 #define vmul_lane_u16(a, b, __c) __extension__ ({ \ 2321 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 2322 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) 2323 #define vmul_lane_u32(a, b, __c) __extension__ ({ \ 2324 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 2325 __a * __builtin_shufflevector(__b, __b, __c, __c); }) 2326 #define vmulq_lane_s16(a, b, __c) __extension__ ({ \ 2327 int16x8_t __a = (a); int16x4_t __b = (b); \ 2328 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) 2329 #define vmulq_lane_s32(a, b, __c) __extension__ ({ \ 2330 int32x4_t __a = (a); int32x2_t __b = (b); \ 2331 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) 2332 #define vmulq_lane_f32(a, b, __c) __extension__ ({ \ 2333 float32x4_t __a = (a); float32x2_t __b = (b); \ 2334 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) 2335 #define vmulq_lane_u16(a, b, __c) __extension__ ({ \ 2336 uint16x8_t __a = (a); uint16x4_t __b = (b); \ 2337 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) 2338 #define vmulq_lane_u32(a, b, __c) __extension__ ({ \ 2339 uint32x4_t __a = (a); uint32x2_t __b = (b); \ 2340 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) 2341 2342 __ai int16x4_t vmul_n_s16(int16x4_t __a, int16_t __b) { 2343 return __a * (int16x4_t){ __b, __b, __b, __b }; } 2344 __ai int32x2_t vmul_n_s32(int32x2_t __a, int32_t __b) { 2345 return __a * (int32x2_t){ __b, __b }; } 2346 __ai float32x2_t vmul_n_f32(float32x2_t __a, float32_t __b) { 2347 return __a * (float32x2_t){ __b, __b }; } 2348 __ai uint16x4_t vmul_n_u16(uint16x4_t __a, uint16_t __b) { 2349 return __a * (uint16x4_t){ __b, __b, __b, __b }; } 2350 __ai uint32x2_t vmul_n_u32(uint32x2_t __a, uint32_t __b) { 2351 return __a * (uint32x2_t){ __b, __b }; } 2352 __ai int16x8_t vmulq_n_s16(int16x8_t __a, int16_t __b) { 2353 return __a * (int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; } 2354 __ai int32x4_t vmulq_n_s32(int32x4_t __a, int32_t __b) { 2355 return __a * (int32x4_t){ __b, __b, __b, __b }; } 2356 __ai float32x4_t vmulq_n_f32(float32x4_t __a, float32_t __b) { 2357 return __a * (float32x4_t){ __b, __b, __b, __b }; } 2358 __ai uint16x8_t vmulq_n_u16(uint16x8_t __a, uint16_t __b) { 2359 return __a * (uint16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; } 2360 __ai uint32x4_t vmulq_n_u32(uint32x4_t __a, uint32_t __b) { 2361 return __a * (uint32x4_t){ __b, __b, __b, __b }; } 2362 2363 __ai int8x8_t vmvn_s8(int8x8_t __a) { 2364 return ~__a; } 2365 __ai int16x4_t vmvn_s16(int16x4_t __a) { 2366 return ~__a; } 2367 __ai int32x2_t vmvn_s32(int32x2_t __a) { 2368 return ~__a; } 2369 __ai uint8x8_t vmvn_u8(uint8x8_t __a) { 2370 return ~__a; } 2371 __ai uint16x4_t vmvn_u16(uint16x4_t __a) { 2372 return ~__a; } 2373 __ai uint32x2_t vmvn_u32(uint32x2_t __a) { 2374 return ~__a; } 2375 __ai poly8x8_t vmvn_p8(poly8x8_t __a) { 2376 return ~__a; } 2377 __ai int8x16_t vmvnq_s8(int8x16_t __a) { 2378 return ~__a; } 2379 __ai int16x8_t vmvnq_s16(int16x8_t __a) { 2380 return ~__a; } 2381 __ai int32x4_t vmvnq_s32(int32x4_t __a) { 2382 return ~__a; } 2383 __ai uint8x16_t vmvnq_u8(uint8x16_t __a) { 2384 return ~__a; } 2385 __ai uint16x8_t vmvnq_u16(uint16x8_t __a) { 2386 return ~__a; } 2387 __ai uint32x4_t vmvnq_u32(uint32x4_t __a) { 2388 return ~__a; } 2389 __ai poly8x16_t vmvnq_p8(poly8x16_t __a) { 2390 return ~__a; } 2391 2392 __ai int8x8_t vneg_s8(int8x8_t __a) { 2393 return -__a; } 2394 __ai int16x4_t vneg_s16(int16x4_t __a) { 2395 return -__a; } 2396 __ai int32x2_t vneg_s32(int32x2_t __a) { 2397 return -__a; } 2398 __ai float32x2_t vneg_f32(float32x2_t __a) { 2399 return -__a; } 2400 __ai int8x16_t vnegq_s8(int8x16_t __a) { 2401 return -__a; } 2402 __ai int16x8_t vnegq_s16(int16x8_t __a) { 2403 return -__a; } 2404 __ai int32x4_t vnegq_s32(int32x4_t __a) { 2405 return -__a; } 2406 __ai float32x4_t vnegq_f32(float32x4_t __a) { 2407 return -__a; } 2408 2409 __ai int8x8_t vorn_s8(int8x8_t __a, int8x8_t __b) { 2410 return __a | ~__b; } 2411 __ai int16x4_t vorn_s16(int16x4_t __a, int16x4_t __b) { 2412 return __a | ~__b; } 2413 __ai int32x2_t vorn_s32(int32x2_t __a, int32x2_t __b) { 2414 return __a | ~__b; } 2415 __ai int64x1_t vorn_s64(int64x1_t __a, int64x1_t __b) { 2416 return __a | ~__b; } 2417 __ai uint8x8_t vorn_u8(uint8x8_t __a, uint8x8_t __b) { 2418 return __a | ~__b; } 2419 __ai uint16x4_t vorn_u16(uint16x4_t __a, uint16x4_t __b) { 2420 return __a | ~__b; } 2421 __ai uint32x2_t vorn_u32(uint32x2_t __a, uint32x2_t __b) { 2422 return __a | ~__b; } 2423 __ai uint64x1_t vorn_u64(uint64x1_t __a, uint64x1_t __b) { 2424 return __a | ~__b; } 2425 __ai int8x16_t vornq_s8(int8x16_t __a, int8x16_t __b) { 2426 return __a | ~__b; } 2427 __ai int16x8_t vornq_s16(int16x8_t __a, int16x8_t __b) { 2428 return __a | ~__b; } 2429 __ai int32x4_t vornq_s32(int32x4_t __a, int32x4_t __b) { 2430 return __a | ~__b; } 2431 __ai int64x2_t vornq_s64(int64x2_t __a, int64x2_t __b) { 2432 return __a | ~__b; } 2433 __ai uint8x16_t vornq_u8(uint8x16_t __a, uint8x16_t __b) { 2434 return __a | ~__b; } 2435 __ai uint16x8_t vornq_u16(uint16x8_t __a, uint16x8_t __b) { 2436 return __a | ~__b; } 2437 __ai uint32x4_t vornq_u32(uint32x4_t __a, uint32x4_t __b) { 2438 return __a | ~__b; } 2439 __ai uint64x2_t vornq_u64(uint64x2_t __a, uint64x2_t __b) { 2440 return __a | ~__b; } 2441 2442 __ai int8x8_t vorr_s8(int8x8_t __a, int8x8_t __b) { 2443 return __a | __b; } 2444 __ai int16x4_t vorr_s16(int16x4_t __a, int16x4_t __b) { 2445 return __a | __b; } 2446 __ai int32x2_t vorr_s32(int32x2_t __a, int32x2_t __b) { 2447 return __a | __b; } 2448 __ai int64x1_t vorr_s64(int64x1_t __a, int64x1_t __b) { 2449 return __a | __b; } 2450 __ai uint8x8_t vorr_u8(uint8x8_t __a, uint8x8_t __b) { 2451 return __a | __b; } 2452 __ai uint16x4_t vorr_u16(uint16x4_t __a, uint16x4_t __b) { 2453 return __a | __b; } 2454 __ai uint32x2_t vorr_u32(uint32x2_t __a, uint32x2_t __b) { 2455 return __a | __b; } 2456 __ai uint64x1_t vorr_u64(uint64x1_t __a, uint64x1_t __b) { 2457 return __a | __b; } 2458 __ai int8x16_t vorrq_s8(int8x16_t __a, int8x16_t __b) { 2459 return __a | __b; } 2460 __ai int16x8_t vorrq_s16(int16x8_t __a, int16x8_t __b) { 2461 return __a | __b; } 2462 __ai int32x4_t vorrq_s32(int32x4_t __a, int32x4_t __b) { 2463 return __a | __b; } 2464 __ai int64x2_t vorrq_s64(int64x2_t __a, int64x2_t __b) { 2465 return __a | __b; } 2466 __ai uint8x16_t vorrq_u8(uint8x16_t __a, uint8x16_t __b) { 2467 return __a | __b; } 2468 __ai uint16x8_t vorrq_u16(uint16x8_t __a, uint16x8_t __b) { 2469 return __a | __b; } 2470 __ai uint32x4_t vorrq_u32(uint32x4_t __a, uint32x4_t __b) { 2471 return __a | __b; } 2472 __ai uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) { 2473 return __a | __b; } 2474 2475 __ai int16x4_t vpadal_s8(int16x4_t __a, int8x8_t __b) { 2476 return (int16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, __b, 1); } 2477 __ai int32x2_t vpadal_s16(int32x2_t __a, int16x4_t __b) { 2478 return (int32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2479 __ai int64x1_t vpadal_s32(int64x1_t __a, int32x2_t __b) { 2480 return (int64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2481 __ai uint16x4_t vpadal_u8(uint16x4_t __a, uint8x8_t __b) { 2482 return (uint16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 17); } 2483 __ai uint32x2_t vpadal_u16(uint32x2_t __a, uint16x4_t __b) { 2484 return (uint32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2485 __ai uint64x1_t vpadal_u32(uint64x1_t __a, uint32x2_t __b) { 2486 return (uint64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 19); } 2487 __ai int16x8_t vpadalq_s8(int16x8_t __a, int8x16_t __b) { 2488 return (int16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, __b, 33); } 2489 __ai int32x4_t vpadalq_s16(int32x4_t __a, int16x8_t __b) { 2490 return (int32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 2491 __ai int64x2_t vpadalq_s32(int64x2_t __a, int32x4_t __b) { 2492 return (int64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 35); } 2493 __ai uint16x8_t vpadalq_u8(uint16x8_t __a, uint8x16_t __b) { 2494 return (uint16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 2495 __ai uint32x4_t vpadalq_u16(uint32x4_t __a, uint16x8_t __b) { 2496 return (uint32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 2497 __ai uint64x2_t vpadalq_u32(uint64x2_t __a, uint32x4_t __b) { 2498 return (uint64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 51); } 2499 2500 __ai int8x8_t vpadd_s8(int8x8_t __a, int8x8_t __b) { 2501 return (int8x8_t)__builtin_neon_vpadd_v(__a, __b, 0); } 2502 __ai int16x4_t vpadd_s16(int16x4_t __a, int16x4_t __b) { 2503 return (int16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2504 __ai int32x2_t vpadd_s32(int32x2_t __a, int32x2_t __b) { 2505 return (int32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2506 __ai uint8x8_t vpadd_u8(uint8x8_t __a, uint8x8_t __b) { 2507 return (uint8x8_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } 2508 __ai uint16x4_t vpadd_u16(uint16x4_t __a, uint16x4_t __b) { 2509 return (uint16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } 2510 __ai uint32x2_t vpadd_u32(uint32x2_t __a, uint32x2_t __b) { 2511 return (uint32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2512 __ai float32x2_t vpadd_f32(float32x2_t __a, float32x2_t __b) { 2513 return (float32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 7); } 2514 2515 __ai int16x4_t vpaddl_s8(int8x8_t __a) { 2516 return (int16x4_t)__builtin_neon_vpaddl_v(__a, 1); } 2517 __ai int32x2_t vpaddl_s16(int16x4_t __a) { 2518 return (int32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 2); } 2519 __ai int64x1_t vpaddl_s32(int32x2_t __a) { 2520 return (int64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 3); } 2521 __ai uint16x4_t vpaddl_u8(uint8x8_t __a) { 2522 return (uint16x4_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 17); } 2523 __ai uint32x2_t vpaddl_u16(uint16x4_t __a) { 2524 return (uint32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 18); } 2525 __ai uint64x1_t vpaddl_u32(uint32x2_t __a) { 2526 return (uint64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 19); } 2527 __ai int16x8_t vpaddlq_s8(int8x16_t __a) { 2528 return (int16x8_t)__builtin_neon_vpaddlq_v(__a, 33); } 2529 __ai int32x4_t vpaddlq_s16(int16x8_t __a) { 2530 return (int32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 34); } 2531 __ai int64x2_t vpaddlq_s32(int32x4_t __a) { 2532 return (int64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 35); } 2533 __ai uint16x8_t vpaddlq_u8(uint8x16_t __a) { 2534 return (uint16x8_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 49); } 2535 __ai uint32x4_t vpaddlq_u16(uint16x8_t __a) { 2536 return (uint32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 50); } 2537 __ai uint64x2_t vpaddlq_u32(uint32x4_t __a) { 2538 return (uint64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 51); } 2539 2540 __ai int8x8_t vpmax_s8(int8x8_t __a, int8x8_t __b) { 2541 return (int8x8_t)__builtin_neon_vpmax_v(__a, __b, 0); } 2542 __ai int16x4_t vpmax_s16(int16x4_t __a, int16x4_t __b) { 2543 return (int16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2544 __ai int32x2_t vpmax_s32(int32x2_t __a, int32x2_t __b) { 2545 return (int32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2546 __ai uint8x8_t vpmax_u8(uint8x8_t __a, uint8x8_t __b) { 2547 return (uint8x8_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 16); } 2548 __ai uint16x4_t vpmax_u16(uint16x4_t __a, uint16x4_t __b) { 2549 return (uint16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 17); } 2550 __ai uint32x2_t vpmax_u32(uint32x2_t __a, uint32x2_t __b) { 2551 return (uint32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2552 __ai float32x2_t vpmax_f32(float32x2_t __a, float32x2_t __b) { 2553 return (float32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 7); } 2554 2555 __ai int8x8_t vpmin_s8(int8x8_t __a, int8x8_t __b) { 2556 return (int8x8_t)__builtin_neon_vpmin_v(__a, __b, 0); } 2557 __ai int16x4_t vpmin_s16(int16x4_t __a, int16x4_t __b) { 2558 return (int16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2559 __ai int32x2_t vpmin_s32(int32x2_t __a, int32x2_t __b) { 2560 return (int32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2561 __ai uint8x8_t vpmin_u8(uint8x8_t __a, uint8x8_t __b) { 2562 return (uint8x8_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 16); } 2563 __ai uint16x4_t vpmin_u16(uint16x4_t __a, uint16x4_t __b) { 2564 return (uint16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 17); } 2565 __ai uint32x2_t vpmin_u32(uint32x2_t __a, uint32x2_t __b) { 2566 return (uint32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2567 __ai float32x2_t vpmin_f32(float32x2_t __a, float32x2_t __b) { 2568 return (float32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 7); } 2569 2570 __ai int8x8_t vqabs_s8(int8x8_t __a) { 2571 return (int8x8_t)__builtin_neon_vqabs_v(__a, 0); } 2572 __ai int16x4_t vqabs_s16(int16x4_t __a) { 2573 return (int16x4_t)__builtin_neon_vqabs_v((int8x8_t)__a, 1); } 2574 __ai int32x2_t vqabs_s32(int32x2_t __a) { 2575 return (int32x2_t)__builtin_neon_vqabs_v((int8x8_t)__a, 2); } 2576 __ai int8x16_t vqabsq_s8(int8x16_t __a) { 2577 return (int8x16_t)__builtin_neon_vqabsq_v(__a, 32); } 2578 __ai int16x8_t vqabsq_s16(int16x8_t __a) { 2579 return (int16x8_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 33); } 2580 __ai int32x4_t vqabsq_s32(int32x4_t __a) { 2581 return (int32x4_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 34); } 2582 2583 __ai int8x8_t vqadd_s8(int8x8_t __a, int8x8_t __b) { 2584 return (int8x8_t)__builtin_neon_vqadd_v(__a, __b, 0); } 2585 __ai int16x4_t vqadd_s16(int16x4_t __a, int16x4_t __b) { 2586 return (int16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2587 __ai int32x2_t vqadd_s32(int32x2_t __a, int32x2_t __b) { 2588 return (int32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2589 __ai int64x1_t vqadd_s64(int64x1_t __a, int64x1_t __b) { 2590 return (int64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2591 __ai uint8x8_t vqadd_u8(uint8x8_t __a, uint8x8_t __b) { 2592 return (uint8x8_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } 2593 __ai uint16x4_t vqadd_u16(uint16x4_t __a, uint16x4_t __b) { 2594 return (uint16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } 2595 __ai uint32x2_t vqadd_u32(uint32x2_t __a, uint32x2_t __b) { 2596 return (uint32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2597 __ai uint64x1_t vqadd_u64(uint64x1_t __a, uint64x1_t __b) { 2598 return (uint64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 19); } 2599 __ai int8x16_t vqaddq_s8(int8x16_t __a, int8x16_t __b) { 2600 return (int8x16_t)__builtin_neon_vqaddq_v(__a, __b, 32); } 2601 __ai int16x8_t vqaddq_s16(int16x8_t __a, int16x8_t __b) { 2602 return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 2603 __ai int32x4_t vqaddq_s32(int32x4_t __a, int32x4_t __b) { 2604 return (int32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 2605 __ai int64x2_t vqaddq_s64(int64x2_t __a, int64x2_t __b) { 2606 return (int64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); } 2607 __ai uint8x16_t vqaddq_u8(uint8x16_t __a, uint8x16_t __b) { 2608 return (uint8x16_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 2609 __ai uint16x8_t vqaddq_u16(uint16x8_t __a, uint16x8_t __b) { 2610 return (uint16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 2611 __ai uint32x4_t vqaddq_u32(uint32x4_t __a, uint32x4_t __b) { 2612 return (uint32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 2613 __ai uint64x2_t vqaddq_u64(uint64x2_t __a, uint64x2_t __b) { 2614 return (uint64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 51); } 2615 2616 __ai int32x4_t vqdmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { 2617 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 34); } 2618 __ai int64x2_t vqdmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { 2619 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 35); } 2620 2621 #define vqdmlal_lane_s16(a, b, c, __d) __extension__ ({ \ 2622 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ 2623 vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2624 #define vqdmlal_lane_s32(a, b, c, __d) __extension__ ({ \ 2625 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ 2626 vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) 2627 2628 __ai int32x4_t vqdmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { 2629 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); } 2630 __ai int64x2_t vqdmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { 2631 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int32x2_t){ __c, __c }, 35); } 2632 2633 __ai int32x4_t vqdmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { 2634 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 34); } 2635 __ai int64x2_t vqdmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { 2636 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 35); } 2637 2638 #define vqdmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ 2639 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ 2640 vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2641 #define vqdmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ 2642 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ 2643 vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) 2644 2645 __ai int32x4_t vqdmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { 2646 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); } 2647 __ai int64x2_t vqdmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { 2648 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int32x2_t){ __c, __c }, 35); } 2649 2650 __ai int16x4_t vqdmulh_s16(int16x4_t __a, int16x4_t __b) { 2651 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2652 __ai int32x2_t vqdmulh_s32(int32x2_t __a, int32x2_t __b) { 2653 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2654 __ai int16x8_t vqdmulhq_s16(int16x8_t __a, int16x8_t __b) { 2655 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 2656 __ai int32x4_t vqdmulhq_s32(int32x4_t __a, int32x4_t __b) { 2657 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 2658 2659 #define vqdmulh_lane_s16(a, b, __c) __extension__ ({ \ 2660 int16x4_t __a = (a); int16x4_t __b = (b); \ 2661 vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2662 #define vqdmulh_lane_s32(a, b, __c) __extension__ ({ \ 2663 int32x2_t __a = (a); int32x2_t __b = (b); \ 2664 vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2665 #define vqdmulhq_lane_s16(a, b, __c) __extension__ ({ \ 2666 int16x8_t __a = (a); int16x4_t __b = (b); \ 2667 vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) 2668 #define vqdmulhq_lane_s32(a, b, __c) __extension__ ({ \ 2669 int32x4_t __a = (a); int32x2_t __b = (b); \ 2670 vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2671 2672 __ai int16x4_t vqdmulh_n_s16(int16x4_t __a, int16_t __b) { 2673 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 1); } 2674 __ai int32x2_t vqdmulh_n_s32(int32x2_t __a, int32_t __b) { 2675 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 2); } 2676 __ai int16x8_t vqdmulhq_n_s16(int16x8_t __a, int16_t __b) { 2677 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); } 2678 __ai int32x4_t vqdmulhq_n_s32(int32x4_t __a, int32_t __b) { 2679 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x4_t){ __b, __b, __b, __b }, 34); } 2680 2681 __ai int32x4_t vqdmull_s16(int16x4_t __a, int16x4_t __b) { 2682 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 34); } 2683 __ai int64x2_t vqdmull_s32(int32x2_t __a, int32x2_t __b) { 2684 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 35); } 2685 2686 #define vqdmull_lane_s16(a, b, __c) __extension__ ({ \ 2687 int16x4_t __a = (a); int16x4_t __b = (b); \ 2688 vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2689 #define vqdmull_lane_s32(a, b, __c) __extension__ ({ \ 2690 int32x2_t __a = (a); int32x2_t __b = (b); \ 2691 vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2692 2693 __ai int32x4_t vqdmull_n_s16(int16x4_t __a, int16_t __b) { 2694 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); } 2695 __ai int64x2_t vqdmull_n_s32(int32x2_t __a, int32_t __b) { 2696 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); } 2697 2698 __ai int8x8_t vqmovn_s16(int16x8_t __a) { 2699 return (int8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 0); } 2700 __ai int16x4_t vqmovn_s32(int32x4_t __a) { 2701 return (int16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 1); } 2702 __ai int32x2_t vqmovn_s64(int64x2_t __a) { 2703 return (int32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 2); } 2704 __ai uint8x8_t vqmovn_u16(uint16x8_t __a) { 2705 return (uint8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 16); } 2706 __ai uint16x4_t vqmovn_u32(uint32x4_t __a) { 2707 return (uint16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 17); } 2708 __ai uint32x2_t vqmovn_u64(uint64x2_t __a) { 2709 return (uint32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 18); } 2710 2711 __ai uint8x8_t vqmovun_s16(int16x8_t __a) { 2712 return (uint8x8_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 16); } 2713 __ai uint16x4_t vqmovun_s32(int32x4_t __a) { 2714 return (uint16x4_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 17); } 2715 __ai uint32x2_t vqmovun_s64(int64x2_t __a) { 2716 return (uint32x2_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 18); } 2717 2718 __ai int8x8_t vqneg_s8(int8x8_t __a) { 2719 return (int8x8_t)__builtin_neon_vqneg_v(__a, 0); } 2720 __ai int16x4_t vqneg_s16(int16x4_t __a) { 2721 return (int16x4_t)__builtin_neon_vqneg_v((int8x8_t)__a, 1); } 2722 __ai int32x2_t vqneg_s32(int32x2_t __a) { 2723 return (int32x2_t)__builtin_neon_vqneg_v((int8x8_t)__a, 2); } 2724 __ai int8x16_t vqnegq_s8(int8x16_t __a) { 2725 return (int8x16_t)__builtin_neon_vqnegq_v(__a, 32); } 2726 __ai int16x8_t vqnegq_s16(int16x8_t __a) { 2727 return (int16x8_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 33); } 2728 __ai int32x4_t vqnegq_s32(int32x4_t __a) { 2729 return (int32x4_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 34); } 2730 2731 __ai int16x4_t vqrdmulh_s16(int16x4_t __a, int16x4_t __b) { 2732 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2733 __ai int32x2_t vqrdmulh_s32(int32x2_t __a, int32x2_t __b) { 2734 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2735 __ai int16x8_t vqrdmulhq_s16(int16x8_t __a, int16x8_t __b) { 2736 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 2737 __ai int32x4_t vqrdmulhq_s32(int32x4_t __a, int32x4_t __b) { 2738 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 2739 2740 #define vqrdmulh_lane_s16(a, b, __c) __extension__ ({ \ 2741 int16x4_t __a = (a); int16x4_t __b = (b); \ 2742 vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2743 #define vqrdmulh_lane_s32(a, b, __c) __extension__ ({ \ 2744 int32x2_t __a = (a); int32x2_t __b = (b); \ 2745 vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2746 #define vqrdmulhq_lane_s16(a, b, __c) __extension__ ({ \ 2747 int16x8_t __a = (a); int16x4_t __b = (b); \ 2748 vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) 2749 #define vqrdmulhq_lane_s32(a, b, __c) __extension__ ({ \ 2750 int32x4_t __a = (a); int32x2_t __b = (b); \ 2751 vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2752 2753 __ai int16x4_t vqrdmulh_n_s16(int16x4_t __a, int16_t __b) { 2754 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 1); } 2755 __ai int32x2_t vqrdmulh_n_s32(int32x2_t __a, int32_t __b) { 2756 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 2); } 2757 __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __a, int16_t __b) { 2758 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); } 2759 __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __a, int32_t __b) { 2760 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x4_t){ __b, __b, __b, __b }, 34); } 2761 2762 __ai int8x8_t vqrshl_s8(int8x8_t __a, int8x8_t __b) { 2763 return (int8x8_t)__builtin_neon_vqrshl_v(__a, __b, 0); } 2764 __ai int16x4_t vqrshl_s16(int16x4_t __a, int16x4_t __b) { 2765 return (int16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2766 __ai int32x2_t vqrshl_s32(int32x2_t __a, int32x2_t __b) { 2767 return (int32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2768 __ai int64x1_t vqrshl_s64(int64x1_t __a, int64x1_t __b) { 2769 return (int64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2770 __ai uint8x8_t vqrshl_u8(uint8x8_t __a, int8x8_t __b) { 2771 return (uint8x8_t)__builtin_neon_vqrshl_v((int8x8_t)__a, __b, 16); } 2772 __ai uint16x4_t vqrshl_u16(uint16x4_t __a, int16x4_t __b) { 2773 return (uint16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } 2774 __ai uint32x2_t vqrshl_u32(uint32x2_t __a, int32x2_t __b) { 2775 return (uint32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2776 __ai uint64x1_t vqrshl_u64(uint64x1_t __a, int64x1_t __b) { 2777 return (uint64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); } 2778 __ai int8x16_t vqrshlq_s8(int8x16_t __a, int8x16_t __b) { 2779 return (int8x16_t)__builtin_neon_vqrshlq_v(__a, __b, 32); } 2780 __ai int16x8_t vqrshlq_s16(int16x8_t __a, int16x8_t __b) { 2781 return (int16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 2782 __ai int32x4_t vqrshlq_s32(int32x4_t __a, int32x4_t __b) { 2783 return (int32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 2784 __ai int64x2_t vqrshlq_s64(int64x2_t __a, int64x2_t __b) { 2785 return (int64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } 2786 __ai uint8x16_t vqrshlq_u8(uint8x16_t __a, int8x16_t __b) { 2787 return (uint8x16_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, __b, 48); } 2788 __ai uint16x8_t vqrshlq_u16(uint16x8_t __a, int16x8_t __b) { 2789 return (uint16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 2790 __ai uint32x4_t vqrshlq_u32(uint32x4_t __a, int32x4_t __b) { 2791 return (uint32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 2792 __ai uint64x2_t vqrshlq_u64(uint64x2_t __a, int64x2_t __b) { 2793 return (uint64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } 2794 2795 #define vqrshrn_n_s16(a, __b) __extension__ ({ \ 2796 int16x8_t __a = (a); \ 2797 (int8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 0); }) 2798 #define vqrshrn_n_s32(a, __b) __extension__ ({ \ 2799 int32x4_t __a = (a); \ 2800 (int16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 1); }) 2801 #define vqrshrn_n_s64(a, __b) __extension__ ({ \ 2802 int64x2_t __a = (a); \ 2803 (int32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 2); }) 2804 #define vqrshrn_n_u16(a, __b) __extension__ ({ \ 2805 uint16x8_t __a = (a); \ 2806 (uint8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 16); }) 2807 #define vqrshrn_n_u32(a, __b) __extension__ ({ \ 2808 uint32x4_t __a = (a); \ 2809 (uint16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 17); }) 2810 #define vqrshrn_n_u64(a, __b) __extension__ ({ \ 2811 uint64x2_t __a = (a); \ 2812 (uint32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 18); }) 2813 2814 #define vqrshrun_n_s16(a, __b) __extension__ ({ \ 2815 int16x8_t __a = (a); \ 2816 (uint8x8_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 16); }) 2817 #define vqrshrun_n_s32(a, __b) __extension__ ({ \ 2818 int32x4_t __a = (a); \ 2819 (uint16x4_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 17); }) 2820 #define vqrshrun_n_s64(a, __b) __extension__ ({ \ 2821 int64x2_t __a = (a); \ 2822 (uint32x2_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 18); }) 2823 2824 __ai int8x8_t vqshl_s8(int8x8_t __a, int8x8_t __b) { 2825 return (int8x8_t)__builtin_neon_vqshl_v(__a, __b, 0); } 2826 __ai int16x4_t vqshl_s16(int16x4_t __a, int16x4_t __b) { 2827 return (int16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2828 __ai int32x2_t vqshl_s32(int32x2_t __a, int32x2_t __b) { 2829 return (int32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2830 __ai int64x1_t vqshl_s64(int64x1_t __a, int64x1_t __b) { 2831 return (int64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2832 __ai uint8x8_t vqshl_u8(uint8x8_t __a, int8x8_t __b) { 2833 return (uint8x8_t)__builtin_neon_vqshl_v((int8x8_t)__a, __b, 16); } 2834 __ai uint16x4_t vqshl_u16(uint16x4_t __a, int16x4_t __b) { 2835 return (uint16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } 2836 __ai uint32x2_t vqshl_u32(uint32x2_t __a, int32x2_t __b) { 2837 return (uint32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2838 __ai uint64x1_t vqshl_u64(uint64x1_t __a, int64x1_t __b) { 2839 return (uint64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 19); } 2840 __ai int8x16_t vqshlq_s8(int8x16_t __a, int8x16_t __b) { 2841 return (int8x16_t)__builtin_neon_vqshlq_v(__a, __b, 32); } 2842 __ai int16x8_t vqshlq_s16(int16x8_t __a, int16x8_t __b) { 2843 return (int16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 2844 __ai int32x4_t vqshlq_s32(int32x4_t __a, int32x4_t __b) { 2845 return (int32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 2846 __ai int64x2_t vqshlq_s64(int64x2_t __a, int64x2_t __b) { 2847 return (int64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } 2848 __ai uint8x16_t vqshlq_u8(uint8x16_t __a, int8x16_t __b) { 2849 return (uint8x16_t)__builtin_neon_vqshlq_v((int8x16_t)__a, __b, 48); } 2850 __ai uint16x8_t vqshlq_u16(uint16x8_t __a, int16x8_t __b) { 2851 return (uint16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 2852 __ai uint32x4_t vqshlq_u32(uint32x4_t __a, int32x4_t __b) { 2853 return (uint32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 2854 __ai uint64x2_t vqshlq_u64(uint64x2_t __a, int64x2_t __b) { 2855 return (uint64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } 2856 2857 #define vqshlu_n_s8(a, __b) __extension__ ({ \ 2858 int8x8_t __a = (a); \ 2859 (uint8x8_t)__builtin_neon_vqshlu_n_v(__a, __b, 16); }) 2860 #define vqshlu_n_s16(a, __b) __extension__ ({ \ 2861 int16x4_t __a = (a); \ 2862 (uint16x4_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 17); }) 2863 #define vqshlu_n_s32(a, __b) __extension__ ({ \ 2864 int32x2_t __a = (a); \ 2865 (uint32x2_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 18); }) 2866 #define vqshlu_n_s64(a, __b) __extension__ ({ \ 2867 int64x1_t __a = (a); \ 2868 (uint64x1_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 19); }) 2869 #define vqshluq_n_s8(a, __b) __extension__ ({ \ 2870 int8x16_t __a = (a); \ 2871 (uint8x16_t)__builtin_neon_vqshluq_n_v(__a, __b, 48); }) 2872 #define vqshluq_n_s16(a, __b) __extension__ ({ \ 2873 int16x8_t __a = (a); \ 2874 (uint16x8_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 49); }) 2875 #define vqshluq_n_s32(a, __b) __extension__ ({ \ 2876 int32x4_t __a = (a); \ 2877 (uint32x4_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 50); }) 2878 #define vqshluq_n_s64(a, __b) __extension__ ({ \ 2879 int64x2_t __a = (a); \ 2880 (uint64x2_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 51); }) 2881 2882 #define vqshl_n_s8(a, __b) __extension__ ({ \ 2883 int8x8_t __a = (a); \ 2884 (int8x8_t)__builtin_neon_vqshl_n_v(__a, __b, 0); }) 2885 #define vqshl_n_s16(a, __b) __extension__ ({ \ 2886 int16x4_t __a = (a); \ 2887 (int16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 1); }) 2888 #define vqshl_n_s32(a, __b) __extension__ ({ \ 2889 int32x2_t __a = (a); \ 2890 (int32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 2); }) 2891 #define vqshl_n_s64(a, __b) __extension__ ({ \ 2892 int64x1_t __a = (a); \ 2893 (int64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 3); }) 2894 #define vqshl_n_u8(a, __b) __extension__ ({ \ 2895 uint8x8_t __a = (a); \ 2896 (uint8x8_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 16); }) 2897 #define vqshl_n_u16(a, __b) __extension__ ({ \ 2898 uint16x4_t __a = (a); \ 2899 (uint16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 17); }) 2900 #define vqshl_n_u32(a, __b) __extension__ ({ \ 2901 uint32x2_t __a = (a); \ 2902 (uint32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 18); }) 2903 #define vqshl_n_u64(a, __b) __extension__ ({ \ 2904 uint64x1_t __a = (a); \ 2905 (uint64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 19); }) 2906 #define vqshlq_n_s8(a, __b) __extension__ ({ \ 2907 int8x16_t __a = (a); \ 2908 (int8x16_t)__builtin_neon_vqshlq_n_v(__a, __b, 32); }) 2909 #define vqshlq_n_s16(a, __b) __extension__ ({ \ 2910 int16x8_t __a = (a); \ 2911 (int16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 33); }) 2912 #define vqshlq_n_s32(a, __b) __extension__ ({ \ 2913 int32x4_t __a = (a); \ 2914 (int32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 34); }) 2915 #define vqshlq_n_s64(a, __b) __extension__ ({ \ 2916 int64x2_t __a = (a); \ 2917 (int64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 35); }) 2918 #define vqshlq_n_u8(a, __b) __extension__ ({ \ 2919 uint8x16_t __a = (a); \ 2920 (uint8x16_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 48); }) 2921 #define vqshlq_n_u16(a, __b) __extension__ ({ \ 2922 uint16x8_t __a = (a); \ 2923 (uint16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 49); }) 2924 #define vqshlq_n_u32(a, __b) __extension__ ({ \ 2925 uint32x4_t __a = (a); \ 2926 (uint32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 50); }) 2927 #define vqshlq_n_u64(a, __b) __extension__ ({ \ 2928 uint64x2_t __a = (a); \ 2929 (uint64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 51); }) 2930 2931 #define vqshrn_n_s16(a, __b) __extension__ ({ \ 2932 int16x8_t __a = (a); \ 2933 (int8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 0); }) 2934 #define vqshrn_n_s32(a, __b) __extension__ ({ \ 2935 int32x4_t __a = (a); \ 2936 (int16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 1); }) 2937 #define vqshrn_n_s64(a, __b) __extension__ ({ \ 2938 int64x2_t __a = (a); \ 2939 (int32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 2); }) 2940 #define vqshrn_n_u16(a, __b) __extension__ ({ \ 2941 uint16x8_t __a = (a); \ 2942 (uint8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 16); }) 2943 #define vqshrn_n_u32(a, __b) __extension__ ({ \ 2944 uint32x4_t __a = (a); \ 2945 (uint16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 17); }) 2946 #define vqshrn_n_u64(a, __b) __extension__ ({ \ 2947 uint64x2_t __a = (a); \ 2948 (uint32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 18); }) 2949 2950 #define vqshrun_n_s16(a, __b) __extension__ ({ \ 2951 int16x8_t __a = (a); \ 2952 (uint8x8_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 16); }) 2953 #define vqshrun_n_s32(a, __b) __extension__ ({ \ 2954 int32x4_t __a = (a); \ 2955 (uint16x4_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 17); }) 2956 #define vqshrun_n_s64(a, __b) __extension__ ({ \ 2957 int64x2_t __a = (a); \ 2958 (uint32x2_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 18); }) 2959 2960 __ai int8x8_t vqsub_s8(int8x8_t __a, int8x8_t __b) { 2961 return (int8x8_t)__builtin_neon_vqsub_v(__a, __b, 0); } 2962 __ai int16x4_t vqsub_s16(int16x4_t __a, int16x4_t __b) { 2963 return (int16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2964 __ai int32x2_t vqsub_s32(int32x2_t __a, int32x2_t __b) { 2965 return (int32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2966 __ai int64x1_t vqsub_s64(int64x1_t __a, int64x1_t __b) { 2967 return (int64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2968 __ai uint8x8_t vqsub_u8(uint8x8_t __a, uint8x8_t __b) { 2969 return (uint8x8_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 16); } 2970 __ai uint16x4_t vqsub_u16(uint16x4_t __a, uint16x4_t __b) { 2971 return (uint16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 17); } 2972 __ai uint32x2_t vqsub_u32(uint32x2_t __a, uint32x2_t __b) { 2973 return (uint32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2974 __ai uint64x1_t vqsub_u64(uint64x1_t __a, uint64x1_t __b) { 2975 return (uint64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 19); } 2976 __ai int8x16_t vqsubq_s8(int8x16_t __a, int8x16_t __b) { 2977 return (int8x16_t)__builtin_neon_vqsubq_v(__a, __b, 32); } 2978 __ai int16x8_t vqsubq_s16(int16x8_t __a, int16x8_t __b) { 2979 return (int16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 2980 __ai int32x4_t vqsubq_s32(int32x4_t __a, int32x4_t __b) { 2981 return (int32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 2982 __ai int64x2_t vqsubq_s64(int64x2_t __a, int64x2_t __b) { 2983 return (int64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 35); } 2984 __ai uint8x16_t vqsubq_u8(uint8x16_t __a, uint8x16_t __b) { 2985 return (uint8x16_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 2986 __ai uint16x8_t vqsubq_u16(uint16x8_t __a, uint16x8_t __b) { 2987 return (uint16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 2988 __ai uint32x4_t vqsubq_u32(uint32x4_t __a, uint32x4_t __b) { 2989 return (uint32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 2990 __ai uint64x2_t vqsubq_u64(uint64x2_t __a, uint64x2_t __b) { 2991 return (uint64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 51); } 2992 2993 __ai int8x8_t vraddhn_s16(int16x8_t __a, int16x8_t __b) { 2994 return (int8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } 2995 __ai int16x4_t vraddhn_s32(int32x4_t __a, int32x4_t __b) { 2996 return (int16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } 2997 __ai int32x2_t vraddhn_s64(int64x2_t __a, int64x2_t __b) { 2998 return (int32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } 2999 __ai uint8x8_t vraddhn_u16(uint16x8_t __a, uint16x8_t __b) { 3000 return (uint8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } 3001 __ai uint16x4_t vraddhn_u32(uint32x4_t __a, uint32x4_t __b) { 3002 return (uint16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } 3003 __ai uint32x2_t vraddhn_u64(uint64x2_t __a, uint64x2_t __b) { 3004 return (uint32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } 3005 3006 __ai float32x2_t vrecpe_f32(float32x2_t __a) { 3007 return (float32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 7); } 3008 __ai uint32x2_t vrecpe_u32(uint32x2_t __a) { 3009 return (uint32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 18); } 3010 __ai float32x4_t vrecpeq_f32(float32x4_t __a) { 3011 return (float32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 39); } 3012 __ai uint32x4_t vrecpeq_u32(uint32x4_t __a) { 3013 return (uint32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 50); } 3014 3015 __ai float32x2_t vrecps_f32(float32x2_t __a, float32x2_t __b) { 3016 return (float32x2_t)__builtin_neon_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 7); } 3017 __ai float32x4_t vrecpsq_f32(float32x4_t __a, float32x4_t __b) { 3018 return (float32x4_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 39); } 3019 3020 __ai int8x8_t vreinterpret_s8_s16(int16x4_t __a) { 3021 return (int8x8_t)__a; } 3022 __ai int8x8_t vreinterpret_s8_s32(int32x2_t __a) { 3023 return (int8x8_t)__a; } 3024 __ai int8x8_t vreinterpret_s8_s64(int64x1_t __a) { 3025 return (int8x8_t)__a; } 3026 __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __a) { 3027 return (int8x8_t)__a; } 3028 __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __a) { 3029 return (int8x8_t)__a; } 3030 __ai int8x8_t vreinterpret_s8_u32(uint32x2_t __a) { 3031 return (int8x8_t)__a; } 3032 __ai int8x8_t vreinterpret_s8_u64(uint64x1_t __a) { 3033 return (int8x8_t)__a; } 3034 __ai int8x8_t vreinterpret_s8_f16(float16x4_t __a) { 3035 return (int8x8_t)__a; } 3036 __ai int8x8_t vreinterpret_s8_f32(float32x2_t __a) { 3037 return (int8x8_t)__a; } 3038 __ai int8x8_t vreinterpret_s8_p8(poly8x8_t __a) { 3039 return (int8x8_t)__a; } 3040 __ai int8x8_t vreinterpret_s8_p16(poly16x4_t __a) { 3041 return (int8x8_t)__a; } 3042 __ai int16x4_t vreinterpret_s16_s8(int8x8_t __a) { 3043 return (int16x4_t)__a; } 3044 __ai int16x4_t vreinterpret_s16_s32(int32x2_t __a) { 3045 return (int16x4_t)__a; } 3046 __ai int16x4_t vreinterpret_s16_s64(int64x1_t __a) { 3047 return (int16x4_t)__a; } 3048 __ai int16x4_t vreinterpret_s16_u8(uint8x8_t __a) { 3049 return (int16x4_t)__a; } 3050 __ai int16x4_t vreinterpret_s16_u16(uint16x4_t __a) { 3051 return (int16x4_t)__a; } 3052 __ai int16x4_t vreinterpret_s16_u32(uint32x2_t __a) { 3053 return (int16x4_t)__a; } 3054 __ai int16x4_t vreinterpret_s16_u64(uint64x1_t __a) { 3055 return (int16x4_t)__a; } 3056 __ai int16x4_t vreinterpret_s16_f16(float16x4_t __a) { 3057 return (int16x4_t)__a; } 3058 __ai int16x4_t vreinterpret_s16_f32(float32x2_t __a) { 3059 return (int16x4_t)__a; } 3060 __ai int16x4_t vreinterpret_s16_p8(poly8x8_t __a) { 3061 return (int16x4_t)__a; } 3062 __ai int16x4_t vreinterpret_s16_p16(poly16x4_t __a) { 3063 return (int16x4_t)__a; } 3064 __ai int32x2_t vreinterpret_s32_s8(int8x8_t __a) { 3065 return (int32x2_t)__a; } 3066 __ai int32x2_t vreinterpret_s32_s16(int16x4_t __a) { 3067 return (int32x2_t)__a; } 3068 __ai int32x2_t vreinterpret_s32_s64(int64x1_t __a) { 3069 return (int32x2_t)__a; } 3070 __ai int32x2_t vreinterpret_s32_u8(uint8x8_t __a) { 3071 return (int32x2_t)__a; } 3072 __ai int32x2_t vreinterpret_s32_u16(uint16x4_t __a) { 3073 return (int32x2_t)__a; } 3074 __ai int32x2_t vreinterpret_s32_u32(uint32x2_t __a) { 3075 return (int32x2_t)__a; } 3076 __ai int32x2_t vreinterpret_s32_u64(uint64x1_t __a) { 3077 return (int32x2_t)__a; } 3078 __ai int32x2_t vreinterpret_s32_f16(float16x4_t __a) { 3079 return (int32x2_t)__a; } 3080 __ai int32x2_t vreinterpret_s32_f32(float32x2_t __a) { 3081 return (int32x2_t)__a; } 3082 __ai int32x2_t vreinterpret_s32_p8(poly8x8_t __a) { 3083 return (int32x2_t)__a; } 3084 __ai int32x2_t vreinterpret_s32_p16(poly16x4_t __a) { 3085 return (int32x2_t)__a; } 3086 __ai int64x1_t vreinterpret_s64_s8(int8x8_t __a) { 3087 return (int64x1_t)__a; } 3088 __ai int64x1_t vreinterpret_s64_s16(int16x4_t __a) { 3089 return (int64x1_t)__a; } 3090 __ai int64x1_t vreinterpret_s64_s32(int32x2_t __a) { 3091 return (int64x1_t)__a; } 3092 __ai int64x1_t vreinterpret_s64_u8(uint8x8_t __a) { 3093 return (int64x1_t)__a; } 3094 __ai int64x1_t vreinterpret_s64_u16(uint16x4_t __a) { 3095 return (int64x1_t)__a; } 3096 __ai int64x1_t vreinterpret_s64_u32(uint32x2_t __a) { 3097 return (int64x1_t)__a; } 3098 __ai int64x1_t vreinterpret_s64_u64(uint64x1_t __a) { 3099 return (int64x1_t)__a; } 3100 __ai int64x1_t vreinterpret_s64_f16(float16x4_t __a) { 3101 return (int64x1_t)__a; } 3102 __ai int64x1_t vreinterpret_s64_f32(float32x2_t __a) { 3103 return (int64x1_t)__a; } 3104 __ai int64x1_t vreinterpret_s64_p8(poly8x8_t __a) { 3105 return (int64x1_t)__a; } 3106 __ai int64x1_t vreinterpret_s64_p16(poly16x4_t __a) { 3107 return (int64x1_t)__a; } 3108 __ai uint8x8_t vreinterpret_u8_s8(int8x8_t __a) { 3109 return (uint8x8_t)__a; } 3110 __ai uint8x8_t vreinterpret_u8_s16(int16x4_t __a) { 3111 return (uint8x8_t)__a; } 3112 __ai uint8x8_t vreinterpret_u8_s32(int32x2_t __a) { 3113 return (uint8x8_t)__a; } 3114 __ai uint8x8_t vreinterpret_u8_s64(int64x1_t __a) { 3115 return (uint8x8_t)__a; } 3116 __ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __a) { 3117 return (uint8x8_t)__a; } 3118 __ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __a) { 3119 return (uint8x8_t)__a; } 3120 __ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __a) { 3121 return (uint8x8_t)__a; } 3122 __ai uint8x8_t vreinterpret_u8_f16(float16x4_t __a) { 3123 return (uint8x8_t)__a; } 3124 __ai uint8x8_t vreinterpret_u8_f32(float32x2_t __a) { 3125 return (uint8x8_t)__a; } 3126 __ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __a) { 3127 return (uint8x8_t)__a; } 3128 __ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __a) { 3129 return (uint8x8_t)__a; } 3130 __ai uint16x4_t vreinterpret_u16_s8(int8x8_t __a) { 3131 return (uint16x4_t)__a; } 3132 __ai uint16x4_t vreinterpret_u16_s16(int16x4_t __a) { 3133 return (uint16x4_t)__a; } 3134 __ai uint16x4_t vreinterpret_u16_s32(int32x2_t __a) { 3135 return (uint16x4_t)__a; } 3136 __ai uint16x4_t vreinterpret_u16_s64(int64x1_t __a) { 3137 return (uint16x4_t)__a; } 3138 __ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __a) { 3139 return (uint16x4_t)__a; } 3140 __ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __a) { 3141 return (uint16x4_t)__a; } 3142 __ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __a) { 3143 return (uint16x4_t)__a; } 3144 __ai uint16x4_t vreinterpret_u16_f16(float16x4_t __a) { 3145 return (uint16x4_t)__a; } 3146 __ai uint16x4_t vreinterpret_u16_f32(float32x2_t __a) { 3147 return (uint16x4_t)__a; } 3148 __ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __a) { 3149 return (uint16x4_t)__a; } 3150 __ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __a) { 3151 return (uint16x4_t)__a; } 3152 __ai uint32x2_t vreinterpret_u32_s8(int8x8_t __a) { 3153 return (uint32x2_t)__a; } 3154 __ai uint32x2_t vreinterpret_u32_s16(int16x4_t __a) { 3155 return (uint32x2_t)__a; } 3156 __ai uint32x2_t vreinterpret_u32_s32(int32x2_t __a) { 3157 return (uint32x2_t)__a; } 3158 __ai uint32x2_t vreinterpret_u32_s64(int64x1_t __a) { 3159 return (uint32x2_t)__a; } 3160 __ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __a) { 3161 return (uint32x2_t)__a; } 3162 __ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __a) { 3163 return (uint32x2_t)__a; } 3164 __ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __a) { 3165 return (uint32x2_t)__a; } 3166 __ai uint32x2_t vreinterpret_u32_f16(float16x4_t __a) { 3167 return (uint32x2_t)__a; } 3168 __ai uint32x2_t vreinterpret_u32_f32(float32x2_t __a) { 3169 return (uint32x2_t)__a; } 3170 __ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __a) { 3171 return (uint32x2_t)__a; } 3172 __ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __a) { 3173 return (uint32x2_t)__a; } 3174 __ai uint64x1_t vreinterpret_u64_s8(int8x8_t __a) { 3175 return (uint64x1_t)__a; } 3176 __ai uint64x1_t vreinterpret_u64_s16(int16x4_t __a) { 3177 return (uint64x1_t)__a; } 3178 __ai uint64x1_t vreinterpret_u64_s32(int32x2_t __a) { 3179 return (uint64x1_t)__a; } 3180 __ai uint64x1_t vreinterpret_u64_s64(int64x1_t __a) { 3181 return (uint64x1_t)__a; } 3182 __ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __a) { 3183 return (uint64x1_t)__a; } 3184 __ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __a) { 3185 return (uint64x1_t)__a; } 3186 __ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __a) { 3187 return (uint64x1_t)__a; } 3188 __ai uint64x1_t vreinterpret_u64_f16(float16x4_t __a) { 3189 return (uint64x1_t)__a; } 3190 __ai uint64x1_t vreinterpret_u64_f32(float32x2_t __a) { 3191 return (uint64x1_t)__a; } 3192 __ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __a) { 3193 return (uint64x1_t)__a; } 3194 __ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __a) { 3195 return (uint64x1_t)__a; } 3196 __ai float16x4_t vreinterpret_f16_s8(int8x8_t __a) { 3197 return (float16x4_t)__a; } 3198 __ai float16x4_t vreinterpret_f16_s16(int16x4_t __a) { 3199 return (float16x4_t)__a; } 3200 __ai float16x4_t vreinterpret_f16_s32(int32x2_t __a) { 3201 return (float16x4_t)__a; } 3202 __ai float16x4_t vreinterpret_f16_s64(int64x1_t __a) { 3203 return (float16x4_t)__a; } 3204 __ai float16x4_t vreinterpret_f16_u8(uint8x8_t __a) { 3205 return (float16x4_t)__a; } 3206 __ai float16x4_t vreinterpret_f16_u16(uint16x4_t __a) { 3207 return (float16x4_t)__a; } 3208 __ai float16x4_t vreinterpret_f16_u32(uint32x2_t __a) { 3209 return (float16x4_t)__a; } 3210 __ai float16x4_t vreinterpret_f16_u64(uint64x1_t __a) { 3211 return (float16x4_t)__a; } 3212 __ai float16x4_t vreinterpret_f16_f32(float32x2_t __a) { 3213 return (float16x4_t)__a; } 3214 __ai float16x4_t vreinterpret_f16_p8(poly8x8_t __a) { 3215 return (float16x4_t)__a; } 3216 __ai float16x4_t vreinterpret_f16_p16(poly16x4_t __a) { 3217 return (float16x4_t)__a; } 3218 __ai float32x2_t vreinterpret_f32_s8(int8x8_t __a) { 3219 return (float32x2_t)__a; } 3220 __ai float32x2_t vreinterpret_f32_s16(int16x4_t __a) { 3221 return (float32x2_t)__a; } 3222 __ai float32x2_t vreinterpret_f32_s32(int32x2_t __a) { 3223 return (float32x2_t)__a; } 3224 __ai float32x2_t vreinterpret_f32_s64(int64x1_t __a) { 3225 return (float32x2_t)__a; } 3226 __ai float32x2_t vreinterpret_f32_u8(uint8x8_t __a) { 3227 return (float32x2_t)__a; } 3228 __ai float32x2_t vreinterpret_f32_u16(uint16x4_t __a) { 3229 return (float32x2_t)__a; } 3230 __ai float32x2_t vreinterpret_f32_u32(uint32x2_t __a) { 3231 return (float32x2_t)__a; } 3232 __ai float32x2_t vreinterpret_f32_u64(uint64x1_t __a) { 3233 return (float32x2_t)__a; } 3234 __ai float32x2_t vreinterpret_f32_f16(float16x4_t __a) { 3235 return (float32x2_t)__a; } 3236 __ai float32x2_t vreinterpret_f32_p8(poly8x8_t __a) { 3237 return (float32x2_t)__a; } 3238 __ai float32x2_t vreinterpret_f32_p16(poly16x4_t __a) { 3239 return (float32x2_t)__a; } 3240 __ai poly8x8_t vreinterpret_p8_s8(int8x8_t __a) { 3241 return (poly8x8_t)__a; } 3242 __ai poly8x8_t vreinterpret_p8_s16(int16x4_t __a) { 3243 return (poly8x8_t)__a; } 3244 __ai poly8x8_t vreinterpret_p8_s32(int32x2_t __a) { 3245 return (poly8x8_t)__a; } 3246 __ai poly8x8_t vreinterpret_p8_s64(int64x1_t __a) { 3247 return (poly8x8_t)__a; } 3248 __ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __a) { 3249 return (poly8x8_t)__a; } 3250 __ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __a) { 3251 return (poly8x8_t)__a; } 3252 __ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __a) { 3253 return (poly8x8_t)__a; } 3254 __ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __a) { 3255 return (poly8x8_t)__a; } 3256 __ai poly8x8_t vreinterpret_p8_f16(float16x4_t __a) { 3257 return (poly8x8_t)__a; } 3258 __ai poly8x8_t vreinterpret_p8_f32(float32x2_t __a) { 3259 return (poly8x8_t)__a; } 3260 __ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __a) { 3261 return (poly8x8_t)__a; } 3262 __ai poly16x4_t vreinterpret_p16_s8(int8x8_t __a) { 3263 return (poly16x4_t)__a; } 3264 __ai poly16x4_t vreinterpret_p16_s16(int16x4_t __a) { 3265 return (poly16x4_t)__a; } 3266 __ai poly16x4_t vreinterpret_p16_s32(int32x2_t __a) { 3267 return (poly16x4_t)__a; } 3268 __ai poly16x4_t vreinterpret_p16_s64(int64x1_t __a) { 3269 return (poly16x4_t)__a; } 3270 __ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __a) { 3271 return (poly16x4_t)__a; } 3272 __ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __a) { 3273 return (poly16x4_t)__a; } 3274 __ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __a) { 3275 return (poly16x4_t)__a; } 3276 __ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __a) { 3277 return (poly16x4_t)__a; } 3278 __ai poly16x4_t vreinterpret_p16_f16(float16x4_t __a) { 3279 return (poly16x4_t)__a; } 3280 __ai poly16x4_t vreinterpret_p16_f32(float32x2_t __a) { 3281 return (poly16x4_t)__a; } 3282 __ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __a) { 3283 return (poly16x4_t)__a; } 3284 __ai int8x16_t vreinterpretq_s8_s16(int16x8_t __a) { 3285 return (int8x16_t)__a; } 3286 __ai int8x16_t vreinterpretq_s8_s32(int32x4_t __a) { 3287 return (int8x16_t)__a; } 3288 __ai int8x16_t vreinterpretq_s8_s64(int64x2_t __a) { 3289 return (int8x16_t)__a; } 3290 __ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __a) { 3291 return (int8x16_t)__a; } 3292 __ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __a) { 3293 return (int8x16_t)__a; } 3294 __ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __a) { 3295 return (int8x16_t)__a; } 3296 __ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __a) { 3297 return (int8x16_t)__a; } 3298 __ai int8x16_t vreinterpretq_s8_f16(float16x8_t __a) { 3299 return (int8x16_t)__a; } 3300 __ai int8x16_t vreinterpretq_s8_f32(float32x4_t __a) { 3301 return (int8x16_t)__a; } 3302 __ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __a) { 3303 return (int8x16_t)__a; } 3304 __ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __a) { 3305 return (int8x16_t)__a; } 3306 __ai int16x8_t vreinterpretq_s16_s8(int8x16_t __a) { 3307 return (int16x8_t)__a; } 3308 __ai int16x8_t vreinterpretq_s16_s32(int32x4_t __a) { 3309 return (int16x8_t)__a; } 3310 __ai int16x8_t vreinterpretq_s16_s64(int64x2_t __a) { 3311 return (int16x8_t)__a; } 3312 __ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __a) { 3313 return (int16x8_t)__a; } 3314 __ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __a) { 3315 return (int16x8_t)__a; } 3316 __ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __a) { 3317 return (int16x8_t)__a; } 3318 __ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __a) { 3319 return (int16x8_t)__a; } 3320 __ai int16x8_t vreinterpretq_s16_f16(float16x8_t __a) { 3321 return (int16x8_t)__a; } 3322 __ai int16x8_t vreinterpretq_s16_f32(float32x4_t __a) { 3323 return (int16x8_t)__a; } 3324 __ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __a) { 3325 return (int16x8_t)__a; } 3326 __ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __a) { 3327 return (int16x8_t)__a; } 3328 __ai int32x4_t vreinterpretq_s32_s8(int8x16_t __a) { 3329 return (int32x4_t)__a; } 3330 __ai int32x4_t vreinterpretq_s32_s16(int16x8_t __a) { 3331 return (int32x4_t)__a; } 3332 __ai int32x4_t vreinterpretq_s32_s64(int64x2_t __a) { 3333 return (int32x4_t)__a; } 3334 __ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __a) { 3335 return (int32x4_t)__a; } 3336 __ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __a) { 3337 return (int32x4_t)__a; } 3338 __ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __a) { 3339 return (int32x4_t)__a; } 3340 __ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __a) { 3341 return (int32x4_t)__a; } 3342 __ai int32x4_t vreinterpretq_s32_f16(float16x8_t __a) { 3343 return (int32x4_t)__a; } 3344 __ai int32x4_t vreinterpretq_s32_f32(float32x4_t __a) { 3345 return (int32x4_t)__a; } 3346 __ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __a) { 3347 return (int32x4_t)__a; } 3348 __ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __a) { 3349 return (int32x4_t)__a; } 3350 __ai int64x2_t vreinterpretq_s64_s8(int8x16_t __a) { 3351 return (int64x2_t)__a; } 3352 __ai int64x2_t vreinterpretq_s64_s16(int16x8_t __a) { 3353 return (int64x2_t)__a; } 3354 __ai int64x2_t vreinterpretq_s64_s32(int32x4_t __a) { 3355 return (int64x2_t)__a; } 3356 __ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __a) { 3357 return (int64x2_t)__a; } 3358 __ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __a) { 3359 return (int64x2_t)__a; } 3360 __ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __a) { 3361 return (int64x2_t)__a; } 3362 __ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __a) { 3363 return (int64x2_t)__a; } 3364 __ai int64x2_t vreinterpretq_s64_f16(float16x8_t __a) { 3365 return (int64x2_t)__a; } 3366 __ai int64x2_t vreinterpretq_s64_f32(float32x4_t __a) { 3367 return (int64x2_t)__a; } 3368 __ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __a) { 3369 return (int64x2_t)__a; } 3370 __ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __a) { 3371 return (int64x2_t)__a; } 3372 __ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __a) { 3373 return (uint8x16_t)__a; } 3374 __ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __a) { 3375 return (uint8x16_t)__a; } 3376 __ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __a) { 3377 return (uint8x16_t)__a; } 3378 __ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __a) { 3379 return (uint8x16_t)__a; } 3380 __ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __a) { 3381 return (uint8x16_t)__a; } 3382 __ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __a) { 3383 return (uint8x16_t)__a; } 3384 __ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __a) { 3385 return (uint8x16_t)__a; } 3386 __ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __a) { 3387 return (uint8x16_t)__a; } 3388 __ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __a) { 3389 return (uint8x16_t)__a; } 3390 __ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __a) { 3391 return (uint8x16_t)__a; } 3392 __ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __a) { 3393 return (uint8x16_t)__a; } 3394 __ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __a) { 3395 return (uint16x8_t)__a; } 3396 __ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __a) { 3397 return (uint16x8_t)__a; } 3398 __ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __a) { 3399 return (uint16x8_t)__a; } 3400 __ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __a) { 3401 return (uint16x8_t)__a; } 3402 __ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __a) { 3403 return (uint16x8_t)__a; } 3404 __ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __a) { 3405 return (uint16x8_t)__a; } 3406 __ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __a) { 3407 return (uint16x8_t)__a; } 3408 __ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __a) { 3409 return (uint16x8_t)__a; } 3410 __ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __a) { 3411 return (uint16x8_t)__a; } 3412 __ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __a) { 3413 return (uint16x8_t)__a; } 3414 __ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __a) { 3415 return (uint16x8_t)__a; } 3416 __ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __a) { 3417 return (uint32x4_t)__a; } 3418 __ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __a) { 3419 return (uint32x4_t)__a; } 3420 __ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __a) { 3421 return (uint32x4_t)__a; } 3422 __ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __a) { 3423 return (uint32x4_t)__a; } 3424 __ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __a) { 3425 return (uint32x4_t)__a; } 3426 __ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __a) { 3427 return (uint32x4_t)__a; } 3428 __ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __a) { 3429 return (uint32x4_t)__a; } 3430 __ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __a) { 3431 return (uint32x4_t)__a; } 3432 __ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __a) { 3433 return (uint32x4_t)__a; } 3434 __ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __a) { 3435 return (uint32x4_t)__a; } 3436 __ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __a) { 3437 return (uint32x4_t)__a; } 3438 __ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __a) { 3439 return (uint64x2_t)__a; } 3440 __ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __a) { 3441 return (uint64x2_t)__a; } 3442 __ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __a) { 3443 return (uint64x2_t)__a; } 3444 __ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __a) { 3445 return (uint64x2_t)__a; } 3446 __ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __a) { 3447 return (uint64x2_t)__a; } 3448 __ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __a) { 3449 return (uint64x2_t)__a; } 3450 __ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __a) { 3451 return (uint64x2_t)__a; } 3452 __ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __a) { 3453 return (uint64x2_t)__a; } 3454 __ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __a) { 3455 return (uint64x2_t)__a; } 3456 __ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __a) { 3457 return (uint64x2_t)__a; } 3458 __ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __a) { 3459 return (uint64x2_t)__a; } 3460 __ai float16x8_t vreinterpretq_f16_s8(int8x16_t __a) { 3461 return (float16x8_t)__a; } 3462 __ai float16x8_t vreinterpretq_f16_s16(int16x8_t __a) { 3463 return (float16x8_t)__a; } 3464 __ai float16x8_t vreinterpretq_f16_s32(int32x4_t __a) { 3465 return (float16x8_t)__a; } 3466 __ai float16x8_t vreinterpretq_f16_s64(int64x2_t __a) { 3467 return (float16x8_t)__a; } 3468 __ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __a) { 3469 return (float16x8_t)__a; } 3470 __ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __a) { 3471 return (float16x8_t)__a; } 3472 __ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __a) { 3473 return (float16x8_t)__a; } 3474 __ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __a) { 3475 return (float16x8_t)__a; } 3476 __ai float16x8_t vreinterpretq_f16_f32(float32x4_t __a) { 3477 return (float16x8_t)__a; } 3478 __ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __a) { 3479 return (float16x8_t)__a; } 3480 __ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __a) { 3481 return (float16x8_t)__a; } 3482 __ai float32x4_t vreinterpretq_f32_s8(int8x16_t __a) { 3483 return (float32x4_t)__a; } 3484 __ai float32x4_t vreinterpretq_f32_s16(int16x8_t __a) { 3485 return (float32x4_t)__a; } 3486 __ai float32x4_t vreinterpretq_f32_s32(int32x4_t __a) { 3487 return (float32x4_t)__a; } 3488 __ai float32x4_t vreinterpretq_f32_s64(int64x2_t __a) { 3489 return (float32x4_t)__a; } 3490 __ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __a) { 3491 return (float32x4_t)__a; } 3492 __ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __a) { 3493 return (float32x4_t)__a; } 3494 __ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __a) { 3495 return (float32x4_t)__a; } 3496 __ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __a) { 3497 return (float32x4_t)__a; } 3498 __ai float32x4_t vreinterpretq_f32_f16(float16x8_t __a) { 3499 return (float32x4_t)__a; } 3500 __ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __a) { 3501 return (float32x4_t)__a; } 3502 __ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __a) { 3503 return (float32x4_t)__a; } 3504 __ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __a) { 3505 return (poly8x16_t)__a; } 3506 __ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __a) { 3507 return (poly8x16_t)__a; } 3508 __ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __a) { 3509 return (poly8x16_t)__a; } 3510 __ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __a) { 3511 return (poly8x16_t)__a; } 3512 __ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __a) { 3513 return (poly8x16_t)__a; } 3514 __ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __a) { 3515 return (poly8x16_t)__a; } 3516 __ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __a) { 3517 return (poly8x16_t)__a; } 3518 __ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __a) { 3519 return (poly8x16_t)__a; } 3520 __ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __a) { 3521 return (poly8x16_t)__a; } 3522 __ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __a) { 3523 return (poly8x16_t)__a; } 3524 __ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __a) { 3525 return (poly8x16_t)__a; } 3526 __ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __a) { 3527 return (poly16x8_t)__a; } 3528 __ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __a) { 3529 return (poly16x8_t)__a; } 3530 __ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __a) { 3531 return (poly16x8_t)__a; } 3532 __ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __a) { 3533 return (poly16x8_t)__a; } 3534 __ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __a) { 3535 return (poly16x8_t)__a; } 3536 __ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __a) { 3537 return (poly16x8_t)__a; } 3538 __ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __a) { 3539 return (poly16x8_t)__a; } 3540 __ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __a) { 3541 return (poly16x8_t)__a; } 3542 __ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __a) { 3543 return (poly16x8_t)__a; } 3544 __ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __a) { 3545 return (poly16x8_t)__a; } 3546 __ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __a) { 3547 return (poly16x8_t)__a; } 3548 3549 __ai int8x8_t vrev16_s8(int8x8_t __a) { 3550 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } 3551 __ai uint8x8_t vrev16_u8(uint8x8_t __a) { 3552 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } 3553 __ai poly8x8_t vrev16_p8(poly8x8_t __a) { 3554 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } 3555 __ai int8x16_t vrev16q_s8(int8x16_t __a) { 3556 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } 3557 __ai uint8x16_t vrev16q_u8(uint8x16_t __a) { 3558 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } 3559 __ai poly8x16_t vrev16q_p8(poly8x16_t __a) { 3560 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } 3561 3562 __ai int8x8_t vrev32_s8(int8x8_t __a) { 3563 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } 3564 __ai int16x4_t vrev32_s16(int16x4_t __a) { 3565 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } 3566 __ai uint8x8_t vrev32_u8(uint8x8_t __a) { 3567 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } 3568 __ai uint16x4_t vrev32_u16(uint16x4_t __a) { 3569 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } 3570 __ai poly8x8_t vrev32_p8(poly8x8_t __a) { 3571 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } 3572 __ai poly16x4_t vrev32_p16(poly16x4_t __a) { 3573 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } 3574 __ai int8x16_t vrev32q_s8(int8x16_t __a) { 3575 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } 3576 __ai int16x8_t vrev32q_s16(int16x8_t __a) { 3577 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } 3578 __ai uint8x16_t vrev32q_u8(uint8x16_t __a) { 3579 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } 3580 __ai uint16x8_t vrev32q_u16(uint16x8_t __a) { 3581 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } 3582 __ai poly8x16_t vrev32q_p8(poly8x16_t __a) { 3583 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } 3584 __ai poly16x8_t vrev32q_p16(poly16x8_t __a) { 3585 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } 3586 3587 __ai int8x8_t vrev64_s8(int8x8_t __a) { 3588 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } 3589 __ai int16x4_t vrev64_s16(int16x4_t __a) { 3590 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } 3591 __ai int32x2_t vrev64_s32(int32x2_t __a) { 3592 return __builtin_shufflevector(__a, __a, 1, 0); } 3593 __ai uint8x8_t vrev64_u8(uint8x8_t __a) { 3594 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } 3595 __ai uint16x4_t vrev64_u16(uint16x4_t __a) { 3596 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } 3597 __ai uint32x2_t vrev64_u32(uint32x2_t __a) { 3598 return __builtin_shufflevector(__a, __a, 1, 0); } 3599 __ai poly8x8_t vrev64_p8(poly8x8_t __a) { 3600 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } 3601 __ai poly16x4_t vrev64_p16(poly16x4_t __a) { 3602 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } 3603 __ai float32x2_t vrev64_f32(float32x2_t __a) { 3604 return __builtin_shufflevector(__a, __a, 1, 0); } 3605 __ai int8x16_t vrev64q_s8(int8x16_t __a) { 3606 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } 3607 __ai int16x8_t vrev64q_s16(int16x8_t __a) { 3608 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } 3609 __ai int32x4_t vrev64q_s32(int32x4_t __a) { 3610 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } 3611 __ai uint8x16_t vrev64q_u8(uint8x16_t __a) { 3612 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } 3613 __ai uint16x8_t vrev64q_u16(uint16x8_t __a) { 3614 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } 3615 __ai uint32x4_t vrev64q_u32(uint32x4_t __a) { 3616 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } 3617 __ai poly8x16_t vrev64q_p8(poly8x16_t __a) { 3618 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } 3619 __ai poly16x8_t vrev64q_p16(poly16x8_t __a) { 3620 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } 3621 __ai float32x4_t vrev64q_f32(float32x4_t __a) { 3622 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } 3623 3624 __ai int8x8_t vrhadd_s8(int8x8_t __a, int8x8_t __b) { 3625 return (int8x8_t)__builtin_neon_vrhadd_v(__a, __b, 0); } 3626 __ai int16x4_t vrhadd_s16(int16x4_t __a, int16x4_t __b) { 3627 return (int16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 3628 __ai int32x2_t vrhadd_s32(int32x2_t __a, int32x2_t __b) { 3629 return (int32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 3630 __ai uint8x8_t vrhadd_u8(uint8x8_t __a, uint8x8_t __b) { 3631 return (uint8x8_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } 3632 __ai uint16x4_t vrhadd_u16(uint16x4_t __a, uint16x4_t __b) { 3633 return (uint16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } 3634 __ai uint32x2_t vrhadd_u32(uint32x2_t __a, uint32x2_t __b) { 3635 return (uint32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } 3636 __ai int8x16_t vrhaddq_s8(int8x16_t __a, int8x16_t __b) { 3637 return (int8x16_t)__builtin_neon_vrhaddq_v(__a, __b, 32); } 3638 __ai int16x8_t vrhaddq_s16(int16x8_t __a, int16x8_t __b) { 3639 return (int16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 3640 __ai int32x4_t vrhaddq_s32(int32x4_t __a, int32x4_t __b) { 3641 return (int32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 3642 __ai uint8x16_t vrhaddq_u8(uint8x16_t __a, uint8x16_t __b) { 3643 return (uint8x16_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 3644 __ai uint16x8_t vrhaddq_u16(uint16x8_t __a, uint16x8_t __b) { 3645 return (uint16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 3646 __ai uint32x4_t vrhaddq_u32(uint32x4_t __a, uint32x4_t __b) { 3647 return (uint32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 3648 3649 __ai int8x8_t vrshl_s8(int8x8_t __a, int8x8_t __b) { 3650 return (int8x8_t)__builtin_neon_vrshl_v(__a, __b, 0); } 3651 __ai int16x4_t vrshl_s16(int16x4_t __a, int16x4_t __b) { 3652 return (int16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } 3653 __ai int32x2_t vrshl_s32(int32x2_t __a, int32x2_t __b) { 3654 return (int32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } 3655 __ai int64x1_t vrshl_s64(int64x1_t __a, int64x1_t __b) { 3656 return (int64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } 3657 __ai uint8x8_t vrshl_u8(uint8x8_t __a, int8x8_t __b) { 3658 return (uint8x8_t)__builtin_neon_vrshl_v((int8x8_t)__a, __b, 16); } 3659 __ai uint16x4_t vrshl_u16(uint16x4_t __a, int16x4_t __b) { 3660 return (uint16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } 3661 __ai uint32x2_t vrshl_u32(uint32x2_t __a, int32x2_t __b) { 3662 return (uint32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } 3663 __ai uint64x1_t vrshl_u64(uint64x1_t __a, int64x1_t __b) { 3664 return (uint64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); } 3665 __ai int8x16_t vrshlq_s8(int8x16_t __a, int8x16_t __b) { 3666 return (int8x16_t)__builtin_neon_vrshlq_v(__a, __b, 32); } 3667 __ai int16x8_t vrshlq_s16(int16x8_t __a, int16x8_t __b) { 3668 return (int16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 3669 __ai int32x4_t vrshlq_s32(int32x4_t __a, int32x4_t __b) { 3670 return (int32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 3671 __ai int64x2_t vrshlq_s64(int64x2_t __a, int64x2_t __b) { 3672 return (int64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } 3673 __ai uint8x16_t vrshlq_u8(uint8x16_t __a, int8x16_t __b) { 3674 return (uint8x16_t)__builtin_neon_vrshlq_v((int8x16_t)__a, __b, 48); } 3675 __ai uint16x8_t vrshlq_u16(uint16x8_t __a, int16x8_t __b) { 3676 return (uint16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 3677 __ai uint32x4_t vrshlq_u32(uint32x4_t __a, int32x4_t __b) { 3678 return (uint32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 3679 __ai uint64x2_t vrshlq_u64(uint64x2_t __a, int64x2_t __b) { 3680 return (uint64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } 3681 3682 #define vrshrn_n_s16(a, __b) __extension__ ({ \ 3683 int16x8_t __a = (a); \ 3684 (int8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 0); }) 3685 #define vrshrn_n_s32(a, __b) __extension__ ({ \ 3686 int32x4_t __a = (a); \ 3687 (int16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 1); }) 3688 #define vrshrn_n_s64(a, __b) __extension__ ({ \ 3689 int64x2_t __a = (a); \ 3690 (int32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 2); }) 3691 #define vrshrn_n_u16(a, __b) __extension__ ({ \ 3692 uint16x8_t __a = (a); \ 3693 (uint8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 16); }) 3694 #define vrshrn_n_u32(a, __b) __extension__ ({ \ 3695 uint32x4_t __a = (a); \ 3696 (uint16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 17); }) 3697 #define vrshrn_n_u64(a, __b) __extension__ ({ \ 3698 uint64x2_t __a = (a); \ 3699 (uint32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 18); }) 3700 3701 #define vrshr_n_s8(a, __b) __extension__ ({ \ 3702 int8x8_t __a = (a); \ 3703 (int8x8_t)__builtin_neon_vrshr_n_v(__a, __b, 0); }) 3704 #define vrshr_n_s16(a, __b) __extension__ ({ \ 3705 int16x4_t __a = (a); \ 3706 (int16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 1); }) 3707 #define vrshr_n_s32(a, __b) __extension__ ({ \ 3708 int32x2_t __a = (a); \ 3709 (int32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 2); }) 3710 #define vrshr_n_s64(a, __b) __extension__ ({ \ 3711 int64x1_t __a = (a); \ 3712 (int64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 3); }) 3713 #define vrshr_n_u8(a, __b) __extension__ ({ \ 3714 uint8x8_t __a = (a); \ 3715 (uint8x8_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 16); }) 3716 #define vrshr_n_u16(a, __b) __extension__ ({ \ 3717 uint16x4_t __a = (a); \ 3718 (uint16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 17); }) 3719 #define vrshr_n_u32(a, __b) __extension__ ({ \ 3720 uint32x2_t __a = (a); \ 3721 (uint32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 18); }) 3722 #define vrshr_n_u64(a, __b) __extension__ ({ \ 3723 uint64x1_t __a = (a); \ 3724 (uint64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 19); }) 3725 #define vrshrq_n_s8(a, __b) __extension__ ({ \ 3726 int8x16_t __a = (a); \ 3727 (int8x16_t)__builtin_neon_vrshrq_n_v(__a, __b, 32); }) 3728 #define vrshrq_n_s16(a, __b) __extension__ ({ \ 3729 int16x8_t __a = (a); \ 3730 (int16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 33); }) 3731 #define vrshrq_n_s32(a, __b) __extension__ ({ \ 3732 int32x4_t __a = (a); \ 3733 (int32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 34); }) 3734 #define vrshrq_n_s64(a, __b) __extension__ ({ \ 3735 int64x2_t __a = (a); \ 3736 (int64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 35); }) 3737 #define vrshrq_n_u8(a, __b) __extension__ ({ \ 3738 uint8x16_t __a = (a); \ 3739 (uint8x16_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 48); }) 3740 #define vrshrq_n_u16(a, __b) __extension__ ({ \ 3741 uint16x8_t __a = (a); \ 3742 (uint16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 49); }) 3743 #define vrshrq_n_u32(a, __b) __extension__ ({ \ 3744 uint32x4_t __a = (a); \ 3745 (uint32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 50); }) 3746 #define vrshrq_n_u64(a, __b) __extension__ ({ \ 3747 uint64x2_t __a = (a); \ 3748 (uint64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 51); }) 3749 3750 __ai float32x2_t vrsqrte_f32(float32x2_t __a) { 3751 return (float32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 7); } 3752 __ai uint32x2_t vrsqrte_u32(uint32x2_t __a) { 3753 return (uint32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 18); } 3754 __ai float32x4_t vrsqrteq_f32(float32x4_t __a) { 3755 return (float32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 39); } 3756 __ai uint32x4_t vrsqrteq_u32(uint32x4_t __a) { 3757 return (uint32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 50); } 3758 3759 __ai float32x2_t vrsqrts_f32(float32x2_t __a, float32x2_t __b) { 3760 return (float32x2_t)__builtin_neon_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 7); } 3761 __ai float32x4_t vrsqrtsq_f32(float32x4_t __a, float32x4_t __b) { 3762 return (float32x4_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 39); } 3763 3764 #define vrsra_n_s8(a, b, __c) __extension__ ({ \ 3765 int8x8_t __a = (a); int8x8_t __b = (b); \ 3766 (int8x8_t)__builtin_neon_vrsra_n_v(__a, __b, __c, 0); }) 3767 #define vrsra_n_s16(a, b, __c) __extension__ ({ \ 3768 int16x4_t __a = (a); int16x4_t __b = (b); \ 3769 (int16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 3770 #define vrsra_n_s32(a, b, __c) __extension__ ({ \ 3771 int32x2_t __a = (a); int32x2_t __b = (b); \ 3772 (int32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 3773 #define vrsra_n_s64(a, b, __c) __extension__ ({ \ 3774 int64x1_t __a = (a); int64x1_t __b = (b); \ 3775 (int64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 3776 #define vrsra_n_u8(a, b, __c) __extension__ ({ \ 3777 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 3778 (uint8x8_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) 3779 #define vrsra_n_u16(a, b, __c) __extension__ ({ \ 3780 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 3781 (uint16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) 3782 #define vrsra_n_u32(a, b, __c) __extension__ ({ \ 3783 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 3784 (uint32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) 3785 #define vrsra_n_u64(a, b, __c) __extension__ ({ \ 3786 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 3787 (uint64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) 3788 #define vrsraq_n_s8(a, b, __c) __extension__ ({ \ 3789 int8x16_t __a = (a); int8x16_t __b = (b); \ 3790 (int8x16_t)__builtin_neon_vrsraq_n_v(__a, __b, __c, 32); }) 3791 #define vrsraq_n_s16(a, b, __c) __extension__ ({ \ 3792 int16x8_t __a = (a); int16x8_t __b = (b); \ 3793 (int16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) 3794 #define vrsraq_n_s32(a, b, __c) __extension__ ({ \ 3795 int32x4_t __a = (a); int32x4_t __b = (b); \ 3796 (int32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) 3797 #define vrsraq_n_s64(a, b, __c) __extension__ ({ \ 3798 int64x2_t __a = (a); int64x2_t __b = (b); \ 3799 (int64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) 3800 #define vrsraq_n_u8(a, b, __c) __extension__ ({ \ 3801 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 3802 (uint8x16_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) 3803 #define vrsraq_n_u16(a, b, __c) __extension__ ({ \ 3804 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 3805 (uint16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) 3806 #define vrsraq_n_u32(a, b, __c) __extension__ ({ \ 3807 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 3808 (uint32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) 3809 #define vrsraq_n_u64(a, b, __c) __extension__ ({ \ 3810 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 3811 (uint64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) 3812 3813 __ai int8x8_t vrsubhn_s16(int16x8_t __a, int16x8_t __b) { 3814 return (int8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } 3815 __ai int16x4_t vrsubhn_s32(int32x4_t __a, int32x4_t __b) { 3816 return (int16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } 3817 __ai int32x2_t vrsubhn_s64(int64x2_t __a, int64x2_t __b) { 3818 return (int32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } 3819 __ai uint8x8_t vrsubhn_u16(uint16x8_t __a, uint16x8_t __b) { 3820 return (uint8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } 3821 __ai uint16x4_t vrsubhn_u32(uint32x4_t __a, uint32x4_t __b) { 3822 return (uint16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } 3823 __ai uint32x2_t vrsubhn_u64(uint64x2_t __a, uint64x2_t __b) { 3824 return (uint32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } 3825 3826 #define vset_lane_u8(a, b, __c) __extension__ ({ \ 3827 uint8_t __a = (a); uint8x8_t __b = (b); \ 3828 (uint8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); }) 3829 #define vset_lane_u16(a, b, __c) __extension__ ({ \ 3830 uint16_t __a = (a); uint16x4_t __b = (b); \ 3831 (uint16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); }) 3832 #define vset_lane_u32(a, b, __c) __extension__ ({ \ 3833 uint32_t __a = (a); uint32x2_t __b = (b); \ 3834 (uint32x2_t)__builtin_neon_vset_lane_i32(__a, (int32x2_t)__b, __c); }) 3835 #define vset_lane_s8(a, b, __c) __extension__ ({ \ 3836 int8_t __a = (a); int8x8_t __b = (b); \ 3837 (int8x8_t)__builtin_neon_vset_lane_i8(__a, __b, __c); }) 3838 #define vset_lane_s16(a, b, __c) __extension__ ({ \ 3839 int16_t __a = (a); int16x4_t __b = (b); \ 3840 (int16x4_t)__builtin_neon_vset_lane_i16(__a, __b, __c); }) 3841 #define vset_lane_s32(a, b, __c) __extension__ ({ \ 3842 int32_t __a = (a); int32x2_t __b = (b); \ 3843 (int32x2_t)__builtin_neon_vset_lane_i32(__a, __b, __c); }) 3844 #define vset_lane_p8(a, b, __c) __extension__ ({ \ 3845 poly8_t __a = (a); poly8x8_t __b = (b); \ 3846 (poly8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); }) 3847 #define vset_lane_p16(a, b, __c) __extension__ ({ \ 3848 poly16_t __a = (a); poly16x4_t __b = (b); \ 3849 (poly16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); }) 3850 #define vset_lane_f32(a, b, __c) __extension__ ({ \ 3851 float32_t __a = (a); float32x2_t __b = (b); \ 3852 (float32x2_t)__builtin_neon_vset_lane_f32(__a, __b, __c); }) 3853 #define vsetq_lane_u8(a, b, __c) __extension__ ({ \ 3854 uint8_t __a = (a); uint8x16_t __b = (b); \ 3855 (uint8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); }) 3856 #define vsetq_lane_u16(a, b, __c) __extension__ ({ \ 3857 uint16_t __a = (a); uint16x8_t __b = (b); \ 3858 (uint16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); }) 3859 #define vsetq_lane_u32(a, b, __c) __extension__ ({ \ 3860 uint32_t __a = (a); uint32x4_t __b = (b); \ 3861 (uint32x4_t)__builtin_neon_vsetq_lane_i32(__a, (int32x4_t)__b, __c); }) 3862 #define vsetq_lane_s8(a, b, __c) __extension__ ({ \ 3863 int8_t __a = (a); int8x16_t __b = (b); \ 3864 (int8x16_t)__builtin_neon_vsetq_lane_i8(__a, __b, __c); }) 3865 #define vsetq_lane_s16(a, b, __c) __extension__ ({ \ 3866 int16_t __a = (a); int16x8_t __b = (b); \ 3867 (int16x8_t)__builtin_neon_vsetq_lane_i16(__a, __b, __c); }) 3868 #define vsetq_lane_s32(a, b, __c) __extension__ ({ \ 3869 int32_t __a = (a); int32x4_t __b = (b); \ 3870 (int32x4_t)__builtin_neon_vsetq_lane_i32(__a, __b, __c); }) 3871 #define vsetq_lane_p8(a, b, __c) __extension__ ({ \ 3872 poly8_t __a = (a); poly8x16_t __b = (b); \ 3873 (poly8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); }) 3874 #define vsetq_lane_p16(a, b, __c) __extension__ ({ \ 3875 poly16_t __a = (a); poly16x8_t __b = (b); \ 3876 (poly16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); }) 3877 #define vsetq_lane_f32(a, b, __c) __extension__ ({ \ 3878 float32_t __a = (a); float32x4_t __b = (b); \ 3879 (float32x4_t)__builtin_neon_vsetq_lane_f32(__a, __b, __c); }) 3880 #define vset_lane_s64(a, b, __c) __extension__ ({ \ 3881 int64_t __a = (a); int64x1_t __b = (b); \ 3882 (int64x1_t)__builtin_neon_vset_lane_i64(__a, __b, __c); }) 3883 #define vset_lane_u64(a, b, __c) __extension__ ({ \ 3884 uint64_t __a = (a); uint64x1_t __b = (b); \ 3885 (uint64x1_t)__builtin_neon_vset_lane_i64(__a, (int64x1_t)__b, __c); }) 3886 #define vsetq_lane_s64(a, b, __c) __extension__ ({ \ 3887 int64_t __a = (a); int64x2_t __b = (b); \ 3888 (int64x2_t)__builtin_neon_vsetq_lane_i64(__a, __b, __c); }) 3889 #define vsetq_lane_u64(a, b, __c) __extension__ ({ \ 3890 uint64_t __a = (a); uint64x2_t __b = (b); \ 3891 (uint64x2_t)__builtin_neon_vsetq_lane_i64(__a, (int64x2_t)__b, __c); }) 3892 3893 __ai int8x8_t vshl_s8(int8x8_t __a, int8x8_t __b) { 3894 return (int8x8_t)__builtin_neon_vshl_v(__a, __b, 0); } 3895 __ai int16x4_t vshl_s16(int16x4_t __a, int16x4_t __b) { 3896 return (int16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } 3897 __ai int32x2_t vshl_s32(int32x2_t __a, int32x2_t __b) { 3898 return (int32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } 3899 __ai int64x1_t vshl_s64(int64x1_t __a, int64x1_t __b) { 3900 return (int64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } 3901 __ai uint8x8_t vshl_u8(uint8x8_t __a, int8x8_t __b) { 3902 return (uint8x8_t)__builtin_neon_vshl_v((int8x8_t)__a, __b, 16); } 3903 __ai uint16x4_t vshl_u16(uint16x4_t __a, int16x4_t __b) { 3904 return (uint16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } 3905 __ai uint32x2_t vshl_u32(uint32x2_t __a, int32x2_t __b) { 3906 return (uint32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } 3907 __ai uint64x1_t vshl_u64(uint64x1_t __a, int64x1_t __b) { 3908 return (uint64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 19); } 3909 __ai int8x16_t vshlq_s8(int8x16_t __a, int8x16_t __b) { 3910 return (int8x16_t)__builtin_neon_vshlq_v(__a, __b, 32); } 3911 __ai int16x8_t vshlq_s16(int16x8_t __a, int16x8_t __b) { 3912 return (int16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } 3913 __ai int32x4_t vshlq_s32(int32x4_t __a, int32x4_t __b) { 3914 return (int32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } 3915 __ai int64x2_t vshlq_s64(int64x2_t __a, int64x2_t __b) { 3916 return (int64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } 3917 __ai uint8x16_t vshlq_u8(uint8x16_t __a, int8x16_t __b) { 3918 return (uint8x16_t)__builtin_neon_vshlq_v((int8x16_t)__a, __b, 48); } 3919 __ai uint16x8_t vshlq_u16(uint16x8_t __a, int16x8_t __b) { 3920 return (uint16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 3921 __ai uint32x4_t vshlq_u32(uint32x4_t __a, int32x4_t __b) { 3922 return (uint32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 3923 __ai uint64x2_t vshlq_u64(uint64x2_t __a, int64x2_t __b) { 3924 return (uint64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } 3925 3926 #define vshll_n_s8(a, __b) __extension__ ({ \ 3927 int8x8_t __a = (a); \ 3928 (int16x8_t)__builtin_neon_vshll_n_v(__a, __b, 33); }) 3929 #define vshll_n_s16(a, __b) __extension__ ({ \ 3930 int16x4_t __a = (a); \ 3931 (int32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 34); }) 3932 #define vshll_n_s32(a, __b) __extension__ ({ \ 3933 int32x2_t __a = (a); \ 3934 (int64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 35); }) 3935 #define vshll_n_u8(a, __b) __extension__ ({ \ 3936 uint8x8_t __a = (a); \ 3937 (uint16x8_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 49); }) 3938 #define vshll_n_u16(a, __b) __extension__ ({ \ 3939 uint16x4_t __a = (a); \ 3940 (uint32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 50); }) 3941 #define vshll_n_u32(a, __b) __extension__ ({ \ 3942 uint32x2_t __a = (a); \ 3943 (uint64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 51); }) 3944 3945 #define vshl_n_s8(a, __b) __extension__ ({ \ 3946 int8x8_t __a = (a); \ 3947 (int8x8_t)__builtin_neon_vshl_n_v(__a, __b, 0); }) 3948 #define vshl_n_s16(a, __b) __extension__ ({ \ 3949 int16x4_t __a = (a); \ 3950 (int16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 1); }) 3951 #define vshl_n_s32(a, __b) __extension__ ({ \ 3952 int32x2_t __a = (a); \ 3953 (int32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 2); }) 3954 #define vshl_n_s64(a, __b) __extension__ ({ \ 3955 int64x1_t __a = (a); \ 3956 (int64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 3); }) 3957 #define vshl_n_u8(a, __b) __extension__ ({ \ 3958 uint8x8_t __a = (a); \ 3959 (uint8x8_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 16); }) 3960 #define vshl_n_u16(a, __b) __extension__ ({ \ 3961 uint16x4_t __a = (a); \ 3962 (uint16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 17); }) 3963 #define vshl_n_u32(a, __b) __extension__ ({ \ 3964 uint32x2_t __a = (a); \ 3965 (uint32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 18); }) 3966 #define vshl_n_u64(a, __b) __extension__ ({ \ 3967 uint64x1_t __a = (a); \ 3968 (uint64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 19); }) 3969 #define vshlq_n_s8(a, __b) __extension__ ({ \ 3970 int8x16_t __a = (a); \ 3971 (int8x16_t)__builtin_neon_vshlq_n_v(__a, __b, 32); }) 3972 #define vshlq_n_s16(a, __b) __extension__ ({ \ 3973 int16x8_t __a = (a); \ 3974 (int16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 33); }) 3975 #define vshlq_n_s32(a, __b) __extension__ ({ \ 3976 int32x4_t __a = (a); \ 3977 (int32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 34); }) 3978 #define vshlq_n_s64(a, __b) __extension__ ({ \ 3979 int64x2_t __a = (a); \ 3980 (int64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 35); }) 3981 #define vshlq_n_u8(a, __b) __extension__ ({ \ 3982 uint8x16_t __a = (a); \ 3983 (uint8x16_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 48); }) 3984 #define vshlq_n_u16(a, __b) __extension__ ({ \ 3985 uint16x8_t __a = (a); \ 3986 (uint16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 49); }) 3987 #define vshlq_n_u32(a, __b) __extension__ ({ \ 3988 uint32x4_t __a = (a); \ 3989 (uint32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 50); }) 3990 #define vshlq_n_u64(a, __b) __extension__ ({ \ 3991 uint64x2_t __a = (a); \ 3992 (uint64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 51); }) 3993 3994 #define vshrn_n_s16(a, __b) __extension__ ({ \ 3995 int16x8_t __a = (a); \ 3996 (int8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 0); }) 3997 #define vshrn_n_s32(a, __b) __extension__ ({ \ 3998 int32x4_t __a = (a); \ 3999 (int16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 1); }) 4000 #define vshrn_n_s64(a, __b) __extension__ ({ \ 4001 int64x2_t __a = (a); \ 4002 (int32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 2); }) 4003 #define vshrn_n_u16(a, __b) __extension__ ({ \ 4004 uint16x8_t __a = (a); \ 4005 (uint8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 16); }) 4006 #define vshrn_n_u32(a, __b) __extension__ ({ \ 4007 uint32x4_t __a = (a); \ 4008 (uint16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 17); }) 4009 #define vshrn_n_u64(a, __b) __extension__ ({ \ 4010 uint64x2_t __a = (a); \ 4011 (uint32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 18); }) 4012 4013 #define vshr_n_s8(a, __b) __extension__ ({ \ 4014 int8x8_t __a = (a); \ 4015 (int8x8_t)__builtin_neon_vshr_n_v(__a, __b, 0); }) 4016 #define vshr_n_s16(a, __b) __extension__ ({ \ 4017 int16x4_t __a = (a); \ 4018 (int16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 1); }) 4019 #define vshr_n_s32(a, __b) __extension__ ({ \ 4020 int32x2_t __a = (a); \ 4021 (int32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 2); }) 4022 #define vshr_n_s64(a, __b) __extension__ ({ \ 4023 int64x1_t __a = (a); \ 4024 (int64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 3); }) 4025 #define vshr_n_u8(a, __b) __extension__ ({ \ 4026 uint8x8_t __a = (a); \ 4027 (uint8x8_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 16); }) 4028 #define vshr_n_u16(a, __b) __extension__ ({ \ 4029 uint16x4_t __a = (a); \ 4030 (uint16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 17); }) 4031 #define vshr_n_u32(a, __b) __extension__ ({ \ 4032 uint32x2_t __a = (a); \ 4033 (uint32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 18); }) 4034 #define vshr_n_u64(a, __b) __extension__ ({ \ 4035 uint64x1_t __a = (a); \ 4036 (uint64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 19); }) 4037 #define vshrq_n_s8(a, __b) __extension__ ({ \ 4038 int8x16_t __a = (a); \ 4039 (int8x16_t)__builtin_neon_vshrq_n_v(__a, __b, 32); }) 4040 #define vshrq_n_s16(a, __b) __extension__ ({ \ 4041 int16x8_t __a = (a); \ 4042 (int16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 33); }) 4043 #define vshrq_n_s32(a, __b) __extension__ ({ \ 4044 int32x4_t __a = (a); \ 4045 (int32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 34); }) 4046 #define vshrq_n_s64(a, __b) __extension__ ({ \ 4047 int64x2_t __a = (a); \ 4048 (int64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 35); }) 4049 #define vshrq_n_u8(a, __b) __extension__ ({ \ 4050 uint8x16_t __a = (a); \ 4051 (uint8x16_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 48); }) 4052 #define vshrq_n_u16(a, __b) __extension__ ({ \ 4053 uint16x8_t __a = (a); \ 4054 (uint16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 49); }) 4055 #define vshrq_n_u32(a, __b) __extension__ ({ \ 4056 uint32x4_t __a = (a); \ 4057 (uint32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 50); }) 4058 #define vshrq_n_u64(a, __b) __extension__ ({ \ 4059 uint64x2_t __a = (a); \ 4060 (uint64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 51); }) 4061 4062 #define vsli_n_s8(a, b, __c) __extension__ ({ \ 4063 int8x8_t __a = (a); int8x8_t __b = (b); \ 4064 (int8x8_t)__builtin_neon_vsli_n_v(__a, __b, __c, 0); }) 4065 #define vsli_n_s16(a, b, __c) __extension__ ({ \ 4066 int16x4_t __a = (a); int16x4_t __b = (b); \ 4067 (int16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 4068 #define vsli_n_s32(a, b, __c) __extension__ ({ \ 4069 int32x2_t __a = (a); int32x2_t __b = (b); \ 4070 (int32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 4071 #define vsli_n_s64(a, b, __c) __extension__ ({ \ 4072 int64x1_t __a = (a); int64x1_t __b = (b); \ 4073 (int64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 4074 #define vsli_n_u8(a, b, __c) __extension__ ({ \ 4075 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 4076 (uint8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) 4077 #define vsli_n_u16(a, b, __c) __extension__ ({ \ 4078 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 4079 (uint16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) 4080 #define vsli_n_u32(a, b, __c) __extension__ ({ \ 4081 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 4082 (uint32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) 4083 #define vsli_n_u64(a, b, __c) __extension__ ({ \ 4084 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 4085 (uint64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) 4086 #define vsli_n_p8(a, b, __c) __extension__ ({ \ 4087 poly8x8_t __a = (a); poly8x8_t __b = (b); \ 4088 (poly8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) 4089 #define vsli_n_p16(a, b, __c) __extension__ ({ \ 4090 poly16x4_t __a = (a); poly16x4_t __b = (b); \ 4091 (poly16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) 4092 #define vsliq_n_s8(a, b, __c) __extension__ ({ \ 4093 int8x16_t __a = (a); int8x16_t __b = (b); \ 4094 (int8x16_t)__builtin_neon_vsliq_n_v(__a, __b, __c, 32); }) 4095 #define vsliq_n_s16(a, b, __c) __extension__ ({ \ 4096 int16x8_t __a = (a); int16x8_t __b = (b); \ 4097 (int16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) 4098 #define vsliq_n_s32(a, b, __c) __extension__ ({ \ 4099 int32x4_t __a = (a); int32x4_t __b = (b); \ 4100 (int32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) 4101 #define vsliq_n_s64(a, b, __c) __extension__ ({ \ 4102 int64x2_t __a = (a); int64x2_t __b = (b); \ 4103 (int64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) 4104 #define vsliq_n_u8(a, b, __c) __extension__ ({ \ 4105 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 4106 (uint8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) 4107 #define vsliq_n_u16(a, b, __c) __extension__ ({ \ 4108 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 4109 (uint16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) 4110 #define vsliq_n_u32(a, b, __c) __extension__ ({ \ 4111 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 4112 (uint32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) 4113 #define vsliq_n_u64(a, b, __c) __extension__ ({ \ 4114 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 4115 (uint64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) 4116 #define vsliq_n_p8(a, b, __c) __extension__ ({ \ 4117 poly8x16_t __a = (a); poly8x16_t __b = (b); \ 4118 (poly8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) 4119 #define vsliq_n_p16(a, b, __c) __extension__ ({ \ 4120 poly16x8_t __a = (a); poly16x8_t __b = (b); \ 4121 (poly16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) 4122 4123 #define vsra_n_s8(a, b, __c) __extension__ ({ \ 4124 int8x8_t __a = (a); int8x8_t __b = (b); \ 4125 (int8x8_t)__builtin_neon_vsra_n_v(__a, __b, __c, 0); }) 4126 #define vsra_n_s16(a, b, __c) __extension__ ({ \ 4127 int16x4_t __a = (a); int16x4_t __b = (b); \ 4128 (int16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 4129 #define vsra_n_s32(a, b, __c) __extension__ ({ \ 4130 int32x2_t __a = (a); int32x2_t __b = (b); \ 4131 (int32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 4132 #define vsra_n_s64(a, b, __c) __extension__ ({ \ 4133 int64x1_t __a = (a); int64x1_t __b = (b); \ 4134 (int64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 4135 #define vsra_n_u8(a, b, __c) __extension__ ({ \ 4136 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 4137 (uint8x8_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) 4138 #define vsra_n_u16(a, b, __c) __extension__ ({ \ 4139 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 4140 (uint16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) 4141 #define vsra_n_u32(a, b, __c) __extension__ ({ \ 4142 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 4143 (uint32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) 4144 #define vsra_n_u64(a, b, __c) __extension__ ({ \ 4145 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 4146 (uint64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) 4147 #define vsraq_n_s8(a, b, __c) __extension__ ({ \ 4148 int8x16_t __a = (a); int8x16_t __b = (b); \ 4149 (int8x16_t)__builtin_neon_vsraq_n_v(__a, __b, __c, 32); }) 4150 #define vsraq_n_s16(a, b, __c) __extension__ ({ \ 4151 int16x8_t __a = (a); int16x8_t __b = (b); \ 4152 (int16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) 4153 #define vsraq_n_s32(a, b, __c) __extension__ ({ \ 4154 int32x4_t __a = (a); int32x4_t __b = (b); \ 4155 (int32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) 4156 #define vsraq_n_s64(a, b, __c) __extension__ ({ \ 4157 int64x2_t __a = (a); int64x2_t __b = (b); \ 4158 (int64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) 4159 #define vsraq_n_u8(a, b, __c) __extension__ ({ \ 4160 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 4161 (uint8x16_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) 4162 #define vsraq_n_u16(a, b, __c) __extension__ ({ \ 4163 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 4164 (uint16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) 4165 #define vsraq_n_u32(a, b, __c) __extension__ ({ \ 4166 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 4167 (uint32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) 4168 #define vsraq_n_u64(a, b, __c) __extension__ ({ \ 4169 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 4170 (uint64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) 4171 4172 #define vsri_n_s8(a, b, __c) __extension__ ({ \ 4173 int8x8_t __a = (a); int8x8_t __b = (b); \ 4174 (int8x8_t)__builtin_neon_vsri_n_v(__a, __b, __c, 0); }) 4175 #define vsri_n_s16(a, b, __c) __extension__ ({ \ 4176 int16x4_t __a = (a); int16x4_t __b = (b); \ 4177 (int16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 4178 #define vsri_n_s32(a, b, __c) __extension__ ({ \ 4179 int32x2_t __a = (a); int32x2_t __b = (b); \ 4180 (int32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 4181 #define vsri_n_s64(a, b, __c) __extension__ ({ \ 4182 int64x1_t __a = (a); int64x1_t __b = (b); \ 4183 (int64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 4184 #define vsri_n_u8(a, b, __c) __extension__ ({ \ 4185 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 4186 (uint8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) 4187 #define vsri_n_u16(a, b, __c) __extension__ ({ \ 4188 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 4189 (uint16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) 4190 #define vsri_n_u32(a, b, __c) __extension__ ({ \ 4191 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 4192 (uint32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) 4193 #define vsri_n_u64(a, b, __c) __extension__ ({ \ 4194 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 4195 (uint64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) 4196 #define vsri_n_p8(a, b, __c) __extension__ ({ \ 4197 poly8x8_t __a = (a); poly8x8_t __b = (b); \ 4198 (poly8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) 4199 #define vsri_n_p16(a, b, __c) __extension__ ({ \ 4200 poly16x4_t __a = (a); poly16x4_t __b = (b); \ 4201 (poly16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) 4202 #define vsriq_n_s8(a, b, __c) __extension__ ({ \ 4203 int8x16_t __a = (a); int8x16_t __b = (b); \ 4204 (int8x16_t)__builtin_neon_vsriq_n_v(__a, __b, __c, 32); }) 4205 #define vsriq_n_s16(a, b, __c) __extension__ ({ \ 4206 int16x8_t __a = (a); int16x8_t __b = (b); \ 4207 (int16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) 4208 #define vsriq_n_s32(a, b, __c) __extension__ ({ \ 4209 int32x4_t __a = (a); int32x4_t __b = (b); \ 4210 (int32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) 4211 #define vsriq_n_s64(a, b, __c) __extension__ ({ \ 4212 int64x2_t __a = (a); int64x2_t __b = (b); \ 4213 (int64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) 4214 #define vsriq_n_u8(a, b, __c) __extension__ ({ \ 4215 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 4216 (uint8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) 4217 #define vsriq_n_u16(a, b, __c) __extension__ ({ \ 4218 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 4219 (uint16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) 4220 #define vsriq_n_u32(a, b, __c) __extension__ ({ \ 4221 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 4222 (uint32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) 4223 #define vsriq_n_u64(a, b, __c) __extension__ ({ \ 4224 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 4225 (uint64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) 4226 #define vsriq_n_p8(a, b, __c) __extension__ ({ \ 4227 poly8x16_t __a = (a); poly8x16_t __b = (b); \ 4228 (poly8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) 4229 #define vsriq_n_p16(a, b, __c) __extension__ ({ \ 4230 poly16x8_t __a = (a); poly16x8_t __b = (b); \ 4231 (poly16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) 4232 4233 #define vst1q_u8(__a, b) __extension__ ({ \ 4234 uint8x16_t __b = (b); \ 4235 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 48); }) 4236 #define vst1q_u16(__a, b) __extension__ ({ \ 4237 uint16x8_t __b = (b); \ 4238 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 49); }) 4239 #define vst1q_u32(__a, b) __extension__ ({ \ 4240 uint32x4_t __b = (b); \ 4241 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 50); }) 4242 #define vst1q_u64(__a, b) __extension__ ({ \ 4243 uint64x2_t __b = (b); \ 4244 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 51); }) 4245 #define vst1q_s8(__a, b) __extension__ ({ \ 4246 int8x16_t __b = (b); \ 4247 __builtin_neon_vst1q_v(__a, __b, 32); }) 4248 #define vst1q_s16(__a, b) __extension__ ({ \ 4249 int16x8_t __b = (b); \ 4250 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 33); }) 4251 #define vst1q_s32(__a, b) __extension__ ({ \ 4252 int32x4_t __b = (b); \ 4253 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 34); }) 4254 #define vst1q_s64(__a, b) __extension__ ({ \ 4255 int64x2_t __b = (b); \ 4256 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 35); }) 4257 #define vst1q_f16(__a, b) __extension__ ({ \ 4258 float16x8_t __b = (b); \ 4259 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 38); }) 4260 #define vst1q_f32(__a, b) __extension__ ({ \ 4261 float32x4_t __b = (b); \ 4262 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 39); }) 4263 #define vst1q_p8(__a, b) __extension__ ({ \ 4264 poly8x16_t __b = (b); \ 4265 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 36); }) 4266 #define vst1q_p16(__a, b) __extension__ ({ \ 4267 poly16x8_t __b = (b); \ 4268 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 37); }) 4269 #define vst1_u8(__a, b) __extension__ ({ \ 4270 uint8x8_t __b = (b); \ 4271 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 16); }) 4272 #define vst1_u16(__a, b) __extension__ ({ \ 4273 uint16x4_t __b = (b); \ 4274 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 17); }) 4275 #define vst1_u32(__a, b) __extension__ ({ \ 4276 uint32x2_t __b = (b); \ 4277 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 18); }) 4278 #define vst1_u64(__a, b) __extension__ ({ \ 4279 uint64x1_t __b = (b); \ 4280 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 19); }) 4281 #define vst1_s8(__a, b) __extension__ ({ \ 4282 int8x8_t __b = (b); \ 4283 __builtin_neon_vst1_v(__a, __b, 0); }) 4284 #define vst1_s16(__a, b) __extension__ ({ \ 4285 int16x4_t __b = (b); \ 4286 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 1); }) 4287 #define vst1_s32(__a, b) __extension__ ({ \ 4288 int32x2_t __b = (b); \ 4289 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 2); }) 4290 #define vst1_s64(__a, b) __extension__ ({ \ 4291 int64x1_t __b = (b); \ 4292 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 3); }) 4293 #define vst1_f16(__a, b) __extension__ ({ \ 4294 float16x4_t __b = (b); \ 4295 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 6); }) 4296 #define vst1_f32(__a, b) __extension__ ({ \ 4297 float32x2_t __b = (b); \ 4298 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 7); }) 4299 #define vst1_p8(__a, b) __extension__ ({ \ 4300 poly8x8_t __b = (b); \ 4301 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 4); }) 4302 #define vst1_p16(__a, b) __extension__ ({ \ 4303 poly16x4_t __b = (b); \ 4304 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 5); }) 4305 4306 #define vst1q_lane_u8(__a, b, __c) __extension__ ({ \ 4307 uint8x16_t __b = (b); \ 4308 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 48); }) 4309 #define vst1q_lane_u16(__a, b, __c) __extension__ ({ \ 4310 uint16x8_t __b = (b); \ 4311 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 49); }) 4312 #define vst1q_lane_u32(__a, b, __c) __extension__ ({ \ 4313 uint32x4_t __b = (b); \ 4314 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 50); }) 4315 #define vst1q_lane_u64(__a, b, __c) __extension__ ({ \ 4316 uint64x2_t __b = (b); \ 4317 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 51); }) 4318 #define vst1q_lane_s8(__a, b, __c) __extension__ ({ \ 4319 int8x16_t __b = (b); \ 4320 __builtin_neon_vst1q_lane_v(__a, __b, __c, 32); }) 4321 #define vst1q_lane_s16(__a, b, __c) __extension__ ({ \ 4322 int16x8_t __b = (b); \ 4323 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 33); }) 4324 #define vst1q_lane_s32(__a, b, __c) __extension__ ({ \ 4325 int32x4_t __b = (b); \ 4326 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 34); }) 4327 #define vst1q_lane_s64(__a, b, __c) __extension__ ({ \ 4328 int64x2_t __b = (b); \ 4329 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 35); }) 4330 #define vst1q_lane_f16(__a, b, __c) __extension__ ({ \ 4331 float16x8_t __b = (b); \ 4332 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 38); }) 4333 #define vst1q_lane_f32(__a, b, __c) __extension__ ({ \ 4334 float32x4_t __b = (b); \ 4335 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 39); }) 4336 #define vst1q_lane_p8(__a, b, __c) __extension__ ({ \ 4337 poly8x16_t __b = (b); \ 4338 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 36); }) 4339 #define vst1q_lane_p16(__a, b, __c) __extension__ ({ \ 4340 poly16x8_t __b = (b); \ 4341 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 37); }) 4342 #define vst1_lane_u8(__a, b, __c) __extension__ ({ \ 4343 uint8x8_t __b = (b); \ 4344 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 16); }) 4345 #define vst1_lane_u16(__a, b, __c) __extension__ ({ \ 4346 uint16x4_t __b = (b); \ 4347 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 17); }) 4348 #define vst1_lane_u32(__a, b, __c) __extension__ ({ \ 4349 uint32x2_t __b = (b); \ 4350 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 18); }) 4351 #define vst1_lane_u64(__a, b, __c) __extension__ ({ \ 4352 uint64x1_t __b = (b); \ 4353 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 19); }) 4354 #define vst1_lane_s8(__a, b, __c) __extension__ ({ \ 4355 int8x8_t __b = (b); \ 4356 __builtin_neon_vst1_lane_v(__a, __b, __c, 0); }) 4357 #define vst1_lane_s16(__a, b, __c) __extension__ ({ \ 4358 int16x4_t __b = (b); \ 4359 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 1); }) 4360 #define vst1_lane_s32(__a, b, __c) __extension__ ({ \ 4361 int32x2_t __b = (b); \ 4362 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 2); }) 4363 #define vst1_lane_s64(__a, b, __c) __extension__ ({ \ 4364 int64x1_t __b = (b); \ 4365 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 3); }) 4366 #define vst1_lane_f16(__a, b, __c) __extension__ ({ \ 4367 float16x4_t __b = (b); \ 4368 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 6); }) 4369 #define vst1_lane_f32(__a, b, __c) __extension__ ({ \ 4370 float32x2_t __b = (b); \ 4371 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 7); }) 4372 #define vst1_lane_p8(__a, b, __c) __extension__ ({ \ 4373 poly8x8_t __b = (b); \ 4374 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 4); }) 4375 #define vst1_lane_p16(__a, b, __c) __extension__ ({ \ 4376 poly16x4_t __b = (b); \ 4377 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 5); }) 4378 4379 #define vst2q_u8(__a, b) __extension__ ({ \ 4380 uint8x16x2_t __b = (b); \ 4381 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 48); }) 4382 #define vst2q_u16(__a, b) __extension__ ({ \ 4383 uint16x8x2_t __b = (b); \ 4384 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 49); }) 4385 #define vst2q_u32(__a, b) __extension__ ({ \ 4386 uint32x4x2_t __b = (b); \ 4387 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 50); }) 4388 #define vst2q_s8(__a, b) __extension__ ({ \ 4389 int8x16x2_t __b = (b); \ 4390 __builtin_neon_vst2q_v(__a, __b.val[0], __b.val[1], 32); }) 4391 #define vst2q_s16(__a, b) __extension__ ({ \ 4392 int16x8x2_t __b = (b); \ 4393 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 33); }) 4394 #define vst2q_s32(__a, b) __extension__ ({ \ 4395 int32x4x2_t __b = (b); \ 4396 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 34); }) 4397 #define vst2q_f16(__a, b) __extension__ ({ \ 4398 float16x8x2_t __b = (b); \ 4399 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 38); }) 4400 #define vst2q_f32(__a, b) __extension__ ({ \ 4401 float32x4x2_t __b = (b); \ 4402 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 39); }) 4403 #define vst2q_p8(__a, b) __extension__ ({ \ 4404 poly8x16x2_t __b = (b); \ 4405 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 36); }) 4406 #define vst2q_p16(__a, b) __extension__ ({ \ 4407 poly16x8x2_t __b = (b); \ 4408 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 37); }) 4409 #define vst2_u8(__a, b) __extension__ ({ \ 4410 uint8x8x2_t __b = (b); \ 4411 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 16); }) 4412 #define vst2_u16(__a, b) __extension__ ({ \ 4413 uint16x4x2_t __b = (b); \ 4414 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 17); }) 4415 #define vst2_u32(__a, b) __extension__ ({ \ 4416 uint32x2x2_t __b = (b); \ 4417 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 18); }) 4418 #define vst2_u64(__a, b) __extension__ ({ \ 4419 uint64x1x2_t __b = (b); \ 4420 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 19); }) 4421 #define vst2_s8(__a, b) __extension__ ({ \ 4422 int8x8x2_t __b = (b); \ 4423 __builtin_neon_vst2_v(__a, __b.val[0], __b.val[1], 0); }) 4424 #define vst2_s16(__a, b) __extension__ ({ \ 4425 int16x4x2_t __b = (b); \ 4426 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); }) 4427 #define vst2_s32(__a, b) __extension__ ({ \ 4428 int32x2x2_t __b = (b); \ 4429 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); }) 4430 #define vst2_s64(__a, b) __extension__ ({ \ 4431 int64x1x2_t __b = (b); \ 4432 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); }) 4433 #define vst2_f16(__a, b) __extension__ ({ \ 4434 float16x4x2_t __b = (b); \ 4435 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); }) 4436 #define vst2_f32(__a, b) __extension__ ({ \ 4437 float32x2x2_t __b = (b); \ 4438 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); }) 4439 #define vst2_p8(__a, b) __extension__ ({ \ 4440 poly8x8x2_t __b = (b); \ 4441 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); }) 4442 #define vst2_p16(__a, b) __extension__ ({ \ 4443 poly16x4x2_t __b = (b); \ 4444 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); }) 4445 4446 #define vst2q_lane_u16(__a, b, __c) __extension__ ({ \ 4447 uint16x8x2_t __b = (b); \ 4448 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); }) 4449 #define vst2q_lane_u32(__a, b, __c) __extension__ ({ \ 4450 uint32x4x2_t __b = (b); \ 4451 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); }) 4452 #define vst2q_lane_s16(__a, b, __c) __extension__ ({ \ 4453 int16x8x2_t __b = (b); \ 4454 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); }) 4455 #define vst2q_lane_s32(__a, b, __c) __extension__ ({ \ 4456 int32x4x2_t __b = (b); \ 4457 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); }) 4458 #define vst2q_lane_f16(__a, b, __c) __extension__ ({ \ 4459 float16x8x2_t __b = (b); \ 4460 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); }) 4461 #define vst2q_lane_f32(__a, b, __c) __extension__ ({ \ 4462 float32x4x2_t __b = (b); \ 4463 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); }) 4464 #define vst2q_lane_p16(__a, b, __c) __extension__ ({ \ 4465 poly16x8x2_t __b = (b); \ 4466 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); }) 4467 #define vst2_lane_u8(__a, b, __c) __extension__ ({ \ 4468 uint8x8x2_t __b = (b); \ 4469 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 16); }) 4470 #define vst2_lane_u16(__a, b, __c) __extension__ ({ \ 4471 uint16x4x2_t __b = (b); \ 4472 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 17); }) 4473 #define vst2_lane_u32(__a, b, __c) __extension__ ({ \ 4474 uint32x2x2_t __b = (b); \ 4475 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 18); }) 4476 #define vst2_lane_s8(__a, b, __c) __extension__ ({ \ 4477 int8x8x2_t __b = (b); \ 4478 __builtin_neon_vst2_lane_v(__a, __b.val[0], __b.val[1], __c, 0); }) 4479 #define vst2_lane_s16(__a, b, __c) __extension__ ({ \ 4480 int16x4x2_t __b = (b); \ 4481 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 1); }) 4482 #define vst2_lane_s32(__a, b, __c) __extension__ ({ \ 4483 int32x2x2_t __b = (b); \ 4484 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 2); }) 4485 #define vst2_lane_f16(__a, b, __c) __extension__ ({ \ 4486 float16x4x2_t __b = (b); \ 4487 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 6); }) 4488 #define vst2_lane_f32(__a, b, __c) __extension__ ({ \ 4489 float32x2x2_t __b = (b); \ 4490 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 7); }) 4491 #define vst2_lane_p8(__a, b, __c) __extension__ ({ \ 4492 poly8x8x2_t __b = (b); \ 4493 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 4); }) 4494 #define vst2_lane_p16(__a, b, __c) __extension__ ({ \ 4495 poly16x4x2_t __b = (b); \ 4496 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 5); }) 4497 4498 #define vst3q_u8(__a, b) __extension__ ({ \ 4499 uint8x16x3_t __b = (b); \ 4500 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 48); }) 4501 #define vst3q_u16(__a, b) __extension__ ({ \ 4502 uint16x8x3_t __b = (b); \ 4503 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 49); }) 4504 #define vst3q_u32(__a, b) __extension__ ({ \ 4505 uint32x4x3_t __b = (b); \ 4506 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 50); }) 4507 #define vst3q_s8(__a, b) __extension__ ({ \ 4508 int8x16x3_t __b = (b); \ 4509 __builtin_neon_vst3q_v(__a, __b.val[0], __b.val[1], __b.val[2], 32); }) 4510 #define vst3q_s16(__a, b) __extension__ ({ \ 4511 int16x8x3_t __b = (b); \ 4512 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 33); }) 4513 #define vst3q_s32(__a, b) __extension__ ({ \ 4514 int32x4x3_t __b = (b); \ 4515 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 34); }) 4516 #define vst3q_f16(__a, b) __extension__ ({ \ 4517 float16x8x3_t __b = (b); \ 4518 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 38); }) 4519 #define vst3q_f32(__a, b) __extension__ ({ \ 4520 float32x4x3_t __b = (b); \ 4521 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 39); }) 4522 #define vst3q_p8(__a, b) __extension__ ({ \ 4523 poly8x16x3_t __b = (b); \ 4524 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 36); }) 4525 #define vst3q_p16(__a, b) __extension__ ({ \ 4526 poly16x8x3_t __b = (b); \ 4527 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 37); }) 4528 #define vst3_u8(__a, b) __extension__ ({ \ 4529 uint8x8x3_t __b = (b); \ 4530 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 16); }) 4531 #define vst3_u16(__a, b) __extension__ ({ \ 4532 uint16x4x3_t __b = (b); \ 4533 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 17); }) 4534 #define vst3_u32(__a, b) __extension__ ({ \ 4535 uint32x2x3_t __b = (b); \ 4536 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 18); }) 4537 #define vst3_u64(__a, b) __extension__ ({ \ 4538 uint64x1x3_t __b = (b); \ 4539 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 19); }) 4540 #define vst3_s8(__a, b) __extension__ ({ \ 4541 int8x8x3_t __b = (b); \ 4542 __builtin_neon_vst3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); }) 4543 #define vst3_s16(__a, b) __extension__ ({ \ 4544 int16x4x3_t __b = (b); \ 4545 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 1); }) 4546 #define vst3_s32(__a, b) __extension__ ({ \ 4547 int32x2x3_t __b = (b); \ 4548 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 2); }) 4549 #define vst3_s64(__a, b) __extension__ ({ \ 4550 int64x1x3_t __b = (b); \ 4551 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 3); }) 4552 #define vst3_f16(__a, b) __extension__ ({ \ 4553 float16x4x3_t __b = (b); \ 4554 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 6); }) 4555 #define vst3_f32(__a, b) __extension__ ({ \ 4556 float32x2x3_t __b = (b); \ 4557 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 7); }) 4558 #define vst3_p8(__a, b) __extension__ ({ \ 4559 poly8x8x3_t __b = (b); \ 4560 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 4); }) 4561 #define vst3_p16(__a, b) __extension__ ({ \ 4562 poly16x4x3_t __b = (b); \ 4563 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 5); }) 4564 4565 #define vst3q_lane_u16(__a, b, __c) __extension__ ({ \ 4566 uint16x8x3_t __b = (b); \ 4567 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); }) 4568 #define vst3q_lane_u32(__a, b, __c) __extension__ ({ \ 4569 uint32x4x3_t __b = (b); \ 4570 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); }) 4571 #define vst3q_lane_s16(__a, b, __c) __extension__ ({ \ 4572 int16x8x3_t __b = (b); \ 4573 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); }) 4574 #define vst3q_lane_s32(__a, b, __c) __extension__ ({ \ 4575 int32x4x3_t __b = (b); \ 4576 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); }) 4577 #define vst3q_lane_f16(__a, b, __c) __extension__ ({ \ 4578 float16x8x3_t __b = (b); \ 4579 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); }) 4580 #define vst3q_lane_f32(__a, b, __c) __extension__ ({ \ 4581 float32x4x3_t __b = (b); \ 4582 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); }) 4583 #define vst3q_lane_p16(__a, b, __c) __extension__ ({ \ 4584 poly16x8x3_t __b = (b); \ 4585 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); }) 4586 #define vst3_lane_u8(__a, b, __c) __extension__ ({ \ 4587 uint8x8x3_t __b = (b); \ 4588 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); }) 4589 #define vst3_lane_u16(__a, b, __c) __extension__ ({ \ 4590 uint16x4x3_t __b = (b); \ 4591 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); }) 4592 #define vst3_lane_u32(__a, b, __c) __extension__ ({ \ 4593 uint32x2x3_t __b = (b); \ 4594 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); }) 4595 #define vst3_lane_s8(__a, b, __c) __extension__ ({ \ 4596 int8x8x3_t __b = (b); \ 4597 __builtin_neon_vst3_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); }) 4598 #define vst3_lane_s16(__a, b, __c) __extension__ ({ \ 4599 int16x4x3_t __b = (b); \ 4600 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); }) 4601 #define vst3_lane_s32(__a, b, __c) __extension__ ({ \ 4602 int32x2x3_t __b = (b); \ 4603 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); }) 4604 #define vst3_lane_f16(__a, b, __c) __extension__ ({ \ 4605 float16x4x3_t __b = (b); \ 4606 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); }) 4607 #define vst3_lane_f32(__a, b, __c) __extension__ ({ \ 4608 float32x2x3_t __b = (b); \ 4609 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); }) 4610 #define vst3_lane_p8(__a, b, __c) __extension__ ({ \ 4611 poly8x8x3_t __b = (b); \ 4612 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); }) 4613 #define vst3_lane_p16(__a, b, __c) __extension__ ({ \ 4614 poly16x4x3_t __b = (b); \ 4615 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); }) 4616 4617 #define vst4q_u8(__a, b) __extension__ ({ \ 4618 uint8x16x4_t __b = (b); \ 4619 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 48); }) 4620 #define vst4q_u16(__a, b) __extension__ ({ \ 4621 uint16x8x4_t __b = (b); \ 4622 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 49); }) 4623 #define vst4q_u32(__a, b) __extension__ ({ \ 4624 uint32x4x4_t __b = (b); \ 4625 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 50); }) 4626 #define vst4q_s8(__a, b) __extension__ ({ \ 4627 int8x16x4_t __b = (b); \ 4628 __builtin_neon_vst4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 32); }) 4629 #define vst4q_s16(__a, b) __extension__ ({ \ 4630 int16x8x4_t __b = (b); \ 4631 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 33); }) 4632 #define vst4q_s32(__a, b) __extension__ ({ \ 4633 int32x4x4_t __b = (b); \ 4634 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 34); }) 4635 #define vst4q_f16(__a, b) __extension__ ({ \ 4636 float16x8x4_t __b = (b); \ 4637 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 38); }) 4638 #define vst4q_f32(__a, b) __extension__ ({ \ 4639 float32x4x4_t __b = (b); \ 4640 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 39); }) 4641 #define vst4q_p8(__a, b) __extension__ ({ \ 4642 poly8x16x4_t __b = (b); \ 4643 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 36); }) 4644 #define vst4q_p16(__a, b) __extension__ ({ \ 4645 poly16x8x4_t __b = (b); \ 4646 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 37); }) 4647 #define vst4_u8(__a, b) __extension__ ({ \ 4648 uint8x8x4_t __b = (b); \ 4649 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 16); }) 4650 #define vst4_u16(__a, b) __extension__ ({ \ 4651 uint16x4x4_t __b = (b); \ 4652 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 17); }) 4653 #define vst4_u32(__a, b) __extension__ ({ \ 4654 uint32x2x4_t __b = (b); \ 4655 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 18); }) 4656 #define vst4_u64(__a, b) __extension__ ({ \ 4657 uint64x1x4_t __b = (b); \ 4658 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 19); }) 4659 #define vst4_s8(__a, b) __extension__ ({ \ 4660 int8x8x4_t __b = (b); \ 4661 __builtin_neon_vst4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0); }) 4662 #define vst4_s16(__a, b) __extension__ ({ \ 4663 int16x4x4_t __b = (b); \ 4664 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 1); }) 4665 #define vst4_s32(__a, b) __extension__ ({ \ 4666 int32x2x4_t __b = (b); \ 4667 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 2); }) 4668 #define vst4_s64(__a, b) __extension__ ({ \ 4669 int64x1x4_t __b = (b); \ 4670 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 3); }) 4671 #define vst4_f16(__a, b) __extension__ ({ \ 4672 float16x4x4_t __b = (b); \ 4673 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 6); }) 4674 #define vst4_f32(__a, b) __extension__ ({ \ 4675 float32x2x4_t __b = (b); \ 4676 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 7); }) 4677 #define vst4_p8(__a, b) __extension__ ({ \ 4678 poly8x8x4_t __b = (b); \ 4679 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 4); }) 4680 #define vst4_p16(__a, b) __extension__ ({ \ 4681 poly16x4x4_t __b = (b); \ 4682 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 5); }) 4683 4684 #define vst4q_lane_u16(__a, b, __c) __extension__ ({ \ 4685 uint16x8x4_t __b = (b); \ 4686 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); }) 4687 #define vst4q_lane_u32(__a, b, __c) __extension__ ({ \ 4688 uint32x4x4_t __b = (b); \ 4689 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); }) 4690 #define vst4q_lane_s16(__a, b, __c) __extension__ ({ \ 4691 int16x8x4_t __b = (b); \ 4692 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); }) 4693 #define vst4q_lane_s32(__a, b, __c) __extension__ ({ \ 4694 int32x4x4_t __b = (b); \ 4695 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); }) 4696 #define vst4q_lane_f16(__a, b, __c) __extension__ ({ \ 4697 float16x8x4_t __b = (b); \ 4698 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); }) 4699 #define vst4q_lane_f32(__a, b, __c) __extension__ ({ \ 4700 float32x4x4_t __b = (b); \ 4701 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); }) 4702 #define vst4q_lane_p16(__a, b, __c) __extension__ ({ \ 4703 poly16x8x4_t __b = (b); \ 4704 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); }) 4705 #define vst4_lane_u8(__a, b, __c) __extension__ ({ \ 4706 uint8x8x4_t __b = (b); \ 4707 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); }) 4708 #define vst4_lane_u16(__a, b, __c) __extension__ ({ \ 4709 uint16x4x4_t __b = (b); \ 4710 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); }) 4711 #define vst4_lane_u32(__a, b, __c) __extension__ ({ \ 4712 uint32x2x4_t __b = (b); \ 4713 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); }) 4714 #define vst4_lane_s8(__a, b, __c) __extension__ ({ \ 4715 int8x8x4_t __b = (b); \ 4716 __builtin_neon_vst4_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); }) 4717 #define vst4_lane_s16(__a, b, __c) __extension__ ({ \ 4718 int16x4x4_t __b = (b); \ 4719 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); }) 4720 #define vst4_lane_s32(__a, b, __c) __extension__ ({ \ 4721 int32x2x4_t __b = (b); \ 4722 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); }) 4723 #define vst4_lane_f16(__a, b, __c) __extension__ ({ \ 4724 float16x4x4_t __b = (b); \ 4725 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); }) 4726 #define vst4_lane_f32(__a, b, __c) __extension__ ({ \ 4727 float32x2x4_t __b = (b); \ 4728 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); }) 4729 #define vst4_lane_p8(__a, b, __c) __extension__ ({ \ 4730 poly8x8x4_t __b = (b); \ 4731 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); }) 4732 #define vst4_lane_p16(__a, b, __c) __extension__ ({ \ 4733 poly16x4x4_t __b = (b); \ 4734 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); }) 4735 4736 __ai int8x8_t vsub_s8(int8x8_t __a, int8x8_t __b) { 4737 return __a - __b; } 4738 __ai int16x4_t vsub_s16(int16x4_t __a, int16x4_t __b) { 4739 return __a - __b; } 4740 __ai int32x2_t vsub_s32(int32x2_t __a, int32x2_t __b) { 4741 return __a - __b; } 4742 __ai int64x1_t vsub_s64(int64x1_t __a, int64x1_t __b) { 4743 return __a - __b; } 4744 __ai float32x2_t vsub_f32(float32x2_t __a, float32x2_t __b) { 4745 return __a - __b; } 4746 __ai uint8x8_t vsub_u8(uint8x8_t __a, uint8x8_t __b) { 4747 return __a - __b; } 4748 __ai uint16x4_t vsub_u16(uint16x4_t __a, uint16x4_t __b) { 4749 return __a - __b; } 4750 __ai uint32x2_t vsub_u32(uint32x2_t __a, uint32x2_t __b) { 4751 return __a - __b; } 4752 __ai uint64x1_t vsub_u64(uint64x1_t __a, uint64x1_t __b) { 4753 return __a - __b; } 4754 __ai int8x16_t vsubq_s8(int8x16_t __a, int8x16_t __b) { 4755 return __a - __b; } 4756 __ai int16x8_t vsubq_s16(int16x8_t __a, int16x8_t __b) { 4757 return __a - __b; } 4758 __ai int32x4_t vsubq_s32(int32x4_t __a, int32x4_t __b) { 4759 return __a - __b; } 4760 __ai int64x2_t vsubq_s64(int64x2_t __a, int64x2_t __b) { 4761 return __a - __b; } 4762 __ai float32x4_t vsubq_f32(float32x4_t __a, float32x4_t __b) { 4763 return __a - __b; } 4764 __ai uint8x16_t vsubq_u8(uint8x16_t __a, uint8x16_t __b) { 4765 return __a - __b; } 4766 __ai uint16x8_t vsubq_u16(uint16x8_t __a, uint16x8_t __b) { 4767 return __a - __b; } 4768 __ai uint32x4_t vsubq_u32(uint32x4_t __a, uint32x4_t __b) { 4769 return __a - __b; } 4770 __ai uint64x2_t vsubq_u64(uint64x2_t __a, uint64x2_t __b) { 4771 return __a - __b; } 4772 4773 __ai int8x8_t vsubhn_s16(int16x8_t __a, int16x8_t __b) { 4774 return (int8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } 4775 __ai int16x4_t vsubhn_s32(int32x4_t __a, int32x4_t __b) { 4776 return (int16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } 4777 __ai int32x2_t vsubhn_s64(int64x2_t __a, int64x2_t __b) { 4778 return (int32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } 4779 __ai uint8x8_t vsubhn_u16(uint16x8_t __a, uint16x8_t __b) { 4780 return (uint8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } 4781 __ai uint16x4_t vsubhn_u32(uint32x4_t __a, uint32x4_t __b) { 4782 return (uint16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } 4783 __ai uint32x2_t vsubhn_u64(uint64x2_t __a, uint64x2_t __b) { 4784 return (uint32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } 4785 4786 __ai int16x8_t vsubl_s8(int8x8_t __a, int8x8_t __b) { 4787 return vmovl_s8(__a) - vmovl_s8(__b); } 4788 __ai int32x4_t vsubl_s16(int16x4_t __a, int16x4_t __b) { 4789 return vmovl_s16(__a) - vmovl_s16(__b); } 4790 __ai int64x2_t vsubl_s32(int32x2_t __a, int32x2_t __b) { 4791 return vmovl_s32(__a) - vmovl_s32(__b); } 4792 __ai uint16x8_t vsubl_u8(uint8x8_t __a, uint8x8_t __b) { 4793 return vmovl_u8(__a) - vmovl_u8(__b); } 4794 __ai uint32x4_t vsubl_u16(uint16x4_t __a, uint16x4_t __b) { 4795 return vmovl_u16(__a) - vmovl_u16(__b); } 4796 __ai uint64x2_t vsubl_u32(uint32x2_t __a, uint32x2_t __b) { 4797 return vmovl_u32(__a) - vmovl_u32(__b); } 4798 4799 __ai int16x8_t vsubw_s8(int16x8_t __a, int8x8_t __b) { 4800 return __a - vmovl_s8(__b); } 4801 __ai int32x4_t vsubw_s16(int32x4_t __a, int16x4_t __b) { 4802 return __a - vmovl_s16(__b); } 4803 __ai int64x2_t vsubw_s32(int64x2_t __a, int32x2_t __b) { 4804 return __a - vmovl_s32(__b); } 4805 __ai uint16x8_t vsubw_u8(uint16x8_t __a, uint8x8_t __b) { 4806 return __a - vmovl_u8(__b); } 4807 __ai uint32x4_t vsubw_u16(uint32x4_t __a, uint16x4_t __b) { 4808 return __a - vmovl_u16(__b); } 4809 __ai uint64x2_t vsubw_u32(uint64x2_t __a, uint32x2_t __b) { 4810 return __a - vmovl_u32(__b); } 4811 4812 __ai uint8x8_t vtbl1_u8(uint8x8_t __a, uint8x8_t __b) { 4813 return (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 16); } 4814 __ai int8x8_t vtbl1_s8(int8x8_t __a, int8x8_t __b) { 4815 return (int8x8_t)__builtin_neon_vtbl1_v(__a, __b, 0); } 4816 __ai poly8x8_t vtbl1_p8(poly8x8_t __a, uint8x8_t __b) { 4817 return (poly8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 4); } 4818 4819 __ai uint8x8_t vtbl2_u8(uint8x8x2_t __a, uint8x8_t __b) { 4820 return (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__b, 16); } 4821 __ai int8x8_t vtbl2_s8(int8x8x2_t __a, int8x8_t __b) { 4822 return (int8x8_t)__builtin_neon_vtbl2_v(__a.val[0], __a.val[1], __b, 0); } 4823 __ai poly8x8_t vtbl2_p8(poly8x8x2_t __a, uint8x8_t __b) { 4824 return (poly8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__b, 4); } 4825 4826 __ai uint8x8_t vtbl3_u8(uint8x8x3_t __a, uint8x8_t __b) { 4827 return (uint8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 16); } 4828 __ai int8x8_t vtbl3_s8(int8x8x3_t __a, int8x8_t __b) { 4829 return (int8x8_t)__builtin_neon_vtbl3_v(__a.val[0], __a.val[1], __a.val[2], __b, 0); } 4830 __ai poly8x8_t vtbl3_p8(poly8x8x3_t __a, uint8x8_t __b) { 4831 return (poly8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 4); } 4832 4833 __ai uint8x8_t vtbl4_u8(uint8x8x4_t __a, uint8x8_t __b) { 4834 return (uint8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 16); } 4835 __ai int8x8_t vtbl4_s8(int8x8x4_t __a, int8x8_t __b) { 4836 return (int8x8_t)__builtin_neon_vtbl4_v(__a.val[0], __a.val[1], __a.val[2], __a.val[3], __b, 0); } 4837 __ai poly8x8_t vtbl4_p8(poly8x8x4_t __a, uint8x8_t __b) { 4838 return (poly8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 4); } 4839 4840 __ai uint8x8_t vtbx1_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { 4841 return (uint8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 16); } 4842 __ai int8x8_t vtbx1_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { 4843 return (int8x8_t)__builtin_neon_vtbx1_v(__a, __b, __c, 0); } 4844 __ai poly8x8_t vtbx1_p8(poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) { 4845 return (poly8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 4); } 4846 4847 __ai uint8x8_t vtbx2_u8(uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) { 4848 return (uint8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 16); } 4849 __ai int8x8_t vtbx2_s8(int8x8_t __a, int8x8x2_t __b, int8x8_t __c) { 4850 return (int8x8_t)__builtin_neon_vtbx2_v(__a, __b.val[0], __b.val[1], __c, 0); } 4851 __ai poly8x8_t vtbx2_p8(poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) { 4852 return (poly8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 4); } 4853 4854 __ai uint8x8_t vtbx3_u8(uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) { 4855 return (uint8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 16); } 4856 __ai int8x8_t vtbx3_s8(int8x8_t __a, int8x8x3_t __b, int8x8_t __c) { 4857 return (int8x8_t)__builtin_neon_vtbx3_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); } 4858 __ai poly8x8_t vtbx3_p8(poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) { 4859 return (poly8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 4); } 4860 4861 __ai uint8x8_t vtbx4_u8(uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) { 4862 return (uint8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 16); } 4863 __ai int8x8_t vtbx4_s8(int8x8_t __a, int8x8x4_t __b, int8x8_t __c) { 4864 return (int8x8_t)__builtin_neon_vtbx4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); } 4865 __ai poly8x8_t vtbx4_p8(poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) { 4866 return (poly8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 4); } 4867 4868 __ai int8x8x2_t vtrn_s8(int8x8_t __a, int8x8_t __b) { 4869 int8x8x2_t r; __builtin_neon_vtrn_v(&r, __a, __b, 0); return r; } 4870 __ai int16x4x2_t vtrn_s16(int16x4_t __a, int16x4_t __b) { 4871 int16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } 4872 __ai int32x2x2_t vtrn_s32(int32x2_t __a, int32x2_t __b) { 4873 int32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } 4874 __ai uint8x8x2_t vtrn_u8(uint8x8_t __a, uint8x8_t __b) { 4875 uint8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } 4876 __ai uint16x4x2_t vtrn_u16(uint16x4_t __a, uint16x4_t __b) { 4877 uint16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } 4878 __ai uint32x2x2_t vtrn_u32(uint32x2_t __a, uint32x2_t __b) { 4879 uint32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } 4880 __ai float32x2x2_t vtrn_f32(float32x2_t __a, float32x2_t __b) { 4881 float32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } 4882 __ai poly8x8x2_t vtrn_p8(poly8x8_t __a, poly8x8_t __b) { 4883 poly8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } 4884 __ai poly16x4x2_t vtrn_p16(poly16x4_t __a, poly16x4_t __b) { 4885 poly16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } 4886 __ai int8x16x2_t vtrnq_s8(int8x16_t __a, int8x16_t __b) { 4887 int8x16x2_t r; __builtin_neon_vtrnq_v(&r, __a, __b, 32); return r; } 4888 __ai int16x8x2_t vtrnq_s16(int16x8_t __a, int16x8_t __b) { 4889 int16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } 4890 __ai int32x4x2_t vtrnq_s32(int32x4_t __a, int32x4_t __b) { 4891 int32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } 4892 __ai uint8x16x2_t vtrnq_u8(uint8x16_t __a, uint8x16_t __b) { 4893 uint8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } 4894 __ai uint16x8x2_t vtrnq_u16(uint16x8_t __a, uint16x8_t __b) { 4895 uint16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } 4896 __ai uint32x4x2_t vtrnq_u32(uint32x4_t __a, uint32x4_t __b) { 4897 uint32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } 4898 __ai float32x4x2_t vtrnq_f32(float32x4_t __a, float32x4_t __b) { 4899 float32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } 4900 __ai poly8x16x2_t vtrnq_p8(poly8x16_t __a, poly8x16_t __b) { 4901 poly8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } 4902 __ai poly16x8x2_t vtrnq_p16(poly16x8_t __a, poly16x8_t __b) { 4903 poly16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } 4904 4905 __ai uint8x8_t vtst_s8(int8x8_t __a, int8x8_t __b) { 4906 return (uint8x8_t)__builtin_neon_vtst_v(__a, __b, 16); } 4907 __ai uint16x4_t vtst_s16(int16x4_t __a, int16x4_t __b) { 4908 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); } 4909 __ai uint32x2_t vtst_s32(int32x2_t __a, int32x2_t __b) { 4910 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); } 4911 __ai uint8x8_t vtst_u8(uint8x8_t __a, uint8x8_t __b) { 4912 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); } 4913 __ai uint16x4_t vtst_u16(uint16x4_t __a, uint16x4_t __b) { 4914 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); } 4915 __ai uint32x2_t vtst_u32(uint32x2_t __a, uint32x2_t __b) { 4916 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); } 4917 __ai uint8x8_t vtst_p8(poly8x8_t __a, poly8x8_t __b) { 4918 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); } 4919 __ai uint8x16_t vtstq_s8(int8x16_t __a, int8x16_t __b) { 4920 return (uint8x16_t)__builtin_neon_vtstq_v(__a, __b, 48); } 4921 __ai uint16x8_t vtstq_s16(int16x8_t __a, int16x8_t __b) { 4922 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 4923 __ai uint32x4_t vtstq_s32(int32x4_t __a, int32x4_t __b) { 4924 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 4925 __ai uint8x16_t vtstq_u8(uint8x16_t __a, uint8x16_t __b) { 4926 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 4927 __ai uint16x8_t vtstq_u16(uint16x8_t __a, uint16x8_t __b) { 4928 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); } 4929 __ai uint32x4_t vtstq_u32(uint32x4_t __a, uint32x4_t __b) { 4930 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); } 4931 __ai uint8x16_t vtstq_p8(poly8x16_t __a, poly8x16_t __b) { 4932 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); } 4933 4934 __ai int8x8x2_t vuzp_s8(int8x8_t __a, int8x8_t __b) { 4935 int8x8x2_t r; __builtin_neon_vuzp_v(&r, __a, __b, 0); return r; } 4936 __ai int16x4x2_t vuzp_s16(int16x4_t __a, int16x4_t __b) { 4937 int16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } 4938 __ai int32x2x2_t vuzp_s32(int32x2_t __a, int32x2_t __b) { 4939 int32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } 4940 __ai uint8x8x2_t vuzp_u8(uint8x8_t __a, uint8x8_t __b) { 4941 uint8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } 4942 __ai uint16x4x2_t vuzp_u16(uint16x4_t __a, uint16x4_t __b) { 4943 uint16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } 4944 __ai uint32x2x2_t vuzp_u32(uint32x2_t __a, uint32x2_t __b) { 4945 uint32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } 4946 __ai float32x2x2_t vuzp_f32(float32x2_t __a, float32x2_t __b) { 4947 float32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } 4948 __ai poly8x8x2_t vuzp_p8(poly8x8_t __a, poly8x8_t __b) { 4949 poly8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } 4950 __ai poly16x4x2_t vuzp_p16(poly16x4_t __a, poly16x4_t __b) { 4951 poly16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } 4952 __ai int8x16x2_t vuzpq_s8(int8x16_t __a, int8x16_t __b) { 4953 int8x16x2_t r; __builtin_neon_vuzpq_v(&r, __a, __b, 32); return r; } 4954 __ai int16x8x2_t vuzpq_s16(int16x8_t __a, int16x8_t __b) { 4955 int16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } 4956 __ai int32x4x2_t vuzpq_s32(int32x4_t __a, int32x4_t __b) { 4957 int32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } 4958 __ai uint8x16x2_t vuzpq_u8(uint8x16_t __a, uint8x16_t __b) { 4959 uint8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } 4960 __ai uint16x8x2_t vuzpq_u16(uint16x8_t __a, uint16x8_t __b) { 4961 uint16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } 4962 __ai uint32x4x2_t vuzpq_u32(uint32x4_t __a, uint32x4_t __b) { 4963 uint32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } 4964 __ai float32x4x2_t vuzpq_f32(float32x4_t __a, float32x4_t __b) { 4965 float32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } 4966 __ai poly8x16x2_t vuzpq_p8(poly8x16_t __a, poly8x16_t __b) { 4967 poly8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } 4968 __ai poly16x8x2_t vuzpq_p16(poly16x8_t __a, poly16x8_t __b) { 4969 poly16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } 4970 4971 __ai int8x8x2_t vzip_s8(int8x8_t __a, int8x8_t __b) { 4972 int8x8x2_t r; __builtin_neon_vzip_v(&r, __a, __b, 0); return r; } 4973 __ai int16x4x2_t vzip_s16(int16x4_t __a, int16x4_t __b) { 4974 int16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } 4975 __ai int32x2x2_t vzip_s32(int32x2_t __a, int32x2_t __b) { 4976 int32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } 4977 __ai uint8x8x2_t vzip_u8(uint8x8_t __a, uint8x8_t __b) { 4978 uint8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } 4979 __ai uint16x4x2_t vzip_u16(uint16x4_t __a, uint16x4_t __b) { 4980 uint16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } 4981 __ai uint32x2x2_t vzip_u32(uint32x2_t __a, uint32x2_t __b) { 4982 uint32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } 4983 __ai float32x2x2_t vzip_f32(float32x2_t __a, float32x2_t __b) { 4984 float32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } 4985 __ai poly8x8x2_t vzip_p8(poly8x8_t __a, poly8x8_t __b) { 4986 poly8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } 4987 __ai poly16x4x2_t vzip_p16(poly16x4_t __a, poly16x4_t __b) { 4988 poly16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } 4989 __ai int8x16x2_t vzipq_s8(int8x16_t __a, int8x16_t __b) { 4990 int8x16x2_t r; __builtin_neon_vzipq_v(&r, __a, __b, 32); return r; } 4991 __ai int16x8x2_t vzipq_s16(int16x8_t __a, int16x8_t __b) { 4992 int16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } 4993 __ai int32x4x2_t vzipq_s32(int32x4_t __a, int32x4_t __b) { 4994 int32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } 4995 __ai uint8x16x2_t vzipq_u8(uint8x16_t __a, uint8x16_t __b) { 4996 uint8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } 4997 __ai uint16x8x2_t vzipq_u16(uint16x8_t __a, uint16x8_t __b) { 4998 uint16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } 4999 __ai uint32x4x2_t vzipq_u32(uint32x4_t __a, uint32x4_t __b) { 5000 uint32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } 5001 __ai float32x4x2_t vzipq_f32(float32x4_t __a, float32x4_t __b) { 5002 float32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } 5003 __ai poly8x16x2_t vzipq_p8(poly8x16_t __a, poly8x16_t __b) { 5004 poly8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } 5005 __ai poly16x8x2_t vzipq_p16(poly16x8_t __a, poly16x8_t __b) { 5006 poly16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } 5007 5008 #undef __ai 5009 5010 #endif /* __ARM_NEON_H */ 5011